diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index 91175ac56b..3a1ffe8f07 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -648,6 +648,24 @@ jobs: arch: aarch64 config-file: ./config/examples/zynqmp_sdcard.config + zynq7000_test: + uses: ./.github/workflows/test-build.yml + with: + arch: arm + config-file: ./config/examples/zynq7000.config + + zynq7000_linux_test: + uses: ./.github/workflows/test-build.yml + with: + arch: arm + config-file: ./config/examples/zynq7000_linux.config + + zc702_sdcard_test: + uses: ./.github/workflows/test-build.yml + with: + arch: arm + config-file: ./config/examples/zc702_sdcard.config + versal_vmk180_test: uses: ./.github/workflows/test-build-aarch64.yml with: diff --git a/Makefile b/Makefile index 2986d8db51..cd9a66e3f5 100644 --- a/Makefile +++ b/Makefile @@ -285,6 +285,10 @@ ifeq ($(TARGET),sama5d3) MAIN_TARGET:=wolfboot.bin test-app/image_v1_signed.bin endif +ifeq ($(TARGET),zynq7000) + MAIN_TARGET:=wolfboot.bin test-app/image_v1_signed.bin +endif + ifeq ($(TARGET),rp2350) MAIN_TARGET:=include/target.h keytools wolfboot_signing_private_key.der pico-sdk-info endif diff --git a/arch.mk b/arch.mk index 261ace49c4..a4fe6c88d1 100644 --- a/arch.mk +++ b/arch.mk @@ -303,6 +303,19 @@ ifeq ($(ARCH),ARM) CFLAGS+=-DWOLFBOOT_USE_STDLIBC endif + ifeq ($(TARGET),zynq7000) + # AMD/Xilinx Zynq-7000 (Cortex-A9, ARMv7-A) - ZC702 Evaluation Kit. + # Loaded by Xilinx FSBL into DDR; see hal/zynq7000.{c,h,ld}. + CORTEX_A9=1 + UPDATE_OBJS:=src/update_ram.o + CFLAGS+=-DWOLFBOOT_DUALBOOT -fno-builtin -ffreestanding + # Do NOT define WOLFBOOT_USE_STDLIBC: newlib's memcpy uses unaligned + # LDRs which fault on Cortex-A9 when MMU is off (FSBL leaves MMU off + # on Zynq-7000). Use wolfBoot's own aligned-safe memcpy from src/string.c. + # U-Boot legacy header detection for Linux/U-Boot payloads (Milestone 5) + CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY + endif + ifeq ($(TARGET),va416x0) CFLAGS+=-I$(WOLFBOOT_ROOT)/hal/vorago/ \ -I$(VORAGO_SDK_DIR)/common/drivers/hdr/ \ @@ -344,6 +357,49 @@ ifeq ($(CORTEX_A5),1) -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON endif endif +else +ifeq ($(CORTEX_A9),1) + # Cortex-A9 (ARMv7-A, 32-bit) - Zynq-7000. + # Build in ARM state (-marm); reset vector lands in ARM mode after FSBL. + # Note: do not filter out -mthumb from CFLAGS/LDFLAGS - that converts the + # variables to simple-expansion flavor and breaks lazy $(LSCRIPT) expansion + # in test-app/Makefile. -marm appended later wins over -mthumb anyway. + FPU=-mfpu=vfp3-d16 + CFLAGS+=-mcpu=cortex-a9 -mtune=cortex-a9 -marm -static -z noexecstack \ + -mno-unaligned-access + LDFLAGS+=-mcpu=cortex-a9 -mtune=cortex-a9 -marm -static -z noexecstack + # Cortex-A9 uses the same generic ARMv7-A startup as Cortex-A5 + # (src/boot_arm32_start.S handles VBAR, per-mode stacks, cache + # invalidate, async-abort enable for any ARMv7-A target). + OBJS+=src/boot_arm32.o src/boot_arm32_start.o + # Linux/U-Boot payload: enable MMU + FDT codepaths in update_ram.c so DTBs + # can be loaded from a separate signed PART_DTS_BOOT partition. The MMU + # itself stays inherited from FSBL's flat 1:1 mapping; wolfBoot does not + # manage page tables on Cortex-A9. + ifeq ($(MMU),1) + CFLAGS+=-DMMU -DWOLFBOOT_FDT + OBJS+=src/fdt.o + endif + # SD card / eMMC boot: swap the update_ram loader for update_disk + GPT. + # The SDHCI HAL hooks live in hal/zynq7000.c and translate the generic + # Cadence-layout driver to the Arasan SDHCI v2.0 controller. + ifneq ($(filter 1,$(DISK_SDCARD) $(DISK_EMMC)),) + CFLAGS+=-DWOLFBOOT_UPDATE_DISK -DMAX_DISKS=1 + UPDATE_OBJS:=src/update_disk.o + OBJS += src/gpt.o + OBJS += src/disk.o + endif + ifeq ($(NO_ASM),1) + MATH_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_c32.o + else + MATH_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_arm32.o + ifneq ($(NO_ARM_ASM),1) + OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o + OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o + CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \ + -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON + endif + endif else # All others use boot_arm.o OBJS+=src/boot_arm.o @@ -456,6 +512,7 @@ else endif endif endif +endif ## Renesas RX diff --git a/config/examples/zc702_sdcard.config b/config/examples/zc702_sdcard.config new file mode 100644 index 0000000000..3dc69d3879 --- /dev/null +++ b/config/examples/zc702_sdcard.config @@ -0,0 +1,65 @@ +ARCH?=ARM +TARGET?=zynq7000 +SIGN?=ECC256 +HASH?=SHA256 + +# Cortex-A9 ZC702 SD-card boot variant. Uses the generic SDHCI driver +# (src/sdhci.c) with HAL hooks in hal/zynq7000.c that translate between the +# driver's Cadence SD4HC register layout and the Arasan SDHCI v2.0 standard +# layout used by the Zynq-7000 controller (same IP family as ZynqMP's v3.0, +# just an older revision; the translation is reused from hal/zynq.c). +DEBUG?=0 +DEBUG_UART?=1 +V?=0 +SPMATH?=1 + +# SD card boot - swaps update_ram.o for update_disk.o + GPT/disk support. +DISK_SDCARD=1 +NO_XIP=1 + +# Stage payload at low DDR (clear of wolfBoot at 0x04000000-0x040FFFFF). +WOLFBOOT_LOAD_ADDRESS=0x10000000 + +# GPT/MBR partition layout on the SD card. +# GPT partition 0 (idx 0): FAT32 - holds BOOT.BIN for the BootROM. +# GPT partition 1 (idx 1): raw - signed boot image (BOOT_PART_A). +# GPT partition 2 (idx 2): raw - signed update image (BOOT_PART_B). +# tools/scripts/zc702/prepare_sdcard.sh lays this out; BOOT_PART_A/B tell +# update_disk.c which GPT entries to use for boot/update. +CFLAGS_EXTRA+=-DBOOT_PART_A=1 -DBOOT_PART_B=2 + +# Arasan SDHCI v2.0 on Zynq-7000 is 3.3V-only, no UHS-I. The generic +# driver tries to push the card to UHS-I SDR25 / 50 MHz / High Speed mode +# which is invalid for our v2.0 + 3.3V combo and causes DTOE on the first +# data transfer (MBR read). Cap the post-init clock at SD default-speed +# 25 MHz; the HSE bit is also masked in hal/zynq7000.c sdhci_reg_write so +# the controller stays in single-edge timing the card matches. +# Cap the post-init SDHCI clock at 6 MHz. The Arasan SDHCI v2.0 on +# Zynq-7000 has a clock-dependent state-cleanup issue: at 12 MHz multi- +# block reads (CMD18) work, but a single-block read (CMD17) issued +# immediately after a CMD18+CMD12 sequence times out (DTOE) on the first +# data block. At 24 MHz even the very first CMD17 fails. 6 MHz / 4-bit +# bus is plenty fast for boot-time loading (~3 MB/s) and is well below +# the v2.0 quirk threshold; raise this if a future fix in src/sdhci.c +# adds an explicit DAT-line reset between transfers. +CFLAGS_EXTRA+=-DSDHCI_CLK_50MHZ=6000 -DSDHCI_CLK_25MHZ=6000 + +# Uncomment to enable verbose SDHCI driver logging when bringing up +# new boards or debugging timing issues. +#CFLAGS_EXTRA+=-DDEBUG_SDHCI + +# Image-header partition addresses are unused for disk boot (kept for the +# Makefile sanity checks). update_disk.c finds images by GPT entry, not by +# memory address. +WOLFBOOT_PARTITION_BOOT_ADDRESS=0x00100000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS=0x00700000 +WOLFBOOT_PARTITION_SWAP_ADDRESS=0x00D00000 +WOLFBOOT_PARTITION_SIZE=0x00600000 +# Sector size of WOLFBOOT_PARTITION (not the SD physical sector, which is +# always 512 B). Used as the smallest erase/copy unit for the BOOT/UPDATE +# partitions; must be > IMAGE_HEADER_SIZE. +WOLFBOOT_SECTOR_SIZE=0x1000 + +IMAGE_HEADER_SIZE=1024 + +CROSS_COMPILE=arm-none-eabi- diff --git a/config/examples/zynq7000.config b/config/examples/zynq7000.config new file mode 100644 index 0000000000..7a9ee2b459 --- /dev/null +++ b/config/examples/zynq7000.config @@ -0,0 +1,42 @@ +ARCH?=ARM +TARGET?=zynq7000 +SIGN?=ECC256 +HASH?=SHA256 + +# Cortex-A9 (Zynq-7000) - selected automatically via TARGET=zynq7000 in arch.mk +DEBUG?=0 +DEBUG_UART?=1 +V?=0 +SPMATH?=1 + +# wolfBoot itself is loaded by Xilinx FSBL to DDR at 0x04000000 (hal/zynq7000.ld). +# WOLFBOOT_LOAD_ADDRESS is the *app* staging address: where wolfBoot copies +# the verified signed image before do_boot. Must NOT overlap wolfBoot itself +# AND src/update_ram.c expects dst > wolfBoot's _end - so place it above the +# wolfBoot region (0x04000000-0x040FFFFF) at 16 MB. +WOLFBOOT_LOAD_ADDRESS=0x10000000 + +# QSPI flash (16 MB N25Q128A on ZC702) via XQspiPs (hal/zynq7000.c). +# Override EXT_FLASH=0 on the make command line for JTAG-only dev builds. +EXT_FLASH?=1 +NO_XIP=1 + +# QSPI partition layout (16 MB total): +# 0x000000 - 0x0FFFFF BOOT.BIN (FSBL + wolfboot) +# 0x100000 - 0x6FFFFF BOOT_A (~6 MB primary) +# 0x700000 - 0xCFFFFF UPDATE_B (~6 MB update) +# 0xD00000 - 0xD0FFFF SWAP scratch (64 KB sector) +WOLFBOOT_PARTITION_BOOT_ADDRESS=0x00100000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS=0x00700000 +WOLFBOOT_PARTITION_SWAP_ADDRESS=0x00D00000 +WOLFBOOT_PARTITION_SIZE=0x00600000 +WOLFBOOT_SECTOR_SIZE=0x10000 + +# DTS placeholders (used in Milestone 5 for Linux payload) +WOLFBOOT_LOAD_DTS_ADDRESS=0x00100000 +WOLFBOOT_DTS_BOOT_ADDRESS=0x00080000 +WOLFBOOT_DTS_UPDATE_ADDRESS=0x00680000 + +IMAGE_HEADER_SIZE=1024 + +CROSS_COMPILE=arm-none-eabi- diff --git a/config/examples/zynq7000_linux.config b/config/examples/zynq7000_linux.config new file mode 100644 index 0000000000..6058b44a12 --- /dev/null +++ b/config/examples/zynq7000_linux.config @@ -0,0 +1,53 @@ +ARCH?=ARM +TARGET?=zynq7000 +SIGN?=ECC256 +HASH?=SHA256 + +# Cortex-A9 Linux/U-Boot variant: enables MMU + WOLFBOOT_FDT codepaths so +# update_ram.c loads a signed DTB out of PART_DTS_BOOT, and switches do_boot +# to the ARM Linux boot ABI (r0=0, r1=~0, r2=DTB_phys_addr). +DEBUG?=0 +DEBUG_UART?=1 +V?=0 +SPMATH?=1 + +# Linux/U-Boot payload tells wolfBoot to use the ARM Linux boot ABI in +# do_boot (boot_arm32.c). MMU=1 enables the DTB load logic in update_ram.c +# and pulls in src/fdt.o. ELF=1 lets wolfBoot understand u-boot.elf or +# vmlinux ELF inputs and load only their LOAD segments. +LINUX_PAYLOAD=1 +MMU=1 +ELF=1 + +# wolfBoot itself is staged by FSBL to DDR at 0x04000000 (hal/zynq7000.ld); +# the kernel/U-Boot image is staged at WOLFBOOT_LOAD_ADDRESS, well clear of +# wolfBoot. 1 GB DDR3 on ZC702 starts at 0x00000000. +WOLFBOOT_LOAD_ADDRESS=0x10000000 + +# DTB load address - kernel reads it from r2. 16 MB clear of WOLFBOOT_LOAD. +WOLFBOOT_LOAD_DTS_ADDRESS=0x11000000 + +EXT_FLASH?=1 +NO_XIP=1 + +# QSPI partition layout (16 MB total) - wider partitions to hold a full +# Linux kernel + DTB. Adjust WOLFBOOT_PARTITION_SIZE to fit the largest +# signed kernel you ship. +# 0x000000 - 0x07FFFF BOOT.BIN (FSBL + wolfboot, 512 KB) +# 0x080000 - 0x0FFFFF DTS_BOOT (signed DTB, 512 KB) +# 0x100000 - 0x6FFFFF BOOT_A (~6 MB kernel) +# 0x700000 - 0x77FFFF DTS_UPD (signed update DTB, 512 KB) +# 0x780000 - 0xDFFFFF UPDATE_B (~6.5 MB update kernel) +# 0xE00000 - 0xE0FFFF SWAP (64 KB) +WOLFBOOT_PARTITION_BOOT_ADDRESS=0x00100000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS=0x00780000 +WOLFBOOT_PARTITION_SWAP_ADDRESS=0x00E00000 +WOLFBOOT_PARTITION_SIZE=0x00600000 +WOLFBOOT_SECTOR_SIZE=0x10000 + +WOLFBOOT_DTS_BOOT_ADDRESS=0x00080000 +WOLFBOOT_DTS_UPDATE_ADDRESS=0x00700000 + +IMAGE_HEADER_SIZE=1024 + +CROSS_COMPILE=arm-none-eabi- diff --git a/docs/Targets.md b/docs/Targets.md index bbce15e5cb..db3761bf4f 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -53,6 +53,7 @@ This README describes configuration of supported targets. * [TI Hercules TMS570LC435](#ti-hercules-tms570lc435) * [Vorago VA416x0](#vorago-va416x0) * [Xilinx Zynq UltraScale](#xilinx-zynq-ultrascale) +* [Xilinx Zynq-7000 (ZC702)](#xilinx-zynq-7000-zc702) * [Versal Gen 1 VMK180](#versal-gen-1-vmk180) ## STM32F4 @@ -3391,6 +3392,268 @@ Entering idle loop... ``` +## Xilinx Zynq-7000 (ZC702) + +AMD/Xilinx Zynq-7000 (XC7Z020) on the ZC702 Evaluation Kit - dual ARM Cortex-A9 (ARMv7-A 32-bit), 1 GB DDR3, 16 MB QSPI NOR (N25Q128A), SDIO, dual UART. Older sibling of the ZynqMP family - distinct silicon, different controllers (`XQspiPs` not `XQspiPsu`, Arasan SDHCI v2.0 not v3.0, no CSU/PMU/PUF, PL310 L2). + +wolfBoot is loaded by the Xilinx Zynq-7000 FSBL into DDR: +``` +BootROM -> FSBL -> wolfBoot -> signed app (or U-Boot/Linux) +``` + +The FSBL handles all PS init (DDR, MIO, clocks, QSPI ref clock); wolfBoot only initializes UART, the QSPI controller, runs the verify/swap logic, and chain-loads the next stage. + +This target supports: +- **QSPI boot** (primary): `config/examples/zynq7000.config` +- **SD card boot** (Milestone 6, planned): `config/examples/zc702_sdcard.config` +- **JTAG-loaded dev** via Platform Cable II + xsdb (no flash required) + +### Prerequisites + +1. **Toolchain**: `arm-none-eabi-gcc` (Arm bare-metal). Tested with 13.2. +2. **Xilinx Vitis** (provides `bootgen`, `xsdb`, and `program_flash`). Source the env once per shell: + ```sh + source /opt/Xilinx/2025.2/Vitis/settings64.sh + ``` + Vivado's `settings64.sh` works equivalently if you don't have Vitis installed. +3. **Platform Cable II USB drivers** (one-time, requires root). Without these the + cable enumerates as `03fd:0013` with empty descriptors and `xsdb` reports no + JTAG targets: + ```sh + sudo /opt/Xilinx/2025.2/Vitis/data/xicom/cable_drivers/lin64/install_script/install_drivers/install_drivers + ``` + Unplug/replug the cable afterward so udev can load the firmware. +4. **Pre-built ZC702 FSBL + DTB** (clone next to wolfboot-alt2): + ```sh + git clone https://github.com/wolfSSL/soc-prebuilt-firmware.git + export PREBUILT_DIR=$(pwd)/../soc-prebuilt-firmware/zc702-zynq + ls $PREBUILT_DIR/zynq_fsbl.elf # required + ``` +5. **Hardware**: ZC702 with Platform Cable II (USB JTAG) connected to J22 and powered. + +### Configuration Options + +Key options in `config/examples/zynq7000.config`: + +- `ARCH=ARM` - 32-bit ARM +- `TARGET=zynq7000` - selects `hal/zynq7000.{c,h,ld}` and the `CORTEX_A9` arch.mk block +- `SIGN=ECC256` / `HASH=SHA256` - smaller and faster than RSA on Cortex-A9 +- `EXT_FLASH=1` - QSPI as external flash via `XQspiPs` +- `WOLFBOOT_LOAD_ADDRESS=0x10000000` - DDR offset 256 MB, where the verified app is staged before `do_boot`. Must be **above** wolfBoot's own region (`0x04000000`-`0x040FFFFF`) because `src/update_ram.c` enforces `dst > _end`. +- `WOLFBOOT_PARTITION_BOOT_ADDRESS=0x00100000` - 16 MB QSPI layout below +- `CROSS_COMPILE=arm-none-eabi-` + +DDR layout: + +| Region | Address range | Contents | +|---|---|---| +| App stage | `0x10000000`+ | Verified signed image, app text/data/bss/stack | +| Image header staging | `0x0FFFFC00`-`0x0FFFFFFF` | wolfBoot copies the 1 KB header here just before the load address | +| wolfBoot | `0x04000000`-`0x040FFFFF` | Loaded by FSBL, runs in place | +| FSBL/BootROM/OCM | `0x00000000`-`0x000FFFFF` | OCM low-mapped during boot | + +QSPI partition layout (16 MB on-board flash): + +| Offset | Size | Contents | +|-------------|---------|-----------------------------------| +| `0x000000` | ~512 KB | BOOT.BIN (FSBL + wolfboot) | +| `0x100000` | 6 MB | BOOT_A (signed primary image) | +| `0x700000` | 6 MB | UPDATE_B (signed update slot) | +| `0xD00000` | 64 KB | SWAP scratch sector | +| `0xD10000`+ | | reserved | + +### Building wolfBoot + +```sh +cp config/examples/zynq7000.config .config +make keysclean && make keytools +make TARGET=zynq7000 wolfboot.elf +``` + +The result is a 32-bit ARM ELF with entry point `0x04000000` and `.text` start at the same address (vector table at the load base). + +### Building BOOT.BIN (production QSPI boot) + +```sh +cp ${PREBUILT_DIR}/zynq_fsbl.elf . +bootgen -arch zynq -image tools/scripts/zc702/zc702_qspi.bif -w -o BOOT.BIN +``` + +`bootgen` ships with Vitis. The `.bif` template at `tools/scripts/zc702/zc702_qspi.bif` is the minimum bootable image; add `download.bit` and a DTB if you also need to load the PL bitstream and a Linux device tree (see Milestone 5). + +### Programming QSPI + +Set ZC702 boot mode straps to **JTAG** (SW16 all OFF) for programming, then either: +- Vitis: `program_flash -f BOOT.BIN -flash_type qspi-x4-single -fsbl ${PREBUILT_DIR}/zynq_fsbl.elf -target_id ` (use `program_flash -jtagtargets` to get the `arm_dap` target ID for your Platform Cable; only needed when more than one cable is connected) +- Vivado Hardware Manager: Tools -> Add Configuration Memory Device -> select N25Q128 -> program with BOOT.BIN at offset 0. + +After programming, set boot mode to **QSPI** by turning **SW16-4 ON** (the four-position dip mapping is `SW16-4 = MIO[5]` MSB of the boot device strap, with SW16-1..3 = MIO[2..4]; per UG850 ch.1.2.4). Power-cycle the board (cold) so the BootROM re-samples the strap. Console comes up on UART1 (J17 USB-UART), 115200 8N1. + +### JTAG-loaded development (no flash) + +For driver bring-up or quick iteration, skip bootgen and load directly via Platform Cable II: + +```sh +source /opt/Xilinx/2025.2/Vitis/settings64.sh # once per shell +xsdb tools/scripts/zc702/jtag_load.tcl +``` + +The script runs the prebuilt FSBL (PS init: DDR/MIO/clocks/UART), then loads `wolfboot.elf` over the top, sets PC to `0x04000000` and CPSR to SVC with IRQ/FIQ masked, and resumes. Override paths via `FSBL_ELF=...` or `WOLFBOOT_ELF=...` env vars. + +With a signed image programmed at QSPI offset `0x100000` (see "Building and flashing the signed test app" below), expected UART output is: + +``` +wolfBoot Zynq-7000 (ZC702) hal_init +Versions: Boot 1, Update 0 +Trying Boot partition at 0x100000 +Loading header 1024 bytes from 0x100000 to 0xFFFFC00 +Loading image 396 bytes from 0x100400 to 0x10000000...done +Boot partition: 0xFFFFC00 (sz 396, ver 0x1, type 0x201) +Checking integrity...done +Verifying signature...done +Successfully selected image in part: 0 +Firmware Valid +Booting at 0x10000000 + +=== ZC702 test-app: BOOT OK === +wolfBoot verified + chain-loaded this image +..... +``` + +On a **blank** QSPI (no signed image yet), wolfBoot prints `Versions: Boot 0, Update 0 / No valid image found! / wolfBoot: PANIC!` instead - that is correct behavior, not a bug. + +If `xsdb` reports `no targets found` or empty `jtag servers`, either: +- Cable USB drivers not installed - see step 3 of Prerequisites, OR +- A previous run left the CPU in a stuck JTAG state - power-cycle the ZC702 (SW10, the Pi4 GPIO 20 power relay, or your PSU control) and retry. + +A separate JTAG-only dev build (no QSPI driver) can be produced with `make EXT_FLASH=0`. + +### Building and flashing the signed test app + +A minimal Cortex-A9 test app lives at `test-app/app_zynq7000.c` (UART banner + heartbeat dots). The top-level `make` target produces both `wolfboot.elf` and `test-app/image_v1_signed.bin` with the keys generated under `wolfboot_signing_private_key.der`: + +```sh +cp config/examples/zynq7000.config .config +make keysclean && make # builds wolfboot.elf + test-app/image_v1_signed.bin +``` + +Program the signed image to QSPI offset `0x100000` (the BOOT_A partition): + +```sh +program_flash -f test-app/image_v1_signed.bin \ + -fsbl ${PREBUILT_DIR}/zynq_fsbl.elf \ + -flash_type qspi_single -offset 0x100000 +``` + +`program_flash` ships with Vitis. Then run wolfBoot via `xsdb tools/scripts/zc702/jtag_load.tcl` - it should verify and chain-load the test app, producing the heartbeat output above. + +### QSPI driver self-test (`TEST_EXT_FLASH`) + +To exercise the `XQspiPs` driver in isolation - read JEDEC ID, sector erase + page program + linear-mode read-back of a 256-byte pattern at `0x200000`: + +```sh +make CFLAGS_EXTRA=-DTEST_EXT_FLASH wolfboot.elf +xsdb tools/scripts/zc702/jtag_load.tcl +``` + +Expected output: + +``` +qspi: --- TEST_EXT_FLASH start --- +qspi: JEDEC ID = 0x20bb18 rc=00 <- Micron N25Q128 +qspi: read @0x100000 = 574f4c468c010000 <- "WOLF" magic from a programmed signed image +qspi: erase sector @ 0x00200000 ... +qspi: page program ... +qspi: post-program JEDEC = 0x20bb18 +qspi: rdback[0..7] = 0001020304050607 +qspi: --- TEST_EXT_FLASH PASS --- +``` + +### QSPI driver design + +The driver in `hal/zynq7000.c` splits read vs cmd-only paths similarly to how the ZynqMP HAL splits SDHCI CMD17 (single-block PIO) vs CMD18 (multi-block SDMA): + +| Operation | Path | Why | +|---|---|---| +| JEDEC ID, RDSR, WREN, sector erase, page program | I/O mode (TXD0/TXD1/2/3 + auto-start) | Short, command-shaped transactions; needs precise byte counts on MOSI | +| Bulk reads (signed image, partition headers) | Linear/XIP mode (`memcpy` from `0xFC000000+offset`) | Hardware-accelerated; controller drives cmd+addr+dummy and presents data through the AXI window | + +`qspi_linear_mode_setup()` configures `LQSPI_CR=0x8000010B` (single-bit `FAST_READ` 0x0B + 1 dummy byte) which avoids needing the flash QE bit set. A sacrificial first-byte read primes the linear-mode pipeline before the actual `memcpy`. + +For TX-only commands sent without RX capture, `qspi_xfer4` picks `TXD1`/`TXD2`/`TXD3` so the controller clocks exactly *N* bytes on the wire (no 4-byte padding that some flash interprets as additional commands - this caused our WREN to fail in an early iteration). + +### Boot flow notes + +- **Cortex-A9 startup**: shared `src/boot_arm32_start.S` (generic ARMv7-A startup, also used by SAMA5D3) plus shared `src/boot_arm32.c` for `do_boot()`. Sets VBAR to wolfBoot's vector table at `0x04000000`, clears `SCTLR.{A,C,I,V}`, invalidates I-cache + branch predictor + TLB, sets stack pointers for IRQ/FIQ/ABT/UND/SVC modes, then unmasks async aborts and calls `main`. +- **MMU stays ON**, inheriting FSBL's flat 1:1 DDR mapping. Disabling the MMU on Cortex-A9 makes all memory Strongly-Ordered, which traps unaligned LDR/STR and breaks any ARMv7-A unrolled `memcpy`. +- **memcpy/memset**: do **not** define `WOLFBOOT_USE_STDLIBC` for this target. newlib's ARMv7-A `memcpy` uses unaligned word LDRs from arbitrary alignments and faults under any code path that runs without the MMU configured for Normal memory. wolfBoot's own byte-wise / aligned-word `memcpy` in `src/string.c` is used instead. +- **`ext_flash_read` returns bytes-read** (not 0 on success): `src/update_ram.c` checks `ret != IMAGE_HEADER_SIZE` for the header read and `ret < 0` for the body read. +- **Cache teardown** in `hal_prepare_boot()`: cleans+invalidates L1 D-cache by set/way, invalidates L1 I-cache and branch predictor, then disables MMU+caches via SCTLR before `do_boot()` performs `bx r4`. +- **Register handoff** (`do_boot` in `src/boot_arm32.c`): bare-metal apps get `r0 = dts_ptr`, `r1=r2=r3=0`, entry in `r4`. With `WOLFBOOT_LINUX_PAYLOAD=1` (set by `LINUX_PAYLOAD=1` in the config), `do_boot` follows the ARM Linux boot ABI: `r0 = 0`, `r1 = ~0` (no machine ID, use DTB), `r2 = DTB physical address`, `r3 = 0`, entry in `r4`. +- **L2 (PL310)**: not touched by wolfBoot. Stock ZC702 FSBLs do not enable PL310; if your customised FSBL does, extend `hal_prepare_boot()` with an L2x0 clean-invalidate + disable. + +### SD card boot (Milestone 6) + +`config/examples/zc702_sdcard.config` enables SD-card boot via the generic +SDHCI driver (`src/sdhci.c`) with HAL hooks in `hal/zynq7000.c` that +translate the driver's Cadence SD4HC register layout to the Arasan +SDHCI v2.0 standard layout used by the Zynq-7000 controller. + +**Strap**: SW16-3 + SW16-4 ON (others OFF). `BOOT_MODE_REG = 0x5`. + +**Layout**: pure MBR (no GPT - the Zynq-7000 BootROM only accepts MBR-with-FAT32-Active for SD boot). + +| Partition | Type | Size | Contents | +|-----------|-----------|-------|---------------------------------------| +| p1 | 0x0C FAT32-LBA, Active | 64 MB | `BOOT.BIN` for the BootROM | +| p2 | 0x83 Linux raw | 16 MB | Signed boot image (`BOOT_PART_A=1`) | +| p3 | 0x83 Linux raw | 16 MB | Signed update image (`BOOT_PART_B=2`) | + +`tools/scripts/zc702/prepare_sdcard.sh` lays this out (parted msdos + +manual MBR type/active patch + dd of signed images). + +```sh +cp config/examples/zc702_sdcard.config .config +make +make test-app/image.bin +IMAGE_HEADER_SIZE=1024 ./tools/keytools/sign --ecc256 --sha256 \ + test-app/image.bin wolfboot_signing_private_key.der 1 +cp ${PREBUILT_DIR}/zynq_fsbl.elf . +bootgen -arch zynq -image tools/scripts/zc702/zc702_qspi.bif -w -o BOOT.BIN +sudo ./tools/scripts/zc702/prepare_sdcard.sh /dev/sdX +``` + +**Arasan SDHCI v2.0 quirks** (handled by the HAL/config): +- 3.3V-only, no UHS-I. The driver tries to enable UHS-I SDR25 / 1.8V + signaling on init; we mask out UMS, 1.8V Enable, sampling-clock, + HV4E, and A64 in `sdhci_reg_write` for SRS15. +- High Speed Enable (HSE bit in HostCtrl1) does not work reliably on + v2.0 with 3.3V cards that didn't switch to HS via CMD6 - we mask + HSE in HostCtrl1 writes. +- Post-init clock is capped at 6 MHz via `SDHCI_CLK_50MHZ=6000`. At + 12 MHz the first single-block CMD17 issued after a multi-block + CMD18+CMD12 sequence times out (DTOE) - looks like a state-cleanup + quirk specific to v2.0. At 24 MHz even the very first CMD17 fails. + 6 MHz / 4-bit yields ~3 MB/s, plenty for boot-time loads. +- Cortex-A9 Global Timer at PERIPHCLK = CPU_3x2x = 333.33 MHz on the + default ZC702 FSBL clock plan; `Z7_GTIMER_FREQ_HZ` in + `hal/zynq7000.h` defaults to that and feeds `hal_get_timer_us()` + used by the SDHCI driver's `udelay()`. + +### Differences from the ZynqMP port + +| Aspect | ZynqMP (`hal/zynq.c`) | Zynq-7000 (`hal/zynq7000.c`) | +|------------------|-------------------------------|------------------------------| +| CPU | Cortex-A53 quad, AArch64 | Cortex-A9 dual, ARMv7-A | +| QSPI controller | GQSPI (`XQspiPsu`) | Linear/Static (`XQspiPs`) | +| UART IP | XUartPs @ `0xFF000000` | XUartPs @ `0xE0001000` | +| SDHCI | Arasan v3.0 + Cadence shim | Arasan v2.0 + Cadence shim | +| Crypto HW | CSU (AES-GCM, SHA3, PUF) | none (DevC AES only) | +| Boot chain | FSBL + PMUFW + BL31 + wolfBoot| FSBL + wolfBoot | +| Linux EL | EL2 (hypervisor) | SVC (no exception levels) | +| `bootgen -arch` | `zynqmp` | `zynq` | + + ## Versal Gen 1 VMK180 AMD Versal Prime Series VMK180 Evaluation Kit - Versal Prime XCVM1802-2MSEVSVA2197 Adaptive SoC - Dual ARM Cortex-A72. diff --git a/hal/zynq7000.c b/hal/zynq7000.c new file mode 100644 index 0000000000..5f41657cac --- /dev/null +++ b/hal/zynq7000.c @@ -0,0 +1,1089 @@ +/* zynq7000.c + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef TARGET_zynq7000 + +#include +#include +#include +#include "image.h" +#include "printf.h" +#include "hal/zynq7000.h" + +#ifndef ARCH_ARM +# error "wolfBoot zynq7000 HAL: wrong architecture selected. Please compile with ARCH=ARM." +#endif + +#ifdef DEBUG_UART +void uart_init(void) +{ + /* Disable interrupts */ + Z7_UART_IDR = Z7_UART_ISR_MASK; + /* Disable TX/RX */ + Z7_UART_CR = (Z7_UART_CR_TX_DIS | Z7_UART_CR_RX_DIS); + /* Clear ISR */ + Z7_UART_ISR = Z7_UART_ISR_MASK; + + /* 8N1 */ + Z7_UART_MR = Z7_UART_MR_8N1; + + /* Half-FIFO trigger levels (XUartPs FIFO depth = 64) */ + Z7_UART_RXWM = 32; + Z7_UART_TXWM = 32; + + /* RX timeout disabled */ + Z7_UART_RXTOUT = 0; + + /* baud = ref_clk / (BR_GEN * (BR_DIV + 1)) */ + Z7_UART_BR_GEN = UART_CLK_REF / (DEBUG_UART_BAUD * (DEBUG_UART_DIV + 1)); + Z7_UART_BR_DIV = DEBUG_UART_DIV; + + /* Reset TX/RX paths */ + Z7_UART_CR = (Z7_UART_CR_TXRST | Z7_UART_CR_RXRST); + /* Enable TX/RX */ + Z7_UART_CR = (Z7_UART_CR_TX_EN | Z7_UART_CR_RX_EN); +} + +void uart_write(const char* buf, unsigned int sz) +{ + unsigned int pos = 0; + while (sz-- > 0) { + char c = buf[pos++]; + if (c == '\n') { + while (Z7_UART_SR & Z7_UART_SR_TXFULL) + ; + Z7_UART_FIFO = (uint32_t)'\r'; + } + while (Z7_UART_SR & Z7_UART_SR_TXFULL) + ; + Z7_UART_FIFO = (uint32_t)c; + } + while (!(Z7_UART_SR & Z7_UART_SR_TXEMPTY)) + ; +} +#endif /* DEBUG_UART */ + +#ifdef EXT_FLASH +/* ===================== QSPI flash driver (XQspiPs) ===================== + * Bare-metal driver for the Zynq-7000 "Linear/Static" QSPI controller. + * Used here in I/O mode (single-bit SPI) for read/write/erase. The Linear + * QSPI XIP window at 0xFC000000 is not used by this driver - all reads go + * through the controller's I/O FIFO so the same code path works whether or + * not FSBL pre-configured linear mode. + * + * The driver assumes FSBL has already configured QSPI ref clock and MIO + * pins (typical when wolfBoot is loaded by FSBL). qspi_init() resets and + * reconfigures the controller itself. */ + +/* JEDEC SPI-NOR command codes (subset used by wolfBoot) */ +#define SPI_CMD_RDID 0x9F +#define SPI_CMD_RDSR 0x05 +#define SPI_CMD_WREN 0x06 +#define SPI_CMD_WRDI 0x04 +#define SPI_CMD_READ 0x03 +#define SPI_CMD_FAST_READ 0x0B /* requires 8 dummy clocks */ +#define SPI_CMD_PAGE_PROGRAM 0x02 +#define SPI_CMD_SECTOR_ERASE 0xD8 /* 64 KB erase */ +#define SPI_STATUS_WIP 0x01 /* write-in-progress */ +#define SPI_STATUS_WEL 0x02 /* write-enable latch */ + +#define SPI_NOR_PAGE_SIZE 256U +#define SPI_NOR_SECTOR_SIZE 0x10000U /* 64 KB */ + +static void qspi_drain_rxfifo(void) +{ + while (Z7_QSPI_ISR & Z7_QSPI_ISR_RXNEMPTY) + (void)Z7_QSPI_RXD; +} + +static void qspi_cs_assert(void) +{ + /* PCS [13:10] = 0b1110 -> CS0 asserted. */ + Z7_QSPI_CR = (Z7_QSPI_CR & ~Z7_QSPI_CR_PCS_MASK) | Z7_QSPI_CR_PCS_CS0; +} + +static void qspi_cs_release(void) +{ + /* PCS [13:10] = 0b1111 -> all CS deasserted. */ + Z7_QSPI_CR |= Z7_QSPI_CR_PCS_NONE; +} + +/* Transfer up to 4 bytes. Uses TXD(n) for partial sends WITHOUT RX (so the + * flash sees exactly n clock cycles of MOSI), and TXD0 (4-byte) when RX is + * needed (so the controller pushes a full 4-byte RX FIFO entry we can + * decode). Mirrors u-boot zynq_qspi.c (offsets[3] when rx_buf, offsets[len-1] + * otherwise). */ +static void qspi_xfer4(const uint8_t *tx, uint8_t *rx, unsigned int nbytes) +{ + uint32_t txw = 0xFFFFFFFFU; + uint32_t rxw; + unsigned int i; + + if (nbytes > 4) + nbytes = 4; + if (nbytes == 0) + return; + + if (tx != NULL) { + for (i = 0; i < nbytes; i++) { + txw &= ~((uint32_t)0xFFU << (i * 8)); + txw |= ((uint32_t)tx[i]) << (i * 8); + } + } + + qspi_drain_rxfifo(); + + if (rx != NULL || nbytes == 4) { + /* Receive path or full 4-byte send: use TXD0. */ + Z7_QSPI_TXD0 = txw; + } else { + /* Send-only short transfer: pick TXD1/TXD2/TXD3 to clock exactly + * nbytes out the wire (no padding). The TX byte(s) are at the LSB. */ + switch (nbytes) { + case 1: Z7_QSPI_TXD1 = txw; break; + case 2: Z7_QSPI_TXD2 = txw; break; + case 3: Z7_QSPI_TXD3 = txw; break; + default: Z7_QSPI_TXD0 = txw; break; + } + } + + while (!(Z7_QSPI_ISR & Z7_QSPI_ISR_RXNEMPTY)) + ; + rxw = Z7_QSPI_RXD; + +#ifdef DEBUG_QSPI_BYTE + { + const char hex[] = "0123456789abcdef"; + char line[48]; + unsigned int p = 0; + line[p++] = '['; + for (i = 0; i < nbytes; i++) { + uint8_t b = (uint8_t)(txw >> (i * 8)); + line[p++] = hex[(b >> 4) & 0xF]; + line[p++] = hex[(b >> 0) & 0xF]; + } + line[p++] = ' '; + line[p++] = '/'; + line[p++] = ' '; + for (i = 0; i < nbytes; i++) { + uint8_t b = (uint8_t)(rxw >> (i * 8)); + line[p++] = hex[(b >> 4) & 0xF]; + line[p++] = hex[(b >> 0) & 0xF]; + } + line[p++] = ']'; + line[p++] = '\n'; + uart_write(line, p); + } +#endif + + if (rx != NULL) { + for (i = 0; i < nbytes; i++) + rx[i] = (uint8_t)(rxw >> (i * 8)); + } +} + +static int qspi_xfer(const uint8_t *tx, uint8_t *rx, unsigned int len) +{ + unsigned int off = 0; + unsigned int chunk; + + qspi_cs_assert(); + while (off < len) { + chunk = len - off; + if (chunk > 4) + chunk = 4; + qspi_xfer4((tx != NULL) ? &tx[off] : NULL, + (rx != NULL) ? &rx[off] : NULL, + chunk); + off += chunk; + } + qspi_cs_release(); + return 0; +} + +/* I/O mode: used for short cmd-only ops (JEDEC, RDSR, WREN, sector erase, + * page program initiation). Reads use Linear/XIP mode separately. */ +static void qspi_io_mode_setup(void) +{ + Z7_QSPI_EN = 0; + Z7_QSPI_IDR = Z7_QSPI_ISR_MASK; + qspi_drain_rxfifo(); + Z7_QSPI_ISR = Z7_QSPI_ISR_MASK; + Z7_QSPI_LQSPI_CR = 0; /* leave linear mode */ + Z7_QSPI_TXTHR = 1; + Z7_QSPI_RXTHR = 1; + Z7_QSPI_CR = Z7_QSPI_CR_IFMODE + | Z7_QSPI_CR_HOLD_B + | Z7_QSPI_CR_SSFORCE + | Z7_QSPI_CR_PCS_NONE + | Z7_QSPI_CR_FIFO_WIDTH + | Z7_QSPI_CR_BAUD_DIV_8 + | Z7_QSPI_CR_MSTREN; + Z7_QSPI_EN = Z7_QSPI_EN_VAL; +} + +/* Linear (XIP) mode: hardware-managed reads. Controller asserts CS, sends + * cmd+addr+dummy, returns data via memory-mapped accesses at 0xFC000000+. + * Matches XQspiPs_LinearInit() in qspips_v3_14/src/xqspips_hw.c. */ +static void qspi_linear_mode_setup(void) +{ + Z7_QSPI_EN = 0; + Z7_QSPI_IDR = Z7_QSPI_ISR_MASK; + qspi_drain_rxfifo(); + Z7_QSPI_ISR = Z7_QSPI_ISR_MASK; + + /* CR: IFMODE=1, FIFO=32-bit, MSTREN=1, SSFORCE=1, HOLD_B=1, /4 baud, + * MANSTRTEN=0 (auto-start), CPHA/CPOL=0, PCS bit 10 cleared (CS0 + * asserted - in linear mode the controller still wants this). */ + Z7_QSPI_CR = Z7_QSPI_CR_IFMODE + | Z7_QSPI_CR_HOLD_B + | Z7_QSPI_CR_SSFORCE + | Z7_QSPI_CR_FIFO_WIDTH + | Z7_QSPI_CR_BAUD_DIV_4 + | Z7_QSPI_CR_MSTREN; + /* Single-bit FAST_READ (0x0B) with 1 dummy byte. Avoids needing QE + * bit set in the flash status register. */ + Z7_QSPI_LQSPI_CR = 0x8000010BU; + Z7_QSPI_EN = Z7_QSPI_EN_VAL; +} + +static void qspi_init(void) +{ + qspi_io_mode_setup(); +} + +static int spi_flash_read_id(uint8_t out[3]) +{ + uint8_t cmd[4] = { SPI_CMD_RDID, 0, 0, 0 }; + uint8_t rx[4] = { 0, 0, 0, 0 }; + int rc = qspi_xfer(cmd, rx, sizeof(cmd)); + if (rc == 0) { + out[0] = rx[1]; + out[1] = rx[2]; + out[2] = rx[3]; + } + return rc; +} + +static int spi_flash_status(uint8_t *status) +{ + uint8_t cmd[2] = { SPI_CMD_RDSR, 0 }; + uint8_t rx[2] = { 0, 0 }; + int rc = qspi_xfer(cmd, rx, sizeof(cmd)); + if (rc == 0) + *status = rx[1]; + return rc; +} + +static int spi_flash_wait_ready(void) +{ + uint8_t status = 0xFF; + /* Spin until WIP clears. No timeout: a stuck flash is a board issue. */ + do { + if (spi_flash_status(&status) != 0) + return -1; + } while ((status & SPI_STATUS_WIP) != 0); + return 0; +} + +static int spi_flash_write_enable(void) +{ + uint8_t cmd = SPI_CMD_WREN; + int rc; + rc = qspi_xfer(&cmd, NULL, 1); + if (rc != 0) + return rc; + /* Optional: confirm WEL bit set */ + { + uint8_t status = 0; + if (spi_flash_status(&status) != 0) + return -1; + if ((status & SPI_STATUS_WEL) == 0) + return -1; + } + return 0; +} + +static int spi_flash_sector_erase(uint32_t address) +{ + uint8_t cmd[4]; + int rc; + + rc = spi_flash_write_enable(); + if (rc != 0) + return rc; + + cmd[0] = SPI_CMD_SECTOR_ERASE; + cmd[1] = (uint8_t)((address >> 16) & 0xFFU); + cmd[2] = (uint8_t)((address >> 8) & 0xFFU); + cmd[3] = (uint8_t)((address >> 0) & 0xFFU); + rc = qspi_xfer(cmd, NULL, sizeof(cmd)); + if (rc != 0) + return rc; + + return spi_flash_wait_ready(); +} + +static int spi_flash_page_program(uint32_t address, + const uint8_t *data, + unsigned int len) +{ + /* len must be <= SPI_NOR_PAGE_SIZE and not cross a page boundary */ + uint8_t hdr[4]; + int rc; + + if (len == 0 || len > SPI_NOR_PAGE_SIZE) + return -1; + + rc = spi_flash_write_enable(); + if (rc != 0) + return rc; + + hdr[0] = SPI_CMD_PAGE_PROGRAM; + hdr[1] = (uint8_t)((address >> 16) & 0xFFU); + hdr[2] = (uint8_t)((address >> 8) & 0xFFU); + hdr[3] = (uint8_t)((address >> 0) & 0xFFU); + + qspi_cs_assert(); + qspi_xfer4(hdr, NULL, 4); + { + unsigned int off = 0; + while (off < len) { + unsigned int chunk = len - off; + if (chunk > 4) + chunk = 4; + qspi_xfer4(&data[off], NULL, chunk); + off += chunk; + } + } + qspi_cs_release(); + + return spi_flash_wait_ready(); +} + +/* Reads use Linear/XIP mode: switch the controller to linear mode, do a + * memcpy from the XIP window at 0xFC000000+offset, then return the + * controller to I/O mode. Mirrors how the ZynqMP HAL splits CMD17 (single + * block PIO) vs CMD18 (multi-block DMA) on SDHCI - here, short cmd ops use + * I/O mode and bulk reads use linear/XIP. */ +static int spi_flash_read(uint32_t address, uint8_t *data, unsigned int len) +{ + /* Issue 32-bit AXI reads to the XIP window. Single-byte loads are + * unreliable - the linear-mode controller is burst-aware and wants + * word transfers. Decompose each 32-bit word into bytes at the + * destination so unaligned addr / unaligned dst / unaligned len all + * work without faulting on devices with strict alignment. */ + const volatile uint32_t *xipw; + uint32_t aligned_addr; + uint32_t w; + unsigned int byte_off; + unsigned int i; + + if (len == 0) + return 0; + + qspi_linear_mode_setup(); + + aligned_addr = address & ~3U; + byte_off = address & 3U; + xipw = (const volatile uint32_t *)(Z7_QSPI_LINEAR_BASE + aligned_addr); + /* Sacrificial first 32-bit read primes the controller pipeline. */ + (void)xipw[0]; + + i = 0; + if (byte_off != 0) { + w = *xipw++; + for (; byte_off < 4U && i < len; byte_off++, i++) + data[i] = (uint8_t)(w >> (byte_off * 8U)); + } + while (i + 4U <= len) { + w = *xipw++; + data[i++] = (uint8_t)(w >> 0); + data[i++] = (uint8_t)(w >> 8); + data[i++] = (uint8_t)(w >> 16); + data[i++] = (uint8_t)(w >> 24); + } + if (i < len) { + w = *xipw; + for (byte_off = 0; i < len; byte_off++, i++) + data[i] = (uint8_t)(w >> (byte_off * 8U)); + } + + qspi_io_mode_setup(); + return 0; +} +#if defined(TEST_EXT_FLASH) || defined(TEST_QSPI) +/* QSPI self-test (enable with -DTEST_EXT_FLASH or -DTEST_QSPI): + * 1) Read JEDEC ID and print it + * 2) Erase a 64 KB sector at TEST_EXT_FLASH_ADDR (default 2 MB offset) + * 3) Page-program a 256 B pattern (i & 0xFF) + * 4) Read back and verify + * Mirrors the existing src/spi_flash.c test_ext_flash() logic. Output via + * UART. Wired to fire from qspi_init() / hal_init() below. */ +#ifndef TEST_EXT_FLASH_ADDR +#define TEST_EXT_FLASH_ADDR (2U * 1024U * 1024U) +#endif + +static void qspi_print_hex_byte(uint8_t b) +{ + static const char hex[] = "0123456789abcdef"; + char buf[2]; + buf[0] = hex[(b >> 4) & 0xFU]; + buf[1] = hex[(b >> 0) & 0xFU]; + uart_write(buf, 2); +} + +static void qspi_print_hex32(uint32_t v) +{ + qspi_print_hex_byte((uint8_t)(v >> 24)); + qspi_print_hex_byte((uint8_t)(v >> 16)); + qspi_print_hex_byte((uint8_t)(v >> 8)); + qspi_print_hex_byte((uint8_t)(v >> 0)); +} + +static void qspi_selftest(void) +{ + static const uint8_t pattern[SPI_NOR_PAGE_SIZE] = { + /* zero-initialized, filled in below */ + 0 + }; + uint8_t rdback[SPI_NOR_PAGE_SIZE]; + uint8_t id[3] = { 0, 0, 0 }; + unsigned int i; + int rc; + /* Local mutable pattern buffer */ + uint8_t patbuf[SPI_NOR_PAGE_SIZE]; + + (void)pattern; + for (i = 0; i < SPI_NOR_PAGE_SIZE; i++) + patbuf[i] = (uint8_t)(i & 0xFFU); + + uart_write("qspi: --- TEST_EXT_FLASH start ---\n", 35); + + /* 1) JEDEC ID */ + rc = spi_flash_read_id(id); + uart_write("qspi: JEDEC ID = 0x", 19); + qspi_print_hex_byte(id[0]); + qspi_print_hex_byte(id[1]); + qspi_print_hex_byte(id[2]); + uart_write(" rc=", 5); + qspi_print_hex_byte((uint8_t)rc); + uart_write("\n", 1); + if (id[0] == 0x00 || id[0] == 0xFF) { + uart_write("qspi: JEDEC read returned blank - driver broken\n", 48); + return; + } + + /* 1b) Sanity read of known-programmed area at 0x100000 (signed image + * staged via Vitis program_flash). Should start with 'WOLF' magic. */ + { + uint8_t boot[8] = { 0 }; + spi_flash_read(0x00100000U, boot, sizeof(boot)); + uart_write("qspi: read @0x100000 = ", 23); + for (i = 0; i < 8; i++) qspi_print_hex_byte(boot[i]); + uart_write("\n", 1); + } + + /* 2) Erase */ + uart_write("qspi: erase sector @ 0x", 23); + qspi_print_hex32(TEST_EXT_FLASH_ADDR); + uart_write(" ...\n", 5); + rc = spi_flash_sector_erase(TEST_EXT_FLASH_ADDR); + if (rc != 0) { + uart_write("qspi: erase FAILED\n", 19); + return; + } + + /* 3) Page program */ + uart_write("qspi: page program ...\n", 23); + rc = spi_flash_page_program(TEST_EXT_FLASH_ADDR, patbuf, SPI_NOR_PAGE_SIZE); + if (rc != 0) { + uart_write("qspi: program FAILED\n", 21); + return; + } + + /* 4a) Re-read JEDEC ID after program to confirm controller is alive */ + { + uint8_t id2[3] = { 0, 0, 0 }; + spi_flash_read_id(id2); + uart_write("qspi: post-program JEDEC = 0x", 29); + qspi_print_hex_byte(id2[0]); + qspi_print_hex_byte(id2[1]); + qspi_print_hex_byte(id2[2]); + uart_write("\n", 1); + } + + /* 4b) Read back at TEST_EXT_FLASH_ADDR + compare */ + for (i = 0; i < SPI_NOR_PAGE_SIZE; i++) + rdback[i] = 0; + spi_flash_read(TEST_EXT_FLASH_ADDR, rdback, SPI_NOR_PAGE_SIZE); + uart_write("qspi: rdback[0..7] = ", 21); + for (i = 0; i < 8; i++) qspi_print_hex_byte(rdback[i]); + uart_write("\n", 1); + + /* 4c) Linear-mode XIP sanity check: read 32-bit words then decode bytes. + * Single-byte AXI accesses to the linear window confuse the controller - + * the burst-aware controller wants 32-bit reads. */ + uart_write("qspi: xip32@0x200000 = ", 23); + { + volatile uint32_t *xipw; + unsigned int j; + uint32_t w; + Z7_QSPI_EN = 0; + Z7_QSPI_LQSPI_CR = 0x80000003U; /* LQ_MODE=1, INST=0x03 (READ), no dummy */ + Z7_QSPI_EN = Z7_QSPI_EN_VAL; + xipw = (volatile uint32_t*)(Z7_QSPI_LINEAR_BASE + TEST_EXT_FLASH_ADDR); + for (j = 0; j < 2; j++) { + w = xipw[j]; + qspi_print_hex_byte((uint8_t)(w >> 0)); + qspi_print_hex_byte((uint8_t)(w >> 8)); + qspi_print_hex_byte((uint8_t)(w >> 16)); + qspi_print_hex_byte((uint8_t)(w >> 24)); + } + uart_write(" xip32@0x100000 = ", 19); + xipw = (volatile uint32_t*)(Z7_QSPI_LINEAR_BASE + 0x100000U); + for (j = 0; j < 2; j++) { + w = xipw[j]; + qspi_print_hex_byte((uint8_t)(w >> 0)); + qspi_print_hex_byte((uint8_t)(w >> 8)); + qspi_print_hex_byte((uint8_t)(w >> 16)); + qspi_print_hex_byte((uint8_t)(w >> 24)); + } + uart_write("\n", 1); + /* Restore I/O mode for any later transfers. */ + Z7_QSPI_EN = 0; + Z7_QSPI_LQSPI_CR = 0; + Z7_QSPI_EN = Z7_QSPI_EN_VAL; + } + for (i = 0; i < SPI_NOR_PAGE_SIZE; i++) { + if (rdback[i] != patbuf[i]) { + uart_write("qspi: MISMATCH @ idx 0x", 23); + qspi_print_hex_byte((uint8_t)(i >> 8)); + qspi_print_hex_byte((uint8_t)(i & 0xFFU)); + uart_write(" got 0x", 8); + qspi_print_hex_byte(rdback[i]); + uart_write(" expected 0x", 12); + qspi_print_hex_byte(patbuf[i]); + uart_write("\n", 1); + return; + } + } + uart_write("qspi: --- TEST_EXT_FLASH PASS ---\n", 34); +} +#endif /* TEST_EXT_FLASH || TEST_QSPI */ + +#endif /* EXT_FLASH (qspi block) */ + +void hal_init(void) +{ +#ifdef DEBUG_UART + uart_init(); + { + const char banner[] = "wolfBoot Zynq-7000 (ZC702) hal_init\n"; + uart_write(banner, sizeof(banner) - 1); + } +#endif +#ifdef EXT_FLASH + qspi_init(); +#if defined(TEST_EXT_FLASH) || defined(TEST_QSPI) + qspi_selftest(); +#endif +#ifdef DEBUG_BOOTPART + /* Dump first 16 bytes of the BOOT partition so we can see if the + * QSPI driver is returning the signed-image header (magic 'WOLF'). */ + { + uint8_t buf[16]; + const char hex[] = "0123456789abcdef"; + char line[3*16 + 2]; + unsigned int i; + spi_flash_read(0x00100000U, buf, sizeof(buf)); + for (i = 0; i < sizeof(buf); i++) { + line[i*3 + 0] = hex[(buf[i] >> 4) & 0xF]; + line[i*3 + 1] = hex[(buf[i] >> 0) & 0xF]; + line[i*3 + 2] = ' '; + } + line[sizeof(line) - 2] = '\n'; + line[sizeof(line) - 1] = 0; + uart_write("QSPI[0x100000]: ", 16); + uart_write(line, sizeof(line) - 1); + } +#endif +#endif +} + +/* Cortex-A9 cache teardown sequence used before do_boot(). FSBL hands off + * with MMU+L1+L2 enabled; we clean and disable them so the next stage sees + * a deterministic CPU state. Order follows ARM ARM B2.2.5: clean D-cache, + * disable MMU, invalidate I-cache, ISB. */ +static inline void z7_dsb(void) { __asm__ volatile("dsb sy" ::: "memory"); } +static inline void z7_isb(void) { __asm__ volatile("isb sy" ::: "memory"); } + +static void z7_l1_dcache_clean_invalidate_all(void) +{ + /* v7-A clean+invalidate by set/way - iterates the data cache levels in + * CLIDR and walks each (set, way) issuing DCCISW. Adapted from ARMv7-A + * Architecture Reference Manual B2.2.4 example. */ + __asm__ volatile ( + "dmb sy \n" + "mrc p15, 1, r0, c0, c0, 1 \n" /* CLIDR */ + "ands r3, r0, #0x07000000 \n" + "mov r3, r3, lsr #23 \n" + "beq 5f \n" + "mov r10, #0 \n" + "1: \n" + "add r2, r10, r10, lsr #1 \n" + "mov r1, r0, lsr r2 \n" + "and r1, r1, #7 \n" + "cmp r1, #2 \n" + "blt 4f \n" + "mcr p15, 2, r10, c0, c0, 0\n" /* CSSELR */ + "isb \n" + "mrc p15, 1, r1, c0, c0, 0 \n" /* CCSIDR */ + "and r2, r1, #7 \n" + "add r2, r2, #4 \n" /* line size */ + "ldr r4, =0x3FF \n" + "ands r4, r4, r1, lsr #3 \n" /* assoc */ + "clz r5, r4 \n" + "ldr r7, =0x7FFF \n" + "ands r7, r7, r1, lsr #13 \n" /* num sets */ + "2: \n" + "mov r9, r4 \n" + "3: \n" + "orr r11, r10, r9, lsl r5 \n" + "orr r11, r11, r7, lsl r2 \n" + "mcr p15, 0, r11, c7, c14, 2 \n" /* DCCISW */ + "subs r9, r9, #1 \n" + "bge 3b \n" + "subs r7, r7, #1 \n" + "bge 2b \n" + "4: \n" + "add r10, r10, #2 \n" + "cmp r3, r10 \n" + "bgt 1b \n" + "5: \n" + "dsb sy \n" + "isb \n" + : + : + : "r0","r1","r2","r3","r4","r5","r7","r9","r10","r11","memory","cc" + ); +} + +static void z7_l1_icache_invalidate_all(void) +{ + /* ICIALLU + branch predictor invalidate */ + __asm__ volatile ( + "mov r0, #0 \n" + "mcr p15, 0, r0, c7, c5, 0 \n" /* ICIALLU */ + "mcr p15, 0, r0, c7, c5, 6 \n" /* BPIALL */ + "dsb sy \n" + "isb \n" + : : : "r0","memory" + ); +} + +static void z7_disable_mmu_and_caches(void) +{ + /* SCTLR: clear M (bit0), C (bit2), I (bit12). Leaves Z (branch predict) + * alone since we cleared BPIALL above. */ + __asm__ volatile ( + "mrc p15, 0, r0, c1, c0, 0 \n" + "bic r0, r0, #(1 << 0) \n" + "bic r0, r0, #(1 << 2) \n" + "bic r0, r0, #(1 << 12) \n" + "mcr p15, 0, r0, c1, c0, 0 \n" + "dsb sy \n" + "isb \n" + : : : "r0","memory" + ); +} + +void hal_prepare_boot(void) +{ + /* Disable IRQ + FIQ */ + __asm__ volatile("cpsid if" ::: "memory"); + z7_dsb(); + z7_l1_dcache_clean_invalidate_all(); + z7_disable_mmu_and_caches(); + z7_l1_icache_invalidate_all(); + z7_isb(); + /* PL310 L2: leave alone for first cut. FSBL on ZC702 typically does + * not enable PL310 unless explicitly configured; if your FSBL does, + * extend this routine with L2x0 clean-invalidate + disable. */ +} + +/* Internal flash operations are no-ops on Zynq-7000: + * QSPI is treated as external flash via ext_flash_*. */ +int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) +{ + (void)address; (void)data; (void)len; + return 0; +} + +int RAMFUNCTION hal_flash_erase(uint32_t address, int len) +{ + (void)address; (void)len; + return 0; +} + +void RAMFUNCTION hal_flash_unlock(void) { } +void RAMFUNCTION hal_flash_lock(void) { } + +#ifdef EXT_FLASH +int ext_flash_read(uintptr_t address, uint8_t *data, int len) +{ + if (len <= 0) + return 0; + if (spi_flash_read((uint32_t)address, data, (unsigned int)len) != 0) + return -1; + return len; /* wolfBoot's update_ram.c expects bytes-read on success */ +} + +int ext_flash_write(uintptr_t address, const uint8_t *data, int len) +{ + /* Split writes on SPI-NOR page boundaries (256 B). */ + uint32_t addr = (uint32_t)address; + unsigned int remain = (unsigned int)((len > 0) ? len : 0); + unsigned int off = 0; + + while (remain > 0) { + unsigned int page_off = addr & (SPI_NOR_PAGE_SIZE - 1U); + unsigned int chunk = SPI_NOR_PAGE_SIZE - page_off; + if (chunk > remain) + chunk = remain; + if (spi_flash_page_program(addr, data + off, chunk) != 0) + return -1; + addr += chunk; + off += chunk; + remain -= chunk; + } + return 0; +} + +int ext_flash_erase(uintptr_t address, int len) +{ + /* Erase whole sectors covering [address, address+len). The caller is + * expected to align to WOLFBOOT_SECTOR_SIZE (= SPI_NOR_SECTOR_SIZE). */ + uint32_t addr = (uint32_t)address; + int remain = len; + while (remain > 0) { + if (spi_flash_sector_erase(addr) != 0) + return -1; + addr += SPI_NOR_SECTOR_SIZE; + remain -= (int)SPI_NOR_SECTOR_SIZE; + } + return 0; +} + +void ext_flash_lock(void) { } +void ext_flash_unlock(void) { } +#endif /* EXT_FLASH */ + +#ifdef MMU +/* Memory-mapped DTB fallback. Not used on Zynq-7000 with EXT_FLASH=1: the + * signed DTB is opened as PART_DTS_BOOT in update_ram.c and read out of + * QSPI via ext_flash_check_read. Return NULL so the fallback path is a + * no-op when the DTS partition is missing. */ +void *hal_get_dts_address(void) +{ + return NULL; +} + +void *hal_get_dts_update_address(void) +{ + return NULL; +} +#endif /* MMU */ + +/* Microsecond timer using the Cortex-A9 Global Timer at PERIPHBASE+0x200. + * 64-bit free-running counter, increments at PERIPHCLK (~166.67 MHz on + * ZC702 with the default FSBL clock plan: CPU_6x4x = 666.67 MHz). The + * Global Timer is started by the FSBL; if it isn't running yet we kick + * it here. */ +uint64_t hal_get_timer_us(void) +{ + uint32_t hi1, lo, hi2; + uint64_t count; + + /* If the Global Timer hasn't been enabled yet, enable it. The Control + * Register's bit 0 is the Timer Enable; one-time per power cycle. */ + if ((Z7_GTIMER_CTRL & Z7_GTIMER_CTRL_EN) == 0) { + Z7_GTIMER_CTRL = Z7_GTIMER_CTRL_EN; + } + + /* Read low/high atomically: read high, low, high again - if high changed, + * a wrap happened mid-read and we must retry. */ + do { + hi1 = Z7_GTIMER_HI; + lo = Z7_GTIMER_LO; + hi2 = Z7_GTIMER_HI; + } while (hi1 != hi2); + + count = ((uint64_t)hi2 << 32) | (uint64_t)lo; + /* Convert ticks to microseconds. PERIPHCLK is fixed (FSBL clock plan), + * so the divide is by a known constant - no 64x64 division needed. */ + return (count * 1000000ULL) / (uint64_t)Z7_GTIMER_FREQ_HZ; +} + +#if defined(DISK_SDCARD) || defined(DISK_EMMC) +/* ============================================================================ + * SDHCI (SD Card / eMMC) Platform Support + * ============================================================================ + * The Zynq-7000 SDIO controller is an Arasan SDHCI v2.0, same IP family as + * the v3.0 used on ZynqMP - register layout matches the SD Host Controller + * Standard 1.00, mapped at 0xE0100000 (SD0) / 0xE0101000 (SD1). + * + * The generic SDHCI driver (src/sdhci.c) targets the Cadence SD4HC layout + * which adds a HRS register block at 0x000-0x01F and shifts the standard + * SRS registers to 0x200+. We translate between the two here, mirroring + * the ZynqMP HAL block in hal/zynq.c. + * + * Differences from ZynqMP's translation: + * - Base addr: SD0 at 0xE0100000 (ZC702 SD card slot) + * - Clock/reset is via SLCR.SDIO_{CLK,RST} (Z7) instead of CRL_APB (ZynqMP) + * - No HV4E mode (32-bit ARM, only legacy SDMA via SRS00) + * - DMA cache ops use ARMv7 CP15 (mcr p15,0,Rt,c7,c10,1 etc.) instead of + * AArch64 dc cvac. Caches are typically off in our hal_prepare_boot + * path, but if a future config keeps them on, the generic-SDMA + * coherency path needs the ops. + */ +#include "sdhci.h" + +#ifndef Z7_SDHCI_BASE +#define Z7_SDHCI_BASE Z7_SDIO0_BASE +#endif + +#define CADENCE_SRS_OFFSET 0x200 + +/* Standard SDHCI register offsets (byte addresses within the controller) + * matching the Arasan v2.0 register map. */ +#define STD_SDHCI_SDMA_ADDR 0x00 /* SDMA System Address (32-bit) */ +#define STD_SDHCI_HOST_CTRL1 0x28 /* Host Control 1 (8-bit) */ +#define STD_SDHCI_POWER_CTRL 0x29 /* Power Control (8-bit) */ +#define STD_SDHCI_BLKGAP_CTRL 0x2A /* Block Gap Control (8-bit) */ +#define STD_SDHCI_WAKEUP_CTRL 0x2B /* Wakeup Control (8-bit) */ +#define STD_SDHCI_CLK_CTRL 0x2C /* Clock Control (16-bit) */ +#define STD_SDHCI_TIMEOUT_CTRL 0x2E /* Timeout Control (8-bit) */ +#define STD_SDHCI_SW_RESET 0x2F /* Software Reset (8-bit) */ +#define STD_SDHCI_HOST_CTRL2 0x3C /* Auto CMD Err(16) + Host Ctrl 2(16) */ + +/* Software Reset register bits (at offset 0x2F, 8-bit register) */ +#define STD_SDHCI_SRA 0x01 /* Software Reset for All */ + +/* Cadence HRS faux-read translation (the driver pokes HRS00/HRS04 during + * init even though there's no HRS block on Arasan; emulate just enough). */ +static uint32_t z7_sdhci_hrs_read(uint32_t hrs_offset) +{ + volatile uint8_t *base = (volatile uint8_t *)Z7_SDHCI_BASE; + switch (hrs_offset) { + case 0x000: { /* HRS00 - Software Reset (mirror std SRA) */ + uint8_t val = *((volatile uint8_t *)(base + STD_SDHCI_SW_RESET)); + return (val & STD_SDHCI_SRA) ? 1U : 0U; + } + case 0x010: /* HRS04 - PHY access ACK (Cadence-specific). Return ACK so + * the driver's wait-for-ack loops complete. */ + return (1U << 26); + default: + return 0; + } +} + +static void z7_sdhci_hrs_write(uint32_t hrs_offset, uint32_t val) +{ + volatile uint8_t *base = (volatile uint8_t *)Z7_SDHCI_BASE; + if (hrs_offset == 0x000 && (val & 1U)) { + /* Software Reset for All - byte write to std SRA. */ + *((volatile uint8_t *)(base + STD_SDHCI_SW_RESET)) = STD_SDHCI_SRA; + } + /* HRS01 (debounce), HRS02, HRS06 (eMMC mode) etc. - not applicable on + * Arasan; ignore. */ + (void)val; +} + +uint32_t sdhci_reg_read(uint32_t offset) +{ + volatile uint8_t *base = (volatile uint8_t *)Z7_SDHCI_BASE; + + if (offset >= CADENCE_SRS_OFFSET) { + uint32_t std_off = offset - CADENCE_SRS_OFFSET; + + /* SRS22 (0x58) -> SRS00: Legacy SDMA address register */ + if (std_off == 0x58) { + return *((volatile uint32_t *)(base + STD_SDHCI_SDMA_ADDR)); + } + /* SRS23 (0x5C) -> 0: no 64-bit SDMA on Arasan v2.0 */ + if (std_off == 0x5C) { + return 0; + } + return *((volatile uint32_t *)(base + std_off)); + } + return z7_sdhci_hrs_read(offset); +} + +void sdhci_reg_write(uint32_t offset, uint32_t val) +{ + volatile uint8_t *base = (volatile uint8_t *)Z7_SDHCI_BASE; + + if (offset >= CADENCE_SRS_OFFSET) { + uint32_t std_off = offset - CADENCE_SRS_OFFSET; + + /* Cadence SRS10 = std 0x28..0x2B (HostCtrl1/PowerCtrl/BlkGap/Wakeup). + * Mask out HSE (High Speed Enable, bit 2) before writing HostCtrl1 + * - Arasan v2.0 + 3.3V SDHC cards stay in single-edge default-speed + * timing (max 25 MHz); the driver tries to push HSE+50MHz which the + * card can't follow, causing DTOE on data transfers. */ + if (std_off == 0x28) { + uint8_t host_ctrl1 = (uint8_t)(val & 0xFF); + host_ctrl1 &= (uint8_t)~0x04U; /* clear HSE */ + *((volatile uint8_t *)(base + STD_SDHCI_HOST_CTRL1)) = host_ctrl1; + *((volatile uint8_t *)(base + STD_SDHCI_POWER_CTRL)) = + (uint8_t)((val >> 8) & 0xFF); + *((volatile uint8_t *)(base + STD_SDHCI_BLKGAP_CTRL)) = + (uint8_t)((val >> 16) & 0xFF); + *((volatile uint8_t *)(base + STD_SDHCI_WAKEUP_CTRL)) = + (uint8_t)((val >> 24) & 0xFF); + return; + } + /* Cadence SRS11 = std 0x2C..0x2F (ClkCtrl/TimeoutCtrl/SwReset) */ + if (std_off == 0x2C) { + *((volatile uint16_t *)(base + STD_SDHCI_CLK_CTRL)) = + (uint16_t)(val & 0xFFFF); + *((volatile uint8_t *)(base + STD_SDHCI_TIMEOUT_CTRL)) = + (uint8_t)((val >> 16) & 0xFF); + *((volatile uint8_t *)(base + STD_SDHCI_SW_RESET)) = + (uint8_t)((val >> 24) & 0xFF); + return; + } + /* SRS22 (0x58) -> SRS00: Legacy SDMA address register */ + if (std_off == 0x58) { + *((volatile uint32_t *)(base + STD_SDHCI_SDMA_ADDR)) = val; + return; + } + if (std_off == 0x5C) { + return; /* no 64-bit SDMA */ + } + /* SRS15 (0x3C): mask out v3-only bits the driver tries to set. + * Arasan SDHCI v2.0 (Zynq-7000) is 3.3V-only with no UHS-I + * support; writing UHS Mode Select / 1.8V Enable / Sampling Clock + * Select / HV4E / A64 either reserved-faults or puts the + * controller into an invalid signaling mode that breaks all + * subsequent data transfers (DTOE on the first PIO read). */ + if (std_off == STD_SDHCI_HOST_CTRL2) { + val &= ~((7U << 0) /* UMS[2:0] (UHS Mode Select) */ + | (1U << 3) /* 1.8V Signaling Enable */ + | (7U << 4) /* Driver Strength Select */ + | (1U << 7) /* Sampling Clock Select */ + | (1U << 12) /* HV4E (Host Version 4 Enable) */ + | (1U << 13)); /* A64 (64-bit Addressing) */ + } + *((volatile uint32_t *)(base + std_off)) = val; + return; + } + z7_sdhci_hrs_write(offset, val); +} + +/* SDHCI controller bring-up. ZC702 boots from SD with FSBL having already + * configured SDIO_CLK / pinmux / power, so this is mostly a no-op. For + * JTAG-loaded development we kick the SLCR clock + reset just in case. */ +void sdhci_platform_init(void) +{ + volatile int i; + + /* Unlock SLCR. */ + Z7_SLCR_UNLOCK = Z7_SLCR_UNLOCK_KEY; + + /* APER clock for SDIO0 (AHB bus clock to the controller). */ + Z7_SLCR_APER_CLK |= Z7_SLCR_APER_SDIO0; + + /* SDIO_CLK_CTRL: enable CLKACT0, IO_PLL source, divisor for ~50 MHz + * ref. IO_PLL=1 GHz default, /20 = 50 MHz. Skip if FSBL already set + * something sensible. */ + if ((Z7_SLCR_SDIO_CLK & Z7_SLCR_SDIO_CLK_ACT0) == 0) { + uint32_t v = Z7_SLCR_SDIO_CLK; + v &= ~Z7_SLCR_SDIO_CLK_DIV_MSK; + v |= (20U << Z7_SLCR_SDIO_CLK_DIV_SH); + v |= Z7_SLCR_SDIO_CLK_ACT0; + Z7_SLCR_SDIO_CLK = v; + } + + /* Pulse SDIO0 reset (REF + CPU) so the controller picks up clock and + * caps cleanly. */ + Z7_SLCR_SDIO_RST |= (Z7_SLCR_SDIO_RST_REF0 | Z7_SLCR_SDIO_RST_CPU0); + for (i = 0; i < 256; i++) { /* short delay */ } + Z7_SLCR_SDIO_RST &= ~(Z7_SLCR_SDIO_RST_REF0 | Z7_SLCR_SDIO_RST_CPU0); + for (i = 0; i < 1024; i++) { /* wait for controller ready */ } + + Z7_SLCR_LOCK = Z7_SLCR_LOCK_KEY; +} + +void sdhci_platform_irq_init(void) +{ + /* Polling-mode driver: nothing to do. */ +} + +void sdhci_platform_set_bus_mode(int is_emmc) +{ + (void)is_emmc; + /* SD vs eMMC bus mode: nothing extra needed for SD - the generic driver + * sets up SD bus width / clock via the standard SDHCI registers, which + * our reg_read/write translation handles transparently. */ +} + +/* DMA cache maintenance for SDMA. wolfBoot's hal_prepare_boot disables D-cache + * before do_boot, but SDHCI runs BEFORE that during the load+verify phase + * with caches potentially live. ARMv7 cache ops (clean / clean+invalidate + * by MVA) keep DMA buffers coherent. */ +void sdhci_platform_dma_prepare(void *buf, uint32_t sz, int is_write) +{ + uintptr_t start = (uintptr_t)buf & ~31U; /* L1 D-cache line = 32B */ + uintptr_t end = ((uintptr_t)buf + sz + 31U) & ~31U; + uintptr_t addr; + + if (is_write) { + /* DCCMVAC - clean by MVA: dirty CPU lines flushed so DMA reads them */ + for (addr = start; addr < end; addr += 32U) { + __asm__ volatile("mcr p15, 0, %0, c7, c10, 1" : : "r"(addr) : "memory"); + } + } else { + /* DCCIMVAC - clean+invalidate by MVA: discard stale CPU lines so + * DMA write data shows up on subsequent CPU reads */ + for (addr = start; addr < end; addr += 32U) { + __asm__ volatile("mcr p15, 0, %0, c7, c14, 1" : : "r"(addr) : "memory"); + } + } + __asm__ volatile("dsb sy" : : : "memory"); +} + +void sdhci_platform_dma_complete(void *buf, uint32_t sz, int is_write) +{ + if (!is_write) { + uintptr_t start = (uintptr_t)buf & ~31U; + uintptr_t end = ((uintptr_t)buf + sz + 31U) & ~31U; + uintptr_t addr; + for (addr = start; addr < end; addr += 32U) { + __asm__ volatile("mcr p15, 0, %0, c7, c14, 1" : : "r"(addr) : "memory"); + } + __asm__ volatile("dsb sy" : : : "memory"); + } +} +#endif /* DISK_SDCARD || DISK_EMMC */ + +#endif /* TARGET_zynq7000 */ diff --git a/hal/zynq7000.h b/hal/zynq7000.h new file mode 100644 index 0000000000..cfbb46b946 --- /dev/null +++ b/hal/zynq7000.h @@ -0,0 +1,217 @@ +/* zynq7000.h + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Xilinx Zynq-7000 (Cortex-A9, ARMv7-A 32-bit) HAL register map. + * Reference: UG585 (Zynq-7000 TRM), UG821 (Zynq-7000 SW Dev Guide). + * Target board: ZC702 Evaluation Kit (XC7Z020). + */ + +#ifndef _ZYNQ7000_H_ +#define _ZYNQ7000_H_ + +#include + +/* DDR memory range (PS DDR3 on ZC702: 1 GB) */ +#define Z7_DDR_BASE 0x00000000UL +#define Z7_DDR_HIGH 0x3FFFFFFFUL + +/* On-chip memory (OCM, 256 KB at high alias when remapped) */ +#define Z7_OCM_BASE 0xFFFC0000UL + +/* SLCR (System Level Control Registers) - UG585 ch.4 */ +#define Z7_SLCR_BASE 0xF8000000UL +#define Z7_SLCR_UNLOCK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x008))) +#define Z7_SLCR_LOCK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x004))) +#define Z7_SLCR_UART_RST (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x228))) +#define Z7_SLCR_LQSPI_RST (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x204))) +#define Z7_SLCR_UART_CLK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x154))) +#define Z7_SLCR_LQSPI_CLK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x14C))) +#define Z7_SLCR_UNLOCK_KEY 0x0000DF0DUL +#define Z7_SLCR_LOCK_KEY 0x0000767BUL + +/* UART (XUartPs) - UG585 ch.19. Same IP as ZynqMP, different base. */ +#define Z7_UART0_BASE 0xE0000000UL +#define Z7_UART1_BASE 0xE0001000UL + +#if defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 0 + #define DEBUG_UART_BASE Z7_UART0_BASE +#elif defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 1 + #define DEBUG_UART_BASE Z7_UART1_BASE +#endif +#ifndef DEBUG_UART_BASE + /* ZC702 console is wired to UART1 (MIO48/49) */ + #define DEBUG_UART_BASE Z7_UART1_BASE +#endif + +#define Z7_UART_CR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x00))) +#define Z7_UART_MR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x04))) +#define Z7_UART_IDR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x0C))) +#define Z7_UART_ISR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x14))) +#define Z7_UART_BR_GEN (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x18))) +#define Z7_UART_RXTOUT (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x1C))) +#define Z7_UART_RXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x20))) +#define Z7_UART_SR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x2C))) +#define Z7_UART_FIFO (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x30))) +#define Z7_UART_BR_DIV (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x34))) +#define Z7_UART_TXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x44))) + +#define Z7_UART_CR_TX_DIS 0x00000020U +#define Z7_UART_CR_TX_EN 0x00000010U +#define Z7_UART_CR_RX_DIS 0x00000008U +#define Z7_UART_CR_RX_EN 0x00000004U +#define Z7_UART_CR_TXRST 0x00000002U +#define Z7_UART_CR_RXRST 0x00000001U +#define Z7_UART_ISR_MASK 0x00003FFFU +#define Z7_UART_MR_8N1 0x00000020U /* parity none, 8 data, 1 stop */ +#define Z7_UART_SR_TXFULL 0x00000010U +#define Z7_UART_SR_TXEMPTY 0x00000008U + +/* PS UART_REF_CLK on ZC702 is 50 MHz (IO_PLL / 20). + * BR_GEN = ref / (baud * (BR_DIV + 1)). For 115200 with BR_DIV=6 -> BR_GEN=62. + */ +#ifndef UART_CLK_REF + #define UART_CLK_REF 50000000U +#endif +#ifndef DEBUG_UART_BAUD + #define DEBUG_UART_BAUD 115200U + #define DEBUG_UART_DIV 6U +#endif + +/* QSPI controller (XQspiPs - the older "Linear/Static" QSPI on Z7, + * NOT the GQSPI on ZynqMP). UG585 ch.12. */ +#define Z7_QSPI_BASE 0xE000D000UL +#define Z7_QSPI_LINEAR_BASE 0xFC000000UL /* XIP window for linear-mode reads */ + +#define Z7_QSPI_CR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x00))) +#define Z7_QSPI_ISR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x04))) +#define Z7_QSPI_IER (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x08))) +#define Z7_QSPI_IDR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x0C))) +#define Z7_QSPI_IMR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x10))) +#define Z7_QSPI_EN (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x14))) +#define Z7_QSPI_DELAY (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x18))) +#define Z7_QSPI_TXD0 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x1C))) +#define Z7_QSPI_RXD (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x20))) +#define Z7_QSPI_SICR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x24))) +#define Z7_QSPI_TXTHR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x28))) +#define Z7_QSPI_RXTHR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x2C))) +#define Z7_QSPI_GPIO (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x30))) +#define Z7_QSPI_LPBK (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x38))) +#define Z7_QSPI_TXD1 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x80))) +#define Z7_QSPI_TXD2 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x84))) +#define Z7_QSPI_TXD3 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x88))) +#define Z7_QSPI_LQSPI_CR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0xA0))) +#define Z7_QSPI_LQSPI_STS (*((volatile uint32_t*)(Z7_QSPI_BASE + 0xA4))) +#define Z7_QSPI_MODID (*((volatile uint32_t*)(Z7_QSPI_BASE + 0xFC))) + +/* QSPI Config Register (CR) bits. + * PCS is a 4-bit slave-select decode field [13:10]: 0xF = all CS deasserted, + * 0xE = CS0 active. We mask the whole 4-bit field, not just bit 10. + */ +#define Z7_QSPI_CR_IFMODE 0x80000000U /* flash mem interface mode */ +#define Z7_QSPI_CR_HOLD_B 0x00080000U /* drive HOLD high */ +#define Z7_QSPI_CR_MANSTRT 0x00010000U /* manual start command (kick) */ +#define Z7_QSPI_CR_MANSTRTEN 0x00008000U /* manual start enable */ +#define Z7_QSPI_CR_SSFORCE 0x00004000U /* manual CS control */ +#define Z7_QSPI_CR_PCS_MASK 0x00003C00U /* PCS field [13:10] */ +#define Z7_QSPI_CR_PCS_NONE 0x00003C00U /* all CS deasserted (0xF<<10) */ +#define Z7_QSPI_CR_PCS_CS0 0x00003800U /* CS0 asserted (0xE<<10) */ +#define Z7_QSPI_CR_REF_CLK 0x00000100U +#define Z7_QSPI_CR_FIFO_WIDTH 0x000000C0U /* must be 11 (32-bit) */ +#define Z7_QSPI_CR_BAUD_DIV_MSK 0x00000038U +/* BAUDDIV field is value N in bits[5:3]; clock = ref_clk / 2^(N+1). + * N=1 -> /4, N=2 -> /8, N=3 -> /16. */ +#define Z7_QSPI_CR_BAUD_DIV_4 0x00000008U /* /4 (BAUDDIV=1) */ +#define Z7_QSPI_CR_BAUD_DIV_8 0x00000010U /* /8 (BAUDDIV=2) */ +#define Z7_QSPI_CR_BAUD_DIV_16 0x00000018U /* /16 (BAUDDIV=3) */ +#define Z7_QSPI_CR_CPHA 0x00000004U +#define Z7_QSPI_CR_CPOL 0x00000002U +#define Z7_QSPI_CR_MSTREN 0x00000001U + +/* QSPI Interrupt Status Register (ISR) bits */ +#define Z7_QSPI_ISR_TXUF 0x00000040U /* TX underflow */ +#define Z7_QSPI_ISR_RXFULL 0x00000020U /* RX FIFO full */ +#define Z7_QSPI_ISR_RXNEMPTY 0x00000010U /* RX FIFO not empty */ +#define Z7_QSPI_ISR_TXFULL 0x00000008U /* TX FIFO full */ +#define Z7_QSPI_ISR_TXNFULL 0x00000004U /* TX FIFO threshold */ +#define Z7_QSPI_ISR_RXOVR 0x00000001U /* RX overrun */ +#define Z7_QSPI_ISR_MASK 0x0000007DU + +#define Z7_QSPI_EN_VAL 0x00000001U /* enable controller */ + +/* SLCR clock/reset for QSPI (FSBL normally pre-configures these) */ +#define Z7_SLCR_LQSPI_CLK_DIV_MSK 0x00003F00U +#define Z7_SLCR_LQSPI_CLK_DIV_5 0x00000500U +#define Z7_SLCR_LQSPI_CLK_SRCSEL_M 0x00000030U +#define Z7_SLCR_LQSPI_CLK_CLKACT0 0x00000001U +#define Z7_SLCR_LQSPI_RST_REF 0x00000002U +#define Z7_SLCR_LQSPI_RST_CPU 0x00000001U + +/* SDIO (Arasan SDHCI v2.0). UG585 ch.10. */ +#define Z7_SDIO0_BASE 0xE0100000UL +#define Z7_SDIO1_BASE 0xE0101000UL + +/* SDIO clock/reset via SLCR. UG585 ch.4. */ +#define Z7_SLCR_SDIO_CLK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x150))) +#define Z7_SLCR_SDIO_RST (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x218))) +#define Z7_SLCR_APER_CLK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x12C))) +/* SDIO_CLK_CTRL: CLKACT0/1 (bits 0/1), SRCSEL (bits 5:4 = 00 IO_PLL), + * DIVISOR (bits 13:8). For 50 MHz SDIO ref from 1 GHz IO_PLL, DIVISOR=20. */ +#define Z7_SLCR_SDIO_CLK_ACT0 0x00000001U +#define Z7_SLCR_SDIO_CLK_ACT1 0x00000002U +#define Z7_SLCR_SDIO_CLK_DIV_SH 8 +#define Z7_SLCR_SDIO_CLK_DIV_MSK 0x00003F00U +#define Z7_SLCR_SDIO_RST_REF0 0x00000010U +#define Z7_SLCR_SDIO_RST_REF1 0x00000020U +#define Z7_SLCR_SDIO_RST_CPU0 0x00000001U +#define Z7_SLCR_SDIO_RST_CPU1 0x00000002U +#define Z7_SLCR_APER_SDIO0 0x00000400U /* SDIO0 AMBA APER clock enable */ +#define Z7_SLCR_APER_SDIO1 0x00000800U + +/* Cortex-A9 Global Timer (64-bit, increments at PERIPHCLK = CPU_3x2x). + * UG585 ch.3.5.4. */ +#define Z7_GTIMER_LO (*((volatile uint32_t*)(Z7_GTIMER_BASE + 0x00))) +#define Z7_GTIMER_HI (*((volatile uint32_t*)(Z7_GTIMER_BASE + 0x04))) +#define Z7_GTIMER_CTRL (*((volatile uint32_t*)(Z7_GTIMER_BASE + 0x08))) +#define Z7_GTIMER_CTRL_EN 0x00000001U +/* The Cortex-A9 Global Timer runs at PERIPHCLK, which on Zynq-7000 is the + * CPU_3x2x clock = CPU_6x4x / 2. With the default ZC702 FSBL clock plan + * (ARM_PLL = 1.333 GHz, CPU_6x4x = ARM_PLL/2 = 666.67 MHz), PERIPHCLK is + * 333.33 MHz. Override at compile time if you reclock the CPU. */ +#ifndef Z7_GTIMER_FREQ_HZ +#define Z7_GTIMER_FREQ_HZ 333333333UL +#endif + +/* DevC (Device Configuration: AES + bitstream loader). UG585 ch.6. */ +#define Z7_DEVC_BASE 0xF8007000UL + +/* GIC (PL390 / GIC-400 v1) - per-CPU interface and distributor. */ +#define Z7_GIC_CPUIF_BASE 0xF8F00100UL +#define Z7_GIC_DIST_BASE 0xF8F01000UL + +/* PL310 L2 cache controller. UG585 ch.3. */ +#define Z7_PL310_BASE 0xF8F02000UL + +/* SCU + private timer/watchdog. UG585 ch.3. */ +#define Z7_SCU_BASE 0xF8F00000UL +#define Z7_GTIMER_BASE 0xF8F00200UL +#define Z7_PTIMER_BASE 0xF8F00600UL + +#endif /* _ZYNQ7000_H_ */ diff --git a/hal/zynq7000.ld b/hal/zynq7000.ld new file mode 100644 index 0000000000..aeca136809 --- /dev/null +++ b/hal/zynq7000.ld @@ -0,0 +1,62 @@ +OUTPUT_FORMAT("elf32-littlearm") +OUTPUT_ARCH(arm) + +/* wolfBoot is loaded by Xilinx FSBL into DDR at 0x04000000. + * Reserve 1 MB for code/data/bss/stack. */ +MEMORY +{ + DDR_MEM(rwx): ORIGIN = 0x04000000, LENGTH = 0x00100000 +} + +ENTRY(reset_vector_entry) + +SECTIONS +{ + .text : { + _start_text = .; + KEEP(*(start)) + *(.text) + *(.text.*) + *(.rodata) + *(.rodata*) + . = ALIGN(4); + *(.glue_7) + . = ALIGN(4); + *(.eh_frame) + . = ALIGN(4); + _end_text = .; + } > DDR_MEM + + . = ALIGN(4); + .dummy : { + _edummy = .; + } > DDR_MEM + + .data : AT (LOADADDR(.dummy)) { + _start_data = .; + *(.vectors) + *(.data) + *(.data.*) + _end_data = .; + } > DDR_MEM + + .bss (NOLOAD) : { + . = ALIGN(4); + _start_bss = .; + *(.bss) + *(.bss.*) + *(COMMON) + _end_bss = .; + _end = .; + } > DDR_MEM +} + +kernel_addr = 0x00100000; +update_addr = 0x00700000; +dts_addr = 0x00000000; + +_romsize = _end_data - _start_text; +_sramsize = _end_bss - _start_text; +END_STACK = _start_text; +_stack_top = ORIGIN(DDR_MEM) + LENGTH(DDR_MEM); +end = .; diff --git a/src/boot_arm32.c b/src/boot_arm32.c index dedd1e4375..8b7747a716 100644 --- a/src/boot_arm32.c +++ b/src/boot_arm32.c @@ -61,23 +61,30 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset, const uint32_t* dts_offset) void RAMFUNCTION do_boot(const uint32_t *app_offset) #endif { - /* Set application address via r4 */ - asm volatile("mov r4, %0" : : "r"(app_offset)); + /* Set application address via r4 */ + asm volatile("mov r4, %0" : : "r"(app_offset)); #ifdef MMU - /* Move the dts pointer to r5 (as first argument) */ - asm volatile("mov r5, %0" : : "r"(dts_offset)); + /* Move the dts pointer to r5 (as first argument) */ + asm volatile("mov r5, %0" : : "r"(dts_offset)); #else - asm volatile("mov r5, 0"); + asm volatile("mov r5, #0"); #endif - /* Zero registers r1, r2, r3 */ - asm volatile("mov r3, 0"); - asm volatile("mov r2, 0"); - asm volatile("mov r1, 0"); - - /* Move the dts pointer to r0 (as first argument) */ +#ifdef WOLFBOOT_LINUX_PAYLOAD + /* ARM Linux boot ABI: r0=0, r1=~0 (no machine ID, use DTB), + * r2=DTB physical address, r3=0. */ + asm volatile("mvn r1, #0"); + asm volatile("mov r2, r5"); + asm volatile("mov r3, #0"); + asm volatile("mov r0, #0"); +#else + /* wolfBoot legacy DTS handoff: r0 = dts pointer, r1=r2=r3=0. */ + asm volatile("mov r3, #0"); + asm volatile("mov r2, #0"); + asm volatile("mov r1, #0"); asm volatile("mov r0, r5"); +#endif /* Unconditionally jump to app_entry at r4 */ asm volatile("bx r4"); diff --git a/src/boot_arm32_start.S b/src/boot_arm32_start.S index fa3021232f..34ee186bdd 100644 --- a/src/boot_arm32_start.S +++ b/src/boot_arm32_start.S @@ -1,5 +1,25 @@ -/** - * Arm32 (32bit Cortex-A) boot up +/* boot_arm32_start.S + * + * Generic ARMv7-A 32-bit (Cortex-A5/A7/A8/A9/A15/A17) startup for wolfBoot. + * Performs the minimum CPU setup that every standalone image needs before + * running C code: + * + * 1. mask IRQ + FIQ, force SVC mode + * 2. set VBAR to wolfBoot's vector table (so aborts route to us, not + * to whatever the bootloader/BootROM left vectors pointing at) + * 3. clear SCTLR V (high vectors), A (alignment fault), C (D-cache), + * I (I-cache) bits. Leave M (MMU) alone so we inherit a flat 1:1 + * mapping from the bootloader if it set one up - disabling MMU on + * ARMv7-A would treat all memory as Strongly-Ordered and fault on + * unaligned accesses (e.g. C string ops on 2-byte aligned literals) + * 4. invalidate TLB, I-cache, branch predictor + * 5. set up per-mode stacks (SVC/IRQ/FIQ/ABT/UND) carved below _stack_top + * 6. copy .data, zero .bss, enable async aborts, jump to main + * + * Modeled after the Xilinx standalone BSP cortexa9/gcc/boot.S, generalized + * for any ARMv7-A target. Used by both SAMA5D3 (Cortex-A5) and Zynq-7000 + * (Cortex-A9). + * * Copyright (C) 2026 wolfSSL Inc. * * This file is part of wolfBoot. @@ -18,81 +38,137 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ -.section start - .text -/* startup entry point */ - .globl reset_vector_entry - .align 4 + .arm + .section start, "ax" + + .globl reset_vector_entry + .align 4 reset_vector_entry: -/* Exception vectors (should be a branch to be detected as a valid code by the rom */ -_exception_vectors: - b isr_reset /* reset */ - b isr_empty /* Undefined Instruction */ - b isr_swi /* Software Interrupt */ - b isr_pabt /* Prefetch Abort */ - b dabt_vector /* Data Abort */ -.word _romsize /* Size of the binary for ROMCode loading */ - b isr_irq /* IRQ : read the AIC */ - b isr_fiq /* FIQ */ - -isr_empty: - b isr_empty -isr_swi: - b isr_swi -isr_pabt: - b isr_pabt -dabt_vector: - subs pc, r14, #4 /* return */ - nop -isr_rsvd: - b isr_rsvd -isr_irq: - b isr_irq -isr_fiq: - b isr_fiq - - -/* Reset handler procedure. Prepare the memory and call main() */ +_vector_table: + b isr_reset /* 0x00 reset */ + b isr_undef /* 0x04 undefined */ + b isr_swi /* 0x08 swi */ + b isr_pabt /* 0x0C prefetch abort */ + b isr_dabt /* 0x10 data abort */ + .word _romsize /* 0x14 (size word, kept for FSBL/BootROM parity) */ + b isr_irq /* 0x18 IRQ */ + b isr_fiq /* 0x1C FIQ */ + +isr_undef: b isr_undef +isr_swi: b isr_swi +isr_pabt: b isr_pabt +isr_dabt: b isr_dabt +isr_irq: b isr_irq +isr_fiq: b isr_fiq + isr_reset: - /* Initialize the stack pointer */ - ldr sp,=_stack_top - /* Save BootROM supplied boot source information to stack */ - push {r4} - - /* Copy the data section */ - ldr r2, =_lp_data - ldmia r2, {r1, r3, r4} - 1: - cmp r3, r4 - ldrcc r2, [r1], #4 - strcc r2, [r3], #4 - bcc 1b - - /* Zero bss area */ - adr r2, _lp_bss - ldmia r2, {r3, r4} - mov r2, #0 - 1: - cmp r3, r4 - strcc r2, [r3], #4 - bcc 1b - - /* Jump to main() */ - ldr r4, = main + /* 1. Mask IRQ + FIQ, force SVC mode. */ + cpsid if + mrs r0, cpsr + bic r0, r0, #0x1f + orr r0, r0, #0x13 /* SVC mode */ + msr cpsr_c, r0 + + /* 2. Set VBAR to our vector table (the load address). */ + ldr r0, =_vector_table + mcr p15, 0, r0, c12, c0, 0 + + /* 3. Adjust SCTLR. Keep MMU bit (M) as the bootloader left it - if it + * set up flat 1:1 mapping (Xilinx FSBL on Zynq-7000 does), we want it + * on so unaligned LDR/STR from C code doesn't fault. Clear V (high + * vectors), A (alignment fault check), C (D-cache), I (I-cache). */ + mrc p15, 0, r1, c1, c0, 0 + bic r1, r1, #(1 << 13) /* V bit */ + bic r1, r1, #(1 << 1) /* A bit */ + bic r1, r1, #(1 << 2) /* C bit (D-cache) */ + bic r1, r1, #(1 << 12) /* I bit (I-cache) */ + mcr p15, 0, r1, c1, c0, 0 + dsb + isb + + /* 4. Invalidate TLB, I-cache, branch predictor. Leave D-cache alone - + * if the bootloader has dirty lines we'd need clean+invalidate by + * set/way (CSSELR/CCSIDR walk), which is risky in startup. */ + mov r0, #0 + mcr p15, 0, r0, c8, c7, 0 /* TLBIALL */ + mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ + mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ + dsb + isb + + /* 5. Set stack pointers for IRQ/FIQ/ABT/UND/SVC modes. Each gets a + * 1 KB slice carved below _stack_top: + * _stack_top <- top + * 0x000 SVC (sys/usr) - main wolfBoot stack (largest) + * 0x800 IRQ + * 0xC00 FIQ + * 0x1000 ABT + * 0x1400 UND + */ + mrs r0, cpsr + bic r0, r0, #0x1f + + orr r1, r0, #0x12 /* IRQ */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0x800) + + orr r1, r0, #0x11 /* FIQ */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0xC00) + + orr r1, r0, #0x17 /* ABT */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0x1000) + + orr r1, r0, #0x1b /* UND */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0x1400) + + orr r1, r0, #0x13 /* SVC (where main runs) */ + msr cpsr_c, r1 + ldr sp, =_stack_top + + /* Save BootROM r4 (boot source info on some platforms; ignored by + * platforms that don't use it). */ + push {r4} + + /* 6. Copy .data section (LMA -> VMA). LMA == VMA in the standard + * wolfBoot linker scripts, so this is usually a no-op. */ + ldr r2, =_lp_data + ldmia r2, {r1, r3, r4} +1: cmp r3, r4 + ldrcc r2, [r1], #4 + strcc r2, [r3], #4 + bcc 1b + + /* Zero .bss */ + adr r2, _lp_bss + ldmia r2, {r3, r4} + mov r2, #0 +1: cmp r3, r4 + strcc r2, [r3], #4 + bcc 1b + + /* Enable async-abort delivery (clear A bit in CPSR) so we get an + * exception now if the bus throws one, rather than later when state + * is harder to recover. */ + mrs r0, cpsr + bic r0, r0, #(1 << 8) + msr cpsr_xsf, r0 + + /* Jump to main(). */ + ldr r4, =main mov lr, pc bx r4 - - /* main() should never return */ + _panic: b _panic -.align + .align _lp_data: -.word _start_data -.word _end_data - + .word _start_data + .word _end_data _lp_bss: -.word _start_bss -.word _end_bss - + .word _start_bss + .word _end_bss diff --git a/test-app/ARM-zynq7000.ld b/test-app/ARM-zynq7000.ld new file mode 100644 index 0000000000..363d0cf6c4 --- /dev/null +++ b/test-app/ARM-zynq7000.ld @@ -0,0 +1,52 @@ +OUTPUT_FORMAT("elf32-littlearm") +OUTPUT_ARCH(arm) + +/* App is staged by wolfBoot to DDR at WOLFBOOT_LOAD_ADDRESS=0x10000000. + * Stack carved out from the half-MB region just above. */ +MEMORY +{ + DDR_MEM(rwx) : ORIGIN = 0x10000000, LENGTH = 0x00080000 /* 512 KB code/data/bss */ + STACK_MEM(rw) : ORIGIN = 0x10080000, LENGTH = 0x00080000 /* 512 KB stack */ +} + +ENTRY(reset_vector_entry) +SECTIONS +{ + .text : AT (ORIGIN(DDR_MEM)) { + _start_text = .; + *(.iv) + *(.text) + *(.rodata) + *(.rodata*) + . = ALIGN(4); + *(.glue_7) + . = ALIGN(4); + *(.eh_frame) + . = ALIGN(4); + _end_text = .; + } + + . = ALIGN(4); + .dummy : { + _edummy = .; + } + + .data : AT (LOADADDR(.dummy)) { + _start_data = .; + *(.vectors) + *(.data) + _end_data = .; + } + + .bss (NOLOAD) : { + . = ALIGN(4); + _start_bss = .; + *(.bss) + _end_bss = .; + } +} +_romsize = _end_data - _start_text; +_sramsize = _end_bss - _start_text; +END_STACK = _start_text; +_stack_top = ORIGIN(STACK_MEM) + LENGTH(STACK_MEM); +end = .; diff --git a/test-app/Makefile b/test-app/Makefile index e5b06108c3..17868942dc 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -413,6 +413,11 @@ ifeq ($(TARGET),sama5d3) LSCRIPT_TEMPLATE:=$(ARCH)-$(TARGET).ld endif +ifeq ($(TARGET),zynq7000) + APP_OBJS+=./boot_arm32_start.o + LSCRIPT_TEMPLATE:=$(ARCH)-$(TARGET).ld +endif + ifeq ($(TARGET),stm32l4) APP_OBJS+=$(STM32CUBE)/Drivers/STM32L4xx_HAL_Driver/Src/stm32l4xx_hal_flash.o APP_OBJS+=$(STM32CUBE)/Drivers/STM32L4xx_HAL_Driver/Src/stm32l4xx_hal_flash_ex.o diff --git a/test-app/app_zynq7000.c b/test-app/app_zynq7000.c new file mode 100644 index 0000000000..5b765ad566 --- /dev/null +++ b/test-app/app_zynq7000.c @@ -0,0 +1,67 @@ +/* app_zynq7000.c + * + * Bare-metal Cortex-A9 test app for the Zynq-7000 ZC702. Prints a banner + * on UART1 and a heartbeat character so the user can see do_boot() landed. + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include + +#ifdef TARGET_zynq7000 + +#define UART1_FIFO (*(volatile uint32_t*)0xE0001030U) +#define UART1_SR (*(volatile uint32_t*)0xE000102CU) +#define UART_SR_TXFULL 0x10U +#define UART_SR_TXEMPTY 0x08U + +static void uart_putc(char c) +{ + while (UART1_SR & UART_SR_TXFULL) + ; + UART1_FIFO = (uint32_t)(uint8_t)c; +} + +static void uart_puts(const char *s) +{ + while (*s) { + if (*s == '\n') + uart_putc('\r'); + uart_putc(*s++); + } +} + +static void delay(volatile uint32_t n) +{ + while (n--) { + __asm__ volatile("nop"); + } +} + +void main(void) +{ + uart_puts("\n=== ZC702 test-app: BOOT OK ===\n"); + uart_puts("wolfBoot verified + chain-loaded this image\n"); + while (1) { + uart_putc('.'); + delay(2000000); + } +} + +#endif /* TARGET_zynq7000 */ diff --git a/tools/scripts/zc702/jtag_load.tcl b/tools/scripts/zc702/jtag_load.tcl new file mode 100644 index 0000000000..f359d56f34 --- /dev/null +++ b/tools/scripts/zc702/jtag_load.tcl @@ -0,0 +1,76 @@ +# jtag_load.tcl - load wolfboot.elf onto a ZC702 via Xilinx Platform Cable II. +# +# Uses the prebuilt Zynq-7000 FSBL (zynq_fsbl.elf) to bring DDR / MIO / +# clocks / UART up, then loads wolfboot.elf over the top and starts it. +# +# Usage: +# source /opt/Xilinx/2025.2/Vitis/settings64.sh +# xsdb tools/scripts/zc702/jtag_load.tcl +# +# Set the JTAG boot mode straps on the ZC702 (SW16 = all OFF) before use. +# After this script runs the board may need a power-cycle to recover the +# CPU into a JTAG-loadable state again. +# +# Override paths via env: +# FSBL_ELF=... FSBL ELF path +# WOLFBOOT_ELF=... wolfboot ELF path + +set fsbl_default "$::env(HOME)/GitHub/soc-prebuilt-firmware/zc702-zynq/zynq_fsbl.elf" +set wolfboot_default "[file dirname [info script]]/../../../wolfboot.elf" + +if {[info exists ::env(FSBL_ELF)]} { set fsbl_elf $::env(FSBL_ELF) } \ + else { set fsbl_elf $fsbl_default } +if {[info exists ::env(WOLFBOOT_ELF)]} { set wolfboot_elf $::env(WOLFBOOT_ELF) } \ + else { set wolfboot_elf $wolfboot_default } + +if {![file exists $fsbl_elf]} { + puts "ERROR: FSBL not found at $fsbl_elf" + puts "Clone wolfSSL/soc-prebuilt-firmware next to wolfboot or set FSBL_ELF." + exit 1 +} +if {![file exists $wolfboot_elf]} { + puts "ERROR: wolfboot.elf not found at $wolfboot_elf" + exit 1 +} + +connect + +# Sometimes the chain comes up empty if the previous run left the CPU in an +# off-chain state (e.g. WFI with clock gated). Retry the target lookup. +for {set i 0} {$i < 5} {incr i} { + catch {targets -set -filter {name =~ "ARM Cortex-A9 MPCore #0"}} rc + if {[string first "no targets" $rc] < 0} { break } + puts "Cortex-A9 not on chain yet, retry $i ..." + after 500 +} +if {[string first "no targets" $rc] >= 0} { + puts "ERROR: no Cortex-A9 targets visible after retries." + puts "Power-cycle the ZC702 (SW10) and try again." + exit 1 +} + +# Full PS reset, then wait for BootROM to enter JTAG-mode poll loop. +rst -system +after 1500 +targets -set -filter {name =~ "ARM Cortex-A9 MPCore #0"} + +# Run FSBL to completion. It does ps7_init (DDR/MIO/clocks/UART), then +# parks itself since no bundled second-stage exists. 2-3s is plenty. +puts "Loading FSBL: $fsbl_elf" +dow $fsbl_elf +con +after 3000 + +# Stop where FSBL parked, but do NOT rst -processor here - that would drop +# us back into BootROM and lose FSBL's PS state. +stop + +# Load wolfBoot at its DDR address. xsdb's `dow` does NOT consistently set +# PC after a second target dow, so set PC and CPSR explicitly. +puts "Loading wolfBoot: $wolfboot_elf" +dow $wolfboot_elf +rwr pc 0x04000000 +rwr cpsr 0xD3 ;# SVC mode, IRQ+FIQ masked + +puts "Resuming - watch UART1 (115200 8N1) for the wolfBoot banner." +con diff --git a/tools/scripts/zc702/prepare_linux.sh b/tools/scripts/zc702/prepare_linux.sh new file mode 100755 index 0000000000..fa06698962 --- /dev/null +++ b/tools/scripts/zc702/prepare_linux.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Sign and stage a ZC702 Linux kernel for wolfBoot. +# +# Two modes (chosen by the APPENDED env var): +# +# APPENDED=1 (default, recommended) - appends the DTB to the zImage and +# signs the concatenation as one image. The kernel finds the DTB at the +# end of itself via CONFIG_ARM_APPENDED_DTB. Required because the ARMv7 +# zImage decompressor is observed to lose r2 (the DTB physical pointer +# wolfBoot passed in) before it reaches the decompressed kernel head.S +# on Zynq-7000 - the kernel ends up with __atags_pointer = 0 and never +# parses chosen.bootargs / chosen.stdout-path. Appending the DTB is +# independent of r2. +# +# APPENDED=0 - signs the zImage alone and stages the DTB raw at +# WOLFBOOT_DTS_BOOT_ADDRESS. wolfBoot reads it via PART_DTS_BOOT and +# relocates to WOLFBOOT_LOAD_DTS_ADDRESS, then passes that pointer in +# r2 per the ARM Linux boot ABI. Useful for kernels/decompressors +# that do preserve r2 correctly. +# +# Inputs (env): +# ZIMAGE - path to ARM zImage (default: ../linux-xlnx/arch/arm/boot/zImage) +# DTB - path to .dtb (default: ../linux-xlnx/arch/arm/boot/dts/zynq-zc702.dtb) +# VERSION - image version (default: 1) +# APPENDED - 0 or 1 (default: 1) +# +# Kernel must be built with: +# APPENDED=1 -> CONFIG_ARM_APPENDED_DTB=y, CONFIG_ARM_ATAG_DTB_COMPAT=y +# APPENDED=0 -> bootargs / stdout-path baked into the DTB ahead of time + +set -e +# Source .config (Makefile syntax: NAME ?= value). +eval "$(sed -e 's/?=/=/' -e 's/^\([A-Z_][A-Z_0-9]*\)=\(.*\)$/\1="\2"/' .config 2>/dev/null | grep -E '^[A-Z]')" + +SIGN_TOOL="./tools/keytools/sign" +KEY="wolfboot_signing_private_key.der" + +ZIMAGE="${ZIMAGE:-../linux-xlnx/arch/arm/boot/zImage}" +DTB="${DTB:-../linux-xlnx/arch/arm/boot/dts/zynq-zc702.dtb}" +VERSION="${VERSION:-1}" +APPENDED="${APPENDED:-1}" + +[ -f "$ZIMAGE" ] || { echo "ERROR: kernel not found at $ZIMAGE" >&2; exit 1; } +[ -f "$DTB" ] || { echo "ERROR: dtb not found at $DTB" >&2; exit 1; } +[ -f "$KEY" ] || { echo "ERROR: signing key $KEY not found" >&2; exit 1; } + +PSIZE=$((${WOLFBOOT_PARTITION_SIZE:-0x600000})) + +if [ "$APPENDED" = "1" ]; then + # Concatenate zImage + DTB, sign as single image. + KDTB=$(mktemp /tmp/zImage_dtb.XXXXXX) + trap "rm -f $KDTB" EXIT + cat "$ZIMAGE" "$DTB" > "$KDTB" + SIZE=$(stat -c %s "$KDTB") + + if [ "$SIZE" -gt "$PSIZE" ]; then + echo "ERROR: zImage+dtb ($SIZE bytes) exceeds WOLFBOOT_PARTITION_SIZE ($PSIZE)" >&2 + exit 1 + fi + + echo "Mode : APPENDED (zImage + DTB concatenated, signed as one image)" + echo "zImage: $ZIMAGE" + echo "DTB : $DTB" + echo "Total : $SIZE bytes" + echo "Signing as PART_BOOT v$VERSION ..." + $SIGN_TOOL --ecc256 --sha256 "$KDTB" "$KEY" "$VERSION" + + SIGNED_OUT="${KDTB%.*}_v${VERSION}_signed.bin" + [ -f "$SIGNED_OUT" ] || SIGNED_OUT="${KDTB}_v${VERSION}_signed.bin" + mv "$SIGNED_OUT" "image_v${VERSION}_signed.bin" + + echo "" + echo "Outputs:" + ls -la "image_v${VERSION}_signed.bin" + echo "" + echo "Flash with (replace and ):" + echo " program_flash -f image_v${VERSION}_signed.bin -offset ${WOLFBOOT_PARTITION_BOOT_ADDRESS} -flash_type qspi-x4-single -fsbl -target_id " + echo "" + echo "(No separate DTB programming needed - DTB is appended to zImage.)" +else + # Sign zImage alone, copy DTB raw. + KSIZE=$(stat -c %s "$ZIMAGE") + DSIZE=$(stat -c %s "$DTB") + + if [ "$KSIZE" -gt "$PSIZE" ]; then + echo "ERROR: zImage ($KSIZE bytes) exceeds WOLFBOOT_PARTITION_SIZE ($PSIZE)" >&2 + exit 1 + fi + + echo "Mode : RAW DTB (zImage signed alone, DTB staged separately at PART_DTS_BOOT)" + echo "zImage: $ZIMAGE ($KSIZE bytes)" + echo "DTB : $DTB ($DSIZE bytes)" + echo "Signing kernel as PART_BOOT v$VERSION ..." + $SIGN_TOOL --ecc256 --sha256 "$ZIMAGE" "$KEY" "$VERSION" + + SIGNED_OUT="${ZIMAGE%.*}_v${VERSION}_signed.bin" + [ -f "$SIGNED_OUT" ] || SIGNED_OUT="${ZIMAGE}_v${VERSION}_signed.bin" + mv "$SIGNED_OUT" "image_v${VERSION}_signed.bin" + + cp "$DTB" dtb.bin + + echo "" + echo "Outputs:" + ls -la "image_v${VERSION}_signed.bin" dtb.bin + echo "" + echo "Flash with (replace and ):" + echo " program_flash -f image_v${VERSION}_signed.bin -offset ${WOLFBOOT_PARTITION_BOOT_ADDRESS} -flash_type qspi-x4-single -fsbl -target_id " + echo " program_flash -f dtb.bin -offset ${WOLFBOOT_DTS_BOOT_ADDRESS} -flash_type qspi-x4-single -fsbl -target_id " +fi diff --git a/tools/scripts/zc702/prepare_sdcard.sh b/tools/scripts/zc702/prepare_sdcard.sh new file mode 100755 index 0000000000..e74b8b7c38 --- /dev/null +++ b/tools/scripts/zc702/prepare_sdcard.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Lay out a ZC702 wolfBoot SD card. +# +# Pure MBR layout (no GPT). The Zynq-7000 BootROM (UG821 ch.6.3) requires +# an MBR with the first partition as FAT32, type 0x0C (FAT32-LBA), with the +# Active flag (0x80) set, and BOOT.BIN as a regular file in that FAT32 +# root. wolfBoot's disk.c reads MBR partitions when no protective GPT entry +# is found (src/disk.c:disk_open_mbr). +# +# Layout: +# MBR p1 64 MB FAT32-LBA (0x0C) Active - holds BOOT.BIN for BootROM +# (>= 33 MB so mkfs.vfat creates a +# standard-cluster FAT32 the +# BootROM accepts) +# MBR p2 16 MB Linux raw (0x83) - signed boot image (BOOT_PART_A=1) +# MBR p3 16 MB Linux raw (0x83) - signed update image (BOOT_PART_B=2) +# +# wolfBoot indexes MBR partitions starting at 0, so partition p1=idx0, +# p2=idx1, p3=idx2 - matching BOOT_PART_A=1 and BOOT_PART_B=2 in the config. +# +# Usage: +# sudo ./tools/scripts/zc702/prepare_sdcard.sh /dev/sdX [signed_image] + +set -e +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' + +DEV="$1" +SIGNED="${2:-test-app/image_v1_signed.bin}" +BOOTBIN="${BOOT_BIN:-./BOOT.BIN}" + +[ -n "$DEV" ] || { echo -e "${RED}usage:${NC} sudo $0 [signed_image]" >&2; exit 1; } +[ "$EUID" = 0 ] || { echo -e "${RED}must run as root${NC}" >&2; exit 1; } + +case "$DEV" in + /dev/sda|/dev/nvme*) echo -e "${RED}refusing $DEV${NC}" >&2; exit 1 ;; + /dev/sd[b-z]|/dev/mmcblk[0-9]) ;; + *) echo -e "${RED}unsupported device: $DEV${NC}" >&2; exit 1 ;; +esac + +[ -b "$DEV" ] || { echo -e "${RED}$DEV not a block device${NC}" >&2; exit 1; } +mount | grep -q "^${DEV}" && { echo -e "${RED}unmount $DEV partitions first${NC}" >&2; mount | grep "^${DEV}" >&2; exit 1; } +[ -f "$BOOTBIN" ] || { echo -e "${RED}BOOT.BIN not found at $BOOTBIN${NC}" >&2; exit 1; } +[ -f "$SIGNED" ] || { echo -e "${RED}signed image not found at $SIGNED${NC}" >&2; exit 1; } + +case "$DEV" in + /dev/mmcblk*) P1="${DEV}p1"; P2="${DEV}p2"; P3="${DEV}p3" ;; + *) P1="${DEV}1"; P2="${DEV}2"; P3="${DEV}3" ;; +esac + +echo -e "${YELLOW}Target:${NC}" +lsblk -o NAME,SIZE,MODEL,VENDOR,TRAN "$DEV" 2>/dev/null | head -5 +echo -e "${YELLOW}BOOT.BIN:${NC} $BOOTBIN ($(stat -c %s "$BOOTBIN") bytes)" +echo -e "${YELLOW}signed:${NC} $SIGNED ($(stat -c %s "$SIGNED") bytes)" +echo +read -p "Type 'yes' to wipe $DEV: " CONFIRM +[ "$CONFIRM" = yes ] || { echo "Aborted."; exit 1; } + +echo -e "${GREEN}1.${NC} Wiping head + tail to remove any existing GPT/MBR..." +wipefs --all --force "$DEV" >/dev/null 2>&1 || true +dd if=/dev/zero of="$DEV" bs=1M count=8 conv=fsync status=none +DEV_SECTORS=$(blockdev --getsz "$DEV") +dd if=/dev/zero of="$DEV" bs=512 \ + seek=$((DEV_SECTORS - 2048)) count=2048 conv=fsync status=none 2>/dev/null || true +sync + +echo -e "${GREEN}2.${NC} Writing pure MBR (parted msdos label) with 3 primary partitions..." +parted "$DEV" --script -- \ + mklabel msdos \ + mkpart primary fat32 1MiB 65MiB \ + mkpart primary 65MiB 81MiB \ + mkpart primary 81MiB 97MiB \ + set 1 boot on +sync; partprobe "$DEV"; sleep 1 + +echo -e "${GREEN}3.${NC} Patching MBR types: p1=0x0C (FAT32-LBA), p2/p3=0x83 (Linux)..." +# parted leaves p1 as 0x0C already when fat32 is requested; force in case. +# Each MBR partition entry is 16 bytes starting at 0x1BE. +# p1 entry: offset 0x1BE, type byte at 0x1BE+4 = 0x1C2 +# p2 entry: offset 0x1CE, type byte at 0x1CE+4 = 0x1D2 +# p3 entry: offset 0x1DE, type byte at 0x1DE+4 = 0x1E2 +printf '\x0C' | dd of="$DEV" bs=1 seek=$((0x1C2)) count=1 conv=notrunc status=none +printf '\x83' | dd of="$DEV" bs=1 seek=$((0x1D2)) count=1 conv=notrunc status=none +printf '\x83' | dd of="$DEV" bs=1 seek=$((0x1E2)) count=1 conv=notrunc status=none +# Active flag on p1 (parted's `set 1 boot on` should have done this) +printf '\x80' | dd of="$DEV" bs=1 seek=$((0x1BE)) count=1 conv=notrunc status=none +sync; partprobe "$DEV"; sleep 1 + +echo -e "${GREEN}4.${NC} Formatting $P1 as FAT32 (label BOOT)..." +mkfs.vfat -F 32 -n BOOT "$P1" >/dev/null + +echo -e "${GREEN}5.${NC} Copying BOOT.BIN to $P1..." +MNT=$(mktemp -d) +mount "$P1" "$MNT" +cp "$BOOTBIN" "$MNT/BOOT.BIN" +sync +umount "$MNT" +rmdir "$MNT" + +echo -e "${GREEN}6.${NC} Writing signed image to $P2 (BOOT_A) and $P3 (BOOT_B)..." +dd if="$SIGNED" of="$P2" bs=512 conv=fsync status=none +dd if="$SIGNED" of="$P3" bs=512 conv=fsync status=none +sync + +echo +echo -e "${GREEN}MBR partition entries (offset 0x1BE):${NC}" +dd if="$DEV" bs=1 skip=$((0x1BE)) count=64 status=none 2>/dev/null | xxd | head -4 +echo +echo -e "${GREEN}Done.${NC} Insert into J64, set SW16-3 + SW16-4 ON for SD boot," +echo -e "and power-cycle. Console on UART1 @ 115200." diff --git a/tools/scripts/zc702/zc702_qspi.bif b/tools/scripts/zc702/zc702_qspi.bif new file mode 100644 index 0000000000..cefcfcb0bd --- /dev/null +++ b/tools/scripts/zc702/zc702_qspi.bif @@ -0,0 +1,22 @@ +// bootgen image descriptor for ZC702 QSPI boot. +// +// Pairs the prebuilt Zynq-7000 FSBL from +// ${PREBUILT_DIR}/zynq_fsbl.elf (default ../soc-prebuilt-firmware/zc702-zynq) +// with wolfboot.elf produced by `make TARGET=zynq7000`. +// +// Usage: +// PREBUILT_DIR=$HOME/GitHub/soc-prebuilt-firmware/zc702-zynq \ +// cp ${PREBUILT_DIR}/zynq_fsbl.elf . +// bootgen -arch zynq -image tools/scripts/zc702/zc702_qspi.bif -w -o BOOT.BIN +// +// Then program BOOT.BIN to QSPI offset 0 with `program_flash` (Vitis) or +// Vivado Hardware Manager (program in JTAG strap mode: SW16 all OFF). Then set +// the QSPI boot strap by turning SW16-4 ON (MIO[5], MSB of the boot device +// field; per UG850 ch.1.2.4) and cold power-cycle so the BootROM re-samples +// the strap. + +the_ROM_image: +{ + [bootloader] zynq_fsbl.elf + wolfboot.elf +}