From 38bf8c3d1387767571d838b7bbc341fbb863f631 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 20 Mar 2026 14:40:39 -0700 Subject: [PATCH 1/7] Support for MAX32666 --- arch.mk | 4 + config/examples/max32666.config | 16 ++ docs/Targets.md | 106 ++++++++ hal/max32666.c | 438 ++++++++++++++++++++++++++++++++ hal/max32666.h | 376 +++++++++++++++++++++++++++ hal/max32666.ld | 73 ++++++ test-app/app_max32666.c | 40 +++ tools/openocd/max32665.cfg | 23 ++ 8 files changed, 1076 insertions(+) create mode 100644 config/examples/max32666.config create mode 100644 hal/max32666.c create mode 100644 hal/max32666.h create mode 100644 hal/max32666.ld create mode 100644 test-app/app_max32666.c create mode 100644 tools/openocd/max32665.cfg diff --git a/arch.mk b/arch.mk index 2292a90084..739757510a 100644 --- a/arch.mk +++ b/arch.mk @@ -187,6 +187,10 @@ ifeq ($(ARCH),ARM) SPI_TARGET=stm32 endif + ifeq ($(TARGET),max32666) + ARCH_FLASH_OFFSET=0x10000000 + endif + ifeq ($(TARGET),pic32cz) ARCH_FLASH_OFFSET=0x08000000 CORTEX_M7=1 diff --git a/config/examples/max32666.config b/config/examples/max32666.config new file mode 100644 index 0000000000..02dffdb258 --- /dev/null +++ b/config/examples/max32666.config @@ -0,0 +1,16 @@ +ARCH?=ARM +TARGET?=max32666 +SIGN?=ECC256 +HASH?=SHA256 +VTOR?=1 +NO_ASM?=0 +SPMATH?=1 +RAM_CODE?=1 +NVM_FLASH_WRITEONCE?=1 +NO_MPU?=1 +DEBUG_UART?=1 +WOLFBOOT_SECTOR_SIZE?=0x2000 +WOLFBOOT_PARTITION_SIZE?=0x40000 +WOLFBOOT_PARTITION_BOOT_ADDRESS?=0x10008000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0x10048000 +WOLFBOOT_PARTITION_SWAP_ADDRESS?=0x10088000 diff --git a/docs/Targets.md b/docs/Targets.md index d2634cb372..28ff1431ac 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -5,6 +5,7 @@ This README describes configuration of supported targets. ## Supported Targets * [Simulated](#simulated) +* [Analog Devices MAX32666](#analog-devices-max32666) * [Cortex-A53 / Raspberry PI 3](#cortex-a53--raspberry-pi-3-experimental) * [Cypress PSoC-6](#cypress-psoc-6) * [Infineon AURIX TC3xx](#infineon-aurix-tc3xx) @@ -8491,3 +8492,108 @@ Number of public keys: 1 11 0C FA F6 B5 F9 59 BA B9 A5 8E 34 4A CD C5 83 7E 43 EF 61 6E C4 15 88 3C FE D6 76 47 D9 82 A4 ``` + +## Analog Devices MAX32666 + +The Analog Devices MAX32665/MAX32666 family features a dual Cortex-M4 at 96 MHz +with 1MB internal flash (2 x 512KB banks), 560KB SRAM, and BLE 5. + +wolfBoot has been tested on the MAX32666FTHR board with a MAX32625PICO debug adapter. + +**Key Features:** +- ARM Cortex-M4 core at 96 MHz (HIRC96M oscillator) +- 1MB Flash: 8KB page erase, 128-bit (16-byte) write unit, dual-bank (FLC0/FLC1) +- 560KB SRAM +- Bare-metal implementation (no MSDK required for boot) +- UART0 debug output (P0.0 TX, P0.1 RX on FTHR board) + +### MAX32666: Memory Layout + +Internal-flash-only layout (default): + +| Region | Address Range | Size | +|--------|---------------|------| +| Bootloader | 0x10000000 - 0x10007FFF | 32 KB | +| Boot Partition | 0x10008000 - 0x10047FFF | 256 KB | +| Update Partition | 0x10048000 - 0x10087FFF | 256 KB | +| Swap Sector | 0x10088000 - 0x10089FFF | 8 KB | + +### MAX32666: Building + +```sh +cp config/examples/max32666.config .config +make clean +make keysclean +make +``` + +Expected wolfBoot size: ~25KB (ECC256 + SHA256 with Cortex-M4 ASM). + +### MAX32666: Flashing + +The MAX32666FTHR board uses an external MAX32625PICO debug adapter (CMSIS-DAP). +An OpenOCD target config is provided at `tools/openocd/max32665.cfg`. + +**Important:** If multiple CMSIS-DAP probes are connected, specify the PICO's +serial number with `cmsis_dap_serial`. Find it with: +`ls /dev/serial/by-id/ | grep DAPLink` + +```sh +# Flash the factory image (wolfBoot + signed test-app) +openocd -f interface/cmsis-dap.cfg \ + -c "cmsis_dap_serial " \ + -f tools/openocd/max32665.cfg \ + -c "adapter speed 1000" \ + -c "program factory.bin 0x10000000 verify reset exit" +``` + +### MAX32666: UART Console + +The FTHR board routes UART1 (P1.12 RX, P1.13 TX) through the PICO adapter's +CDC serial interface. The serial port appears as the DAPLink's `-if01` interface: + +```sh +# Find the serial port +ls /dev/serial/by-id/ | grep DAPLink + +# Connect (typically /dev/ttyACMx) +minicom -D /dev/ttyACM2 -b 115200 +# or +screen /dev/serial/by-id/usb-ARM_DAPLink_CMSIS-DAP_*-if01 115200 +``` + +Expected output on first boot: + +``` +wolfBoot Version: X.Y.Z (date time) +``` + +Followed by the test application: + +``` +MAX32666 Test App v1 +Boot success marked. Version: 1 +``` + +### MAX32666: Configuration Options + +| Option | Description | +|--------|-------------| +| `NVM_FLASH_WRITEONCE` | **Required.** Flash can only be written once between erases. | +| `RAM_CODE` | **Required.** Run flash erase/write from RAM (executing from same flash). | +| `DEBUG_UART` | Enable UART0 debug output (115200 baud, 8N1). | +| `EXT_FLASH` | Enable external flash support (for QSPI NAND configuration). | +| `FLAGS_HOME` | Keep boot flags in internal flash (required when `EXT_FLASH=1`). | + +### MAX32666: External QSPI NAND Configuration + +A separate configuration is provided for external QSPI NAND flash (Micron +MT29F8G01ADBFD12) as firmware update storage: + +```sh +cp config/examples/max32666-nand.config .config +make clean +make +``` + +See [config/examples/max32666-nand.config](/config/examples/max32666-nand.config) for details. diff --git a/hal/max32666.c b/hal/max32666.c new file mode 100644 index 0000000000..b321f79798 --- /dev/null +++ b/hal/max32666.c @@ -0,0 +1,438 @@ +/* max32666.c + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + * HAL for Analog Devices MAX32665/MAX32666 + * Tested on MAX32666FTHR: Cortex-M4 @ 96MHz, 1MB Flash, 560KB SRAM + */ + +#include +#include +#include "image.h" +#include "hal.h" +#include "printf.h" + +/* Override RAMFUNCTION for test-app when RAM_CODE is set */ +#if defined(RAM_CODE) && !defined(__WOLFBOOT) + #undef RAMFUNCTION + #define RAMFUNCTION __attribute__((used,section(".ramcode"),long_call)) +#endif + +/* Assembly helpers */ +#define DMB() __asm__ volatile ("dmb") +#define DSB() __asm__ volatile ("dsb") +#define ISB() __asm__ volatile ("isb") + +#define __disable_irq() __asm__ volatile ("cpsid i" ::: "memory") +#define __enable_irq() __asm__ volatile ("cpsie i" ::: "memory") + +#include "max32666.h" + +/* Helper to access FLC registers by base + offset */ +#define FLC_REG(base, off) (*(volatile uint32_t *)((uint32_t)(base) + (off))) + +/* ============== Flash Bank Helper ============== */ + +/* Determine which FLC bank controls the given address */ +static volatile uint32_t* flc_base_for_addr(uint32_t address) +{ + if (address < (FLASH_BASE + (FLASH_SIZE / 2))) { + return (volatile uint32_t*)FLC0_BASE; + } + return (volatile uint32_t*)FLC1_BASE; +} + +/* ============== Watchdog Functions ============== */ + +static void watchdog_disable(void) +{ + /* Disable WDT0 */ + WDT0_CTRL &= ~(WDT_CTRL_EN | WDT_CTRL_RST_EN | WDT_CTRL_INT_EN); +} + +/* ============== Clock Configuration ============== */ + +static void clock_init(void) +{ + /* Enable HIRC96M (96 MHz) */ + GCR_CLKCN |= GCR_CLKCN_HIRC96M_EN; + + /* Wait for HIRC96M to be ready */ + while (!(GCR_CLKCN & GCR_CLKCN_HIRC96M_RDY)) {} + + /* Select HIRC96 as system clock, no prescaler (PSC=0 = div1) */ + GCR_CLKCN = (GCR_CLKCN & ~(GCR_CLKCN_CLKSEL_MASK | GCR_CLKCN_PSC_MASK)) | + GCR_CLKCN_CLKSEL_HIRC96 | + GCR_CLKCN_HIRC96M_EN | + GCR_CLKCN_HIRC8M_EN; + + /* Wait for clock switch to complete */ + while (!(GCR_CLKCN & GCR_CLKCN_CKRDY)) {} + + /* Enable HIRC8M (7.3728 MHz) for UART baud rate generation */ + GCR_CLKCN |= GCR_CLKCN_HIRC8M_EN; + while (!(GCR_CLKCN & GCR_CLKCN_HIRC8M_RDY)) {} +} + +/* ============== ICC (Instruction Cache) Functions ============== */ + +static void RAMFUNCTION icc_disable(void) +{ + ICC0_CTRL &= ~ICC_CTRL_EN; +} + +static void RAMFUNCTION icc_enable(void) +{ + /* Invalidate and re-enable cache */ + ICC0_INVALIDATE = 1; + ICC0_CTRL |= ICC_CTRL_EN; + while (!(ICC0_CTRL & ICC_CTRL_RDY)) {} +} + +/* ============== Flash Functions ============== */ + +static void RAMFUNCTION flc_unlock(volatile uint32_t *flc_base) +{ + FLC_REG(flc_base, FLC_ACNTL_OFF) = FLC_ACNTL_UNLOCK_KEY1; + FLC_REG(flc_base, FLC_ACNTL_OFF) = FLC_ACNTL_UNLOCK_KEY2; + + /* Set unlock bits in CN register */ + FLC_REG(flc_base, FLC_CN_OFF) = + (FLC_REG(flc_base, FLC_CN_OFF) & ~FLC_CN_UNLOCK_MASK) | + FLC_CN_UNLOCK_UNLOCKED; +} + +static void RAMFUNCTION flc_lock(volatile uint32_t *flc_base) +{ + FLC_REG(flc_base, FLC_ACNTL_OFF) = 0; +} + +static void RAMFUNCTION flc_wait_done(volatile uint32_t *flc_base) +{ + /* Wait for any pending operation */ + while (FLC_REG(flc_base, FLC_CN_OFF) & + (FLC_CN_WR | FLC_CN_PGE | FLC_CN_ME)) {} + + /* Check done flag */ + if (FLC_REG(flc_base, FLC_INTR_OFF) & FLC_INTR_DONE) { + /* Clear done flag (write 1 to clear) */ + FLC_REG(flc_base, FLC_INTR_OFF) = FLC_INTR_DONE; + } +} + +static int RAMFUNCTION flc_write128(uint32_t address, const uint32_t *data, + volatile uint32_t *flc_base) +{ + /* Skip if data is all 0xFF (erased) */ + if (data[0] == 0xFFFFFFFF && data[1] == 0xFFFFFFFF && + data[2] == 0xFFFFFFFF && data[3] == 0xFFFFFFFF) { + return 0; + } + + flc_unlock(flc_base); + flc_wait_done(flc_base); + + /* Clear any previous errors */ + if (FLC_REG(flc_base, FLC_INTR_OFF) & FLC_INTR_AF) { + FLC_REG(flc_base, FLC_INTR_OFF) = FLC_INTR_AF; + } + + /* Set flash clock divider for 1 MHz operation */ + FLC_REG(flc_base, FLC_CLKDIV_OFF) = FLC_CLKDIV_VALUE; + + /* Set address */ + FLC_REG(flc_base, FLC_ADDR_OFF) = address; + + /* Load 128-bit data (4 x 32-bit words) */ + *(volatile uint32_t *)((uint32_t)flc_base + FLC_DATA_OFF + 0x00) = data[0]; + *(volatile uint32_t *)((uint32_t)flc_base + FLC_DATA_OFF + 0x04) = data[1]; + *(volatile uint32_t *)((uint32_t)flc_base + FLC_DATA_OFF + 0x08) = data[2]; + *(volatile uint32_t *)((uint32_t)flc_base + FLC_DATA_OFF + 0x0C) = data[3]; + + /* Trigger 128-bit write */ + DSB(); + FLC_REG(flc_base, FLC_CN_OFF) |= FLC_CN_WR; + + /* Wait for completion */ + flc_wait_done(flc_base); + + flc_lock(flc_base); + + /* Check for access fault */ + if (FLC_REG(flc_base, FLC_INTR_OFF) & FLC_INTR_AF) { + FLC_REG(flc_base, FLC_INTR_OFF) = FLC_INTR_AF; + return -1; + } + + return 0; +} + +static int RAMFUNCTION flc_page_erase(uint32_t address, + volatile uint32_t *flc_base) +{ + flc_unlock(flc_base); + flc_wait_done(flc_base); + + /* Clear any previous errors */ + if (FLC_REG(flc_base, FLC_INTR_OFF) & FLC_INTR_AF) { + FLC_REG(flc_base, FLC_INTR_OFF) = FLC_INTR_AF; + } + + /* Set flash clock divider */ + FLC_REG(flc_base, FLC_CLKDIV_OFF) = FLC_CLKDIV_VALUE; + + /* Set address (any address within the page) */ + FLC_REG(flc_base, FLC_ADDR_OFF) = address; + + /* Set erase code and trigger page erase */ + FLC_REG(flc_base, FLC_CN_OFF) = + (FLC_REG(flc_base, FLC_CN_OFF) & ~FLC_CN_ERASE_CODE_MASK) | + FLC_CN_ERASE_CODE_PGE; + DSB(); + FLC_REG(flc_base, FLC_CN_OFF) |= FLC_CN_PGE; + + /* Wait for completion */ + flc_wait_done(flc_base); + + /* Clear erase bits */ + FLC_REG(flc_base, FLC_CN_OFF) &= + ~(FLC_CN_PGE | FLC_CN_ERASE_CODE_MASK); + + flc_lock(flc_base); + + /* Check for access fault */ + if (FLC_REG(flc_base, FLC_INTR_OFF) & FLC_INTR_AF) { + FLC_REG(flc_base, FLC_INTR_OFF) = FLC_INTR_AF; + return -1; + } + + return 0; +} + +/* ============== UART Functions ============== */ + +#ifdef DEBUG_UART + +void uart_init(void) +{ + /* Enable GPIO1 clock for pin muxing */ + GCR_PERCKCN0 &= ~GCR_PERCKCN0_GPIO1D; + + /* Enable UART peripheral clock (clear disable bit) */ + GCR_PERCKCN0 &= ~DEBUG_UART_PCLKDIS; + + /* Enable HIRC8M for UART baud rate clock */ + GCR_CLKCN |= GCR_CLKCN_HIRC8M_EN; + while (!(GCR_CLKCN & GCR_CLKCN_HIRC8M_RDY)) {} + +#if DEBUG_UART_NUM == 1 + /* UART1 MAP_B: P1.12 (RX), P1.13 (TX) = AF3 + * AF3: EN0=0, EN1=0, EN2=1 (per MSDK gpio_reva.c) */ + GPIO1_EN0_CLR = UART1B_PINS; + GPIO1_EN1_CLR = UART1B_PINS; + *(volatile uint32_t *)(GPIO1_BASE + GPIO_EN2_SET_OFF) = UART1B_PINS; +#endif + + /* Disable UART before configuration */ + DEBUG_UART_CTRL = 0; + + /* Configure: 8-bit, no parity, 1 stop bit, HIRC8M clock source */ + DEBUG_UART_CTRL = UART_CTRL_CHAR_SZ_8 | UART_CTRL_CLKSEL; + + /* Set baud rate using HIRC8M (7.3728 MHz) + * baud = clk / (IBAUD * (128 >> FACTOR)) + * For 115200 with FACTOR=2 (div 32): IBAUD = 7372800 / (115200*32) = 2 + */ + DEBUG_UART_BAUD0 = (2UL << UART_BAUD0_IBAUD_SHIFT) | + UART_BAUD0_FACTOR_32; + DEBUG_UART_BAUD1 = 0; /* No fractional adjustment */ + + /* Disable all interrupts */ + DEBUG_UART_INT_EN = 0; + + /* Clear any pending interrupt flags */ + DEBUG_UART_INT_FL = DEBUG_UART_INT_FL; + + /* Enable UART */ + DEBUG_UART_CTRL |= UART_CTRL_ENABLE; +} + +void RAMFUNCTION uart_write(const char* buf, unsigned int sz) +{ + unsigned int i; + for (i = 0; i < sz; i++) { + if (buf[i] == '\n') { + while (DEBUG_UART_STATUS & UART_STATUS_TX_FULL) {} + DEBUG_UART_FIFO = '\r'; + } + while (DEBUG_UART_STATUS & UART_STATUS_TX_FULL) {} + DEBUG_UART_FIFO = buf[i]; + } + /* Wait for transmission complete */ + while (DEBUG_UART_STATUS & UART_STATUS_TX_BUSY) {} +} + +int RAMFUNCTION uart_read(char* c) +{ + if (DEBUG_UART_STATUS & UART_STATUS_RX_EMPTY) { + return 0; + } + *c = (char)(DEBUG_UART_FIFO & 0xFF); + return 1; +} + +#endif /* DEBUG_UART */ + +/* ============== HAL Interface Functions ============== */ + +void hal_init(void) +{ + /* Disable watchdog first */ + watchdog_disable(); + + /* Initialize clocks to 96 MHz */ + clock_init(); + + /* Set FLC clock dividers for both banks */ + FLC0_CLKDIV = FLC_CLKDIV_VALUE; + FLC1_CLKDIV = FLC_CLKDIV_VALUE; + + /* Enable instruction cache */ + icc_enable(); + +#ifdef DEBUG_UART + uart_init(); + +#ifdef __WOLFBOOT +#ifdef WOLFBOOT_REPRODUCIBLE_BUILD + wolfBoot_printf("wolfBoot Version: %s\n", LIBWOLFBOOT_VERSION_STRING); +#else + wolfBoot_printf("wolfBoot Version: %s (%s %s)\n", + LIBWOLFBOOT_VERSION_STRING, __DATE__, __TIME__); +#endif +#endif /* __WOLFBOOT */ +#endif /* DEBUG_UART */ +} + +void hal_prepare_boot(void) +{ +#ifdef DEBUG_UART + /* Wait for UART to finish transmitting */ + while (DEBUG_UART_STATUS & UART_STATUS_TX_BUSY) {} + + /* Disable UART for clean handoff to application */ + DEBUG_UART_CTRL = 0; +#endif +} + +int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) +{ + int ret; + int i = 0; + uint32_t write_buf[4]; /* 128-bit (16-byte) write buffer */ + volatile uint32_t *flc_base; + + icc_disable(); + + while (len > 0) { + flc_base = flc_base_for_addr(address); + + if ((len < FLASH_WRITE_SIZE) || (address & (FLASH_WRITE_SIZE - 1))) { + /* Handle unaligned start or partial write with RMW */ + uint32_t aligned_addr = address & ~(FLASH_WRITE_SIZE - 1); + uint32_t offset = address - aligned_addr; + int bytes_to_copy = FLASH_WRITE_SIZE - offset; + if (bytes_to_copy > len) + bytes_to_copy = len; + + memcpy(write_buf, (void*)aligned_addr, FLASH_WRITE_SIZE); + memcpy((uint8_t*)write_buf + offset, data + i, bytes_to_copy); + + ret = flc_write128(aligned_addr, write_buf, flc_base); + if (ret != 0) { + icc_enable(); + return ret; + } + + address += bytes_to_copy; + i += bytes_to_copy; + len -= bytes_to_copy; + } else { + /* Write full 128-bit aligned words */ + while (len >= FLASH_WRITE_SIZE) { + flc_base = flc_base_for_addr(address); + memcpy(write_buf, data + i, FLASH_WRITE_SIZE); + + ret = flc_write128(address, write_buf, flc_base); + if (ret != 0) { + icc_enable(); + return ret; + } + + address += FLASH_WRITE_SIZE; + i += FLASH_WRITE_SIZE; + len -= FLASH_WRITE_SIZE; + } + } + } + + icc_enable(); + return 0; +} + +int RAMFUNCTION hal_flash_erase(uint32_t address, int len) +{ + int ret; + volatile uint32_t *flc_base; + + /* Align to page boundary */ + if (address & (FLASH_PAGE_SIZE - 1)) { + address &= ~(FLASH_PAGE_SIZE - 1); + } + + icc_disable(); + + while (len > 0) { + flc_base = flc_base_for_addr(address); + + ret = flc_page_erase(address, flc_base); + if (ret != 0) { + icc_enable(); + return ret; + } + + address += FLASH_PAGE_SIZE; + len -= FLASH_PAGE_SIZE; + } + + icc_enable(); + return 0; +} + +void RAMFUNCTION hal_flash_unlock(void) +{ + flc_unlock((volatile uint32_t*)FLC0_BASE); + flc_unlock((volatile uint32_t*)FLC1_BASE); +} + +void RAMFUNCTION hal_flash_lock(void) +{ + flc_lock((volatile uint32_t*)FLC0_BASE); + flc_lock((volatile uint32_t*)FLC1_BASE); +} diff --git a/hal/max32666.h b/hal/max32666.h new file mode 100644 index 0000000000..80185243ee --- /dev/null +++ b/hal/max32666.h @@ -0,0 +1,376 @@ +/* max32666.h + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + * Hardware register definitions for Analog Devices MAX32665/MAX32666 + * MAX32666: Dual Cortex-M4 @ 96 MHz, 1MB Flash, 560KB SRAM, BLE 5 + * + * Register offsets verified against MSDK: + * ~/GitHub/msdk/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ + */ + +#ifndef MAX32666_H +#define MAX32666_H + +#include + +/* ============== Memory Map ============== */ +#define FLASH_BASE 0x10000000UL +#define FLASH_SIZE (1024 * 1024) /* 1MB */ +#define FLASH_PAGE_SIZE 8192 /* 8KB page erase */ +#define FLASH_WRITE_SIZE 16 /* 128-bit write unit */ + +#define SRAM_BASE 0x20000000UL +#define SRAM_SIZE (560 * 1024) /* 560KB */ + +/* ============== GCR - Global Control Registers ============== */ +/* MSDK: gcr_regs.h */ +#define GCR_BASE 0x40000000UL +#define GCR_SCON (*(volatile uint32_t *)(GCR_BASE + 0x00UL)) +#define GCR_RSTR0 (*(volatile uint32_t *)(GCR_BASE + 0x04UL)) +#define GCR_CLKCN (*(volatile uint32_t *)(GCR_BASE + 0x08UL)) +#define GCR_PM (*(volatile uint32_t *)(GCR_BASE + 0x0CUL)) +#define GCR_PCKDIV (*(volatile uint32_t *)(GCR_BASE + 0x18UL)) +#define GCR_PERCKCN0 (*(volatile uint32_t *)(GCR_BASE + 0x24UL)) +#define GCR_MEMCKCN (*(volatile uint32_t *)(GCR_BASE + 0x28UL)) +#define GCR_MEMZCN (*(volatile uint32_t *)(GCR_BASE + 0x2CUL)) +#define GCR_SYSST (*(volatile uint32_t *)(GCR_BASE + 0x40UL)) +#define GCR_RSTR1 (*(volatile uint32_t *)(GCR_BASE + 0x44UL)) +#define GCR_PERCKCN1 (*(volatile uint32_t *)(GCR_BASE + 0x48UL)) +#define GCR_EVENTEN (*(volatile uint32_t *)(GCR_BASE + 0x4CUL)) +#define GCR_REVISION (*(volatile uint32_t *)(GCR_BASE + 0x50UL)) +#define GCR_SYSSIE (*(volatile uint32_t *)(GCR_BASE + 0x54UL)) + +/* GCR_CLKCN fields (MSDK: CLKCN register) */ +#define GCR_CLKCN_PSC_SHIFT 6 /* Prescaler: bits [8:6] */ +#define GCR_CLKCN_PSC_MASK (0x7UL << 6) +#define GCR_CLKCN_CLKSEL_SHIFT 9 /* Clock source: bits [11:9] */ +#define GCR_CLKCN_CLKSEL_MASK (0x7UL << 9) +#define GCR_CLKCN_CKRDY (1UL << 13) /* Clock ready */ +#define GCR_CLKCN_X32M_EN (1UL << 16) /* 32 MHz XTAL enable */ +#define GCR_CLKCN_X32K_EN (1UL << 17) /* 32.768 kHz XTAL enable */ +#define GCR_CLKCN_HIRC_EN (1UL << 18) /* 60 MHz HIRC enable */ +#define GCR_CLKCN_HIRC96M_EN (1UL << 19) /* 96 MHz HIRC enable */ +#define GCR_CLKCN_HIRC8M_EN (1UL << 20) /* 7.3728 MHz HIRC8M enable */ +#define GCR_CLKCN_HIRC8M_VS (1UL << 21) /* HIRC8M valid status */ +#define GCR_CLKCN_X32M_RDY (1UL << 24) /* 32 MHz XTAL ready */ +#define GCR_CLKCN_X32K_RDY (1UL << 25) /* 32.768 kHz XTAL ready */ +#define GCR_CLKCN_HIRC_RDY (1UL << 26) /* 60 MHz HIRC ready */ +#define GCR_CLKCN_HIRC96M_RDY (1UL << 27) /* 96 MHz HIRC ready */ +#define GCR_CLKCN_HIRC8M_RDY (1UL << 28) /* 7.3728 MHz HIRC8M ready */ + +/* Clock source selection values for CLKSEL field */ +#define GCR_CLKCN_CLKSEL_HIRC (0UL << 9) /* 60 MHz HIRC */ +#define GCR_CLKCN_CLKSEL_XTAL32M (2UL << 9) /* 32 MHz XTAL */ +#define GCR_CLKCN_CLKSEL_LIRC8 (3UL << 9) /* 8 kHz LIRC */ +#define GCR_CLKCN_CLKSEL_HIRC96 (4UL << 9) /* 96 MHz HIRC */ +#define GCR_CLKCN_CLKSEL_HIRC8 (5UL << 9) /* 7.3728 MHz HIRC8M */ +#define GCR_CLKCN_CLKSEL_XTAL32K (6UL << 9) /* 32.768 kHz XTAL */ + +/* GCR_PERCKCN0 bits (1 = clock disabled) - MSDK: PERCKCN0 register */ +#define GCR_PERCKCN0_GPIO0D (1UL << 0) +#define GCR_PERCKCN0_GPIO1D (1UL << 1) +#define GCR_PERCKCN0_USBD (1UL << 3) +#define GCR_PERCKCN0_DMAD (1UL << 5) +#define GCR_PERCKCN0_SPI1D (1UL << 6) +#define GCR_PERCKCN0_SPI2D (1UL << 7) +#define GCR_PERCKCN0_UART0D (1UL << 9) +#define GCR_PERCKCN0_UART1D (1UL << 10) +#define GCR_PERCKCN0_I2C0D (1UL << 13) +#define GCR_PERCKCN0_CRYPTOD (1UL << 14) /* TPU/Crypto engine */ +#define GCR_PERCKCN0_TIMER0D (1UL << 15) +#define GCR_PERCKCN0_TIMER1D (1UL << 16) +#define GCR_PERCKCN0_TIMER2D (1UL << 17) +#define GCR_PERCKCN0_I2C1D (1UL << 28) + +/* GCR_PERCKCN1 bits */ +#define GCR_PERCKCN1_UART2D (1UL << 1) +#define GCR_PERCKCN1_FLCD (1UL << 3) +#define GCR_PERCKCN1_ICCD (1UL << 11) +#define GCR_PERCKCN1_WDT0D (1UL << 27) +#define GCR_PERCKCN1_WDT1D (1UL << 28) + +/* ============== FLC - Flash Controller ============== */ +/* Bank 0: 0x10000000 - 0x1007FFFF (512KB) */ +/* Bank 1: 0x10080000 - 0x100FFFFF (512KB) */ +/* MSDK: flc_regs.h */ +#define FLC0_BASE 0x40029000UL +#define FLC1_BASE 0x40029400UL + +/* FLC register offsets */ +#define FLC_ADDR_OFF 0x00UL +#define FLC_CLKDIV_OFF 0x04UL +#define FLC_CN_OFF 0x08UL +#define FLC_INTR_OFF 0x024UL +#define FLC_DATA_OFF 0x030UL /* DATA[0..3] = 128-bit write data */ +#define FLC_ACNTL_OFF 0x040UL + +/* FLC0 registers */ +#define FLC0_ADDR (*(volatile uint32_t *)(FLC0_BASE + FLC_ADDR_OFF)) +#define FLC0_CLKDIV (*(volatile uint32_t *)(FLC0_BASE + FLC_CLKDIV_OFF)) +#define FLC0_CN (*(volatile uint32_t *)(FLC0_BASE + FLC_CN_OFF)) +#define FLC0_INTR (*(volatile uint32_t *)(FLC0_BASE + FLC_INTR_OFF)) +#define FLC0_ACNTL (*(volatile uint32_t *)(FLC0_BASE + FLC_ACNTL_OFF)) + +/* FLC1 registers */ +#define FLC1_ADDR (*(volatile uint32_t *)(FLC1_BASE + FLC_ADDR_OFF)) +#define FLC1_CLKDIV (*(volatile uint32_t *)(FLC1_BASE + FLC_CLKDIV_OFF)) +#define FLC1_CN (*(volatile uint32_t *)(FLC1_BASE + FLC_CN_OFF)) +#define FLC1_INTR (*(volatile uint32_t *)(FLC1_BASE + FLC_INTR_OFF)) +#define FLC1_ACNTL (*(volatile uint32_t *)(FLC1_BASE + FLC_ACNTL_OFF)) + +/* FLC_CN (Control) register bits */ +#define FLC_CN_WR (1UL << 0) /* Start write */ +#define FLC_CN_ME (1UL << 1) /* Mass erase */ +#define FLC_CN_PGE (1UL << 2) /* Page erase */ +#define FLC_CN_ERASE_CODE_SHIFT 8 /* Erase code: bits [15:8] */ +#define FLC_CN_ERASE_CODE_MASK (0xFFUL << 8) +#define FLC_CN_ERASE_CODE_PGE (0x55UL << 8) /* Page erase code */ +#define FLC_CN_ERASE_CODE_ME (0xAAUL << 8) /* Mass erase code */ +#define FLC_CN_PEND (1UL << 24) /* Operation pending */ +#define FLC_CN_UNLOCK_SHIFT 28 /* Unlock: bits [31:28] */ +#define FLC_CN_UNLOCK_MASK (0xFUL << 28) +#define FLC_CN_UNLOCK_UNLOCKED (0x2UL << 28) /* Unlock value */ +#define FLC_CN_UNLOCK_LOCKED (0x3UL << 28) /* Lock value */ + +/* FLC_INTR register bits */ +#define FLC_INTR_DONE (1UL << 0) /* Operation done */ +#define FLC_INTR_AF (1UL << 1) /* Access fault */ +#define FLC_INTR_DONEIE (1UL << 8) /* Done interrupt enable */ +#define FLC_INTR_AFIE (1UL << 9) /* Fault interrupt enable */ + +/* FLC unlock sequence (ACNTL register) */ +#define FLC_ACNTL_UNLOCK_KEY1 0x3A7F5200UL +#define FLC_ACNTL_UNLOCK_KEY2 0xA1E34F20UL + +/* FLC clock divider: flash requires 1 MHz clock + * CLKDIV = SystemClock / 1MHz + * At 96 MHz: CLKDIV = 96 + */ +#define FLC_CLKDIV_VALUE 96 + +/* ============== ICC - Internal Cache Controller ============== */ +/* MSDK: icc_regs.h */ +#define ICC0_BASE 0x4002A000UL +#define ICC0_INFO (*(volatile uint32_t *)(ICC0_BASE + 0x0000UL)) +#define ICC0_SZ (*(volatile uint32_t *)(ICC0_BASE + 0x0004UL)) +#define ICC0_CTRL (*(volatile uint32_t *)(ICC0_BASE + 0x0100UL)) +#define ICC0_INVALIDATE (*(volatile uint32_t *)(ICC0_BASE + 0x0700UL)) + +#define ICC_CTRL_EN (1UL << 0) /* Cache enable */ +#define ICC_CTRL_RDY (1UL << 16) /* Cache ready */ + +/* ============== WDT - Watchdog Timer ============== */ +/* MSDK: wdt_regs.h */ +#define WDT0_BASE 0x40003000UL +#define WDT0_CTRL (*(volatile uint32_t *)(WDT0_BASE + 0x00UL)) +#define WDT0_RST (*(volatile uint32_t *)(WDT0_BASE + 0x04UL)) + +/* WDT_CTRL bits */ +#define WDT_CTRL_INT_PERIOD_SHIFT 0 +#define WDT_CTRL_RST_PERIOD_SHIFT 4 +#define WDT_CTRL_EN (1UL << 8) +#define WDT_CTRL_INT_FLAG (1UL << 9) +#define WDT_CTRL_INT_EN (1UL << 10) +#define WDT_CTRL_RST_EN (1UL << 11) +#define WDT_CTRL_RST_FLAG (1UL << 31) + +/* WDT reset sequence */ +#define WDT_RST_SEQ1 0x00A5 +#define WDT_RST_SEQ2 0x005A + +/* ============== UART ============== */ +/* MSDK: uart_regs.h */ +#define UART0_BASE 0x40042000UL +#define UART1_BASE 0x40043000UL +#define UART2_BASE 0x40044000UL + +/* UART register offsets (verified against MSDK) */ +#define UART_CTRL_OFF 0x00UL +#define UART_THRESH_CTRL_OFF 0x04UL +#define UART_STATUS_OFF 0x08UL +#define UART_INT_EN_OFF 0x0CUL +#define UART_INT_FL_OFF 0x10UL +#define UART_BAUD0_OFF 0x14UL +#define UART_BAUD1_OFF 0x18UL +#define UART_FIFO_OFF 0x1CUL +#define UART_DMA_OFF 0x20UL +#define UART_TX_FIFO_OFF 0x24UL + +/* UART0 registers */ +#define UART0_CTRL (*(volatile uint32_t *)(UART0_BASE + UART_CTRL_OFF)) +#define UART0_STATUS (*(volatile uint32_t *)(UART0_BASE + UART_STATUS_OFF)) +#define UART0_INT_EN (*(volatile uint32_t *)(UART0_BASE + UART_INT_EN_OFF)) +#define UART0_INT_FL (*(volatile uint32_t *)(UART0_BASE + UART_INT_FL_OFF)) +#define UART0_BAUD0 (*(volatile uint32_t *)(UART0_BASE + UART_BAUD0_OFF)) +#define UART0_BAUD1 (*(volatile uint32_t *)(UART0_BASE + UART_BAUD1_OFF)) +#define UART0_FIFO (*(volatile uint32_t *)(UART0_BASE + UART_FIFO_OFF)) + +/* UART1 registers */ +#define UART1_CTRL (*(volatile uint32_t *)(UART1_BASE + UART_CTRL_OFF)) +#define UART1_STATUS (*(volatile uint32_t *)(UART1_BASE + UART_STATUS_OFF)) +#define UART1_INT_EN (*(volatile uint32_t *)(UART1_BASE + UART_INT_EN_OFF)) +#define UART1_INT_FL (*(volatile uint32_t *)(UART1_BASE + UART_INT_FL_OFF)) +#define UART1_BAUD0 (*(volatile uint32_t *)(UART1_BASE + UART_BAUD0_OFF)) +#define UART1_BAUD1 (*(volatile uint32_t *)(UART1_BASE + UART_BAUD1_OFF)) +#define UART1_FIFO (*(volatile uint32_t *)(UART1_BASE + UART_FIFO_OFF)) + +/* UART_CTRL fields */ +#define UART_CTRL_ENABLE (1UL << 0) +#define UART_CTRL_PARITY_EN (1UL << 1) +#define UART_CTRL_TX_FLUSH (1UL << 5) +#define UART_CTRL_RX_FLUSH (1UL << 6) +#define UART_CTRL_CHAR_SZ_SHIFT 8 /* Character size: bits [9:8] */ +#define UART_CTRL_CHAR_SZ_8 (3UL << 8) /* 8-bit characters */ +#define UART_CTRL_STOPBITS (1UL << 10) /* 1=2 stop bits */ +#define UART_CTRL_FLOW_CTRL (1UL << 11) +#define UART_CTRL_CLKSEL (1UL << 15) /* 1=HIRC8M (7.3728MHz) as baud clock */ + +/* UART_STATUS fields */ +#define UART_STATUS_TX_BUSY (1UL << 0) +#define UART_STATUS_RX_BUSY (1UL << 1) +#define UART_STATUS_RX_EMPTY (1UL << 4) /* RX FIFO empty */ +#define UART_STATUS_RX_FULL (1UL << 5) /* RX FIFO full */ +#define UART_STATUS_TX_EMPTY (1UL << 6) /* TX FIFO empty */ +#define UART_STATUS_TX_FULL (1UL << 7) /* TX FIFO full */ + +/* UART_BAUD0 fields */ +#define UART_BAUD0_IBAUD_SHIFT 0 /* Integer baud divisor: bits [11:0] */ +#define UART_BAUD0_IBAUD_MASK (0xFFFUL << 0) +#define UART_BAUD0_FACTOR_SHIFT 16 /* Baud factor: bits [17:16] */ +#define UART_BAUD0_FACTOR_MASK (0x3UL << 16) +#define UART_BAUD0_FACTOR_128 (0UL << 16) /* Oversampling 128 */ +#define UART_BAUD0_FACTOR_64 (1UL << 16) /* Oversampling 64 */ +#define UART_BAUD0_FACTOR_32 (2UL << 16) /* Oversampling 32 */ +#define UART_BAUD0_FACTOR_16 (3UL << 16) /* Oversampling 16 */ + +/* Baud rate: baud = uart_clk / (IBAUD * (128 >> FACTOR)) + * Using HIRC8M (7.3728 MHz) for exact standard baud rates. + * For 115200 with FACTOR=2 (div 32): IBAUD = 7372800 / (115200 * 32) = 2 + * For 115200 with FACTOR=3 (div 16): IBAUD = 7372800 / (115200 * 16) = 4 + */ +#define HIRC8M_FREQ 7372800UL +#define HIRC96M_FREQ 96000000UL +#define HIRC_FREQ 60000000UL + +#ifndef UART_BAUDRATE +#define UART_BAUDRATE 115200 +#endif + +/* Select debug UART instance + * MAX32666FTHR uses UART1 MAP_B (P1.12 RX, P1.13 TX) through PICO adapter + */ +#ifndef DEBUG_UART_NUM +#define DEBUG_UART_NUM 1 +#endif + +#if DEBUG_UART_NUM == 0 + #define DEBUG_UART_CTRL UART0_CTRL + #define DEBUG_UART_STATUS UART0_STATUS + #define DEBUG_UART_INT_EN UART0_INT_EN + #define DEBUG_UART_INT_FL UART0_INT_FL + #define DEBUG_UART_BAUD0 UART0_BAUD0 + #define DEBUG_UART_BAUD1 UART0_BAUD1 + #define DEBUG_UART_FIFO UART0_FIFO + #define DEBUG_UART_PCLKDIS GCR_PERCKCN0_UART0D +#elif DEBUG_UART_NUM == 1 + #define DEBUG_UART_CTRL UART1_CTRL + #define DEBUG_UART_STATUS UART1_STATUS + #define DEBUG_UART_INT_EN UART1_INT_EN + #define DEBUG_UART_INT_FL UART1_INT_FL + #define DEBUG_UART_BAUD0 UART1_BAUD0 + #define DEBUG_UART_BAUD1 UART1_BAUD1 + #define DEBUG_UART_FIFO UART1_FIFO + #define DEBUG_UART_PCLKDIS GCR_PERCKCN0_UART1D +#endif + +/* ============== GPIO ============== */ +/* MSDK: gpio_regs.h */ +#define GPIO0_BASE 0x40008000UL +#define GPIO1_BASE 0x40009000UL + +/* GPIO register offsets */ +#define GPIO_EN0_OFF 0x00UL /* GPIO function enable (1=GPIO, 0=AF) */ +#define GPIO_EN0_SET_OFF 0x04UL +#define GPIO_EN0_CLR_OFF 0x08UL +#define GPIO_OUT_EN_OFF 0x0CUL /* Output enable */ +#define GPIO_OUT_EN_SET_OFF 0x10UL +#define GPIO_OUT_EN_CLR_OFF 0x14UL +#define GPIO_OUT_OFF 0x18UL /* Output data */ +#define GPIO_OUT_SET_OFF 0x1CUL +#define GPIO_OUT_CLR_OFF 0x20UL +#define GPIO_IN_OFF 0x24UL /* Input data */ +#define GPIO_EN1_OFF 0x68UL /* Alternate function select */ +#define GPIO_EN1_SET_OFF 0x6CUL +#define GPIO_EN1_CLR_OFF 0x70UL +#define GPIO_EN2_OFF 0x74UL +#define GPIO_EN2_SET_OFF 0x78UL +#define GPIO_EN2_CLR_OFF 0x7CUL + +/* GPIO0 registers */ +#define GPIO0_EN0 (*(volatile uint32_t *)(GPIO0_BASE + GPIO_EN0_OFF)) +#define GPIO0_EN0_SET (*(volatile uint32_t *)(GPIO0_BASE + GPIO_EN0_SET_OFF)) +#define GPIO0_EN0_CLR (*(volatile uint32_t *)(GPIO0_BASE + GPIO_EN0_CLR_OFF)) +#define GPIO0_EN1 (*(volatile uint32_t *)(GPIO0_BASE + GPIO_EN1_OFF)) +#define GPIO0_EN1_SET (*(volatile uint32_t *)(GPIO0_BASE + GPIO_EN1_SET_OFF)) +#define GPIO0_EN1_CLR (*(volatile uint32_t *)(GPIO0_BASE + GPIO_EN1_CLR_OFF)) +#define GPIO0_OUT_EN (*(volatile uint32_t *)(GPIO0_BASE + GPIO_OUT_EN_OFF)) +#define GPIO0_OUT_SET (*(volatile uint32_t *)(GPIO0_BASE + GPIO_OUT_SET_OFF)) +#define GPIO0_OUT_CLR (*(volatile uint32_t *)(GPIO0_BASE + GPIO_OUT_CLR_OFF)) +#define GPIO0_IN (*(volatile uint32_t *)(GPIO0_BASE + GPIO_IN_OFF)) + +/* GPIO1 registers */ +#define GPIO1_EN0 (*(volatile uint32_t *)(GPIO1_BASE + GPIO_EN0_OFF)) +#define GPIO1_EN0_SET (*(volatile uint32_t *)(GPIO1_BASE + GPIO_EN0_SET_OFF)) +#define GPIO1_EN0_CLR (*(volatile uint32_t *)(GPIO1_BASE + GPIO_EN0_CLR_OFF)) +#define GPIO1_EN1 (*(volatile uint32_t *)(GPIO1_BASE + GPIO_EN1_OFF)) +#define GPIO1_EN1_SET (*(volatile uint32_t *)(GPIO1_BASE + GPIO_EN1_SET_OFF)) +#define GPIO1_EN1_CLR (*(volatile uint32_t *)(GPIO1_BASE + GPIO_EN1_CLR_OFF)) +#define GPIO1_OUT_EN (*(volatile uint32_t *)(GPIO1_BASE + GPIO_OUT_EN_OFF)) +#define GPIO1_OUT_SET (*(volatile uint32_t *)(GPIO1_BASE + GPIO_OUT_SET_OFF)) +#define GPIO1_OUT_CLR (*(volatile uint32_t *)(GPIO1_BASE + GPIO_OUT_CLR_OFF)) +#define GPIO1_IN (*(volatile uint32_t *)(GPIO1_BASE + GPIO_IN_OFF)) + +/* Pin definitions for MAX32666FTHR: + * UART1 MAP_B: P1.12 = RX, P1.13 = TX (AF3: EN0=1, EN1=1) + * UART0 MAP_A: P0.0 = TX, P0.1 = RX (AF1: EN0=0, EN1=0, default) + */ +#define UART1B_TX_PIN (1UL << 13) /* P1.13 */ +#define UART1B_RX_PIN (1UL << 12) /* P1.12 */ +#define UART1B_PINS (UART1B_TX_PIN | UART1B_RX_PIN) + +/* ============== ARM Cortex-M4 System Registers ============== */ +#define SCB_BASE 0xE000ED00UL +#define SCB_CPUID (*(volatile uint32_t *)(SCB_BASE + 0x00UL)) +#define SCB_ICSR (*(volatile uint32_t *)(SCB_BASE + 0x04UL)) +#define SCB_VTOR (*(volatile uint32_t *)(SCB_BASE + 0x08UL)) +#define SCB_AIRCR (*(volatile uint32_t *)(SCB_BASE + 0x0CUL)) + +#define AIRCR_VECTKEY (0x05FAUL << 16) +#define AIRCR_SYSRESETREQ (1UL << 2) + +/* ============== Function Declarations ============== */ + +#ifdef DEBUG_UART +void uart_init(void); +void uart_write(const char* buf, unsigned int sz); +int uart_read(char* c); +#endif + +#endif /* MAX32666_H */ diff --git a/hal/max32666.ld b/hal/max32666.ld new file mode 100644 index 0000000000..6741c2fc6c --- /dev/null +++ b/hal/max32666.ld @@ -0,0 +1,73 @@ +/* + * Linker script for wolfBoot on MAX32665/MAX32666 + * + * MAX32666: 1MB Flash (0x10000000 - 0x100FFFFF), 560KB SRAM + * + * Flash Memory Layout: + * 0x10000000 - 0x10007FFF: Bootloader (32 KB) + * 0x10008000 - 0x10047FFF: Boot Partition (256 KB) + * 0x10048000 - 0x10087FFF: Update Partition (256 KB) + * 0x10088000 - 0x10089FFF: Swap Sector (8 KB) + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = @ARCH_FLASH_OFFSET@, LENGTH = @BOOTLOADER_PARTITION_SIZE@ + RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 0x8C000 /* 560KB SRAM */ +} + +SECTIONS +{ + .text : + { + _start_text = .; + KEEP(*(.isr_vector)) + . = ALIGN(4); + *(.text*) + *(.rodata*) + *(.init*) + *(.fini*) + . = ALIGN(4); + _end_text = .; + } > FLASH + + .edidx : + { + . = ALIGN(4); + *(.ARM.exidx*) + } > FLASH + + _stored_data = .; + + .data : AT (_stored_data) + { + _start_data = .; + KEEP(*(.data*)) + . = ALIGN(4); + KEEP(*(.ramcode)) + . = ALIGN(4); + _end_data = .; + } > RAM + + .bss (NOLOAD) : + { + _start_bss = .; + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + _end_bss = .; + __bss_end__ = .; + _end = .; + } > RAM + . = ALIGN(4); +} + +/* Partition addresses for wolfBoot */ +_wolfboot_partition_boot_address = @WOLFBOOT_PARTITION_BOOT_ADDRESS@; +_wolfboot_partition_size = @WOLFBOOT_PARTITION_SIZE@; +_wolfboot_partition_update_address = @WOLFBOOT_PARTITION_UPDATE_ADDRESS@; +_wolfboot_partition_swap_address = @WOLFBOOT_PARTITION_SWAP_ADDRESS@; + +/* Stack at end of SRAM (grows down) */ +END_STACK = ORIGIN(RAM) + LENGTH(RAM); diff --git a/test-app/app_max32666.c b/test-app/app_max32666.c new file mode 100644 index 0000000000..f768603a2c --- /dev/null +++ b/test-app/app_max32666.c @@ -0,0 +1,40 @@ +/* app_max32666.c + * + * Test application for wolfBoot on MAX32666FTHR + * + * Copyright (C) 2025 wolfSSL Inc. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "hal.h" +#include "wolfboot/wolfboot.h" +#include "printf.h" + +void main(void) +{ + uint32_t version; + + hal_init(); + + version = wolfBoot_current_firmware_version(); + + wolfBoot_printf("MAX32666 Test App v%lu\n", (unsigned long)version); + + /* Mark boot successful to prevent rollback */ + wolfBoot_success(); + + wolfBoot_printf("Boot success marked. Version: %lu\n", + (unsigned long)version); + + /* Main loop */ + while (1) { + __asm__ volatile ("wfi"); + } +} diff --git a/tools/openocd/max32665.cfg b/tools/openocd/max32665.cfg new file mode 100644 index 0000000000..6f19d0faee --- /dev/null +++ b/tools/openocd/max32665.cfg @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Analog Devices MAX32665/MAX32666 - Dual Arm Cortex-M4F @ 96MHz +# 1MB Flash (2 x 512KB banks), 560KB SRAM + +# Set the reset pin configuration +reset_config srst_only + +# Flash Bank 0: 512KB at 0x10000000, FLC0 at 0x40029000 +set FLASH_BASE 0x10000000 +set FLASH_SIZE 0x80000 +set FLC_BASE 0x40029000 +set FLASH_SECTOR 0x2000 +set FLASH_CLK 96 +set FLASH_OPTIONS 0x00 + +# Use Serial Wire Debug +transport select swd + +source [find target/max32xxx_common.cfg] + +# Flash Bank 1: 512KB at 0x10080000, FLC1 at 0x40029400 +flash bank $_CHIPNAME.flash1 max32xxx 0x10080000 0x80000 0 0 $_CHIPNAME.cpu \ +0x40029400 0x2000 96 0x00 From ef61efc8082235e3e774a7bf906da0a0868761a0 Mon Sep 17 00:00:00 2001 From: Mattia Moffa Date: Wed, 25 Mar 2026 23:49:41 +0100 Subject: [PATCH 2/7] Fix UART; support FTHR2 board --- arch.mk | 3 +++ hal/max32666.c | 49 ++++++++++++++++++++++------------------- hal/max32666.h | 29 ++++++++++++++++++------ test-app/app_max32666.c | 20 ++++++++++++++++- 4 files changed, 70 insertions(+), 31 deletions(-) diff --git a/arch.mk b/arch.mk index 739757510a..89f8aa02cb 100644 --- a/arch.mk +++ b/arch.mk @@ -189,6 +189,9 @@ ifeq ($(ARCH),ARM) ifeq ($(TARGET),max32666) ARCH_FLASH_OFFSET=0x10000000 + ifeq ($(MAX32666_FTHR2),1) + CFLAGS+=-DMAX32666_FTHR2 + endif endif ifeq ($(TARGET),pic32cz) diff --git a/hal/max32666.c b/hal/max32666.c index b321f79798..516c09b52f 100644 --- a/hal/max32666.c +++ b/hal/max32666.c @@ -79,15 +79,10 @@ static void clock_init(void) /* Select HIRC96 as system clock, no prescaler (PSC=0 = div1) */ GCR_CLKCN = (GCR_CLKCN & ~(GCR_CLKCN_CLKSEL_MASK | GCR_CLKCN_PSC_MASK)) | GCR_CLKCN_CLKSEL_HIRC96 | - GCR_CLKCN_HIRC96M_EN | - GCR_CLKCN_HIRC8M_EN; + GCR_CLKCN_HIRC96M_EN; /* Wait for clock switch to complete */ while (!(GCR_CLKCN & GCR_CLKCN_CKRDY)) {} - - /* Enable HIRC8M (7.3728 MHz) for UART baud rate generation */ - GCR_CLKCN |= GCR_CLKCN_HIRC8M_EN; - while (!(GCR_CLKCN & GCR_CLKCN_HIRC8M_RDY)) {} } /* ============== ICC (Instruction Cache) Functions ============== */ @@ -231,19 +226,20 @@ static int RAMFUNCTION flc_page_erase(uint32_t address, void uart_init(void) { - /* Enable GPIO1 clock for pin muxing */ - GCR_PERCKCN0 &= ~GCR_PERCKCN0_GPIO1D; - /* Enable UART peripheral clock (clear disable bit) */ GCR_PERCKCN0 &= ~DEBUG_UART_PCLKDIS; - /* Enable HIRC8M for UART baud rate clock */ - GCR_CLKCN |= GCR_CLKCN_HIRC8M_EN; - while (!(GCR_CLKCN & GCR_CLKCN_HIRC8M_RDY)) {} - -#if DEBUG_UART_NUM == 1 +#if DEBUG_UART_NUM == 0 + /* UART0 MAP_A: P0.9 (TX), P0.10 (RX) = AF3 + * AF3: EN0=0, EN1=0, EN2=1 */ + GCR_PERCKCN0 &= ~GCR_PERCKCN0_GPIO0D; + GPIO0_EN0_CLR = UART0A_PINS; + GPIO0_EN1_CLR = UART0A_PINS; + *(volatile uint32_t *)(GPIO0_BASE + GPIO_EN2_SET_OFF) = UART0A_PINS; +#elif DEBUG_UART_NUM == 1 /* UART1 MAP_B: P1.12 (RX), P1.13 (TX) = AF3 * AF3: EN0=0, EN1=0, EN2=1 (per MSDK gpio_reva.c) */ + GCR_PERCKCN0 &= ~GCR_PERCKCN0_GPIO1D; GPIO1_EN0_CLR = UART1B_PINS; GPIO1_EN1_CLR = UART1B_PINS; *(volatile uint32_t *)(GPIO1_BASE + GPIO_EN2_SET_OFF) = UART1B_PINS; @@ -252,16 +248,23 @@ void uart_init(void) /* Disable UART before configuration */ DEBUG_UART_CTRL = 0; - /* Configure: 8-bit, no parity, 1 stop bit, HIRC8M clock source */ - DEBUG_UART_CTRL = UART_CTRL_CHAR_SZ_8 | UART_CTRL_CLKSEL; - - /* Set baud rate using HIRC8M (7.3728 MHz) - * baud = clk / (IBAUD * (128 >> FACTOR)) - * For 115200 with FACTOR=2 (div 32): IBAUD = 7372800 / (115200*32) = 2 + /* Configure: 8-bit, no parity, 1 stop bit, PCLK clock source. + * Per errata #8: do NOT use HIRC8M (CLKSEL=1) for TX, + * it generates a spurious pulse that corrupts framing. */ + DEBUG_UART_CTRL = UART_CTRL_CHAR_SZ_8; + + /* Set baud rate using PCLK (SYSCLK/2 = 48 MHz @ 96 MHz sys clock). + * baud = clk / ((IBAUD + DBAUD/128) * (128 >> FACTOR)) + * For 115200 with FACTOR=0 (prescale 128): + * 48000000 / 115200 = 416.667 + * IBAUD = floor(416.667/128) = 3 + * DBAUD = round((416.667/128 - 3) * 128) = round(32.667) = 33 + * ME10-650 DBAUD workaround (per MSDK uart_reva.c): 33 - 3 = 30 + * Effective baud = 48000000 / ((3 + 33/128) * 128) = 115200.0 */ - DEBUG_UART_BAUD0 = (2UL << UART_BAUD0_IBAUD_SHIFT) | - UART_BAUD0_FACTOR_32; - DEBUG_UART_BAUD1 = 0; /* No fractional adjustment */ + DEBUG_UART_BAUD0 = (3UL << UART_BAUD0_IBAUD_SHIFT) | + UART_BAUD0_FACTOR_128; + DEBUG_UART_BAUD1 = 30; /* Disable all interrupts */ DEBUG_UART_INT_EN = 0; diff --git a/hal/max32666.h b/hal/max32666.h index 80185243ee..91ec3e4668 100644 --- a/hal/max32666.h +++ b/hal/max32666.h @@ -260,10 +260,15 @@ #define UART_BAUD0_FACTOR_32 (2UL << 16) /* Oversampling 32 */ #define UART_BAUD0_FACTOR_16 (3UL << 16) /* Oversampling 16 */ -/* Baud rate: baud = uart_clk / (IBAUD * (128 >> FACTOR)) - * Using HIRC8M (7.3728 MHz) for exact standard baud rates. - * For 115200 with FACTOR=2 (div 32): IBAUD = 7372800 / (115200 * 32) = 2 - * For 115200 with FACTOR=3 (div 16): IBAUD = 7372800 / (115200 * 16) = 4 +/* Baud rate: baud = uart_clk / ((IBAUD + DBAUD/128) * (128 >> FACTOR)) + * + * Per errata #8, HIRC8M causes a spurious TX pulse. + * Use PCLK (SYSCLK/2 = 48 MHz) instead. + * + * For 115200 with PCLK=48MHz, FACTOR=0 (prescale 128): + * divisor = 48000000/115200 = 416.667 + * IBAUD = 416/128 = 3, remainder = 416.667 - 3*128 = 32.667 + * DBAUD = 33, adjusted for ME10-650 errata: 33 - 3 = 30 */ #define HIRC8M_FREQ 7372800UL #define HIRC96M_FREQ 96000000UL @@ -274,11 +279,16 @@ #endif /* Select debug UART instance - * MAX32666FTHR uses UART1 MAP_B (P1.12 RX, P1.13 TX) through PICO adapter + * MAX32666FTHR uses UART1 MAP_B (P1.12 RX, P1.13 TX) through external PICO + * MAX32666FTHR2 uses UART0 MAP_A (P0.0 TX, P0.1 RX) through onboard DAPLINK */ #ifndef DEBUG_UART_NUM +#ifdef MAX32666_FTHR2 +#define DEBUG_UART_NUM 0 +#else #define DEBUG_UART_NUM 1 #endif +#endif #if DEBUG_UART_NUM == 0 #define DEBUG_UART_CTRL UART0_CTRL @@ -348,9 +358,14 @@ #define GPIO1_IN (*(volatile uint32_t *)(GPIO1_BASE + GPIO_IN_OFF)) /* Pin definitions for MAX32666FTHR: - * UART1 MAP_B: P1.12 = RX, P1.13 = TX (AF3: EN0=1, EN1=1) - * UART0 MAP_A: P0.0 = TX, P0.1 = RX (AF1: EN0=0, EN1=0, default) + * UART1 MAP_B: P1.13 = TX, P1.12 = RX (AF3: EN0=1, EN1=1) + * + * Pin definitions for MAX32666FTHR2: + * UART0 MAP_A: P0.9 = TX, P0.10 = RX (AF3: EN0=1, EN1=1) */ +#define UART0A_TX_PIN (1UL << 9) /* P0.9 */ +#define UART0A_RX_PIN (1UL << 10) /* P0.10 */ +#define UART0A_PINS (UART0A_TX_PIN | UART0A_RX_PIN) #define UART1B_TX_PIN (1UL << 13) /* P1.13 */ #define UART1B_RX_PIN (1UL << 12) /* P1.12 */ #define UART1B_PINS (UART1B_TX_PIN | UART1B_RX_PIN) diff --git a/test-app/app_max32666.c b/test-app/app_max32666.c index f768603a2c..c12b282bd7 100644 --- a/test-app/app_max32666.c +++ b/test-app/app_max32666.c @@ -16,6 +16,11 @@ #include "hal.h" #include "wolfboot/wolfboot.h" #include "printf.h" +#include "hal/max32666.h" + +#define LED_RED_PIN (1UL << 29) +#define LED_BLUE_PIN (1UL << 30) +#define LED_GREEN_PIN (1UL << 31) void main(void) { @@ -23,8 +28,21 @@ void main(void) hal_init(); + version = wolfBoot_current_firmware_version(); + if (version == 1) { + /* Turn on blue LED */ + GPIO0_EN0_SET = LED_BLUE_PIN; /* configure as GPIO */ + GPIO0_OUT_EN |= LED_BLUE_PIN; /* enable output */ + GPIO0_OUT_CLR = LED_BLUE_PIN; /* drive low (LED on) */ + } else { + /* Turn on green LED */ + GPIO0_EN0_SET = LED_GREEN_PIN; /* configure as GPIO */ + GPIO0_OUT_EN |= LED_GREEN_PIN; /* enable output */ + GPIO0_OUT_CLR = LED_GREEN_PIN; /* drive low (LED on) */ + } + wolfBoot_printf("MAX32666 Test App v%lu\n", (unsigned long)version); /* Mark boot successful to prevent rollback */ @@ -35,6 +53,6 @@ void main(void) /* Main loop */ while (1) { - __asm__ volatile ("wfi"); + __asm__ volatile ("nop"); } } From 31f912c53d1372997244e038bc630284d27b8ca5 Mon Sep 17 00:00:00 2001 From: Mattia Moffa Date: Thu, 26 Mar 2026 01:12:16 +0100 Subject: [PATCH 3/7] Implementation of test-app crypto test --- arch.mk | 2 +- docs/Targets.md | 7 +- hal/max32666.h | 1 + include/user_settings.h | 12 +- src/string.c | 26 +++- test-app/ARM-max32666.ld | 59 +++++++++ test-app/Makefile | 44 +++++++ test-app/app_max32666.c | 252 ++++++++++++++++++++++++++++++++++++++- test-app/syscalls.c | 50 +++++++- 9 files changed, 438 insertions(+), 15 deletions(-) create mode 100644 test-app/ARM-max32666.ld diff --git a/arch.mk b/arch.mk index 89f8aa02cb..a3a0cacb03 100644 --- a/arch.mk +++ b/arch.mk @@ -140,7 +140,7 @@ endif ## ARM Cortex-M ifeq ($(ARCH),ARM) CROSS_COMPILE?=arm-none-eabi- - CFLAGS+=-DARCH_ARM + CFLAGS+=-DARCH_ARM -DFAST_MEMCPY CFLAGS+=-mthumb -mlittle-endian LDFLAGS+=-mthumb -mlittle-endian ifeq ($(USE_GCC),1) diff --git a/docs/Targets.md b/docs/Targets.md index 28ff1431ac..97532a2fe9 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -8498,14 +8498,17 @@ Number of public keys: 1 The Analog Devices MAX32665/MAX32666 family features a dual Cortex-M4 at 96 MHz with 1MB internal flash (2 x 512KB banks), 560KB SRAM, and BLE 5. -wolfBoot has been tested on the MAX32666FTHR board with a MAX32625PICO debug adapter. +wolfBoot has been tested on the MAX32666FTHR board with a MAX32625PICO debug adapter, +as well as the MAX32666FTHR2 board with the onboard debugger. **Key Features:** - ARM Cortex-M4 core at 96 MHz (HIRC96M oscillator) - 1MB Flash: 8KB page erase, 128-bit (16-byte) write unit, dual-bank (FLC0/FLC1) - 560KB SRAM - Bare-metal implementation (no MSDK required for boot) -- UART0 debug output (P0.0 TX, P0.1 RX on FTHR board) +- UART0 debug output: + - P1.13 TX, P1.12 RX on FTHR board + - P0.9 TX, P0.10 RX on FTHR2 board ### MAX32666: Memory Layout diff --git a/hal/max32666.h b/hal/max32666.h index 91ec3e4668..6ce038a66e 100644 --- a/hal/max32666.h +++ b/hal/max32666.h @@ -101,6 +101,7 @@ /* GCR_PERCKCN1 bits */ #define GCR_PERCKCN1_UART2D (1UL << 1) +#define GCR_PERCKCN1_TRNGD (1UL << 2) #define GCR_PERCKCN1_FLCD (1UL << 3) #define GCR_PERCKCN1_ICCD (1UL << 11) #define GCR_PERCKCN1_WDT0D (1UL << 27) diff --git a/include/user_settings.h b/include/user_settings.h index c5daa79b3e..15dbf28106 100644 --- a/include/user_settings.h +++ b/include/user_settings.h @@ -513,7 +513,8 @@ extern int tolower(int c); #endif #if !defined(WOLFCRYPT_SECURE_MODE) && !defined(WOLFBOOT_TPM_PARMENC) && \ - !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) + !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) && \ + !defined(WOLFCRYPT_MAX32666_TEST) #if !(defined(WOLFBOOT_ENABLE_WOLFHSM_CLIENT) && \ defined(WOLFBOOT_SIGN_ML_DSA)) && \ !defined(WOLFBOOT_ENABLE_WOLFHSM_SERVER) @@ -561,12 +562,14 @@ extern int tolower(int c); #endif #if !defined(WOLFBOOT_TPM) && !defined(WOLFCRYPT_SECURE_MODE) && \ - !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) + !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) && \ + !defined(WOLFCRYPT_MAX32666_TEST) # define NO_HMAC #endif #if !defined(WOLFBOOT_TPM) && !defined(WOLFCRYPT_SECURE_MODE) && \ - !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) + !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) && \ + !defined(WOLFCRYPT_MAX32666_TEST) # if !(defined(WOLFBOOT_ENABLE_WOLFHSM_CLIENT) && \ defined(WOLFBOOT_SIGN_ML_DSA)) && \ !defined(WOLFBOOT_ENABLE_WOLFHSM_SERVER) @@ -706,7 +709,8 @@ extern int tolower(int c); # endif # if !defined(SECURE_PKCS11) && !defined(WOLFCRYPT_TZ_PSA) && \ !defined(WOLFBOOT_ENABLE_WOLFHSM_SERVER) && \ - !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) + !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) && \ + !defined(WOLFCRYPT_MAX32666_TEST) # define NO_WOLFSSL_MEMORY # define WOLFSSL_NO_MALLOC # endif diff --git a/src/string.c b/src/string.c index 82a1012cfc..c4894adaf6 100644 --- a/src/string.c +++ b/src/string.c @@ -43,6 +43,7 @@ #endif #include +#include #if !defined(TARGET_library) && defined(__STDC_HOSTED__) && __STDC_HOSTED__ \ && !defined(__CCRX__) #include @@ -98,9 +99,32 @@ int isalpha(int c) void *memset(void *s, int c, size_t n) { unsigned char *d = (unsigned char *)s; + unsigned char uc = (unsigned char)c; + +#if defined(ARCH_ARM) || defined(ARCH_AARCH64) + /* Use word-sized writes when aligned — required for MMIO peripheral + * registers on ARM (APB bus does not support sub-word writes). */ + + /* Write bytes until the pointer is 4-byte aligned */ + while (n > 0 && ((uintptr_t)d & 3U)) { + *d++ = uc; + n--; + } + + if (n >= 4) { + uint32_t w = ((uint32_t)uc) | ((uint32_t)uc << 8) | + ((uint32_t)uc << 16) | ((uint32_t)uc << 24); + volatile uint32_t *dw = (volatile uint32_t *)d; + while (n >= 4) { + *dw++ = w; + n -= 4; + } + d = (unsigned char *)dw; + } +#endif /* ARCH_ARM || ARCH_AARCH64 */ while (n--) { - *d++ = (unsigned char)c; + *d++ = uc; } return s; diff --git a/test-app/ARM-max32666.ld b/test-app/ARM-max32666.ld new file mode 100644 index 0000000000..1fda65429e --- /dev/null +++ b/test-app/ARM-max32666.ld @@ -0,0 +1,59 @@ +MEMORY +{ + FLASH (rx) : ORIGIN = @WOLFBOOT_TEST_APP_ADDRESS@, LENGTH = @WOLFBOOT_TEST_APP_SIZE@ + RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 0x8C000 /* 560KB SRAM */ +} + +_Min_Heap_Size = 0x2000; /* 8K heap for wolfCrypt MAA */ + +SECTIONS +{ + .text : + { + _start_text = .; + KEEP(*(.isr_vector)) + *(.init) + *(.fini) + *(.text*) + KEEP(*(.rodata*)) + . = ALIGN(4); + _end_text = .; + } > FLASH + + .ARM : + { + __exidx_start = .; + *(.ARM.exidx*) + __exidx_end = .; + } > FLASH + + _stored_data = .; + + .data : AT (_stored_data) + { + _start_data = .; + KEEP(*(.data*)) + . = ALIGN(4); + KEEP(*(.ramcode)) + . = ALIGN(4); + _end_data = .; + } > RAM + + .bss : + { + _start_bss = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + _end_bss = .; + _end = .; + } > RAM +} + +_wolfboot_partition_boot_address = @WOLFBOOT_PARTITION_BOOT_ADDRESS@; +_wolfboot_partition_size = @WOLFBOOT_PARTITION_SIZE@; +_wolfboot_partition_update_address = @WOLFBOOT_PARTITION_UPDATE_ADDRESS@; +_wolfboot_partition_swap_address = @WOLFBOOT_PARTITION_SWAP_ADDRESS@; + +PROVIDE(_start_heap = _end); +PROVIDE(_end_stack = ORIGIN(RAM) + LENGTH(RAM)); diff --git a/test-app/Makefile b/test-app/Makefile index a52ef53fa8..2f63f12225 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -97,6 +97,13 @@ else endif endif +# Disable Thumb2 ASM for MAX32666 before arch.mk: hardware TPU handles AES +ifeq ($(TARGET),max32666) + ifeq ($(WOLFCRYPT_MAX32666_TEST),1) + NO_ARM_ASM=1 + endif +endif + include ../arch.mk ifeq ($(USE_CLANG),1) @@ -136,6 +143,30 @@ endif # wolfCrypt Test and Benchmark Support WOLFCRYPT_SUPPORT=0 +# MAX32666 hardware crypto test (MSDK TPU/MAA/TRNG acceleration) +ifeq ($(TARGET),max32666) + ifeq ($(WOLFCRYPT_MAX32666_TEST),1) + MSDK_DIR?=../../msdk + CFLAGS += -DWOLFCRYPT_MAX32666_TEST + CFLAGS += -DWOLFSSL_MAX3266X + CFLAGS += -DMAX3266X_RNG -DMAX3266X_AES -DMAX3266X_SHA -DMAX3266X_MATH + CFLAGS += -DHAVE_ECC_CDH -DHAVE_ECC_DHE + CFLAGS += -DHAVE_AESGCM -DGCM_TABLE + CFLAGS += -DENCRYPT_WITH_AES256 + CFLAGS += -DWOLFSSL_SP_MATH_ALL + CFLAGS += -DTARGET=MAX32665 -DTARGET_REV=0x4131 + CFLAGS += -ffunction-sections -fdata-sections + CFLAGS += \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Include/MAX32665/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Include/ + WOLFCRYPT_SUPPORT=1 + endif +endif + ifeq ($(WOLFCRYPT_TEST),1) CFLAGS+=-DWOLFCRYPT_TEST WOLFCRYPT_SUPPORT=1 @@ -562,6 +593,19 @@ ifeq ($(TARGET),nrf5340_net) LSCRIPT_TEMPLATE=ARM-nrf5340_net.ld endif +ifeq ($(TARGET),max32666) + LSCRIPT_TEMPLATE=ARM-max32666.ld + ifeq ($(WOLFCRYPT_MAX32666_TEST),1) + MSDK_DIR?=../../msdk + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_me14.o + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_reva.o + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/trng_reva.o + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/sys_me14.o + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/mxc_delay.o + APP_OBJS += $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/maxim/max3266x.o + endif +endif + ifeq ($(TARGET),ti_hercules) LSCRIPT_TEMPLATE=ARM-r5be.ld # Override linker flags diff --git a/test-app/app_max32666.c b/test-app/app_max32666.c index c12b282bd7..0ee35f6f7e 100644 --- a/test-app/app_max32666.c +++ b/test-app/app_max32666.c @@ -18,6 +18,245 @@ #include "printf.h" #include "hal/max32666.h" +#ifdef WOLFCRYPT_MAX32666_TEST +/* MSDK's mxc_delay.c references SystemCoreClock (CMSIS). + * wolfBoot configures HIRC96M = 96 MHz in hal_init(). */ +uint32_t SystemCoreClock = 96000000; + +/* MAX32665/MAX32666 TRNG does not implement on-demand health test in hardware. + * The MSDK trng_revb driver assumes a 3-register layout (ctrl/status/data) but + * the actual silicon only has 2 registers (cn/data). Provide a stub so that + * wolfSSL's wc_GenerateSeed() can proceed. */ +int MXC_TRNG_HealthTest(void) { return 0; } + +#include +#include +#include +#include +#include + +/* + * AES-GCM 256-bit known-answer test + * Test Case 16 from McGrew & Viega GCM spec (same as wolfcrypt test.c) + */ +static int test_aes_gcm_256(void) +{ + Aes aes; + int ret; + byte resultC[60]; + byte resultT[16]; + byte resultP[60]; + + static const byte key[] = { + 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, + 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08, + 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, + 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08 + }; + static const byte iv[] = { + 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, + 0xde, 0xca, 0xf8, 0x88 + }; + static const byte plain[] = { + 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, + 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, + 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, + 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, + 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, + 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, + 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, + 0xba, 0x63, 0x7b, 0x39 + }; + static const byte aad[] = { + 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, + 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, + 0xab, 0xad, 0xda, 0xd2 + }; + static const byte expC[] = { + 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, + 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, + 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, + 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, + 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, + 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, + 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, + 0xbc, 0xc9, 0xf6, 0x62 + }; + static const byte expT[] = { + 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68, + 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b + }; + + wolfBoot_printf("AES-GCM-256 test: "); + + memset(&aes, 0, sizeof(aes)); + ret = wc_AesGcmSetKey(&aes, key, sizeof(key)); + if (ret != 0) { + wolfBoot_printf("FAIL (SetKey: %d)\n", ret); + return ret; + } + + memset(resultC, 0, sizeof(resultC)); + memset(resultT, 0, sizeof(resultT)); + ret = wc_AesGcmEncrypt(&aes, resultC, plain, sizeof(plain), + iv, sizeof(iv), resultT, sizeof(resultT), + aad, sizeof(aad)); + if (ret != 0) { + wolfBoot_printf("FAIL (Encrypt: %d)\n", ret); + return ret; + } + if (memcmp(resultC, expC, sizeof(expC)) != 0) { + wolfBoot_printf("FAIL (ciphertext mismatch)\n"); + return -1; + } + if (memcmp(resultT, expT, sizeof(expT)) != 0) { + wolfBoot_printf("FAIL (tag mismatch)\n"); + return -1; + } + + memset(resultP, 0, sizeof(resultP)); + ret = wc_AesGcmDecrypt(&aes, resultP, resultC, sizeof(resultC), + iv, sizeof(iv), resultT, sizeof(resultT), + aad, sizeof(aad)); + if (ret != 0) { + wolfBoot_printf("FAIL (Decrypt: %d)\n", ret); + return ret; + } + if (memcmp(resultP, plain, sizeof(plain)) != 0) { + wolfBoot_printf("FAIL (plaintext mismatch)\n"); + return -1; + } + + wolfBoot_printf("PASS\n"); + return 0; +} + +/* + * ECDHE P-256 known-answer test + * Two fixed key pairs; verify shared secret matches expected value. + * Keys from NIST ECDH test vectors (KAS_ECC_CDH_PrimitiveTest). + */ +static int test_ecdhe_p256(void) +{ + ecc_key keyA, keyB; + WC_RNG rng; + int ret; + byte secretA[32], secretB[32]; + word32 secretALen = sizeof(secretA); + word32 secretBLen = sizeof(secretB); + int i; + + /* Party A key pair (dA, QAx, QAy) where QA = dA * G */ + static const char dA[] = + "c88f01f510d9ac3f70a292daa2316de544e9aab8afe84049c62a9c57862d1433"; + static const char QAx[] = + "dad0b65394221cf9b051e1feca5787d098dfe637fc90b9ef945d0c3772581180"; + static const char QAy[] = + "5271a0461cdb8252d61f1c456fa3e59ab1f45b33accf5f58389e0577b8990bb3"; + + /* Party B key pair (dB, QBx, QBy) where QB = dB * G */ + static const char dB[] = + "c6ef9c5d78ae012a011164acb397ce2088685d8f06bf9be0b283ab46476bee53"; + static const char QBx[] = + "d12dfb5289c8d4f81208b70270398c342296970a0bccb74c736fc7554494bf63"; + static const char QBy[] = + "56fbf3ca366cc23e8157854c13c58d6aac23f046ada30f8353e74f33039872ab"; + + /* Expected shared secret: dA * QB = dB * QA */ + static const byte expSecret[] = { + 0xd6, 0x84, 0x0f, 0x6b, 0x42, 0xf6, 0xed, 0xaf, + 0xd1, 0x31, 0x16, 0xe0, 0xe1, 0x25, 0x65, 0x20, + 0x2f, 0xef, 0x8e, 0x9e, 0xce, 0x7d, 0xce, 0x03, + 0x81, 0x24, 0x64, 0xd0, 0x4b, 0x94, 0x42, 0xde, + }; + + wolfBoot_printf("ECDHE P-256 test: "); + + /* Enable TRNG peripheral clock before wc_InitRng seeds from hardware */ + GCR_PERCKCN1 &= ~GCR_PERCKCN1_TRNGD; + + ret = wc_InitRng(&rng); + if (ret != 0) { + wolfBoot_printf("FAIL (InitRng: %d)\n", ret); + return ret; + } + + ret = wc_ecc_init(&keyA); + if (ret != 0) { + wolfBoot_printf("FAIL (init A: %d)\n", ret); + wc_FreeRng(&rng); + return ret; + } + ret = wc_ecc_init(&keyB); + if (ret != 0) { + wolfBoot_printf("FAIL (init B: %d)\n", ret); + wc_ecc_free(&keyA); + wc_FreeRng(&rng); + return ret; + } + + wc_ecc_set_rng(&keyA, &rng); + wc_ecc_set_rng(&keyB, &rng); + + /* Import key A (private + public) */ + ret = wc_ecc_import_raw(&keyA, QAx, QAy, dA, "SECP256R1"); + if (ret != 0) { + wolfBoot_printf("FAIL (import A: %d)\n", ret); + goto cleanup; + } + + /* Import key B (private + public) */ + ret = wc_ecc_import_raw(&keyB, QBx, QBy, dB, "SECP256R1"); + if (ret != 0) { + wolfBoot_printf("FAIL (import B: %d)\n", ret); + goto cleanup; + } + + /* A computes shared secret: dA * QB */ + ret = wc_ecc_shared_secret(&keyA, &keyB, secretA, &secretALen); + if (ret != 0) { + wolfBoot_printf("FAIL (shared A: %d)\n", ret); + goto cleanup; + } + + /* B computes shared secret: dB * QA */ + ret = wc_ecc_shared_secret(&keyB, &keyA, secretB, &secretBLen); + if (ret != 0) { + wolfBoot_printf("FAIL (shared B: %d)\n", ret); + goto cleanup; + } + + /* Both sides must agree */ + if (secretALen != secretBLen || + memcmp(secretA, secretB, secretALen) != 0) { + wolfBoot_printf("FAIL (secrets differ)\n"); + ret = -1; + goto cleanup; + } + + /* Verify against expected value */ + if (secretALen != sizeof(expSecret) || + memcmp(secretA, expSecret, sizeof(expSecret)) != 0) { + wolfBoot_printf("FAIL (expected mismatch)\n"); + wolfBoot_printf(" got: "); + for (i = 0; i < (int)secretALen; i++) + wolfBoot_printf("%02x", secretA[i]); + wolfBoot_printf("\n"); + ret = -1; + goto cleanup; + } + + wolfBoot_printf("PASS\n"); + ret = 0; + +cleanup: + wc_ecc_free(&keyB); + wc_ecc_free(&keyA); + wc_FreeRng(&rng); + return ret; +} +#endif /* WOLFCRYPT_MAX32666_TEST */ + #define LED_RED_PIN (1UL << 29) #define LED_BLUE_PIN (1UL << 30) #define LED_GREEN_PIN (1UL << 31) @@ -28,7 +267,6 @@ void main(void) hal_init(); - version = wolfBoot_current_firmware_version(); if (version == 1) { @@ -51,6 +289,18 @@ void main(void) wolfBoot_printf("Boot success marked. Version: %lu\n", (unsigned long)version); +#ifdef WOLFCRYPT_MAX32666_TEST + /* Initialize MAX32666 TPU hardware */ + if (wc_MXC_TPU_Init() != 0) { + wolfBoot_printf("TPU init failed!\n"); + } else { + wolfBoot_printf("TPU initialized.\n"); + test_aes_gcm_256(); + test_ecdhe_p256(); + wc_MXC_TPU_Shutdown(); + } +#endif + /* Main loop */ while (1) { __asm__ volatile ("nop"); diff --git a/test-app/syscalls.c b/test-app/syscalls.c index 77e0cbe5c3..94091e815c 100644 --- a/test-app/syscalls.c +++ b/test-app/syscalls.c @@ -461,30 +461,68 @@ void *__memcpy_chk(void *dst, const void *src, unsigned int len, } /* ========== Heap allocator (bare-metal) ========== - * Simple allocator using _sbrk. Replaces glibc malloc/free/realloc - * which require TLS and internal glibc state not available bare-metal. */ + * Bump allocator using _sbrk with top-of-heap reclaim. When the most + * recent allocation is freed, the heap pointer is moved back so the + * memory can be reused. This is critical for MAX3266X_SHA: the wolfSSL + * TPU SHA port allocates a msg buffer (malloc), grows it (realloc), and + * frees it (free) for every SHA operation. The HASHDRBG performs dozens + * of SHA-256 calls during wc_InitRng and ECC blinding, which would + * exhaust the heap without reclaim. */ + +static void *last_alloc_ptr; +static unsigned int last_alloc_size; void *malloc(unsigned int size) { - void *p = _sbrk((int)size); + void *p; + /* Round up to 4-byte alignment so that all allocations are naturally + * aligned. Required by peripherals such as the MAX32666 MAA, which + * perform word-wide DMA copies and fault on misaligned buffers. */ + size = (size + 3u) & ~3u; + p = _sbrk((int)size); if (p == (void *)-1) return (void *)0; + last_alloc_ptr = p; + last_alloc_size = size; return p; } void free(void *ptr) { - (void)ptr; /* no-op: bare-metal bump allocator doesn't reclaim */ + if (ptr != (void *)0 && ptr == last_alloc_ptr) { + /* Reclaim the top-of-heap allocation */ + _sbrk(-((int)last_alloc_size)); + last_alloc_ptr = (void *)0; + last_alloc_size = 0; + } + /* Non-top allocations cannot be reclaimed in a bump allocator */ } void *realloc(void *ptr, unsigned int size) { + unsigned int new_size; void *newp; + if (!ptr) return malloc(size); - newp = malloc(size); + + new_size = (size + 3u) & ~3u; + + /* If this is the most recent allocation, extend in place */ + if (ptr == last_alloc_ptr) { + if (new_size <= last_alloc_size) + return ptr; /* shrink or same size: nothing to do */ + /* Grow: request the additional bytes */ + if (_sbrk((int)(new_size - last_alloc_size)) == (void *)-1) + return (void *)0; + last_alloc_size = new_size; + return ptr; + } + + /* Not the most recent allocation: allocate new and copy */ + newp = malloc(new_size); if (newp) - memcpy(newp, ptr, size); /* may over-copy, but safe for bump alloc */ + memcpy(newp, ptr, new_size); /* may over-copy, safe for bump alloc */ return newp; } From c3557bcaea034b67632f908a1a6933715e5edfc3 Mon Sep 17 00:00:00 2001 From: Mattia Moffa Date: Thu, 9 Apr 2026 18:31:51 +0200 Subject: [PATCH 4/7] SHA256 acceleration for bootloader --- arch.mk | 26 ++++++++++++++++++++++++++ config/examples/max32666.config | 2 ++ hal/max32666.c | 6 ++++++ hal/max32666.h | 4 ++++ include/user_settings.h | 14 ++++++++++---- test-app/Makefile | 18 +++++------------- tools/keytools/Makefile | 2 +- 7 files changed, 54 insertions(+), 18 deletions(-) diff --git a/arch.mk b/arch.mk index a3a0cacb03..5dd3f74638 100644 --- a/arch.mk +++ b/arch.mk @@ -192,6 +192,32 @@ ifeq ($(ARCH),ARM) ifeq ($(MAX32666_FTHR2),1) CFLAGS+=-DMAX32666_FTHR2 endif + # MAX3266X TPU hardware SHA256 acceleration (requires MSDK_DIR) + ifeq ($(MAX3266X_TPU),1) + NO_ARM_ASM=1 + CFLAGS+=-DWOLFSSL_MAX3266X -DMAX3266X_SHA + CFLAGS+=-DTARGET=MAX32665 -DTARGET_REV=0x4131 + CFLAGS+=-ffunction-sections -fdata-sections + MAX3266X_CFLAGS:= \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Include/MAX32665/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Include/ + CFLAGS+=$(MAX3266X_CFLAGS) + OBJS+=$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_me14.o \ + $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_reva.o \ + $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/sys_me14.o \ + $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/mxc_delay.o + WOLFCRYPT_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/maxim/max3266x.o + endif + endif + + # max3266x.c requires WOLFSSL_SP_MATH_ALL for MXC_WORD_SIZE, but only the + # MAA math code (MAX3266X_MATH) actually uses it. Add SP_MATH_ALL to just + # this object to satisfy the header, without bloating the bootloader. + ifeq ($(MAX3266X_TPU),1) +$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/maxim/max3266x.o: CFLAGS+=-DWOLFSSL_SP_MATH_ALL endif ifeq ($(TARGET),pic32cz) diff --git a/config/examples/max32666.config b/config/examples/max32666.config index 02dffdb258..7b09039283 100644 --- a/config/examples/max32666.config +++ b/config/examples/max32666.config @@ -9,6 +9,8 @@ RAM_CODE?=1 NVM_FLASH_WRITEONCE?=1 NO_MPU?=1 DEBUG_UART?=1 +MAX3266X_TPU?=1 +MSDK_DIR?=$(PWD)/../msdk WOLFBOOT_SECTOR_SIZE?=0x2000 WOLFBOOT_PARTITION_SIZE?=0x40000 WOLFBOOT_PARTITION_BOOT_ADDRESS?=0x10008000 diff --git a/hal/max32666.c b/hal/max32666.c index 516c09b52f..847e9dbdf9 100644 --- a/hal/max32666.c +++ b/hal/max32666.c @@ -44,6 +44,12 @@ #include "max32666.h" +/* CMSIS variable required by MSDK drivers (mxc_delay.c). + * Only define in bootloader; test-app provides its own. */ +#if defined(WOLFSSL_MAX3266X) && defined(__WOLFBOOT) +uint32_t SystemCoreClock = 96000000; +#endif + /* Helper to access FLC registers by base + offset */ #define FLC_REG(base, off) (*(volatile uint32_t *)((uint32_t)(base) + (off))) diff --git a/hal/max32666.h b/hal/max32666.h index 6ce038a66e..31efb1f526 100644 --- a/hal/max32666.h +++ b/hal/max32666.h @@ -273,7 +273,9 @@ */ #define HIRC8M_FREQ 7372800UL #define HIRC96M_FREQ 96000000UL +#ifndef HIRC_FREQ #define HIRC_FREQ 60000000UL +#endif #ifndef UART_BAUDRATE #define UART_BAUDRATE 115200 @@ -372,7 +374,9 @@ #define UART1B_PINS (UART1B_TX_PIN | UART1B_RX_PIN) /* ============== ARM Cortex-M4 System Registers ============== */ +#ifndef SCB_BASE #define SCB_BASE 0xE000ED00UL +#endif #define SCB_CPUID (*(volatile uint32_t *)(SCB_BASE + 0x00UL)) #define SCB_ICSR (*(volatile uint32_t *)(SCB_BASE + 0x04UL)) #define SCB_VTOR (*(volatile uint32_t *)(SCB_BASE + 0x08UL)) diff --git a/include/user_settings.h b/include/user_settings.h index 15dbf28106..d5cd3b173a 100644 --- a/include/user_settings.h +++ b/include/user_settings.h @@ -157,10 +157,16 @@ extern int tolower(int c); # endif /* SP MATH */ -# if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH_ALL) -# define WOLFSSL_SP_MATH -# define WOLFSSL_SP_SMALL -# define WOLFSSL_HAVE_SP_ECC +# if !defined(USE_FAST_MATH) +# if !defined(WOLFSSL_SP_MATH_ALL) +# define WOLFSSL_SP_MATH +# endif +# ifndef WOLFSSL_SP_SMALL +# define WOLFSSL_SP_SMALL +# endif +# ifndef WOLFSSL_HAVE_SP_ECC +# define WOLFSSL_HAVE_SP_ECC +# endif # endif #define WOLFSSL_PUBLIC_MP diff --git a/test-app/Makefile b/test-app/Makefile index 2f63f12225..e886b337e8 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -144,25 +144,18 @@ endif WOLFCRYPT_SUPPORT=0 # MAX32666 hardware crypto test (MSDK TPU/MAA/TRNG acceleration) +# Base TPU/SHA support and MSDK paths are inherited from arch.mk +# when MAX3266X_TPU=1. This block adds the extra features for the +# full crypto test (AES, RNG, MAA math, ECDHE). ifeq ($(TARGET),max32666) ifeq ($(WOLFCRYPT_MAX32666_TEST),1) - MSDK_DIR?=../../msdk CFLAGS += -DWOLFCRYPT_MAX32666_TEST - CFLAGS += -DWOLFSSL_MAX3266X - CFLAGS += -DMAX3266X_RNG -DMAX3266X_AES -DMAX3266X_SHA -DMAX3266X_MATH + CFLAGS += -DMAX3266X_RNG -DMAX3266X_AES -DMAX3266X_MATH CFLAGS += -DHAVE_ECC_CDH -DHAVE_ECC_DHE CFLAGS += -DHAVE_AESGCM -DGCM_TABLE CFLAGS += -DENCRYPT_WITH_AES256 CFLAGS += -DWOLFSSL_SP_MATH_ALL - CFLAGS += -DTARGET=MAX32665 -DTARGET_REV=0x4131 - CFLAGS += -ffunction-sections -fdata-sections - CFLAGS += \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Include/MAX32665/ \ - -I$(MSDK_DIR)/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/ \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/ \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/ \ - -I$(MSDK_DIR)/Libraries/CMSIS/Include/ + CFLAGS += -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/ WOLFCRYPT_SUPPORT=1 endif endif @@ -596,7 +589,6 @@ endif ifeq ($(TARGET),max32666) LSCRIPT_TEMPLATE=ARM-max32666.ld ifeq ($(WOLFCRYPT_MAX32666_TEST),1) - MSDK_DIR?=../../msdk APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_me14.o APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_reva.o APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/trng_reva.o diff --git a/tools/keytools/Makefile b/tools/keytools/Makefile index d3cbd35838..3ae5480a51 100644 --- a/tools/keytools/Makefile +++ b/tools/keytools/Makefile @@ -22,7 +22,7 @@ OBJDIR = ./ LIBS = ML_DSA_LEVEL?=2 -CFLAGS+=-DML_DSA_LEVEL=$(ML_DSA_LEVEL) +CFLAGS+=-DML_DSA_LEVEL=$(ML_DSA_LEVEL) -DWOLFSSL_DILITHIUM_NO_CTX LMS_LEVELS?=1 LMS_HEIGHT?=10 From cf31371bf5c995c9b3ec9be6b1a2c1b1a02ab447 Mon Sep 17 00:00:00 2001 From: Mattia Moffa Date: Fri, 19 Jun 2026 19:33:18 +0200 Subject: [PATCH 5/7] Use bank-relative offsets instead of addresses in FLC registers --- hal/max32666.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hal/max32666.c b/hal/max32666.c index 847e9dbdf9..adce76f6a2 100644 --- a/hal/max32666.c +++ b/hal/max32666.c @@ -64,6 +64,12 @@ static volatile uint32_t* flc_base_for_addr(uint32_t address) return (volatile uint32_t*)FLC1_BASE; } +/* Convert memory-mapped address into physical bank-relative offset */ +static uint32_t flc_phys_addr(uint32_t address) +{ + return address & ((FLASH_SIZE / 2) - 1); +} + /* ============== Watchdog Functions ============== */ static void watchdog_disable(void) @@ -158,7 +164,7 @@ static int RAMFUNCTION flc_write128(uint32_t address, const uint32_t *data, FLC_REG(flc_base, FLC_CLKDIV_OFF) = FLC_CLKDIV_VALUE; /* Set address */ - FLC_REG(flc_base, FLC_ADDR_OFF) = address; + FLC_REG(flc_base, FLC_ADDR_OFF) = flc_phys_addr(address); /* Load 128-bit data (4 x 32-bit words) */ *(volatile uint32_t *)((uint32_t)flc_base + FLC_DATA_OFF + 0x00) = data[0]; @@ -199,7 +205,7 @@ static int RAMFUNCTION flc_page_erase(uint32_t address, FLC_REG(flc_base, FLC_CLKDIV_OFF) = FLC_CLKDIV_VALUE; /* Set address (any address within the page) */ - FLC_REG(flc_base, FLC_ADDR_OFF) = address; + FLC_REG(flc_base, FLC_ADDR_OFF) = flc_phys_addr(address); /* Set erase code and trigger page erase */ FLC_REG(flc_base, FLC_CN_OFF) = From b488c7cb4fd4361cf1ae3cfb73c1a68908eb37b3 Mon Sep 17 00:00:00 2001 From: Mattia Moffa Date: Fri, 19 Jun 2026 21:14:23 +0200 Subject: [PATCH 6/7] Support older Maxim SDK ("LPSDK") --- arch.mk | 39 ++++++++++++++++++++++----------- config/examples/max32666.config | 1 + docs/Targets.md | 2 ++ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/arch.mk b/arch.mk index 5dd3f74638..60d389bde6 100644 --- a/arch.mk +++ b/arch.mk @@ -195,20 +195,33 @@ ifeq ($(ARCH),ARM) # MAX3266X TPU hardware SHA256 acceleration (requires MSDK_DIR) ifeq ($(MAX3266X_TPU),1) NO_ARM_ASM=1 - CFLAGS+=-DWOLFSSL_MAX3266X -DMAX3266X_SHA - CFLAGS+=-DTARGET=MAX32665 -DTARGET_REV=0x4131 + CFLAGS+=-DMAX3266X_SHA CFLAGS+=-ffunction-sections -fdata-sections - MAX3266X_CFLAGS:= \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Include/MAX32665/ \ - -I$(MSDK_DIR)/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/ \ - -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/ \ - -I$(MSDK_DIR)/Libraries/CMSIS/Include/ - CFLAGS+=$(MAX3266X_CFLAGS) - OBJS+=$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_me14.o \ - $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_reva.o \ - $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/sys_me14.o \ - $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/mxc_delay.o + ifeq ($(MAX3266X_OLD),1) + # Older Maxim SDK tree (flat MAX32665PeriphDriver layout) + CFLAGS+=-DWOLFSSL_MAX3266X_OLD + MAX3266X_CFLAGS:= \ + -I$(MSDK_DIR)/Libraries/MAX32665PeriphDriver/Include/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Include/ + CFLAGS+=$(MAX3266X_CFLAGS) + OBJS+=$(MSDK_DIR)/Libraries/MAX32665PeriphDriver/Source/mxc_sys.o \ + $(MSDK_DIR)/Libraries/MAX32665PeriphDriver/Source/mxc_delay.o + else + CFLAGS+=-DWOLFSSL_MAX3266X + CFLAGS+=-DTARGET=MAX32665 -DTARGET_REV=0x4131 + MAX3266X_CFLAGS:= \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Include/MAX32665/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Device/Maxim/MAX32665/Include/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/ \ + -I$(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/ \ + -I$(MSDK_DIR)/Libraries/CMSIS/Include/ + CFLAGS+=$(MAX3266X_CFLAGS) + OBJS+=$(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_me14.o \ + $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_reva.o \ + $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/sys_me14.o \ + $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/mxc_delay.o + endif WOLFCRYPT_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/maxim/max3266x.o endif endif diff --git a/config/examples/max32666.config b/config/examples/max32666.config index 7b09039283..198e521cf4 100644 --- a/config/examples/max32666.config +++ b/config/examples/max32666.config @@ -10,6 +10,7 @@ NVM_FLASH_WRITEONCE?=1 NO_MPU?=1 DEBUG_UART?=1 MAX3266X_TPU?=1 +MAX3266X_OLD?=0 MSDK_DIR?=$(PWD)/../msdk WOLFBOOT_SECTOR_SIZE?=0x2000 WOLFBOOT_PARTITION_SIZE?=0x40000 diff --git a/docs/Targets.md b/docs/Targets.md index 97532a2fe9..375910b13b 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -8587,6 +8587,8 @@ Boot success marked. Version: 1 | `DEBUG_UART` | Enable UART0 debug output (115200 baud, 8N1). | | `EXT_FLASH` | Enable external flash support (for QSPI NAND configuration). | | `FLAGS_HOME` | Keep boot flags in internal flash (required when `EXT_FLASH=1`). | +| `MAX3266X_TPU` | Enable TPU hardware SHA256 acceleration (requires `MSDK_DIR`). | +| `MAX3266X_OLD` | Build TPU acceleration against the older, deprecated Maxim SDK tree instead of the modern MSDK. | ### MAX32666: External QSPI NAND Configuration From a700ab3098e57fe4c309ae29f0cf93c3d8f07acb Mon Sep 17 00:00:00 2001 From: Mattia Moffa Date: Sat, 20 Jun 2026 05:20:41 +0200 Subject: [PATCH 7/7] Clean up build flag usage, use trng healthtest in test-app --- arch.mk | 11 ++--------- config/examples/max32666.config | 2 +- src/string.c | 7 ++----- test-app/Makefile | 2 ++ test-app/app_max32666.c | 6 ------ tools/keytools/Makefile | 2 +- 6 files changed, 8 insertions(+), 22 deletions(-) diff --git a/arch.mk b/arch.mk index 60d389bde6..35f71fef5a 100644 --- a/arch.mk +++ b/arch.mk @@ -140,7 +140,7 @@ endif ## ARM Cortex-M ifeq ($(ARCH),ARM) CROSS_COMPILE?=arm-none-eabi- - CFLAGS+=-DARCH_ARM -DFAST_MEMCPY + CFLAGS+=-DARCH_ARM CFLAGS+=-mthumb -mlittle-endian LDFLAGS+=-mthumb -mlittle-endian ifeq ($(USE_GCC),1) @@ -195,7 +195,7 @@ ifeq ($(ARCH),ARM) # MAX3266X TPU hardware SHA256 acceleration (requires MSDK_DIR) ifeq ($(MAX3266X_TPU),1) NO_ARM_ASM=1 - CFLAGS+=-DMAX3266X_SHA + CFLAGS+=-DMAX3266X_SHA -DFAST_MEMCPY CFLAGS+=-ffunction-sections -fdata-sections ifeq ($(MAX3266X_OLD),1) # Older Maxim SDK tree (flat MAX32665PeriphDriver layout) @@ -226,13 +226,6 @@ ifeq ($(ARCH),ARM) endif endif - # max3266x.c requires WOLFSSL_SP_MATH_ALL for MXC_WORD_SIZE, but only the - # MAA math code (MAX3266X_MATH) actually uses it. Add SP_MATH_ALL to just - # this object to satisfy the header, without bloating the bootloader. - ifeq ($(MAX3266X_TPU),1) -$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/maxim/max3266x.o: CFLAGS+=-DWOLFSSL_SP_MATH_ALL - endif - ifeq ($(TARGET),pic32cz) ARCH_FLASH_OFFSET=0x08000000 CORTEX_M7=1 diff --git a/config/examples/max32666.config b/config/examples/max32666.config index 198e521cf4..681f1eb392 100644 --- a/config/examples/max32666.config +++ b/config/examples/max32666.config @@ -9,7 +9,7 @@ RAM_CODE?=1 NVM_FLASH_WRITEONCE?=1 NO_MPU?=1 DEBUG_UART?=1 -MAX3266X_TPU?=1 +MAX3266X_TPU?=0 MAX3266X_OLD?=0 MSDK_DIR?=$(PWD)/../msdk WOLFBOOT_SECTOR_SIZE?=0x2000 diff --git a/src/string.c b/src/string.c index c4894adaf6..b9c94491ca 100644 --- a/src/string.c +++ b/src/string.c @@ -101,10 +101,7 @@ void *memset(void *s, int c, size_t n) unsigned char *d = (unsigned char *)s; unsigned char uc = (unsigned char)c; -#if defined(ARCH_ARM) || defined(ARCH_AARCH64) - /* Use word-sized writes when aligned — required for MMIO peripheral - * registers on ARM (APB bus does not support sub-word writes). */ - +#ifdef FAST_MEMCPY /* Write bytes until the pointer is 4-byte aligned */ while (n > 0 && ((uintptr_t)d & 3U)) { *d++ = uc; @@ -121,7 +118,7 @@ void *memset(void *s, int c, size_t n) } d = (unsigned char *)dw; } -#endif /* ARCH_ARM || ARCH_AARCH64 */ +#endif /* FAST_MEMCPY */ while (n--) { *d++ = uc; diff --git a/test-app/Makefile b/test-app/Makefile index e886b337e8..61dc833054 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -592,6 +592,8 @@ ifeq ($(TARGET),max32666) APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_me14.o APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TPU/tpu_reva.o APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/trng_reva.o + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/trng_me14.o + APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/TRNG/trng_revb.o APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/sys_me14.o APP_OBJS += $(MSDK_DIR)/Libraries/PeriphDrivers/Source/SYS/mxc_delay.o APP_OBJS += $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/maxim/max3266x.o diff --git a/test-app/app_max32666.c b/test-app/app_max32666.c index 0ee35f6f7e..10110b2bce 100644 --- a/test-app/app_max32666.c +++ b/test-app/app_max32666.c @@ -23,12 +23,6 @@ * wolfBoot configures HIRC96M = 96 MHz in hal_init(). */ uint32_t SystemCoreClock = 96000000; -/* MAX32665/MAX32666 TRNG does not implement on-demand health test in hardware. - * The MSDK trng_revb driver assumes a 3-register layout (ctrl/status/data) but - * the actual silicon only has 2 registers (cn/data). Provide a stub so that - * wolfSSL's wc_GenerateSeed() can proceed. */ -int MXC_TRNG_HealthTest(void) { return 0; } - #include #include #include diff --git a/tools/keytools/Makefile b/tools/keytools/Makefile index 3ae5480a51..d3cbd35838 100644 --- a/tools/keytools/Makefile +++ b/tools/keytools/Makefile @@ -22,7 +22,7 @@ OBJDIR = ./ LIBS = ML_DSA_LEVEL?=2 -CFLAGS+=-DML_DSA_LEVEL=$(ML_DSA_LEVEL) -DWOLFSSL_DILITHIUM_NO_CTX +CFLAGS+=-DML_DSA_LEVEL=$(ML_DSA_LEVEL) LMS_LEVELS?=1 LMS_HEIGHT?=10