From bfd38cd1e2282d37e942efddcbef4d587131c03e Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Mon, 10 Aug 2015 11:51:46 +1000 Subject: [PATCH] Experimental support for storing const strings in SPI flash or IRAM As discussed in #11 Known limitations: - Only supports l8ui/l16ui - Unoptimised load routine - Not called from DoubleExceptionHandler but should be - Doesn't restore state properly when falling through to fatal exception handler --- core/exception_vectors.S | 128 ++++++++++++++++++ examples/experiments/unaligned_load/Makefile | 2 + .../unaligned_load/unaligned_load.c | 117 ++++++++++++++++ 3 files changed, 247 insertions(+) create mode 100644 examples/experiments/unaligned_load/Makefile create mode 100644 examples/experiments/unaligned_load/unaligned_load.c diff --git a/core/exception_vectors.S b/core/exception_vectors.S index 43dbd12..3b4a559 100644 --- a/core/exception_vectors.S +++ b/core/exception_vectors.S @@ -173,12 +173,14 @@ CallNMIExceptionHandler: /* Some UserException causes, see table Table 4–64 in ISA reference */ #define CAUSE_SYSCALL 1 +#define CAUSE_LOADSTORE 3 #define CAUSE_LVL1INT 4 .type CallUserExceptionHandler, @function CallUserExceptionHandler: rsr.exccause a0 beqi a0, CAUSE_SYSCALL, UserSyscallHandler + beqi a0, CAUSE_LOADSTORE, UserLoadStoreExceptionHandler mov a0, sp addi sp, sp, -0x50 s32i a0, sp, 0x10 @@ -242,6 +244,132 @@ UserSyscallHandler: rsr.excsave1 a0 rfe + + .section .bss + .global LoadStoreErrorFlag + .align 4 +LoadStoreErrorFlag: + .long 0 + .long 0 + .long 0 + .long 0 + + .section .data + .align 4 +PRINT_ADDR: + .string "0x%08lx '%c'\r\n" +PRINT_MULTI: + .string "a3=0x%08lx a4=0x%08lx a5=0x%08lx a6=0x%08lx a7=0x%08lx\r\n" + + .text + .section .vecbase.text + .literal_position + .align 4 + .global UserLoadStoreExceptionHandler +/* "Fix" LoadStoreException exceptions thatare l8/l16 from an Instruction region */ +UserLoadStoreExceptionHandler: + addi sp, sp, -0x40 + s32i a2, sp, 0x08 + rsr.excsave1 a2 /* a0 value */ + s32i a2, sp, 0x00 + addi a2, sp, 0x40 + s32i a2, sp, 0x04 /* original sp value */ + s32i a3, sp, 0x0c + s32i a4, sp, 0x10 + s32i a5, sp, 0x14 + s32i a6, sp, 0x18 + s32i a7, sp, 0x1c + s32i a8, sp, 0x20 + s32i a9, sp, 0x24 + s32i a10, sp, 0x28 + s32i a11, sp, 0x2c + s32i a12, sp, 0x30 + s32i a13, sp, 0x34 + s32i a14, sp, 0x38 + s32i a15, sp, 0x3c + + /* Check the top nibble of the faulting address is 4, otherwise + we can't help out here */ + rsr.excvaddr a2 + extui a2, a2, 28, 4 + bnei a2, 0x4, .Lcant_fix + + /* Load the instruction we failed to execute */ + rsr.epc1 a3 + movi a4, ~3 + and a2, a3, a4 + l32i a4, a2, 0 + l32i a5, a2, 4 + ssa8l a3 + src a4, a5, a4 + /* a4 is now the instruction that failed */ + + /* example l8ui instr 040c72 */ + movi a2, 0x00F00F /* l8ui/l16ui opcode mask */ + and a3, a4, a2 + movi a8, 0xFF + movi a5, 0x000002 /* l8ui opcode after masking */ + beq a3, a5, .Lcan_fix + + movi a8, 0xFFFF + movi a5, 0x001002 /* l16ui opcode after masking */ + beq a3, a5, .Lcan_fix + +.Lcant_fix: + /* not an l8ui or an l16ui, or not in the instruction space, so bomb out +TODO: the exception dump will have some wrong values in it */ + movi a2, PRINT_ADDR + movi a3, 0xafafafaf + call0 printf + call0 sdk_user_fatal_exception_handler +.Lcan_fix: + /* verified an 8- or 16-bit read in an instruction address space. + + a4 holds instruction, a8 holds mask + */ + extui a2, a4, 4, 4 /* a2 is destination register 0-15 */ + slli a2, a2, 2 /* a2 is now offset of destination register, relative to stack pointer */ + + rsr.sar a6 + rsr.excvaddr a3 + ssa8l a3 /* sar is the shift to extract a3's byte */ + movi a4, ~3 + and a4, a3, a4 /* a4 is word aligned read address */ + + l32i a5, a4, 0 /* perform the actual read */ + srl a5, a5 + and a5, a5, a8 /* mask off bits we need for an l8/l16 */ + + wsr.sar a6 + + add a6, sp, a2 + s32i a5, a6, 0 /* overwrite correct value on register slot @ stack+a2 */ + + /* Footer*/ + //Increment PC + rsr.epc1 a2 + addi a3, a2, 0x3 + wsr.epc1 a3 + + // Restore registers + l32i a0, sp, 0x00 + l32i a2, sp, 0x08 + l32i a3, sp, 0x0c + l32i a4, sp, 0x10 + l32i a5, sp, 0x14 + l32i a6, sp, 0x18 + l32i a7, sp, 0x1c + l32i a8, sp, 0x20 + l32i a9, sp, 0x24 + l32i a10, sp, 0x28 + l32i a11, sp, 0x2c + l32i a12, sp, 0x30 + l32i a13, sp, 0x34 + l32i a14, sp, 0x38 + l32i a15, sp, 0x3c + l32i sp, sp, 0x04 + rfe + .global _xt_user_exit .type _xt_user_exit, @function _xt_user_exit: diff --git a/examples/experiments/unaligned_load/Makefile b/examples/experiments/unaligned_load/Makefile new file mode 100644 index 0000000..158bfb3 --- /dev/null +++ b/examples/experiments/unaligned_load/Makefile @@ -0,0 +1,2 @@ +PROGRAM=unaligned_load +include ../../../common.mk diff --git a/examples/experiments/unaligned_load/unaligned_load.c b/examples/experiments/unaligned_load/unaligned_load.c new file mode 100644 index 0000000..033c7be --- /dev/null +++ b/examples/experiments/unaligned_load/unaligned_load.c @@ -0,0 +1,117 @@ +/* Very basic example that just demonstrates we can run at all! + */ +#include "esp/rom.h" +#include "espressif/esp_common.h" +#include "espressif/sdk_private.h" +#include "FreeRTOS.h" +#include "task.h" +#include "queue.h" + +#include "string.h" +#include "strings.h" + +#define TESTSTRING "O hai there! %d %d %d" + +const char *dramtest = TESTSTRING; +const __attribute__((section(".iram1.notrodata"))) char iramtest[] = TESTSTRING; +const __attribute__((section(".text.notrodata"))) char iromtest[] = TESTSTRING; + +INLINED uint32_t get_ccount (void) +{ + uint32_t ccount; + asm volatile ("rsr.ccount %0" : "=a" (ccount)); + return ccount; +} + +typedef void (* test_with_fn_t)(const char *string); + +char buf[64]; + +void test_memcpy_aligned(const char *string) +{ + memcpy(buf, string, 16); +} + +void test_memcpy_unaligned(const char *string) +{ + memcpy(buf, string, 15); +} + +void test_memcpy_unaligned2(const char *string) +{ + memcpy(buf, string+1, 15); +} + +void test_strcpy(const char *string) +{ + strcpy(buf, string); +} + +void test_sprintf(const char *string) +{ + sprintf(buf, string, 1, 2, 3); +} + +void test_sprintf_arg(const char *string) +{ + sprintf(buf, "%s", string); +} + +void test_naive_strcpy(const char *string) +{ + char *to = buf; + while((*to++ = *string++)) + ; +} + +#define TEST_REPEATS 1000 + +void test_noop(const char *string) +{ + +} + +uint32_t IRAM run_test(const char *string, test_with_fn_t testfn, const char *testfn_label, uint32_t nullvalue, bool evict_cache) +{ + printf(" .. against %30s: ", testfn_label); + vPortEnterCritical(); + uint32_t before = get_ccount(); + for(int i = 0; i < TEST_REPEATS; i++) { + testfn(string); + if(evict_cache) { + Cache_Read_Disable(); + Cache_Read_Enable(0,0,1); + } + } + uint32_t after = get_ccount(); + vPortExitCritical(); + uint32_t instructions = (after-before)/TEST_REPEATS - nullvalue; + printf("%5ld instructions\r\n", instructions); + return instructions; +} + +void test_string(const char *string, char *label, bool evict_cache) +{ + printf("Testing %s (%p) '%s'\r\n", label, string, string); + printf("Formats as: '"); + printf(string, 1, 2, 3); + printf("'\r\n"); + uint32_t nullvalue = run_test(string, test_noop, "null op", 0, evict_cache); + run_test(string, test_memcpy_aligned, "memcpy - aligned len", nullvalue, evict_cache); + run_test(string, test_memcpy_unaligned, "memcpy - unaligned len", nullvalue, evict_cache); + run_test(string, test_memcpy_unaligned2, "memcpy - unaligned start&len", nullvalue, evict_cache); + run_test(string, test_strcpy, "strcpy", nullvalue, evict_cache); + run_test(string, test_naive_strcpy, "naive strcpy", nullvalue, evict_cache); + run_test(string, test_sprintf, "sprintf", nullvalue, evict_cache); + run_test(string, test_sprintf_arg, "sprintf format arg", nullvalue, evict_cache); +} + +void user_init(void) +{ + sdk_uart_div_modify(0, UART_CLK_FREQ / 115200); + printf("\r\n\r\nSDK version:%s\r\n", sdk_system_get_sdk_version()); + test_string(dramtest, "DRAM", 0); + test_string(iramtest, "IRAM", 0); + test_string(iromtest, "Cached flash", 0); + test_string(iromtest, "'Uncached' flash", 1); +}