From 7f92bf64be5655725471ffb33d3c326242dd8092 Mon Sep 17 00:00:00 2001 From: Johny Mattsson Date: Thu, 26 May 2016 15:19:20 +1000 Subject: [PATCH] Completely reworked user exception handler. Incidentally now also a whole lot faster. --- app/Makefile | 2 +- app/user/user_exceptions.c | 145 +++++++++++++++++-------------------- app/user/user_main.c | 4 - ld/nodemcu.ld | 14 +--- 4 files changed, 70 insertions(+), 95 deletions(-) diff --git a/app/Makefile b/app/Makefile index 5f28ffad..9631a025 100644 --- a/app/Makefile +++ b/app/Makefile @@ -109,7 +109,6 @@ USED_SDK_LIBS= \ phy \ pp \ smartconfig \ - ssc \ ssl \ wpa \ wps \ @@ -125,6 +124,7 @@ LINKFLAGS_eagle.app.v6 = \ -Wl,--wrap=_xtos_set_exception_handler \ -Wl,-static \ $(addprefix -u , $(SELECTED_MODULE_SYMS)) \ + -u _UserExceptionVectorOverride \ -Wl,--start-group \ -lhal \ $(addprefix -l,$(USED_SDK_LIBS)) \ diff --git a/app/user/user_exceptions.c b/app/user/user_exceptions.c index 74cb9097..d2af9ea0 100644 --- a/app/user/user_exceptions.c +++ b/app/user/user_exceptions.c @@ -31,82 +31,71 @@ * @author Johny Mattsson */ -#include "user_exceptions.h" - -#define LOAD_MASK 0x00f00fu -#define L8UI_MATCH 0x000002u -#define L16UI_MATCH 0x001002u -#define L16SI_MATCH 0x009002u - - -void load_non_32_wide_handler (struct exception_frame *ef, uint32_t cause) -{ - /* If this is not EXCCAUSE_LOAD_STORE_ERROR you're doing it wrong! */ - (void)cause; - - uint32_t epc1 = ef->epc; - uint32_t excvaddr; - uint32_t insn; - asm ( - "rsr %0, EXCVADDR;" /* read out the faulting address */ - "movi a4, ~3;" /* prepare a mask for the EPC */ - "and a4, a4, %2;" /* apply mask for 32bit aligned base */ - "l32i a5, a4, 0;" /* load part 1 */ - "l32i a6, a4, 4;" /* load part 2 */ - "ssa8l %2;" /* set up shift register for src op */ - "src %1, a6, a5;" /* right shift to get faulting instruction */ - :"=r"(excvaddr), "=r"(insn) - :"r"(epc1) - :"a4", "a5", "a6" - ); - - uint32_t valmask = 0; - uint32_t what = insn & LOAD_MASK; - - if (what == L8UI_MATCH) - valmask = 0xffu; - else if (what == L16UI_MATCH || what == L16SI_MATCH) - valmask = 0xffffu; - else - { -die: - /* Turns out we couldn't fix this, trigger a system break instead - * and hang if the break doesn't get handled. This is effectively - * what would happen if the default handler was installed. */ - asm ("break 1, 1"); - while (1) {} - } - - /* Load, shift and mask down to correct size */ - uint32_t val = (*(uint32_t *)(excvaddr & ~0x3)); - val >>= (excvaddr & 0x3) * 8; - val &= valmask; - - /* Sign-extend for L16SI, if applicable */ - if (what == L16SI_MATCH && (val & 0x8000)) - val |= 0xffff0000; - - int regno = (insn & 0x0000f0u) >> 4; - if (regno == 1) - goto die; /* we can't support loading into a1, just die */ - else if (regno != 0) - --regno; /* account for skipped a1 in exception_frame */ - - ef->a_reg[regno] = val; /* carry out the load */ - ef->epc += 3; /* resume at following instruction */ -} - - -/** - * The SDK's user_main function installs a debugging handler regardless - * of whether there's a proper handler installed for EXCCAUSE_LOAD_STORE_ERROR, - * which of course breaks everything if we allow that to go through. As such, - * we use the linker to wrap that call and stop the SDK from shooting itself in - * its proverbial foot. +/* Minimal handler function for 8/16bit loads from the mapped SPI flash. + * Called from the overridden UserExceptionVector on exception cause 3. */ -exception_handler_fn TEXT_SECTION_ATTR -__wrap__xtos_set_exception_handler (uint32_t cause, exception_handler_fn fn) -{ - if (cause != EXCCAUSE_LOAD_STORE_ERROR) - __real__xtos_set_exception_handler (cause, fn); -} +asm( +".section \".iram0.text\"\n" +" .align 4\n" +"store_mask:\n" +" .word 0x004000\n" /* bit 14 is set on store instructions (see note ^) */ +"align_mask:\n" +" .word ~3\n" /* mask to get 32bit alignment of addresses */ +".type cause3_handler,@function\n" +"cause3_handler:\n" +" wsr a2, EXCSAVE2\n" /* free up a2 for use too */ +" rsr a2, EPC1\n" /* get the program counter that caused the exception */ +" ssa8l a2\n" /* prepare to extract the (unaligned) instruction */ +" l32r a0, align_mask\n"/* prepare mask for 32bit alignment */ +" and a2, a2, a0\n" /* get aligned base address of instruction */ +" l32i a0, a2, 0\n" /* load first part */ +" l32i a2, a2, 4\n" /* load second part */ +" src a0, a2, a0\n" /* faulting instruction now in a0 */ +" l32r a2, store_mask\n" +" and a0, a0, a2\n" /* test for store bit */ +" bnez a0, 1f\n" /* it's a store, we don't do stores, get out */ + +"" /* As it turns out, the ESP8266 happily does 8/16bit */ + /* loads from the mapped SPI flash, but then raises an */ + /* exception anyway; we can simply increment the */ + /* program counter and be our merry way, safe in */ + /* the knowledge that the loads have already been done.*/ + /* Note that this only applies to the SPI flash, not */ + /* internal ROM or IRAM - if we ever want to support */ + /* those we'll need to add the appropriate loading */ + /* logic here. For now I see no need for such support. */ + +" rsr a2, EPC1\n" /* read the program counter again */ +" addi a2, a2, 3\n" /* advance program counter past faulting instruction */ +" wsr a2, EPC1\n" /* and store it back */ +" rsr a2, EXCSAVE2\n" /* restore a2 */ +" rsr a0, EXCSAVE1\n" /* restore a0 */ +" rfe\n" /* and done! */ +"1:rsr a2, EXCSAVE2\n" /* we're about to chain, so restore the a2 we clobbered*/ +" ret\n" /* and hop back into the exception vector code */ +); +/* Note ^) Except for the S32E instruction, but that's not applicable here, + * so we can happily ignore it. + */ + + + +/* Our sneaky override of the UserExceptionVector to allow us to handle 8/16bit + * loads from SPI flash. MUST BE >= 32bytes compiled, as the next vector starts + * there. + */ +asm( +".section \".UserExceptionVectorOverride.text\"\n" +".type _UserExceptionVectorOverride,@function\n" +".globl _UserExceptionVectorOverride\n" +"_UserExceptionVectorOverride:\n" +" wsr a0, EXCSAVE1\n" /* free up a0 for a while */ +" rsr a0, EXCCAUSE\n" /* get the exception cause */ +" bnei a0, 3, 2f\n" /* if not EXCCAUSE_LOAD_STORE_ERROR, chain to rtos */ +" j 1f\n" /* jump past noncode bytes for cause3_handler addr */ +" .align 4\n" /* proper alignment for literals */ +" .literal_position\n" /* the linker will put cause3_handler addr here */ +"1:call0 cause3_handler\n" /* handle loads and rfe, stores will return here */ +"2:rsr a0, EXCSAVE1\n" /* restore a0 before we chain */ +" j _UserExceptionVector\n" /* and off we go to rtos */ +); diff --git a/app/user/user_main.c b/app/user/user_main.c index 0e28c1f8..1c53e4dd 100644 --- a/app/user/user_main.c +++ b/app/user/user_main.c @@ -39,9 +39,6 @@ static os_event_t *taskQueue; */ void TEXT_SECTION_ATTR user_start_trampoline (void) { - __real__xtos_set_exception_handler ( - EXCCAUSE_LOAD_STORE_ERROR, load_non_32_wide_handler); - #ifdef LUA_USE_MODULES_RTCTIME // Note: Keep this as close to call_user_start() as possible, since it // is where the cpu clock actually gets bumped to 80MHz. @@ -113,7 +110,6 @@ void nodemcu_init(void) fs_mount(); // test_spiffs(); #endif - // endpoint_setup(); task_post_low(task_get_id(start_lua),'s'); } diff --git a/ld/nodemcu.ld b/ld/nodemcu.ld index 2113076c..69ac944d 100644 --- a/ld/nodemcu.ld +++ b/ld/nodemcu.ld @@ -159,13 +159,6 @@ SECTIONS _rodata_end = ABSOLUTE(.); } >dram0_0_seg :dram0_0_phdr - .UserExceptionVector.literal : AT(LOADADDR(.rodata) + (ADDR(.UserExceptionVector.literal) - ADDR(.rodata))) ALIGN(4) - { - _UserExceptionVector_literal_start = ABSOLUTE(.); - *(.UserExceptionVector.literal) - _UserExceptionVector_literal_end = ABSOLUTE(.); - } >dram0_0_seg :dram0_0_phdr - .bss ALIGN(8) (NOLOAD) : ALIGN(4) { . = ALIGN (8); @@ -206,11 +199,7 @@ SECTIONS LONG(0) LONG(0) . = ALIGN(16); - *(.UserExceptionVector.text) - LONG(0) - LONG(0) - LONG(0) - LONG(0) + KEEP(*(.UserExceptionVectorOverride.text)) . = ALIGN(16); *(.DoubleExceptionVector.text) LONG(0) @@ -218,6 +207,7 @@ SECTIONS LONG(0) LONG(0) . = ALIGN (16); + KEEP(*(.UserExceptionVector.text)) *(.entry.text) *(.init.literal) *(.init)