diff --git a/core/exception_vectors.S b/core/exception_vectors.S index df04ab3..a390cd3 100644 --- a/core/exception_vectors.S +++ b/core/exception_vectors.S @@ -44,6 +44,8 @@ LoadStoreErrorHandlerStack: .word 0 # a2 .word 0 # a3 .word 0 # a4 + .word 0 # a5 + .word 0 # a6 .balign 4 .global debug_saved_ctx @@ -123,7 +125,11 @@ DoubleExceptionVector: * * Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause * - * (Fast path (no branches) is for L8UI) + * Accesses can be assumed aligned here as unaligned accesses would have generate + * an unaligned exception (9) before getting here. + * + * Fast path (no branches) is for L8UI from low registers 0, 2-5, and the fast + * store path is S8I for low registers 0, 2-7. */ .literal_position @@ -141,6 +147,8 @@ LoadStoreErrorHandler: s32i a4, sp, 0x10 rsr a0, sar # Save SAR in a0 to restore later + # led_on a2, a3 + /* Examine the opcode which generated the exception */ /* Note: Instructions are in this order to avoid pipeline stalls. */ rsr a2, epc1 @@ -185,6 +193,8 @@ LoadStoreErrorHandler: addi a3, a3, 0x3 wsr a3, epc1 + # led_off a0, a3 + /* Stupid opcode tricks: The jumptable we use later on needs 16 bytes * per entry (so we can avoid a second jump by just doing a RFE inside * each entry). Unfortunately, however, Xtensa doesn't have an addx16 @@ -214,25 +224,11 @@ LoadStoreErrorHandler: rsr a1, excsave1 # restore a1 saved by UserExceptionVector rfe -.LSE_assign_reg: - /* At this point, a2 contains the register number times 2, a4 is the - * read value. */ - - /* Calculate the jumptable address, and restore all regs except a2 and - * a4 so we have less to do after jumping. */ - /* Note: Instructions are in this order to avoid pipeline stalls. */ - movi a3, .LSE_jumptable_base - l32i a0, sp, 0 - addx8 a2, a2, a3 # a2 is now the address to jump to - l32i a3, sp, 0x0c - - jx a2 - .balign 4 .LSE_check_l16: /* At this point, a2 contains the opcode, a3 is masked opcode */ movi a4, 0x001002 # l16si or l16ui opcode after masking - bne a3, a4, .LSE_wrong_opcode + bne a3, a4, .LSE_check_s8i /* Note: At this point, the opcode could be one of two things: * xx1xx2 (L16UI) @@ -255,32 +251,91 @@ LoadStoreErrorHandler: or a4, a3, a4 # set 32-bit sign bits j .LSE_post_fetch -.LSE_wrong_opcode: - /* If we got here it's not an opcode we can try to fix, so bomb out. - * Restore registers so any dump the fatal exception routine produces - * will have correct values */ - wsr a0, sar - l32i a0, sp, 0 - /*l32i a2, sp, 0x08*/ - l32i a3, sp, 0x0c - l32i a4, sp, 0x10 - rsr a1, excsave1 - mov a2, a1 - movi a3, 0 - call0 fatal_exception_handler - .balign 4 -.LSE_assign_a1: - /* a1 is saved in excsave1, so just update that with the value, */ - wsr a4, excsave1 - /* Then restore all regs and return */ +.LSE_check_s8i: + /* At this point, a2 contains the opcode */ + movi a3, 0x00F00F # opcode mask for s8i/s16i + s32i a5, sp, 0x14 # Save a5, needed for store op + and a3, a2, a3 # a3 is masked instruction + movi a4, 0x004002 # s8i opcode after masking + s32i a6, sp, 0x18 # Save a6, needed for store op + bne a3, a4, .LSE_check_s16i + + /* Note: At this point, the opcode is s8i */ + movi a5, 0x000000ff # source mask +.LSE_store: + /* We jump here for either S8I or S16I to get the address and load + * and mask the current contents. */ + movi a4, ~3 + rsr a3, excvaddr # read faulting address + and a4, a3, a4 # a4 now word aligned address + ssa8b a3 # sar is now left shift amount + sll a3, a5 + movi a6, 0xffffffff + xor a6, a6, a3 # a6 now has the word mask + l32i a3, a4, 0 # read the current word + and a3, a3, a6 # a3 now has the masked word + extui a2, a2, 4, 4 # a2 is now source register 0-15 + + /* At this point, a2 contains the source register 0-15, a3 contains the + * masked memory contents, a4 contains the address, a5 contains the source + * mask, and sar contains the left shift amount. */ + bgei a2, 7, .LSE_load_reg # a7..a15 use jumptable + beqi a2, 1, .LSE_load_a1 # a1 uses a special routine + + /* We're loading from a0 or a2..a6, which are all saved in our "stack" + * area. Calculate the correct address and load the value there. */ + addx4 a2, a2, sp + l32i a2, a2, 0 + +.LSE_store_apply: + and a2, a2, a5 # mask the source + sll a2, a2 # shift the source + or a3, a3, a2 # combine with the masked memory contents + s32i a3, a4, 0 # write back to memory + + /* Note: Instructions are in this order to avoid pipeline stalls */ + rsr a3, epc1 + wsr a0, sar + addi a3, a3, 0x3 + wsr a3, epc1 + + # led_off a2, a3 + + /* Restore all regs and return */ l32i a0, sp, 0 l32i a2, sp, 0x08 l32i a3, sp, 0x0c l32i a4, sp, 0x10 - rsr a1, excsave1 + l32i a5, sp, 0x14 + l32i a6, sp, 0x18 + rsr a1, excsave1 # restore a1 saved by UserExceptionVector rfe + .balign 4 +.LSE_check_s16i: + /* At this point, a2 contains the opcode */ + movi a4, 0x005002 # s16i opcode after masking + bne a3, a4, .LSE_wrong_opcode + /* Note: At this point, the opcode is s16i */ + movi a5, 0x0000ffff # source mask + j .LSE_store + + .balign 4 +.LSE_assign_reg: + /* At this point, a2 contains the register number times 2, a4 is the + * read value. */ + + /* Calculate the jumptable address, and restore all regs except a2 and + * a4 so we have less to do after jumping. */ + /* Note: Instructions are in this order to avoid pipeline stalls. */ + movi a3, .LSE_jumptable_base + l32i a0, sp, 0 + addx8 a2, a2, a3 # a2 is now the address to jump to + l32i a3, sp, 0x0c + + jx a2 + .balign 4 .LSE_jumptable: /* The first 5 entries (80 bytes) of this table are unused (registers @@ -366,6 +421,81 @@ LoadStoreErrorHandler: rsr a1, excsave1 rfe + .balign 4 +.LSE_assign_a1: + /* a1 is saved in excsave1, so just update that with the value, */ + wsr a4, excsave1 + /* Then restore all regs and return */ + l32i a0, sp, 0 + l32i a2, sp, 0x08 + l32i a3, sp, 0x0c + l32i a4, sp, 0x10 + rsr a1, excsave1 + rfe + + .balign 4 +.LSE_load_reg: + /* Calculate the jumptable address. */ + movi a6, .LSE_store_jumptable_base + addx8 a2, a2, a6 # a2 is now the address to jump to + jx a2 + + .balign 4 +.LSE_store_jumptable: + /* The first 7 entries (56 bytes) of this table are unused (registers + * a0..a6 are handled separately above). Rather than have a whole bunch + * of wasted space, we just pretend that the table starts 56 bytes + * earlier in memory. */ + .set .LSE_store_jumptable_base, .LSE_store_jumptable - (8 * 7) + + mov a2, a7 + j .LSE_store_apply + .balign 4 + mov a2, a8 + j .LSE_store_apply + .balign 4 + mov a2, a9 + j .LSE_store_apply + .balign 4 + mov a2, a10 + j .LSE_store_apply + .balign 4 + mov a2, a11 + j .LSE_store_apply + .balign 4 + mov a2, a12 + j .LSE_store_apply + .balign 4 + mov a2, a13 + j .LSE_store_apply + .balign 4 + mov a2, a14 + j .LSE_store_apply + .balign 4 + mov a2, a15 + j .LSE_store_apply + .balign 4 + +.LSE_load_a1: + /* a1 is saved in excsave1, so just read the value, */ + rsr a2, excsave1 + j .LSE_store_apply + + .balign 4 +.LSE_wrong_opcode: + /* If we got here it's not an opcode we can try to fix, so bomb out. + * Restore registers so any dump the fatal exception routine produces + * will have correct values */ + wsr a0, sar + l32i a0, sp, 0 + /*l32i a2, sp, 0x08*/ + l32i a3, sp, 0x0c + l32i a4, sp, 0x10 + rsr a1, excsave1 + mov a2, a1 + movi a3, 0 + call0 fatal_exception_handler + /*************************** Debug exception handler *************************/ .section .vecbase.text, "x" @@ -422,7 +552,7 @@ call_user_start: NMIExceptionHandler: .type NMIExceptionHandler, @function - wsr sp, excsave3 # excsave3 holds user stack + wsr sp, excsave3 # excsave3 holds user stack movi sp, .NMIHandlerStackTop - 0x40 s32i a0, sp, 0x00 s32i a2, sp, 0x04 @@ -460,28 +590,28 @@ NMIExceptionHandler: movi a0, NMIHandlerStack l32i a3, a0, 0 movi a2, NMI_STACK_CANARY - bne a3, a2, .NMIFatalStackOverflow + bne a3, a2, .NMIFatalStackOverflow - l32i a0, sp, 0x3c - wsr a0, sar - l32i a0, sp, 0x38 - wsr a0, excvaddr - l32i a0, sp, 0x34 - wsr a0, excsave1 - l32i a0, sp, 0x30 - wsr a0, exccause - l32i a0, sp, 0x2c - wsr a0, epc1 - l32i a11, sp, 0x28 - l32i a10, sp, 0x24 - l32i a9, sp, 0x20 - l32i a8, sp, 0x1c - l32i a7, sp, 0x18 - l32i a6, sp, 0x14 - l32i a5, sp, 0x10 - l32i a4, sp, 0x0c - l32i a3, sp, 0x08 - movi a0, 0x33 # Reset PS + l32i a0, sp, 0x3c + wsr a0, sar + l32i a0, sp, 0x38 + wsr a0, excvaddr + l32i a0, sp, 0x34 + wsr a0, excsave1 + l32i a0, sp, 0x30 + wsr a0, exccause + l32i a0, sp, 0x2c + wsr a0, epc1 + l32i a11, sp, 0x28 + l32i a10, sp, 0x24 + l32i a9, sp, 0x20 + l32i a8, sp, 0x1c + l32i a7, sp, 0x18 + l32i a6, sp, 0x14 + l32i a5, sp, 0x10 + l32i a4, sp, 0x0c + l32i a3, sp, 0x08 + movi a0, 0x33 # Reset PS wsr a0, ps rsync /* set dport nmi status to 1 (wDev_ProcessFiq clears bit 0 and verifies it @@ -491,10 +621,10 @@ NMIExceptionHandler: movi a0, 0x3ff00000 movi a2, 0x1 s32i a2, a0, 0 - l32i a2, sp, 0x04 - l32i a0, sp, 0x00 - movi a1, 0x0 - xsr a1, excsave3 # Load stack back from excsave3, clear excsave3 + l32i a2, sp, 0x04 + l32i a0, sp, 0x00 + movi a1, 0x0 + xsr a1, excsave3 # Load stack back from excsave3, clear excsave3 rfi 3 .section .rodata