I-RAM memory store support for 8 and 16 stores

This commit is contained in:
Our Air Quality 2017-12-19 14:27:49 +11:00
parent a8c60e0960
commit 8ea1774e1d

View file

@ -44,6 +44,8 @@ LoadStoreErrorHandlerStack:
.word 0 # a2 .word 0 # a2
.word 0 # a3 .word 0 # a3
.word 0 # a4 .word 0 # a4
.word 0 # a5
.word 0 # a6
.balign 4 .balign 4
.global debug_saved_ctx .global debug_saved_ctx
@ -123,7 +125,11 @@ DoubleExceptionVector:
* *
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause * Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
* *
* (Fast path (no branches) is for L8UI) * Accesses can be assumed aligned here as unaligned accesses would have generate
* an unaligned exception (9) before getting here.
*
* Fast path (no branches) is for L8UI from low registers 0, 2-5, and the fast
* store path is S8I for low registers 0, 2-7.
*/ */
.literal_position .literal_position
@ -141,6 +147,8 @@ LoadStoreErrorHandler:
s32i a4, sp, 0x10 s32i a4, sp, 0x10
rsr a0, sar # Save SAR in a0 to restore later rsr a0, sar # Save SAR in a0 to restore later
# led_on a2, a3
/* Examine the opcode which generated the exception */ /* Examine the opcode which generated the exception */
/* Note: Instructions are in this order to avoid pipeline stalls. */ /* Note: Instructions are in this order to avoid pipeline stalls. */
rsr a2, epc1 rsr a2, epc1
@ -185,6 +193,8 @@ LoadStoreErrorHandler:
addi a3, a3, 0x3 addi a3, a3, 0x3
wsr a3, epc1 wsr a3, epc1
# led_off a0, a3
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes /* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
* per entry (so we can avoid a second jump by just doing a RFE inside * per entry (so we can avoid a second jump by just doing a RFE inside
* each entry). Unfortunately, however, Xtensa doesn't have an addx16 * each entry). Unfortunately, however, Xtensa doesn't have an addx16
@ -214,25 +224,11 @@ LoadStoreErrorHandler:
rsr a1, excsave1 # restore a1 saved by UserExceptionVector rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe rfe
.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */
/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c
jx a2
.balign 4 .balign 4
.LSE_check_l16: .LSE_check_l16:
/* At this point, a2 contains the opcode, a3 is masked opcode */ /* At this point, a2 contains the opcode, a3 is masked opcode */
movi a4, 0x001002 # l16si or l16ui opcode after masking movi a4, 0x001002 # l16si or l16ui opcode after masking
bne a3, a4, .LSE_wrong_opcode bne a3, a4, .LSE_check_s8i
/* Note: At this point, the opcode could be one of two things: /* Note: At this point, the opcode could be one of two things:
* xx1xx2 (L16UI) * xx1xx2 (L16UI)
@ -255,32 +251,91 @@ LoadStoreErrorHandler:
or a4, a3, a4 # set 32-bit sign bits or a4, a3, a4 # set 32-bit sign bits
j .LSE_post_fetch j .LSE_post_fetch
.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler
.balign 4 .balign 4
.LSE_assign_a1: .LSE_check_s8i:
/* a1 is saved in excsave1, so just update that with the value, */ /* At this point, a2 contains the opcode */
wsr a4, excsave1 movi a3, 0x00F00F # opcode mask for s8i/s16i
/* Then restore all regs and return */ s32i a5, sp, 0x14 # Save a5, needed for store op
and a3, a2, a3 # a3 is masked instruction
movi a4, 0x004002 # s8i opcode after masking
s32i a6, sp, 0x18 # Save a6, needed for store op
bne a3, a4, .LSE_check_s16i
/* Note: At this point, the opcode is s8i */
movi a5, 0x000000ff # source mask
.LSE_store:
/* We jump here for either S8I or S16I to get the address and load
* and mask the current contents. */
movi a4, ~3
rsr a3, excvaddr # read faulting address
and a4, a3, a4 # a4 now word aligned address
ssa8b a3 # sar is now left shift amount
sll a3, a5
movi a6, 0xffffffff
xor a6, a6, a3 # a6 now has the word mask
l32i a3, a4, 0 # read the current word
and a3, a3, a6 # a3 now has the masked word
extui a2, a2, 4, 4 # a2 is now source register 0-15
/* At this point, a2 contains the source register 0-15, a3 contains the
* masked memory contents, a4 contains the address, a5 contains the source
* mask, and sar contains the left shift amount. */
bgei a2, 7, .LSE_load_reg # a7..a15 use jumptable
beqi a2, 1, .LSE_load_a1 # a1 uses a special routine
/* We're loading from a0 or a2..a6, which are all saved in our "stack"
* area. Calculate the correct address and load the value there. */
addx4 a2, a2, sp
l32i a2, a2, 0
.LSE_store_apply:
and a2, a2, a5 # mask the source
sll a2, a2 # shift the source
or a3, a3, a2 # combine with the masked memory contents
s32i a3, a4, 0 # write back to memory
/* Note: Instructions are in this order to avoid pipeline stalls */
rsr a3, epc1
wsr a0, sar
addi a3, a3, 0x3
wsr a3, epc1
# led_off a2, a3
/* Restore all regs and return */
l32i a0, sp, 0 l32i a0, sp, 0
l32i a2, sp, 0x08 l32i a2, sp, 0x08
l32i a3, sp, 0x0c l32i a3, sp, 0x0c
l32i a4, sp, 0x10 l32i a4, sp, 0x10
rsr a1, excsave1 l32i a5, sp, 0x14
l32i a6, sp, 0x18
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe rfe
.balign 4
.LSE_check_s16i:
/* At this point, a2 contains the opcode */
movi a4, 0x005002 # s16i opcode after masking
bne a3, a4, .LSE_wrong_opcode
/* Note: At this point, the opcode is s16i */
movi a5, 0x0000ffff # source mask
j .LSE_store
.balign 4
.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */
/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c
jx a2
.balign 4 .balign 4
.LSE_jumptable: .LSE_jumptable:
/* The first 5 entries (80 bytes) of this table are unused (registers /* The first 5 entries (80 bytes) of this table are unused (registers
@ -366,6 +421,81 @@ LoadStoreErrorHandler:
rsr a1, excsave1 rsr a1, excsave1
rfe rfe
.balign 4
.LSE_assign_a1:
/* a1 is saved in excsave1, so just update that with the value, */
wsr a4, excsave1
/* Then restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
rfe
.balign 4
.LSE_load_reg:
/* Calculate the jumptable address. */
movi a6, .LSE_store_jumptable_base
addx8 a2, a2, a6 # a2 is now the address to jump to
jx a2
.balign 4
.LSE_store_jumptable:
/* The first 7 entries (56 bytes) of this table are unused (registers
* a0..a6 are handled separately above). Rather than have a whole bunch
* of wasted space, we just pretend that the table starts 56 bytes
* earlier in memory. */
.set .LSE_store_jumptable_base, .LSE_store_jumptable - (8 * 7)
mov a2, a7
j .LSE_store_apply
.balign 4
mov a2, a8
j .LSE_store_apply
.balign 4
mov a2, a9
j .LSE_store_apply
.balign 4
mov a2, a10
j .LSE_store_apply
.balign 4
mov a2, a11
j .LSE_store_apply
.balign 4
mov a2, a12
j .LSE_store_apply
.balign 4
mov a2, a13
j .LSE_store_apply
.balign 4
mov a2, a14
j .LSE_store_apply
.balign 4
mov a2, a15
j .LSE_store_apply
.balign 4
.LSE_load_a1:
/* a1 is saved in excsave1, so just read the value, */
rsr a2, excsave1
j .LSE_store_apply
.balign 4
.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler
/*************************** Debug exception handler *************************/ /*************************** Debug exception handler *************************/
.section .vecbase.text, "x" .section .vecbase.text, "x"
@ -422,7 +552,7 @@ call_user_start:
NMIExceptionHandler: NMIExceptionHandler:
.type NMIExceptionHandler, @function .type NMIExceptionHandler, @function
wsr sp, excsave3 # excsave3 holds user stack wsr sp, excsave3 # excsave3 holds user stack
movi sp, .NMIHandlerStackTop - 0x40 movi sp, .NMIHandlerStackTop - 0x40
s32i a0, sp, 0x00 s32i a0, sp, 0x00
s32i a2, sp, 0x04 s32i a2, sp, 0x04
@ -460,28 +590,28 @@ NMIExceptionHandler:
movi a0, NMIHandlerStack movi a0, NMIHandlerStack
l32i a3, a0, 0 l32i a3, a0, 0
movi a2, NMI_STACK_CANARY movi a2, NMI_STACK_CANARY
bne a3, a2, .NMIFatalStackOverflow bne a3, a2, .NMIFatalStackOverflow
l32i a0, sp, 0x3c l32i a0, sp, 0x3c
wsr a0, sar wsr a0, sar
l32i a0, sp, 0x38 l32i a0, sp, 0x38
wsr a0, excvaddr wsr a0, excvaddr
l32i a0, sp, 0x34 l32i a0, sp, 0x34
wsr a0, excsave1 wsr a0, excsave1
l32i a0, sp, 0x30 l32i a0, sp, 0x30
wsr a0, exccause wsr a0, exccause
l32i a0, sp, 0x2c l32i a0, sp, 0x2c
wsr a0, epc1 wsr a0, epc1
l32i a11, sp, 0x28 l32i a11, sp, 0x28
l32i a10, sp, 0x24 l32i a10, sp, 0x24
l32i a9, sp, 0x20 l32i a9, sp, 0x20
l32i a8, sp, 0x1c l32i a8, sp, 0x1c
l32i a7, sp, 0x18 l32i a7, sp, 0x18
l32i a6, sp, 0x14 l32i a6, sp, 0x14
l32i a5, sp, 0x10 l32i a5, sp, 0x10
l32i a4, sp, 0x0c l32i a4, sp, 0x0c
l32i a3, sp, 0x08 l32i a3, sp, 0x08
movi a0, 0x33 # Reset PS movi a0, 0x33 # Reset PS
wsr a0, ps wsr a0, ps
rsync rsync
/* set dport nmi status to 1 (wDev_ProcessFiq clears bit 0 and verifies it /* set dport nmi status to 1 (wDev_ProcessFiq clears bit 0 and verifies it
@ -491,10 +621,10 @@ NMIExceptionHandler:
movi a0, 0x3ff00000 movi a0, 0x3ff00000
movi a2, 0x1 movi a2, 0x1
s32i a2, a0, 0 s32i a2, a0, 0
l32i a2, sp, 0x04 l32i a2, sp, 0x04
l32i a0, sp, 0x00 l32i a0, sp, 0x00
movi a1, 0x0 movi a1, 0x0
xsr a1, excsave3 # Load stack back from excsave3, clear excsave3 xsr a1, excsave3 # Load stack back from excsave3, clear excsave3
rfi 3 rfi 3
.section .rodata .section .rodata