I-RAM memory store support for 8 and 16 stores

This commit is contained in:
Our Air Quality 2017-12-19 14:27:49 +11:00
parent a8c60e0960
commit 8ea1774e1d

View file

@ -44,6 +44,8 @@ LoadStoreErrorHandlerStack:
.word 0 # a2
.word 0 # a3
.word 0 # a4
.word 0 # a5
.word 0 # a6
.balign 4
.global debug_saved_ctx
@ -123,7 +125,11 @@ DoubleExceptionVector:
*
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
*
* (Fast path (no branches) is for L8UI)
* Accesses can be assumed aligned here as unaligned accesses would have generate
* an unaligned exception (9) before getting here.
*
* Fast path (no branches) is for L8UI from low registers 0, 2-5, and the fast
* store path is S8I for low registers 0, 2-7.
*/
.literal_position
@ -141,6 +147,8 @@ LoadStoreErrorHandler:
s32i a4, sp, 0x10
rsr a0, sar # Save SAR in a0 to restore later
# led_on a2, a3
/* Examine the opcode which generated the exception */
/* Note: Instructions are in this order to avoid pipeline stalls. */
rsr a2, epc1
@ -185,6 +193,8 @@ LoadStoreErrorHandler:
addi a3, a3, 0x3
wsr a3, epc1
# led_off a0, a3
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
* per entry (so we can avoid a second jump by just doing a RFE inside
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
@ -214,25 +224,11 @@ LoadStoreErrorHandler:
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe
.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */
/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c
jx a2
.balign 4
.LSE_check_l16:
/* At this point, a2 contains the opcode, a3 is masked opcode */
movi a4, 0x001002 # l16si or l16ui opcode after masking
bne a3, a4, .LSE_wrong_opcode
bne a3, a4, .LSE_check_s8i
/* Note: At this point, the opcode could be one of two things:
* xx1xx2 (L16UI)
@ -255,32 +251,91 @@ LoadStoreErrorHandler:
or a4, a3, a4 # set 32-bit sign bits
j .LSE_post_fetch
.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler
.balign 4
.LSE_assign_a1:
/* a1 is saved in excsave1, so just update that with the value, */
wsr a4, excsave1
/* Then restore all regs and return */
.LSE_check_s8i:
/* At this point, a2 contains the opcode */
movi a3, 0x00F00F # opcode mask for s8i/s16i
s32i a5, sp, 0x14 # Save a5, needed for store op
and a3, a2, a3 # a3 is masked instruction
movi a4, 0x004002 # s8i opcode after masking
s32i a6, sp, 0x18 # Save a6, needed for store op
bne a3, a4, .LSE_check_s16i
/* Note: At this point, the opcode is s8i */
movi a5, 0x000000ff # source mask
.LSE_store:
/* We jump here for either S8I or S16I to get the address and load
* and mask the current contents. */
movi a4, ~3
rsr a3, excvaddr # read faulting address
and a4, a3, a4 # a4 now word aligned address
ssa8b a3 # sar is now left shift amount
sll a3, a5
movi a6, 0xffffffff
xor a6, a6, a3 # a6 now has the word mask
l32i a3, a4, 0 # read the current word
and a3, a3, a6 # a3 now has the masked word
extui a2, a2, 4, 4 # a2 is now source register 0-15
/* At this point, a2 contains the source register 0-15, a3 contains the
* masked memory contents, a4 contains the address, a5 contains the source
* mask, and sar contains the left shift amount. */
bgei a2, 7, .LSE_load_reg # a7..a15 use jumptable
beqi a2, 1, .LSE_load_a1 # a1 uses a special routine
/* We're loading from a0 or a2..a6, which are all saved in our "stack"
* area. Calculate the correct address and load the value there. */
addx4 a2, a2, sp
l32i a2, a2, 0
.LSE_store_apply:
and a2, a2, a5 # mask the source
sll a2, a2 # shift the source
or a3, a3, a2 # combine with the masked memory contents
s32i a3, a4, 0 # write back to memory
/* Note: Instructions are in this order to avoid pipeline stalls */
rsr a3, epc1
wsr a0, sar
addi a3, a3, 0x3
wsr a3, epc1
# led_off a2, a3
/* Restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
l32i a5, sp, 0x14
l32i a6, sp, 0x18
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe
.balign 4
.LSE_check_s16i:
/* At this point, a2 contains the opcode */
movi a4, 0x005002 # s16i opcode after masking
bne a3, a4, .LSE_wrong_opcode
/* Note: At this point, the opcode is s16i */
movi a5, 0x0000ffff # source mask
j .LSE_store
.balign 4
.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */
/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c
jx a2
.balign 4
.LSE_jumptable:
/* The first 5 entries (80 bytes) of this table are unused (registers
@ -366,6 +421,81 @@ LoadStoreErrorHandler:
rsr a1, excsave1
rfe
.balign 4
.LSE_assign_a1:
/* a1 is saved in excsave1, so just update that with the value, */
wsr a4, excsave1
/* Then restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
rfe
.balign 4
.LSE_load_reg:
/* Calculate the jumptable address. */
movi a6, .LSE_store_jumptable_base
addx8 a2, a2, a6 # a2 is now the address to jump to
jx a2
.balign 4
.LSE_store_jumptable:
/* The first 7 entries (56 bytes) of this table are unused (registers
* a0..a6 are handled separately above). Rather than have a whole bunch
* of wasted space, we just pretend that the table starts 56 bytes
* earlier in memory. */
.set .LSE_store_jumptable_base, .LSE_store_jumptable - (8 * 7)
mov a2, a7
j .LSE_store_apply
.balign 4
mov a2, a8
j .LSE_store_apply
.balign 4
mov a2, a9
j .LSE_store_apply
.balign 4
mov a2, a10
j .LSE_store_apply
.balign 4
mov a2, a11
j .LSE_store_apply
.balign 4
mov a2, a12
j .LSE_store_apply
.balign 4
mov a2, a13
j .LSE_store_apply
.balign 4
mov a2, a14
j .LSE_store_apply
.balign 4
mov a2, a15
j .LSE_store_apply
.balign 4
.LSE_load_a1:
/* a1 is saved in excsave1, so just read the value, */
rsr a2, excsave1
j .LSE_store_apply
.balign 4
.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler
/*************************** Debug exception handler *************************/
.section .vecbase.text, "x"