I-RAM memory store support for 8 and 16 stores
This commit is contained in:
parent
a8c60e0960
commit
8ea1774e1d
1 changed files with 191 additions and 61 deletions
|
@ -44,6 +44,8 @@ LoadStoreErrorHandlerStack:
|
||||||
.word 0 # a2
|
.word 0 # a2
|
||||||
.word 0 # a3
|
.word 0 # a3
|
||||||
.word 0 # a4
|
.word 0 # a4
|
||||||
|
.word 0 # a5
|
||||||
|
.word 0 # a6
|
||||||
|
|
||||||
.balign 4
|
.balign 4
|
||||||
.global debug_saved_ctx
|
.global debug_saved_ctx
|
||||||
|
@ -123,7 +125,11 @@ DoubleExceptionVector:
|
||||||
*
|
*
|
||||||
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
|
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
|
||||||
*
|
*
|
||||||
* (Fast path (no branches) is for L8UI)
|
* Accesses can be assumed aligned here as unaligned accesses would have generate
|
||||||
|
* an unaligned exception (9) before getting here.
|
||||||
|
*
|
||||||
|
* Fast path (no branches) is for L8UI from low registers 0, 2-5, and the fast
|
||||||
|
* store path is S8I for low registers 0, 2-7.
|
||||||
*/
|
*/
|
||||||
.literal_position
|
.literal_position
|
||||||
|
|
||||||
|
@ -141,6 +147,8 @@ LoadStoreErrorHandler:
|
||||||
s32i a4, sp, 0x10
|
s32i a4, sp, 0x10
|
||||||
rsr a0, sar # Save SAR in a0 to restore later
|
rsr a0, sar # Save SAR in a0 to restore later
|
||||||
|
|
||||||
|
# led_on a2, a3
|
||||||
|
|
||||||
/* Examine the opcode which generated the exception */
|
/* Examine the opcode which generated the exception */
|
||||||
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
||||||
rsr a2, epc1
|
rsr a2, epc1
|
||||||
|
@ -185,6 +193,8 @@ LoadStoreErrorHandler:
|
||||||
addi a3, a3, 0x3
|
addi a3, a3, 0x3
|
||||||
wsr a3, epc1
|
wsr a3, epc1
|
||||||
|
|
||||||
|
# led_off a0, a3
|
||||||
|
|
||||||
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
|
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
|
||||||
* per entry (so we can avoid a second jump by just doing a RFE inside
|
* per entry (so we can avoid a second jump by just doing a RFE inside
|
||||||
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
|
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
|
||||||
|
@ -214,25 +224,11 @@ LoadStoreErrorHandler:
|
||||||
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
|
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
|
||||||
rfe
|
rfe
|
||||||
|
|
||||||
.LSE_assign_reg:
|
|
||||||
/* At this point, a2 contains the register number times 2, a4 is the
|
|
||||||
* read value. */
|
|
||||||
|
|
||||||
/* Calculate the jumptable address, and restore all regs except a2 and
|
|
||||||
* a4 so we have less to do after jumping. */
|
|
||||||
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
|
||||||
movi a3, .LSE_jumptable_base
|
|
||||||
l32i a0, sp, 0
|
|
||||||
addx8 a2, a2, a3 # a2 is now the address to jump to
|
|
||||||
l32i a3, sp, 0x0c
|
|
||||||
|
|
||||||
jx a2
|
|
||||||
|
|
||||||
.balign 4
|
.balign 4
|
||||||
.LSE_check_l16:
|
.LSE_check_l16:
|
||||||
/* At this point, a2 contains the opcode, a3 is masked opcode */
|
/* At this point, a2 contains the opcode, a3 is masked opcode */
|
||||||
movi a4, 0x001002 # l16si or l16ui opcode after masking
|
movi a4, 0x001002 # l16si or l16ui opcode after masking
|
||||||
bne a3, a4, .LSE_wrong_opcode
|
bne a3, a4, .LSE_check_s8i
|
||||||
|
|
||||||
/* Note: At this point, the opcode could be one of two things:
|
/* Note: At this point, the opcode could be one of two things:
|
||||||
* xx1xx2 (L16UI)
|
* xx1xx2 (L16UI)
|
||||||
|
@ -255,32 +251,91 @@ LoadStoreErrorHandler:
|
||||||
or a4, a3, a4 # set 32-bit sign bits
|
or a4, a3, a4 # set 32-bit sign bits
|
||||||
j .LSE_post_fetch
|
j .LSE_post_fetch
|
||||||
|
|
||||||
.LSE_wrong_opcode:
|
|
||||||
/* If we got here it's not an opcode we can try to fix, so bomb out.
|
|
||||||
* Restore registers so any dump the fatal exception routine produces
|
|
||||||
* will have correct values */
|
|
||||||
wsr a0, sar
|
|
||||||
l32i a0, sp, 0
|
|
||||||
/*l32i a2, sp, 0x08*/
|
|
||||||
l32i a3, sp, 0x0c
|
|
||||||
l32i a4, sp, 0x10
|
|
||||||
rsr a1, excsave1
|
|
||||||
mov a2, a1
|
|
||||||
movi a3, 0
|
|
||||||
call0 fatal_exception_handler
|
|
||||||
|
|
||||||
.balign 4
|
.balign 4
|
||||||
.LSE_assign_a1:
|
.LSE_check_s8i:
|
||||||
/* a1 is saved in excsave1, so just update that with the value, */
|
/* At this point, a2 contains the opcode */
|
||||||
wsr a4, excsave1
|
movi a3, 0x00F00F # opcode mask for s8i/s16i
|
||||||
/* Then restore all regs and return */
|
s32i a5, sp, 0x14 # Save a5, needed for store op
|
||||||
|
and a3, a2, a3 # a3 is masked instruction
|
||||||
|
movi a4, 0x004002 # s8i opcode after masking
|
||||||
|
s32i a6, sp, 0x18 # Save a6, needed for store op
|
||||||
|
bne a3, a4, .LSE_check_s16i
|
||||||
|
|
||||||
|
/* Note: At this point, the opcode is s8i */
|
||||||
|
movi a5, 0x000000ff # source mask
|
||||||
|
.LSE_store:
|
||||||
|
/* We jump here for either S8I or S16I to get the address and load
|
||||||
|
* and mask the current contents. */
|
||||||
|
movi a4, ~3
|
||||||
|
rsr a3, excvaddr # read faulting address
|
||||||
|
and a4, a3, a4 # a4 now word aligned address
|
||||||
|
ssa8b a3 # sar is now left shift amount
|
||||||
|
sll a3, a5
|
||||||
|
movi a6, 0xffffffff
|
||||||
|
xor a6, a6, a3 # a6 now has the word mask
|
||||||
|
l32i a3, a4, 0 # read the current word
|
||||||
|
and a3, a3, a6 # a3 now has the masked word
|
||||||
|
extui a2, a2, 4, 4 # a2 is now source register 0-15
|
||||||
|
|
||||||
|
/* At this point, a2 contains the source register 0-15, a3 contains the
|
||||||
|
* masked memory contents, a4 contains the address, a5 contains the source
|
||||||
|
* mask, and sar contains the left shift amount. */
|
||||||
|
bgei a2, 7, .LSE_load_reg # a7..a15 use jumptable
|
||||||
|
beqi a2, 1, .LSE_load_a1 # a1 uses a special routine
|
||||||
|
|
||||||
|
/* We're loading from a0 or a2..a6, which are all saved in our "stack"
|
||||||
|
* area. Calculate the correct address and load the value there. */
|
||||||
|
addx4 a2, a2, sp
|
||||||
|
l32i a2, a2, 0
|
||||||
|
|
||||||
|
.LSE_store_apply:
|
||||||
|
and a2, a2, a5 # mask the source
|
||||||
|
sll a2, a2 # shift the source
|
||||||
|
or a3, a3, a2 # combine with the masked memory contents
|
||||||
|
s32i a3, a4, 0 # write back to memory
|
||||||
|
|
||||||
|
/* Note: Instructions are in this order to avoid pipeline stalls */
|
||||||
|
rsr a3, epc1
|
||||||
|
wsr a0, sar
|
||||||
|
addi a3, a3, 0x3
|
||||||
|
wsr a3, epc1
|
||||||
|
|
||||||
|
# led_off a2, a3
|
||||||
|
|
||||||
|
/* Restore all regs and return */
|
||||||
l32i a0, sp, 0
|
l32i a0, sp, 0
|
||||||
l32i a2, sp, 0x08
|
l32i a2, sp, 0x08
|
||||||
l32i a3, sp, 0x0c
|
l32i a3, sp, 0x0c
|
||||||
l32i a4, sp, 0x10
|
l32i a4, sp, 0x10
|
||||||
rsr a1, excsave1
|
l32i a5, sp, 0x14
|
||||||
|
l32i a6, sp, 0x18
|
||||||
|
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
|
||||||
rfe
|
rfe
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.LSE_check_s16i:
|
||||||
|
/* At this point, a2 contains the opcode */
|
||||||
|
movi a4, 0x005002 # s16i opcode after masking
|
||||||
|
bne a3, a4, .LSE_wrong_opcode
|
||||||
|
/* Note: At this point, the opcode is s16i */
|
||||||
|
movi a5, 0x0000ffff # source mask
|
||||||
|
j .LSE_store
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.LSE_assign_reg:
|
||||||
|
/* At this point, a2 contains the register number times 2, a4 is the
|
||||||
|
* read value. */
|
||||||
|
|
||||||
|
/* Calculate the jumptable address, and restore all regs except a2 and
|
||||||
|
* a4 so we have less to do after jumping. */
|
||||||
|
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
||||||
|
movi a3, .LSE_jumptable_base
|
||||||
|
l32i a0, sp, 0
|
||||||
|
addx8 a2, a2, a3 # a2 is now the address to jump to
|
||||||
|
l32i a3, sp, 0x0c
|
||||||
|
|
||||||
|
jx a2
|
||||||
|
|
||||||
.balign 4
|
.balign 4
|
||||||
.LSE_jumptable:
|
.LSE_jumptable:
|
||||||
/* The first 5 entries (80 bytes) of this table are unused (registers
|
/* The first 5 entries (80 bytes) of this table are unused (registers
|
||||||
|
@ -366,6 +421,81 @@ LoadStoreErrorHandler:
|
||||||
rsr a1, excsave1
|
rsr a1, excsave1
|
||||||
rfe
|
rfe
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.LSE_assign_a1:
|
||||||
|
/* a1 is saved in excsave1, so just update that with the value, */
|
||||||
|
wsr a4, excsave1
|
||||||
|
/* Then restore all regs and return */
|
||||||
|
l32i a0, sp, 0
|
||||||
|
l32i a2, sp, 0x08
|
||||||
|
l32i a3, sp, 0x0c
|
||||||
|
l32i a4, sp, 0x10
|
||||||
|
rsr a1, excsave1
|
||||||
|
rfe
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.LSE_load_reg:
|
||||||
|
/* Calculate the jumptable address. */
|
||||||
|
movi a6, .LSE_store_jumptable_base
|
||||||
|
addx8 a2, a2, a6 # a2 is now the address to jump to
|
||||||
|
jx a2
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.LSE_store_jumptable:
|
||||||
|
/* The first 7 entries (56 bytes) of this table are unused (registers
|
||||||
|
* a0..a6 are handled separately above). Rather than have a whole bunch
|
||||||
|
* of wasted space, we just pretend that the table starts 56 bytes
|
||||||
|
* earlier in memory. */
|
||||||
|
.set .LSE_store_jumptable_base, .LSE_store_jumptable - (8 * 7)
|
||||||
|
|
||||||
|
mov a2, a7
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a8
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a9
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a10
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a11
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a12
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a13
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a14
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
mov a2, a15
|
||||||
|
j .LSE_store_apply
|
||||||
|
.balign 4
|
||||||
|
|
||||||
|
.LSE_load_a1:
|
||||||
|
/* a1 is saved in excsave1, so just read the value, */
|
||||||
|
rsr a2, excsave1
|
||||||
|
j .LSE_store_apply
|
||||||
|
|
||||||
|
.balign 4
|
||||||
|
.LSE_wrong_opcode:
|
||||||
|
/* If we got here it's not an opcode we can try to fix, so bomb out.
|
||||||
|
* Restore registers so any dump the fatal exception routine produces
|
||||||
|
* will have correct values */
|
||||||
|
wsr a0, sar
|
||||||
|
l32i a0, sp, 0
|
||||||
|
/*l32i a2, sp, 0x08*/
|
||||||
|
l32i a3, sp, 0x0c
|
||||||
|
l32i a4, sp, 0x10
|
||||||
|
rsr a1, excsave1
|
||||||
|
mov a2, a1
|
||||||
|
movi a3, 0
|
||||||
|
call0 fatal_exception_handler
|
||||||
|
|
||||||
/*************************** Debug exception handler *************************/
|
/*************************** Debug exception handler *************************/
|
||||||
|
|
||||||
.section .vecbase.text, "x"
|
.section .vecbase.text, "x"
|
||||||
|
|
Loading…
Reference in a new issue