532 lines
17 KiB
ArmAsm
532 lines
17 KiB
ArmAsm
/* Xtensa Exception (ie interrupt) Vectors & low-level handler code
|
||
*
|
||
* Core exception handler code is placed in the .vecbase section, which gets
|
||
* picked up specially in the linker script and placed at beginning of IRAM.
|
||
*
|
||
* The actual VecBase symbol should be the first thing in .vecbase (this is not
|
||
* strictly important as it gets set by symbol lookup not by hardcoded address,
|
||
* but having it at 0x40100000 means that the exception vectors have memorable
|
||
* offsets, which match the default Boot ROM vector offsets. So convenient for
|
||
* human understanding.
|
||
*
|
||
* Part of esp-open-rtos
|
||
* Original vector contents Copyright (C) 2014-2015 Espressif Systems
|
||
* Additions Copyright (C) Superhouse Automation Pty Ltd and Angus Gratton
|
||
* BSD Licensed as described in the file LICENSE
|
||
*/
|
||
|
||
#include "led_debug.s"
|
||
|
||
/* Some UserException causes, see table Table 4–64 in ISA reference */
|
||
|
||
#define CAUSE_SYSCALL 1
|
||
#define CAUSE_LOADSTORE 3
|
||
#define CAUSE_LVL1INT 4
|
||
|
||
.section .bss
|
||
|
||
/* Stack space for NMI handler
|
||
|
||
NMI handler stack high water mark measured at 0x134 bytes. Any use
|
||
of the NMI timer callback will add stack overhead as well.
|
||
|
||
The NMI handler does a basic check for stack overflow
|
||
*/
|
||
.balign 16
|
||
NMIHandlerStack:
|
||
.skip 0x200
|
||
.NMIHandlerStackTop:
|
||
|
||
.balign 16
|
||
LoadStoreErrorHandlerStack:
|
||
.word 0 # a0
|
||
.word 0 # (unused)
|
||
.word 0 # a2
|
||
.word 0 # a3
|
||
.word 0 # a4
|
||
|
||
/***************************** Exception Vectors *****************************/
|
||
|
||
.section .vecbase.text, "x"
|
||
|
||
/* Note: Exception vectors must be aligned on a 256-byte (0x100) boundary or
|
||
* they will not function properly. (This is taken care of in the linker
|
||
* script by ensuring .vecbase.text is aligned properly, and putting VecBase
|
||
* right at the beginning of .vecbase.text) */
|
||
.org 0
|
||
VecBase:
|
||
.global VecBase
|
||
/* IMPORTANT: exception vector literals will go here, but we
|
||
* can't have more than 4 otherwise we push DebugExceptionVector past
|
||
* offset 0x10 relative to VecBase. There should be ways to avoid this,
|
||
* and also keep the VecBase offsets easy to read, but this works for
|
||
* now. */
|
||
.literal_position
|
||
|
||
.org VecBase + 0x10
|
||
DebugExceptionVector:
|
||
.type DebugExceptionVector, @function
|
||
|
||
wsr a0, excsave2
|
||
call0 sdk_user_fatal_exception_handler
|
||
rfi 2
|
||
|
||
.org VecBase + 0x20
|
||
NMIExceptionVector:
|
||
.type NMIExceptionVector, @function
|
||
j NMIExceptionHandler
|
||
|
||
.org VecBase + 0x30
|
||
KernelExceptionVector:
|
||
.type KernelExceptionVector, @function
|
||
|
||
break 1, 0
|
||
call0 sdk_user_fatal_exception_handler
|
||
rfe
|
||
|
||
.org VecBase + 0x50
|
||
UserExceptionVector:
|
||
.type UserExceptionVector, @function
|
||
|
||
wsr a1, excsave1
|
||
rsr a1, exccause
|
||
beqi a1, CAUSE_LOADSTORE, LoadStoreErrorHandler
|
||
j UserExceptionHandler
|
||
|
||
.org VecBase + 0x70
|
||
DoubleExceptionVector:
|
||
.type DoubleExceptionVector, @function
|
||
|
||
break 1, 4
|
||
call0 sdk_user_fatal_exception_handler
|
||
|
||
/* Reset vector at offset 0x80 is unused, as vecbase gets reset to mask ROM
|
||
* vectors on chip reset. */
|
||
|
||
/*************************** LoadStoreError Handler **************************/
|
||
|
||
.section .vecbase.text, "x"
|
||
|
||
/* Xtensa "Load/Store Exception" handler:
|
||
* Completes L8/L16 load instructions from Instruction address space, for which
|
||
* the architecture only supports 32-bit reads.
|
||
*
|
||
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
|
||
*
|
||
* (Fast path (no branches) is for L8UI)
|
||
*/
|
||
.literal_position
|
||
|
||
.balign 4
|
||
LoadStoreErrorHandler:
|
||
.type LoadStoreErrorHandler, @function
|
||
|
||
/* Registers are saved in the address corresponding to their register
|
||
* number times 4. This allows a quick and easy mapping later on when
|
||
* needing to store the value to a particular register number. */
|
||
movi sp, LoadStoreErrorHandlerStack
|
||
s32i a0, sp, 0
|
||
s32i a2, sp, 0x08
|
||
s32i a3, sp, 0x0c
|
||
s32i a4, sp, 0x10
|
||
rsr a0, sar # Save SAR in a0 to restore later
|
||
|
||
/* Examine the opcode which generated the exception */
|
||
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
||
rsr a2, epc1
|
||
movi a3, ~3
|
||
ssa8l a2 # sar is now correct shift for aligned read
|
||
and a2, a2, a3 # a2 now 4-byte aligned address of instruction
|
||
l32i a4, a2, 0
|
||
l32i a2, a2, 4
|
||
movi a3, 0x00700F # opcode mask for l8ui/l16si/l16ui
|
||
src a2, a2, a4 # a2 now instruction that failed
|
||
and a3, a2, a3 # a3 is masked instruction
|
||
bnei a3, 0x000002, .LSE_check_l16
|
||
|
||
/* Note: At this point, opcode could technically be one of two things:
|
||
* xx0xx2 (L8UI)
|
||
* xx8xx2 (Reserved (invalid) opcode)
|
||
* It is assumed that we'll never get to this point from an illegal
|
||
* opcode, so we don't bother to check for that case and presume this
|
||
* is always an L8UI. */
|
||
|
||
movi a4, ~3
|
||
rsr a3, excvaddr # read faulting address
|
||
and a4, a3, a4 # a4 now word aligned read address
|
||
|
||
l32i a4, a4, 0 # perform the actual read
|
||
ssa8l a3 # sar is now shift to extract a3's byte
|
||
srl a3, a4 # shift right correct distance
|
||
extui a4, a3, 0, 8 # mask off bits we need for an l8
|
||
|
||
.LSE_post_fetch:
|
||
/* We jump back here after either the L8UI or the L16*I routines do the
|
||
* necessary work to read the value from memory.
|
||
* At this point, a2 holds the faulting instruction and a4 holds the
|
||
* correctly read value.
|
||
|
||
* Restore original SAR value (saved in a0) and update EPC so we'll
|
||
* return back to the instruction following the one we just emulated */
|
||
|
||
/* Note: Instructions are in this order to avoid pipeline stalls */
|
||
rsr a3, epc1
|
||
wsr a0, sar
|
||
addi a3, a3, 0x3
|
||
wsr a3, epc1
|
||
|
||
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
|
||
* per entry (so we can avoid a second jump by just doing a RFE inside
|
||
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
|
||
* operation to make that easy for us. Luckily, all of the faulting
|
||
* opcodes we're processing are guaranteed to have bit 3 be zero, which
|
||
* means if we just shift the register bits of the opcode down by 3
|
||
* instead of 4, we will get the register number multiplied by 2. This
|
||
* combined with an addx8 will give us an effective addx16 without
|
||
* needing any extra shift operations. */
|
||
extui a2, a2, 3, 5 # a2 is now destination register 0-15 times 2
|
||
|
||
bgei a2, 10, .LSE_assign_reg # a5..a15 use jumptable
|
||
beqi a2, 2, .LSE_assign_a1 # a1 uses a special routine
|
||
|
||
/* We're storing into a0 or a2..a4, which are all saved in our "stack"
|
||
* area. Calculate the correct address and stick the value in there,
|
||
* then just do our normal restore and RFE (no jumps required, which
|
||
* actually makes a0..a4 substantially faster). */
|
||
addx2 a2, a2, sp
|
||
s32i a4, a2, 0
|
||
|
||
/* Restore all regs and return */
|
||
l32i a0, sp, 0
|
||
l32i a2, sp, 0x08
|
||
l32i a3, sp, 0x0c
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
|
||
rfe
|
||
|
||
.LSE_assign_reg:
|
||
/* At this point, a2 contains the register number times 2, a4 is the
|
||
* read value. */
|
||
|
||
/* Calculate the jumptable address, and restore all regs except a2 and
|
||
* a4 so we have less to do after jumping. */
|
||
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
||
movi a3, .LSE_jumptable_base
|
||
l32i a0, sp, 0
|
||
addx8 a2, a2, a3 # a2 is now the address to jump to
|
||
l32i a3, sp, 0x0c
|
||
|
||
jx a2
|
||
|
||
.balign 4
|
||
.LSE_check_l16:
|
||
/* At this point, a2 contains the opcode, a3 is masked opcode */
|
||
movi a4, 0x001002 # l16si or l16ui opcode after masking
|
||
bne a3, a4, .LSE_wrong_opcode
|
||
|
||
/* Note: At this point, the opcode could be one of two things:
|
||
* xx1xx2 (L16UI)
|
||
* xx9xx2 (L16SI)
|
||
* Both of these we can handle. */
|
||
|
||
movi a4, ~3
|
||
rsr a3, excvaddr # read faulting address
|
||
and a4, a3, a4 # a4 now word aligned read address
|
||
|
||
l32i a4, a4, 0 # perform the actual read
|
||
ssa8l a3 # sar is now shift to extract a3's bytes
|
||
srl a3, a4 # shift right correct distance
|
||
extui a4, a3, 0, 16 # mask off bits we need for an l16
|
||
|
||
bbci a2, 15, .LSE_post_fetch # Not a signed op
|
||
bbci a4, 15, .LSE_post_fetch # Value does not need sign-extension
|
||
|
||
movi a3, 0xFFFF0000
|
||
or a4, a3, a4 # set 32-bit sign bits
|
||
j .LSE_post_fetch
|
||
|
||
.LSE_wrong_opcode:
|
||
/* If we got here it's not an opcode we can try to fix, so bomb out.
|
||
* Restore registers so any dump the fatal exception routine produces
|
||
* will have correct values */
|
||
wsr a0, sar
|
||
l32i a0, sp, 0
|
||
l32i a2, sp, 0x08
|
||
l32i a3, sp, 0x0c
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
call0 sdk_user_fatal_exception_handler
|
||
|
||
.balign 4
|
||
.LSE_assign_a1:
|
||
/* a1 is saved in excsave1, so just update that with the value, */
|
||
wsr a4, excsave1
|
||
/* Then restore all regs and return */
|
||
l32i a0, sp, 0
|
||
l32i a2, sp, 0x08
|
||
l32i a3, sp, 0x0c
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.balign 4
|
||
.LSE_jumptable:
|
||
/* The first 5 entries (80 bytes) of this table are unused (registers
|
||
* a0..a4 are handled separately above). Rather than have a whole bunch
|
||
* of wasted space, we just pretend that the table starts 80 bytes
|
||
* earlier in memory. */
|
||
.set .LSE_jumptable_base, .LSE_jumptable - (16 * 5)
|
||
|
||
.org .LSE_jumptable_base + (16 * 5)
|
||
mov a5, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 6)
|
||
mov a6, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 7)
|
||
mov a7, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 8)
|
||
mov a8, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 9)
|
||
mov a9, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 10)
|
||
mov a10, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 11)
|
||
mov a11, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 12)
|
||
mov a12, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 13)
|
||
mov a13, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 14)
|
||
mov a14, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
.org .LSE_jumptable_base + (16 * 15)
|
||
mov a15, a4
|
||
l32i a2, sp, 0x08
|
||
l32i a4, sp, 0x10
|
||
rsr a1, excsave1
|
||
rfe
|
||
|
||
/****************************** call_user_start ******************************/
|
||
|
||
.section .vecbase.text, "x"
|
||
|
||
/* This is the first entrypoint called from the ROM after loading the image
|
||
* into IRAM. It just sets up the VECBASE register to point at our own
|
||
* exception vectors and then calls sdk_user_start() */
|
||
|
||
.literal_position
|
||
|
||
.balign 4
|
||
call_user_start:
|
||
.global call_user_start
|
||
.type call_user_start, @function
|
||
|
||
movi a2, VecBase
|
||
wsr a2, vecbase
|
||
call0 sdk_user_start
|
||
|
||
/*************************** NMI Exception Handler ***************************/
|
||
|
||
#define NMI_STACK_CANARY 0xABBABABA
|
||
|
||
.section .vecbase.text, "x"
|
||
|
||
.literal_position
|
||
.balign 16
|
||
NMIExceptionHandler:
|
||
.type NMIExceptionHandler, @function
|
||
|
||
wsr sp, excsave3 # excsave3 holds user stack
|
||
movi sp, .NMIHandlerStackTop - 0x40
|
||
s32i a0, sp, 0x00
|
||
s32i a2, sp, 0x04
|
||
s32i a3, sp, 0x08
|
||
s32i a4, sp, 0x0c
|
||
s32i a5, sp, 0x10
|
||
s32i a6, sp, 0x14
|
||
s32i a7, sp, 0x18
|
||
s32i a8, sp, 0x1c
|
||
s32i a9, sp, 0x20
|
||
s32i a10, sp, 0x24
|
||
s32i a11, sp, 0x28
|
||
rsr a0, epc1
|
||
s32i a0, sp, 0x2c
|
||
rsr a0, exccause
|
||
s32i a0, sp, 0x30
|
||
rsr a0, excsave1
|
||
s32i a0, sp, 0x34
|
||
rsr a0, excvaddr
|
||
s32i a0, sp, 0x38
|
||
rsr a0, sar
|
||
s32i a0, sp, 0x3c
|
||
movi a0, 0x23 # Override PS for NMI handler
|
||
wsr a0, ps
|
||
rsync
|
||
|
||
/* mark the stack overflow point before we call the actual NMI handler */
|
||
movi a0, NMIHandlerStack
|
||
movi a2, NMI_STACK_CANARY
|
||
s32i a2, a0, 0x00
|
||
|
||
call0 sdk_wDev_ProcessFiq
|
||
|
||
/* verify we didn't overflow */
|
||
movi a0, NMIHandlerStack
|
||
l32i a3, a0, 0
|
||
movi a2, NMI_STACK_CANARY
|
||
bne a3, a2, .NMIFatalStackOverflow
|
||
|
||
l32i a0, sp, 0x3c
|
||
wsr a0, sar
|
||
l32i a0, sp, 0x38
|
||
wsr a0, excvaddr
|
||
l32i a0, sp, 0x34
|
||
wsr a0, excsave1
|
||
l32i a0, sp, 0x30
|
||
wsr a0, exccause
|
||
l32i a0, sp, 0x2c
|
||
wsr a0, epc1
|
||
l32i a11, sp, 0x28
|
||
l32i a10, sp, 0x24
|
||
l32i a9, sp, 0x20
|
||
l32i a8, sp, 0x1c
|
||
l32i a7, sp, 0x18
|
||
l32i a6, sp, 0x14
|
||
l32i a5, sp, 0x10
|
||
l32i a4, sp, 0x0c
|
||
l32i a3, sp, 0x08
|
||
movi a0, 0x33 # Reset PS
|
||
wsr a0, ps
|
||
rsync
|
||
/* set dport nmi status to 1 (wDev_ProcessFiq clears bit 0 and verifies it
|
||
* stays cleared, see
|
||
* http://esp8266-re.foogod.com/wiki/WDev_ProcessFiq_%28IoT_RTOS_SDK_0.9.9%29)
|
||
*/
|
||
movi a0, 0x3ff00000
|
||
movi a2, 0x1
|
||
s32i a2, a0, 0
|
||
l32i a2, sp, 0x04
|
||
l32i a0, sp, 0x00
|
||
movi a1, 0x0
|
||
xsr a1, excsave3 # Load stack back from excsave3, clear excsave3
|
||
rfi 3
|
||
|
||
.section .rodata
|
||
|
||
.NMIStackOverflowErrorMsg:
|
||
.string "\nFATAL: NMI Stack Overflow\n"
|
||
|
||
.section .vecbase.text, "x"
|
||
|
||
.NMIFatalStackOverflow:
|
||
movi a2, .NMIStackOverflowErrorMsg
|
||
call0 printf
|
||
.NMIInfiniteLoop:
|
||
j .NMIInfiniteLoop /* TODO: replace with call to abort() */
|
||
|
||
/*********************** General UserException Handler ***********************/
|
||
|
||
.section .vecbase.text, "x"
|
||
|
||
/* Called by UserExceptionVector if EXCCAUSE is anything other than
|
||
* LoadStoreCause. */
|
||
|
||
.literal_position
|
||
.balign 4
|
||
UserExceptionHandler:
|
||
.type UserExceptionHandler, @function
|
||
xsr a0, excsave1 # a0 now contains sp
|
||
mov sp, a0
|
||
addi sp, sp, -0x50
|
||
s32i a0, sp, 0x10
|
||
rsr a0, ps
|
||
s32i a0, sp, 0x08
|
||
rsr a0, epc1
|
||
s32i a0, sp, 0x04
|
||
rsr a0, excsave1
|
||
s32i a0, sp, 0x0c
|
||
movi a0, _xt_user_exit
|
||
s32i a0, sp, 0x0
|
||
call0 sdk__xt_int_enter
|
||
movi a0, 0x23
|
||
wsr a0, ps
|
||
rsync
|
||
rsr a2, exccause
|
||
/* Any UserException cause other than a level 1 interrupt is fatal */
|
||
bnei a2, CAUSE_LVL1INT, .UserFailOtherExceptionCause
|
||
.UserHandleInterrupt:
|
||
rsil a0, 1
|
||
rsr a2, intenable
|
||
rsr a3, interrupt
|
||
movi a4, 0x3fff
|
||
and a2, a2, a3
|
||
and a2, a2, a4 # a2 = 0x3FFF & INTENABLE & INTERRUPT
|
||
call0 _xt_isr_handler
|
||
j sdk__xt_int_exit # once finished, jumps to _xt_user_exit via stack
|
||
|
||
.literal_position
|
||
.UserFailOtherExceptionCause:
|
||
break 1, 1
|
||
call0 sdk_user_fatal_exception_handler
|
||
|
||
/* _xt_user_exit is pushed onto the stack as part of the user exception handler,
|
||
restores same set registers which were saved there and returns from exception */
|
||
_xt_user_exit:
|
||
.global _xt_user_exit
|
||
.type _xt_user_exit, @function
|
||
l32i a0, sp, 0x8
|
||
wsr a0, ps
|
||
l32i a0, sp, 0x4
|
||
wsr a0, epc1
|
||
l32i a0, sp, 0xc
|
||
l32i sp, sp, 0x10
|
||
rsync
|
||
rfe
|