This commit is contained in:
Our Air Quality 2019-04-08 02:49:21 +00:00 committed by GitHub
commit 66796e43df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 366 additions and 131 deletions

View file

@ -137,6 +137,9 @@ static void IRAM default_putc(char c) {
void init_newlib_locks(void);
extern uint8_t sdk_wDevCtrl[];
void nano_malloc_insert_chunk(void *start, size_t size);
extern uint8_t _heap_start[];
extern uint8_t _text_end[];
extern uint8_t enable_low_icache;
// .text+0x258
void IRAM sdk_user_start(void) {
@ -209,8 +212,13 @@ void IRAM sdk_user_start(void) {
cksum_value = buf32[5 + boot_slot];
ic_flash_addr = (flash_sectors - 3 + boot_slot) * sdk_flashchip.sector_size;
sdk_SPIRead(ic_flash_addr, buf32, sizeof(struct sdk_g_ic_saved_st));
#ifdef ESP8266_ENABLE_LOW_ICACHE
enable_low_icache = ESP8266_ENABLE_LOW_ICACHE;
#endif
Cache_Read_Enable(0, 0, 1);
zero_bss();
sdk_os_install_putc1(default_putc);
/* HACK Reclaim a region of unused bss from wdev.o. This would not be
@ -219,6 +227,26 @@ void IRAM sdk_user_start(void) {
* it is in very useful dram. */
nano_malloc_insert_chunk((void *)(sdk_wDevCtrl + 0x2190), 8000);
/* Use all the used DRAM is for the dynamic heap. */
nano_malloc_insert_chunk(_heap_start, 0x3FFFC000 - (uintptr_t)_heap_start);
/* Add unused IRAM to the malloc free list. */
if (enable_low_icache) {
/* The memory region 0x40108000 to 0x4010C000 is used for icache so can
* not be used, but there might still be some unused IRAM */
nano_malloc_insert_chunk(_text_end, 0x40108000 - (uintptr_t)_text_end);
} else {
/* The memory region 0x40108000 to 0x4010C000 is not used as part of the
* instruction cache and is usable as extra IRAM. */
nano_malloc_insert_chunk(_text_end, 0x4010C000 - (uintptr_t)_text_end);
}
/* The preferred memory region to start allocate the early data. If the app
* has ample memory the use the DRAM, other if the app is running low on
* DRAM then it might help the allocated to the IRAM when possible. */
set_malloc_regions(MALLOC_MASK_PREFER_DRAM);
//set_malloc_regions(MALLOC_MASK_PREFER_IRAM);
init_newlib_locks();
if (cksum_magic == 0xffffffff) {
@ -368,6 +396,7 @@ void sdk_user_init_task(void *params) {
/* The start up stack is not used after scheduling has started, so all of
* the top area of RAM which was stack can be used for the dynamic heap. */
xPortSupervisorStackPointer = (void *)0x40000000;
nano_malloc_insert_chunk((void *)0x3FFFC000, 0x4000);
sdk_ets_timer_init();
printf("\nESP-Open-SDK ver: %s compiled @ %s %s\n", OS_VERSION_STR, __DATE__, __TIME__);

View file

@ -23,7 +23,7 @@
#define CAUSE_LOADSTORE 3
#define CAUSE_LVL1INT 4
.section .bss
.section .data
/* Stack space for NMI handler
@ -37,13 +37,26 @@ NMIHandlerStack:
.skip 0x200
.NMIHandlerStackTop:
/* The Load Store exception handler uses a separate stack to store the
* interruptee registers. It does not appear to be practical to use the
* interuptee stack, which must in invalid at some points? This exception is
* synchronouns and the handler does not call back into itself. However it may
* be interrupted by a NMI which in turn may re-enter this exception
* handler. The NMI is responsible for switching the stack pointer to be used by
* this exception handler. Room is allocated for up to 3 stack frames, a base
* and two NMI reentry frames, and each frame is 7 words wide.
*/
#define LoadStoreErrorHandlerStackFrameSize (7 * 4)
.balign 16
.global LoadStoreErrorHandlerStack
LoadStoreErrorHandlerStack:
.word 0 # a0
.word 0 # (unused)
.word 0 # a2
.word 0 # a3
.word 0 # a4
.skip LoadStoreErrorHandlerStackFrameSize * 3
.balign 4
.global LoadStoreErrorHandlerStackPointer
LoadStoreErrorHandlerStackPointer:
.word 0
.balign 4
.global debug_saved_ctx
@ -123,7 +136,11 @@ DoubleExceptionVector:
*
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
*
* (Fast path (no branches) is for L8UI)
* Accesses can be assumed aligned here as unaligned accesses would have generate
* an unaligned exception (9) before getting here.
*
* Fast path (no branches) is for L8UI from low registers 0, 2-5, and the fast
* store path is S8I for low registers 0, 2-7.
*/
.literal_position
@ -131,16 +148,28 @@ DoubleExceptionVector:
LoadStoreErrorHandler:
.type LoadStoreErrorHandler, @function
/* Registers are saved in the address corresponding to their register
* number times 4. This allows a quick and easy mapping later on when
* needing to store the value to a particular register number. */
movi sp, LoadStoreErrorHandlerStack
/* Registers are saved in stack frame offsets corresponding to their
* register number times 4. This allows a quick and easy mapping later
* on when needing to store the value to a particular register
* number.
*
* This handler may be interrupted asynchronously by the NMI. The NMI
* handler is responsible for switching the load/store handler stack
* pointer and that avoids that overhead here. This handler is
* synchronous so the NMI handler can modify and restore the load/store
* stack pointer safely.
*/
movi sp, LoadStoreErrorHandlerStackPointer
l32i sp, sp, 0
s32i a0, sp, 0
s32i a2, sp, 0x08
s32i a3, sp, 0x0c
s32i a4, sp, 0x10
rsr a0, sar # Save SAR in a0 to restore later
# led_on a2, a3
/* Examine the opcode which generated the exception */
/* Note: Instructions are in this order to avoid pipeline stalls. */
rsr a2, epc1
@ -185,6 +214,8 @@ LoadStoreErrorHandler:
addi a3, a3, 0x3
wsr a3, epc1
# led_off a0, a3
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
* per entry (so we can avoid a second jump by just doing a RFE inside
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
@ -214,25 +245,11 @@ LoadStoreErrorHandler:
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe
.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */
/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c
jx a2
.balign 4
.LSE_check_l16:
/* At this point, a2 contains the opcode, a3 is masked opcode */
movi a4, 0x001002 # l16si or l16ui opcode after masking
bne a3, a4, .LSE_wrong_opcode
bne a3, a4, .LSE_check_s8i
/* Note: At this point, the opcode could be one of two things:
* xx1xx2 (L16UI)
@ -255,32 +272,91 @@ LoadStoreErrorHandler:
or a4, a3, a4 # set 32-bit sign bits
j .LSE_post_fetch
.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler
.balign 4
.LSE_assign_a1:
/* a1 is saved in excsave1, so just update that with the value, */
wsr a4, excsave1
/* Then restore all regs and return */
.LSE_check_s8i:
/* At this point, a2 contains the opcode */
movi a3, 0x00F00F # opcode mask for s8i/s16i
s32i a5, sp, 0x14 # Save a5, needed for store op
and a3, a2, a3 # a3 is masked instruction
movi a4, 0x004002 # s8i opcode after masking
s32i a6, sp, 0x18 # Save a6, needed for store op
bne a3, a4, .LSE_check_s16i
/* Note: At this point, the opcode is s8i */
movi a5, 0x000000ff # source mask
.LSE_store:
/* We jump here for either S8I or S16I to get the address and load
* and mask the current contents. */
movi a4, ~3
rsr a3, excvaddr # read faulting address
and a4, a3, a4 # a4 now word aligned address
ssa8b a3 # sar is now left shift amount
sll a3, a5
movi a6, 0xffffffff
xor a6, a6, a3 # a6 now has the word mask
l32i a3, a4, 0 # read the current word
and a3, a3, a6 # a3 now has the masked word
extui a2, a2, 4, 4 # a2 is now source register 0-15
/* At this point, a2 contains the source register 0-15, a3 contains the
* masked memory contents, a4 contains the address, a5 contains the source
* mask, and sar contains the left shift amount. */
bgei a2, 7, .LSE_load_reg # a7..a15 use jumptable
beqi a2, 1, .LSE_load_a1 # a1 uses a special routine
/* We're loading from a0 or a2..a6, which are all saved in our "stack"
* area. Calculate the correct address and load the value there. */
addx4 a2, a2, sp
l32i a2, a2, 0
.LSE_store_apply:
and a2, a2, a5 # mask the source
sll a2, a2 # shift the source
or a3, a3, a2 # combine with the masked memory contents
s32i a3, a4, 0 # write back to memory
/* Note: Instructions are in this order to avoid pipeline stalls */
rsr a3, epc1
wsr a0, sar
addi a3, a3, 0x3
wsr a3, epc1
# led_off a2, a3
/* Restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
l32i a5, sp, 0x14
l32i a6, sp, 0x18
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe
.balign 4
.LSE_check_s16i:
/* At this point, a2 contains the opcode */
movi a4, 0x005002 # s16i opcode after masking
bne a3, a4, .LSE_wrong_opcode
/* Note: At this point, the opcode is s16i */
movi a5, 0x0000ffff # source mask
j .LSE_store
.balign 4
.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */
/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c
jx a2
.balign 4
.LSE_jumptable:
/* The first 5 entries (80 bytes) of this table are unused (registers
@ -366,6 +442,81 @@ LoadStoreErrorHandler:
rsr a1, excsave1
rfe
.balign 4
.LSE_assign_a1:
/* a1 is saved in excsave1, so just update that with the value, */
wsr a4, excsave1
/* Then restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
rfe
.balign 4
.LSE_load_reg:
/* Calculate the jumptable address. */
movi a6, .LSE_store_jumptable_base
addx8 a2, a2, a6 # a2 is now the address to jump to
jx a2
.balign 4
.LSE_store_jumptable:
/* The first 7 entries (56 bytes) of this table are unused (registers
* a0..a6 are handled separately above). Rather than have a whole bunch
* of wasted space, we just pretend that the table starts 56 bytes
* earlier in memory. */
.set .LSE_store_jumptable_base, .LSE_store_jumptable - (8 * 7)
mov a2, a7
j .LSE_store_apply
.balign 4
mov a2, a8
j .LSE_store_apply
.balign 4
mov a2, a9
j .LSE_store_apply
.balign 4
mov a2, a10
j .LSE_store_apply
.balign 4
mov a2, a11
j .LSE_store_apply
.balign 4
mov a2, a12
j .LSE_store_apply
.balign 4
mov a2, a13
j .LSE_store_apply
.balign 4
mov a2, a14
j .LSE_store_apply
.balign 4
mov a2, a15
j .LSE_store_apply
.balign 4
.LSE_load_a1:
/* a1 is saved in excsave1, so just read the value, */
rsr a2, excsave1
j .LSE_store_apply
.balign 4
.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler
/*************************** Debug exception handler *************************/
.section .vecbase.text, "x"
@ -407,6 +558,12 @@ call_user_start:
.global call_user_start
.type call_user_start, @function
/* Initialize the load/store error handler stack pointer. There are no
* load/store exceptions before this point. */
movi a2, LoadStoreErrorHandlerStackPointer
movi a3, LoadStoreErrorHandlerStack
s32i a3, a2, 0
movi a2, VecBase
wsr a2, vecbase
call0 sdk_user_start
@ -422,7 +579,10 @@ call_user_start:
NMIExceptionHandler:
.type NMIExceptionHandler, @function
wsr sp, excsave3 # excsave3 holds user stack
wsr sp, excsave3 # excsave3 holds user stack
/* Load the NMI handler stack pointer which is already offset by -0x40
* to create a frame to store the interruptee state. */
movi sp, .NMIHandlerStackTop - 0x40
s32i a0, sp, 0x00
s32i a2, sp, 0x04
@ -449,39 +609,51 @@ NMIExceptionHandler:
wsr a0, ps
rsync
/* mark the stack overflow point before we call the actual NMI handler */
/* Mark the stack overflow point before we call the actual NMI handler */
movi a0, NMIHandlerStack
movi a2, NMI_STACK_CANARY
s32i a2, a0, 0x00
/* Switch the load/store error handler stack. */
movi a2, LoadStoreErrorHandlerStackPointer
l32i a3, a2, 0
addi a3, a3, LoadStoreErrorHandlerStackFrameSize
s32i a3, a2, 0
call0 sdk_wDev_ProcessFiq
/* verify we didn't overflow */
/* Verify we didn't overflow */
movi a0, NMIHandlerStack
l32i a3, a0, 0
movi a2, NMI_STACK_CANARY
bne a3, a2, .NMIFatalStackOverflow
bne a3, a2, .NMIFatalStackOverflow
l32i a0, sp, 0x3c
wsr a0, sar
l32i a0, sp, 0x38
wsr a0, excvaddr
l32i a0, sp, 0x34
wsr a0, excsave1
l32i a0, sp, 0x30
wsr a0, exccause
l32i a0, sp, 0x2c
wsr a0, epc1
l32i a11, sp, 0x28
l32i a10, sp, 0x24
l32i a9, sp, 0x20
l32i a8, sp, 0x1c
l32i a7, sp, 0x18
l32i a6, sp, 0x14
l32i a5, sp, 0x10
l32i a4, sp, 0x0c
l32i a3, sp, 0x08
movi a0, 0x33 # Reset PS
/* Restore the load/store error handler stack. */
movi a2, LoadStoreErrorHandlerStackPointer
l32i a3, a2, 0
addi a3, a3, -LoadStoreErrorHandlerStackFrameSize
s32i a3, a2, 0
l32i a0, sp, 0x3c
wsr a0, sar
l32i a0, sp, 0x38
wsr a0, excvaddr
l32i a0, sp, 0x34
wsr a0, excsave1
l32i a0, sp, 0x30
wsr a0, exccause
l32i a0, sp, 0x2c
wsr a0, epc1
l32i a11, sp, 0x28
l32i a10, sp, 0x24
l32i a9, sp, 0x20
l32i a8, sp, 0x1c
l32i a7, sp, 0x18
l32i a6, sp, 0x14
l32i a5, sp, 0x10
l32i a4, sp, 0x0c
l32i a3, sp, 0x08
movi a0, 0x33 # Reset PS
wsr a0, ps
rsync
/* set dport nmi status to 1 (wDev_ProcessFiq clears bit 0 and verifies it
@ -491,10 +663,10 @@ NMIExceptionHandler:
movi a0, 0x3ff00000
movi a2, 0x1
s32i a2, a0, 0
l32i a2, sp, 0x04
l32i a0, sp, 0x00
movi a1, 0x0
xsr a1, excsave3 # Load stack back from excsave3, clear excsave3
l32i a2, sp, 0x04
l32i a0, sp, 0x00
movi a1, 0x0
xsr a1, excsave3 # Load stack back from excsave3, clear excsave3
rfi 3
.section .rodata

View file

@ -117,5 +117,10 @@
#define IROM __attribute__((section(".irom0.literal"))) const
#endif
uint32_t set_malloc_regions(uint32_t mask);
#define MALLOC_MASK_PREFER_IRAM 0xfffdfffc
#define MALLOC_MASK_PREFER_DRAM 0
#define MALLOC_MASK_DRAM 0xfffffffe
#define MALLOC_MASK_IRAM 0xfffffffd
#endif

View file

@ -34,39 +34,30 @@
#error Too many lwip sockets for the FD_SETSIZE.
#endif
extern void *xPortSupervisorStackPointer;
IRAM void *_sbrk_r (struct _reent *r, ptrdiff_t incr)
void *_sbrk_r (struct _reent *r, ptrdiff_t incr)
{
extern char _heap_start; /* linker script defined */
static char * heap_end;
char * prev_heap_end;
if (heap_end == NULL)
heap_end = &_heap_start;
prev_heap_end = heap_end;
intptr_t sp = (intptr_t)xPortSupervisorStackPointer;
if(sp == 0) /* scheduler not started */
SP(sp);
if ((intptr_t)heap_end + incr >= sp)
{
r->_errno = ENOMEM;
return (caddr_t)-1;
}
heap_end += incr;
return (caddr_t) prev_heap_end;
r->_errno = ENOMEM;
return (caddr_t)-1;
}
/* If there is a restriction on the dram usage then skip this chunk if in dram,
* and if there is a restriction on the iram usage then skip this chunk if in
* iram */
IRAM int _malloc_region_masked(void *r, unsigned int mask)
{
if ( ((mask & 1) && (uint32_t)r < 0x40000000) ||
((mask & 2) && (uint32_t)r >= 0x40100000) ) {
return 1;
}
/* Insert a disjoint region into the nano malloc pool. Create a malloc chunk,
* filling the size as newlib nano malloc expects, and then free it. */
void nano_malloc_insert_chunk(void *start, size_t size) {
*(uint32_t *)start = size;
free(start + sizeof(size_t));
return 0;
}
uint32_t set_malloc_regions(uint32_t mask)
{
uint32_t malloc_mask = _REENT->malloc_region_mask;
_REENT->malloc_region_mask = mask;
return malloc_mask;
}
/* syscall implementation for stdio write to UART */

View file

@ -22,6 +22,10 @@
rboot_megabyte:
.byte RBOOT_MEGABYTE_DEFAULT
.global enable_low_icache
enable_low_icache:
.byte 1
.section .data
.local cache_return_save
.align 4
@ -44,7 +48,8 @@ Cache_Read_Enable:
/* map the first megabyte of flash */
movi a2, 0
movi a3, 0
movi a4, 1
movi a4, enable_low_icache
l8ui a4, a4, 0
call0 rom_Cache_Read_Enable
movi a3, RBOOT_CONFIG_BASE
@ -67,7 +72,8 @@ Cache_Read_Enable:
l32i a4, a4, 0
extui a2, a4, 0, 1 /* a2 is now lsb of a4 (odd/even) */
srli a3, a4, 1 /* a3 is half value of mb */
movi a4, 1
movi a4, enable_low_icache
l8ui a4, a4, 0
call0 rom_Cache_Read_Enable
movi a0, cache_return_save /* restore a0 return address */
l32i a0, a0, 0