From ed8470631f8940922ae3d3a432de5978bbaca7d2 Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Thu, 24 Sep 2015 08:58:34 +1000
Subject: [PATCH 1/8] Consolidate interrupt management in core as
 esp/interrupts.h & esp_interrupts.c

---
 FreeRTOS/Source/portable/esp8266/port.c       | 57 ------------------
 FreeRTOS/Source/portable/esp8266/portmacro.h  |  4 +-
 core/esp_interrupts.c                         | 60 +++++++++++++++++++
 core/include/esp/cpu.h                        | 33 ----------
 core/include/esp/gpio.h                       |  3 +-
 .../include/esp/interrupts.h                  | 19 +++++-
 core/include/esp/timer.h                      |  3 +-
 core/include/esp8266.h                        |  2 +-
 8 files changed, 83 insertions(+), 98 deletions(-)
 create mode 100644 core/esp_interrupts.c
 delete mode 100644 core/include/esp/cpu.h
 rename include/xtensa_interrupts.h => core/include/esp/interrupts.h (79%)

diff --git a/FreeRTOS/Source/portable/esp8266/port.c b/FreeRTOS/Source/portable/esp8266/port.c
index 71c6658..89c95a1 100644
--- a/FreeRTOS/Source/portable/esp8266/port.c
+++ b/FreeRTOS/Source/portable/esp8266/port.c
@@ -265,60 +265,3 @@ void IRAM vPortExitCritical( void )
 	portENABLE_INTERRUPTS();
 }
 
-/*-----------------------------------------------------------*/
-
-/* Main ISR handler for FreeRTOS side of the ESP libs?
-
-   As far as I can tell, the "real" Xtensa ISRs ("Exceptions") are
-   handled in libmain.a (xtensa_vectors.o) which then can call into here
-   passing an interrupt mask.
-*/
-
-_xt_isr isr[16];
-
-void IRAM _xt_isr_attach(uint8_t i, _xt_isr func)
-{
-    isr[i] = func;
-}
-
-uint16_t IRAM _xt_isr_handler(uint16_t i)
-{
-    uint8_t index;
-
-    /* I think this is implementing some kind of interrupt priority or
-       short-circuiting an expensive ffs for most common interrupts - ie
-       WDT And GPIO are common or high priority, then remaining flags.
-    */
-    if (i & (1 << INUM_WDT)) {
-	index = INUM_WDT;
-    }
-    else if (i & (1 << INUM_GPIO)) {
-	index = INUM_GPIO;
-    }else {
-	index = __builtin_ffs(i) - 1;
-
-	if (index == INUM_MAX) {
-	    /* I don't understand what happens here. INUM_MAX is not
-	       the highest interrupt number listed (and the isr array
-	       has 16 entries).
-
-	       Clearing that flag and then setting index to
-	       __builtin_ffs(i)-1 may result in index == 255 if no
-	       higher flags are set, unless this is guarded against
-	       somehow by the caller?
-
-	       I also don't understand why the code is written like
-	       this in esp_iot_rtos_sdk instead of just putting the i
-	       &= line near the top... Probably no good reason?
-	    */
-	    i &= ~(1 << INUM_MAX);
-	    index = __builtin_ffs(i) - 1;
-	}
-    }
-
-    _xt_clear_ints(1<<index);
-
-    isr[index]();
-
-    return i & ~(1 << index);
-}
diff --git a/FreeRTOS/Source/portable/esp8266/portmacro.h b/FreeRTOS/Source/portable/esp8266/portmacro.h
index da68720..3345ab5 100644
--- a/FreeRTOS/Source/portable/esp8266/portmacro.h
+++ b/FreeRTOS/Source/portable/esp8266/portmacro.h
@@ -73,8 +73,8 @@ extern "C" {
 #include "esp8266.h"
 #include "espressif/esp8266/ets_sys.h"
 #include <stdint.h>
-#include    "xtensa_rtos.h"
-#include "xtensa_interrupts.h"
+#include "xtensa_rtos.h"
+#include <esp/interrupts.h>
 
 /*-----------------------------------------------------------
  * Port specific definitions for ESP8266
diff --git a/core/esp_interrupts.c b/core/esp_interrupts.c
new file mode 100644
index 0000000..481011a
--- /dev/null
+++ b/core/esp_interrupts.c
@@ -0,0 +1,60 @@
+/* ESP8266 Xtensa interrupt management functions
+ *
+ *
+ * Part of esp-open-rtos
+ * Copyright (C) 2015 Angus Gratton
+ * BSD Licensed as described in the file LICENSE
+ */
+#include <esp/interrupts.h>
+
+_xt_isr isr[16];
+
+void IRAM _xt_isr_attach(uint8_t i, _xt_isr func)
+{
+    isr[i] = func;
+}
+
+/* This ISR handler is taken directly from the FreeRTOS port and
+   probably could use a cleanup.
+*/
+uint16_t IRAM _xt_isr_handler(uint16_t i)
+{
+    uint8_t index;
+
+    /* I think this is implementing some kind of interrupt priority or
+       short-circuiting an expensive ffs for most common interrupts - ie
+       WDT And GPIO are common or high priority, then remaining flags.
+    */
+    if (i & (1 << INUM_WDT)) {
+	index = INUM_WDT;
+    }
+    else if (i & (1 << INUM_GPIO)) {
+	index = INUM_GPIO;
+    }else {
+	index = __builtin_ffs(i) - 1;
+
+	if (index == INUM_MAX) {
+	    /* I don't understand what happens here. INUM_MAX is not
+	       the highest interrupt number listed (and the isr array
+	       has 16 entries).
+
+	       Clearing that flag and then setting index to
+	       __builtin_ffs(i)-1 may result in index == 255 if no
+	       higher flags are set, unless this is guarded against
+	       somehow by the caller?
+
+	       I also don't understand why the code is written like
+	       this in esp_iot_rtos_sdk instead of just putting the i
+	       &= line near the top... Probably no good reason?
+	    */
+	    i &= ~(1 << INUM_MAX);
+	    index = __builtin_ffs(i) - 1;
+	}
+    }
+
+    _xt_clear_ints(1<<index);
+
+    isr[index]();
+
+    return i & ~(1 << index);
+}
diff --git a/core/include/esp/cpu.h b/core/include/esp/cpu.h
deleted file mode 100644
index 923e142..0000000
--- a/core/include/esp/cpu.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* esp/cpu.h
- *
- * Details relating to the ESP8266 Xtensa core.
- *
- */
-#ifndef _ESP_CPU_H
-#define _ESP_CPU_H
-#include <stdbool.h>
-
-/* Interrupt numbers for level 1 exception handler.
- *
- * Currently the UserExceptionVector calls down to _xt_isr_handler,
- * defined in port.c, for at least some of these interrupts. Some are handled
- * on the SDK side, though.
- */
-typedef enum {
-    INUM_SPI = 2,
-    INUM_GPIO = 4,
-    INUM_UART = 5,
-    INUM_MAX = 6, /* in some places this is documented as timer0 CCOMPARE0 interrupt */
-    INUM_SOFT = 7,
-    INUM_WDT = 8,
-    INUM_TIMER_FRC1 = 9,
-
-    /* FRC2 default handler. Configured by sdk_ets_timer_init, which
-       runs as part of default libmain.a startup code, assigns
-       interrupt handler to sdk_vApplicationTickHook+0x68
-     */
-    INUM_TIMER_FRC2 = 10,
-} xt_isr_num_t;
-
-#endif
-
diff --git a/core/include/esp/gpio.h b/core/include/esp/gpio.h
index 4f431f2..da7bcfa 100644
--- a/core/include/esp/gpio.h
+++ b/core/include/esp/gpio.h
@@ -11,8 +11,7 @@
 #include <stdbool.h>
 #include "esp/gpio_regs.h"
 #include "esp/iomux.h"
-#include "esp/cpu.h"
-#include "xtensa_interrupts.h"
+#include "esp/interrupts.h"
 
 typedef enum {
     GPIO_INPUT,
diff --git a/include/xtensa_interrupts.h b/core/include/esp/interrupts.h
similarity index 79%
rename from include/xtensa_interrupts.h
rename to core/include/esp/interrupts.h
index d06e2b9..f3d8279 100644
--- a/include/xtensa_interrupts.h
+++ b/core/include/esp/interrupts.h
@@ -1,4 +1,4 @@
-/* Xtensa interrupt management functions
+/* ESP8266 Xtensa interrupt management functions
  *
  * Some (w/ sdk_ prefix) are implemented in binary libs, rest are
  * inlines replacing functions in the binary libraries.
@@ -15,6 +15,23 @@
 #include <xtensa/hal.h>
 #include <common_macros.h>
 
+/* Interrupt numbers for level 1 exception handler. */
+typedef enum {
+    INUM_SPI = 2,
+    INUM_GPIO = 4,
+    INUM_UART = 5,
+    INUM_MAX = 6, /* in some places this is documented as timer0 CCOMPARE0 interrupt */
+    INUM_SOFT = 7,
+    INUM_WDT = 8,
+    INUM_TIMER_FRC1 = 9,
+
+    /* FRC2 default handler. Configured by sdk_ets_timer_init, which
+       runs as part of default libmain.a startup code, assigns
+       interrupt handler to sdk_vApplicationTickHook+0x68
+     */
+    INUM_TIMER_FRC2 = 10,
+} xt_isr_num_t;
+
 void sdk__xt_int_exit (void);
 void _xt_user_exit (void);
 void sdk__xt_tick_timer_init (void);
diff --git a/core/include/esp/timer.h b/core/include/esp/timer.h
index b83a0ee..29c4a5b 100644
--- a/core/include/esp/timer.h
+++ b/core/include/esp/timer.h
@@ -10,9 +10,8 @@
 #define _ESP_TIMER_H
 
 #include <stdbool.h>
-#include <xtensa_interrupts.h>
 #include "esp/timer_regs.h"
-#include "esp/cpu.h"
+#include "esp/interrupts.h"
 
 typedef enum {
     FRC1 = 0,
diff --git a/core/include/esp8266.h b/core/include/esp8266.h
index 968a522..d30ec0e 100644
--- a/core/include/esp8266.h
+++ b/core/include/esp8266.h
@@ -13,7 +13,7 @@
 
 #include "common_macros.h"
 #include "esp/registers.h"
-#include "esp/cpu.h"
+#include "esp/interrupts.h"
 #include "esp/iomux.h"
 #include "esp/gpio.h"
 #include "esp/timer.h"

From 89c481c606b0afaab2098473ad7d8dd96a54d47b Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Thu, 24 Sep 2015 10:17:07 +1000
Subject: [PATCH 2/8] Simplify interrupt and RTOS timer tick handlers

RTOS Timer tick handler is now the same as any other ISR.

This causes a few subtle behaviour changes that seem OK but are worth noting:

* RTOS tick handler sdk__xt_timer_int() is now called from one stack
  frame deeper (inside _xt_isr_handler()), whereas before it was called
  from the level above in UserHandleInterrupt. I can't see any way that
  the extra ~40 bytes of stack use here hurt, though.

* sdk__xt_timer_int() was previous called after all other interrupts
  flagged in the handler, now it's called before the TIMER FRC1 & FRC2
  handlers. The tick handler doesn't appear to do anything particularly
  timing intensive, though.

* GPIO interrupt (value 3) is now lower priority than the SPI
  interrupt (value 2), whereas before it would have been called before
  SPI if both interrupts triggered at once.
---
 FreeRTOS/Source/portable/esp8266/port.c |  2 +
 core/esp_interrupts.c                   | 55 ++++++++-----------------
 core/exception_vectors.S                | 39 +++++-------------
 core/include/esp/interrupts.h           |  3 +-
 4 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/FreeRTOS/Source/portable/esp8266/port.c b/FreeRTOS/Source/portable/esp8266/port.c
index 89c95a1..fc452f0 100644
--- a/FreeRTOS/Source/portable/esp8266/port.c
+++ b/FreeRTOS/Source/portable/esp8266/port.c
@@ -198,6 +198,8 @@ portBASE_TYPE xPortStartScheduler( void )
     }
 
     /* Initialize system tick timer interrupt and schedule the first tick. */
+    _xt_isr_attach(INUM_TICK, sdk__xt_timer_int);
+    _xt_isr_unmask(BIT(INUM_TICK));
     sdk__xt_tick_timer_init();
 
     vTaskSwitchContext();
diff --git a/core/esp_interrupts.c b/core/esp_interrupts.c
index 481011a..4fd68b7 100644
--- a/core/esp_interrupts.c
+++ b/core/esp_interrupts.c
@@ -14,47 +14,26 @@ void IRAM _xt_isr_attach(uint8_t i, _xt_isr func)
     isr[i] = func;
 }
 
-/* This ISR handler is taken directly from the FreeRTOS port and
-   probably could use a cleanup.
+/* Generic ISR handler.
+
+   Handles all flags set for interrupts in 'intset'.
 */
-uint16_t IRAM _xt_isr_handler(uint16_t i)
+uint16_t IRAM _xt_isr_handler(uint16_t intset)
 {
-    uint8_t index;
-
-    /* I think this is implementing some kind of interrupt priority or
-       short-circuiting an expensive ffs for most common interrupts - ie
-       WDT And GPIO are common or high priority, then remaining flags.
-    */
-    if (i & (1 << INUM_WDT)) {
-	index = INUM_WDT;
-    }
-    else if (i & (1 << INUM_GPIO)) {
-	index = INUM_GPIO;
-    }else {
-	index = __builtin_ffs(i) - 1;
-
-	if (index == INUM_MAX) {
-	    /* I don't understand what happens here. INUM_MAX is not
-	       the highest interrupt number listed (and the isr array
-	       has 16 entries).
-
-	       Clearing that flag and then setting index to
-	       __builtin_ffs(i)-1 may result in index == 255 if no
-	       higher flags are set, unless this is guarded against
-	       somehow by the caller?
-
-	       I also don't understand why the code is written like
-	       this in esp_iot_rtos_sdk instead of just putting the i
-	       &= line near the top... Probably no good reason?
-	    */
-	    i &= ~(1 << INUM_MAX);
-	    index = __builtin_ffs(i) - 1;
-	}
+    /* WDT has highest priority (occasional WDT resets otherwise) */
+    if(intset & BIT(INUM_WDT)) {
+        _xt_clear_ints(BIT(INUM_WDT));
+        isr[INUM_WDT]();
+        intset -= BIT(INUM_WDT);
     }
 
-    _xt_clear_ints(1<<index);
+    while(intset) {
+        uint8_t index = __builtin_ffs(intset) - 1;
+        uint16_t mask = BIT(index);
+        _xt_clear_ints(mask);
+        isr[index]();
+        intset -= mask;
+    }
 
-    isr[index]();
-
-    return i & ~(1 << index);
+    return 0;
 }
diff --git a/core/exception_vectors.S b/core/exception_vectors.S
index 5e7f20e..c201a35 100644
--- a/core/exception_vectors.S
+++ b/core/exception_vectors.S
@@ -469,7 +469,6 @@ CallNMIExceptionHandler:
  * LoadStoreCause. */
 
         .literal_position
-
         .balign  4
 UserExceptionHandler:
         .type   UserExceptionHandler, @function
@@ -490,46 +489,28 @@ UserExceptionHandler:
         wsr     a0, ps
         rsync
         rsr     a2, exccause
-        beqi    a2, CAUSE_LVL1INT, UserHandleInterrupt
-        /* Any UserException cause other than level 1 interrupt should panic */
-UserFailOtherExceptionCause:
-        break   1, 1
-        call0   sdk_user_fatal_exception_handler
-UserHandleInterrupt:
+        /* Any UserException cause other than a level 1 interrupt is fatal */
+        bnei    a2, CAUSE_LVL1INT, .UserFailOtherExceptionCause
+.UserHandleInterrupt:
         rsil    a0, 1
         rsr     a2, intenable
         rsr     a3, interrupt
         movi    a4, 0x3fff
         and     a2, a2, a3
-        and     a2, a2, a4      # a2 = 0x3FFF & INTENABLE & INTERRUPT
-UserHandleTimer:
-        movi    a3, 0xffbf
-        and     a3, a2, a3      # a3 = a2 with bit 6 cleared
-        bnez    a3, UserTimerDone   # If any non-timer interrupt bits set
-        movi    a3, 0x40
-        sub     a12, a2, a3     # a12 = a2 - 0x40 -- Will be zero if bit 6 set
-        call0   sdk__xt_timer_int  # tick timer interrupt
-        mov     a2, a12         # restore a2 from a12, ie zero
-        beqz    a2, UserIntDone
-UserTimerDone:
+        and     a2, a2, a4       # a2 = 0x3FFF & INTENABLE & INTERRUPT
         call0   _xt_isr_handler
-        bnez    a2, UserHandleTimer
-UserIntDone:
-        beqz    a2, UserIntExit
-        /* FIXME: this code will never be reached */
+        j       sdk__xt_int_exit # once finished, jumps to _xt_user_exit via stack
+
+        .literal_position
+.UserFailOtherExceptionCause:
         break   1, 1
         call0   sdk_user_fatal_exception_handler
-UserIntExit:
-        call0   sdk__xt_int_exit  # jumps to _xt_user_exit. Never returns here
 
-        .section .text
-
-/* _xt_user_exit is used to exit interrupt context. */
-/* TODO: Find a better place for this to live. */
+/* _xt_user_exit is pushed onto the stack as part of the user exception handler,
+   restores same set registers which were saved there and returns from exception */
 _xt_user_exit:
         .global _xt_user_exit
         .type _xt_user_exit, @function
-
         l32i    a0, sp, 0x8
         wsr     a0, ps
         l32i    a0, sp, 0x4
diff --git a/core/include/esp/interrupts.h b/core/include/esp/interrupts.h
index f3d8279..18d6303 100644
--- a/core/include/esp/interrupts.h
+++ b/core/include/esp/interrupts.h
@@ -20,7 +20,7 @@ typedef enum {
     INUM_SPI = 2,
     INUM_GPIO = 4,
     INUM_UART = 5,
-    INUM_MAX = 6, /* in some places this is documented as timer0 CCOMPARE0 interrupt */
+    INUM_TICK = 6, /* RTOS timer tick, possibly xtensa CPU CCOMPARE0(?) */
     INUM_SOFT = 7,
     INUM_WDT = 8,
     INUM_TIMER_FRC1 = 9,
@@ -35,6 +35,7 @@ typedef enum {
 void sdk__xt_int_exit (void);
 void _xt_user_exit (void);
 void sdk__xt_tick_timer_init (void);
+void sdk__xt_timer_int(void);
 void sdk__xt_timer_int1(void);
 
 INLINED uint32_t _xt_get_intlevel(void)

From c595562343d2d064d95f82425b64de738fa85d42 Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Tue, 29 Sep 2015 13:25:42 +1000
Subject: [PATCH 3/8] Add assembly code style to README & emacs dir-locals

---
 .dir-locals.el | 16 ++++++++++++++++
 README.md      | 12 +++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/.dir-locals.el b/.dir-locals.el
index d14fc88..6eccfa9 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -6,4 +6,20 @@
   (c-file-style . "bsd")
   (c-basic-offset . 4)
   )
+ (asm-mode
+  (indent-tabs-mode . nil)
+  ; this is basically a hack so asm-mode indents with spaces not tabs
+  ; taken from http://stackoverflow.com/questions/2668563/emacs-indentation-in-asm-mode
+  ; (moving to gas-mode may be a better choice)
+  (tab-stop-list (quote (4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 92 96 100 104 108 112 116 120)))
+  (asm-comment-char . "#")
+  )
  )
+
+; IMPORTANT: If you want to write assembly and have indenting to not be infuriating,
+; you probably also want this in your .emacs file:
+;
+; (add-hook 'asm-mode-hook '(lambda () (setq indent-line-function 'indent-relative)))
+;
+; This is not safe to set as a local variable.
+
diff --git a/README.md b/README.md
index 144ea33..fc2c70b 100644
--- a/README.md
+++ b/README.md
@@ -134,7 +134,17 @@ The best way to write suitable code is to first add documentation somewhere like
 
 ## Coding Style
 
-For new contributions, please use BSD style and indent using 4 spaces. If you're an emacs user then there is a .dir-locals.el file in the root which configures cc-mode.
+For new contributions in C, please use BSD style and indent using 4 spaces.
+
+For assembly, please use the following:
+* Instructions indented using 8 spaces.
+* Inline comments use `#` as a comment delimiter.
+* Comments on their own line(s) use `/*`..`*/`.
+* First operand of each instruction should be vertically aligned where possible.
+* For xtensa special registers, prefer `wsr aX, SR` over `wsr.SR aX`
+
+If you're an emacs user then there is a .dir-locals.el file in the root which configures cc-mode and asm-mode (you will need to approve some variable values as safe). See also
+the additional comments in .dir-locals.el, if you're editing assembly code.
 
 Upstream code is left with the indentation and style of the upstream project.
 

From 4448380c5edb31b06a8a2121e5563a068a056e1b Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Tue, 29 Sep 2015 14:37:33 +1000
Subject: [PATCH 4/8] NMIExceptionHandler: Clean up, refactor to use excsave3
 for stack pointer

---
 core/exception_vectors.S | 154 +++++++++++++++++----------------------
 1 file changed, 68 insertions(+), 86 deletions(-)

diff --git a/core/exception_vectors.S b/core/exception_vectors.S
index c201a35..13dc8fb 100644
--- a/core/exception_vectors.S
+++ b/core/exception_vectors.S
@@ -27,9 +27,7 @@
 
 NMIHandlerStack:                # stack space for NMI handler
         .skip   4*0x100
-.LNMIHandlerStackTop:
-NMIRegisterSaved:               # register space for saving NMI registers
-        .skip   4*(16 + 6)
+.NMIHandlerStackTop:
 
 LoadStoreErrorHandlerStack:
         .word   0       # a0
@@ -67,10 +65,7 @@ DebugExceptionVector:
         .org    VecBase + 0x20
 NMIExceptionVector:
         .type   NMIExceptionVector, @function
-
-        wsr     a0, excsave3
-        call0   CallNMIExceptionHandler
-        rfi     3  # Should never be reached
+        j   NMIExceptionHandler
 
         .org    VecBase + 0x30
 KernelExceptionVector:
@@ -373,92 +368,80 @@ call_user_start:
 
         .section .vecbase.text, "x"
 
-/* Save register relative to a0 */
-.macro SAVE_REG register, regnum
-        s32i \register, a0, (4 * (\regnum + 6))
-.endm
-
-/* Load register relative to sp */
-.macro LOAD_REG register, regnum
-        l32i \register, sp, (4 * (\regnum + 6))
-.endm
-
         .literal_position
-
         .balign  16
-CallNMIExceptionHandler:
-        .type   CallNMIExceptionHandler, @function
+NMIExceptionHandler:
+        .type   NMIExceptionHandler, @function
 
-        movi    a0, NMIRegisterSaved
-        SAVE_REG a2, 2
-        SAVE_REG sp, 1
-        SAVE_REG a3, 3
-        rsr     a2, excsave3    # a2 is now former a0
-        SAVE_REG a4, 4
-        SAVE_REG a2, 0
-        rsr     a3, epc1
-        rsr     a4, exccause
-        SAVE_REG a3, -5
-        SAVE_REG a4, -4
-        rsr     a3, excvaddr
-        SAVE_REG a3, -3
-        rsr     a3, excsave1
-        SAVE_REG a3, -2
-        SAVE_REG a5, 5
-        SAVE_REG a6, 6
-        SAVE_REG a7, 7
-        SAVE_REG a8, 8
-        SAVE_REG a9, 9
-        SAVE_REG a10, 10
-        SAVE_REG a11, 11
-        SAVE_REG a12, 12
-        SAVE_REG a13, 13
-        SAVE_REG a14, 14
-        SAVE_REG a15, 15
-        movi    sp, .LNMIHandlerStackTop
-        movi    a0, 0
-        movi    a2, 0x23        # argument for handler
-        wsr     a2, ps
+        wsr     sp, excsave3	# excsave3 holds user stack
+	movi	sp, .NMIHandlerStackTop - 0x54
+        s32i    a0, sp, 0x00
+        s32i    a2, sp, 0x08
+        s32i    a3, sp, 0x0c
+        s32i    a4, sp, 0x10
+        s32i    a5, sp, 0x14
+        s32i    a6, sp, 0x18
+        s32i    a7, sp, 0x1c
+        s32i    a8, sp, 0x20
+        s32i    a9, sp, 0x24
+        s32i    a10, sp, 0x28
+        s32i    a11, sp, 0x2c
+        s32i    a12, sp, 0x30
+        s32i    a13, sp, 0x34
+        s32i    a14, sp, 0x38
+        s32i    a15, sp, 0x3c
+        rsr     a0, epc1
+        s32i    a0, sp, 0x40
+        rsr     a0, exccause
+        s32i    a0, sp, 0x44
+        rsr     a0, excsave1
+        s32i    a0, sp, 0x48
+        rsr     a0, excvaddr
+        s32i    a0, sp, 0x4c
+        rsr     a0, sar
+        s32i    a0, sp, 0x50
+        movi    a0, 0x23        # Override PS for NMI handler
+        wsr     a0, ps
         rsync
-        rsr     a14, sar
-        s32i    a14, sp, 0      # this is also NMIRegisterSaved+0
         call0   sdk_wDev_ProcessFiq
-        l32i    a15, sp, 0
-        wsr     a15, sar
-        movi    a2, 0x33
-        wsr     a2, ps
+
+	l32i 	a0, sp, 0x50
+	wsr	a0, sar
+	l32i	a0, sp, 0x4c
+	wsr	a0, excvaddr
+	l32i	a0, sp, 0x48
+	wsr	a0, excsave1
+	l32i	a0, sp, 0x44
+	wsr	a0, exccause
+	l32i	a0, sp, 0x40
+	wsr	a0, epc1
+	l32i	a15, sp, 0x3c
+	l32i	a14, sp, 0x38
+	l32i	a13, sp, 0x34
+	l32i	a12, sp, 0x30
+	l32i	a11, sp, 0x2c
+	l32i 	a10, sp, 0x28
+	l32i	a9, sp, 0x24
+	l32i	a8, sp, 0x20
+	l32i	a7, sp, 0x1c
+	l32i 	a6, sp, 0x18
+	l32i	a5, sp, 0x14
+	l32i	a4, sp, 0x10
+	l32i	a3, sp, 0x0c
+        movi    a0, 0x33    	  # Reset PS
+        wsr     a0, ps
         rsync
-        LOAD_REG a4, 4
-        LOAD_REG a5, 5
-        LOAD_REG a6, 6
-        LOAD_REG a7, 7
-        LOAD_REG a8, 8
-        LOAD_REG a9, 9
-        LOAD_REG a10, 10
-        LOAD_REG a11, 11
-        LOAD_REG a12, 12
-        LOAD_REG a13, 13
-        LOAD_REG a14, 14
-        LOAD_REG a15, 15
-        LOAD_REG a2, -5
-        LOAD_REG a3, -4
-        wsr     a2, epc1
-        wsr     a3, exccause
-        LOAD_REG a2, -3
-        LOAD_REG a3, -2
-        wsr     a2, excvaddr
-        wsr     a3, excsave1
-        LOAD_REG a0, 0
-        /* set dport nmi status bit 0 (wDev_ProcessFiq clears & verifies this
-         * bit stays cleared, see
+        /* set dport nmi status to 1 (wDev_ProcessFiq clears bit 0 and verifies it
+         * stays cleared, see
          * http://esp8266-re.foogod.com/wiki/WDev_ProcessFiq_%28IoT_RTOS_SDK_0.9.9%29)
          */
-        movi    a2, 0x3ff00000
-        movi    a3, 0x1
-        s32i    a3, a2, 0
-        LOAD_REG a2, 2
-        LOAD_REG a3, 3
-        LOAD_REG a1, 1
+        movi    a0, 0x3ff00000
+        movi    a2, 0x1
+        s32i    a2, a0, 0
+	l32i	a2, sp, 0x08
+	l32i	a0, sp, 0x00
+	movi	a1, 0x0
+	xsr	a1, excsave3       # Load stack back from excsave3, clear excsave3
         rfi     3
 
 /*********************** General UserException Handler ***********************/
@@ -519,4 +502,3 @@ _xt_user_exit:
         l32i    sp, sp, 0x10
         rsync
         rfe
-

From 697d5b72bc94ac3e0021a9b29df6ac933e16b618 Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Tue, 29 Sep 2015 14:51:24 +1000
Subject: [PATCH 5/8] NMIExceptionHandler: Don't save callee-saved registers,
 fix stack alignment

NMI routine sdk_wDev_ProcessFiq seems to be written in C, meets ABI
calling conventions for callee-saved registers. Not sure why SDK
handlers saved them.

NMI handler now also meets the ABI requirement that stack is 16 byte
aligned (doesn't seem strictly necessary, but can't hurt.)
---
 core/exception_vectors.S | 72 ++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/core/exception_vectors.S b/core/exception_vectors.S
index 13dc8fb..fed2f67 100644
--- a/core/exception_vectors.S
+++ b/core/exception_vectors.S
@@ -25,10 +25,12 @@
 
         .section .bss
 
+        .balign 16
 NMIHandlerStack:                # stack space for NMI handler
         .skip   4*0x100
 .NMIHandlerStackTop:
 
+        .balign 16
 LoadStoreErrorHandlerStack:
         .word   0       # a0
         .word   0       # (unused)
@@ -374,60 +376,52 @@ NMIExceptionHandler:
         .type   NMIExceptionHandler, @function
 
         wsr     sp, excsave3	# excsave3 holds user stack
-	movi	sp, .NMIHandlerStackTop - 0x54
+        movi    sp, .NMIHandlerStackTop - 0x40
         s32i    a0, sp, 0x00
-        s32i    a2, sp, 0x08
-        s32i    a3, sp, 0x0c
-        s32i    a4, sp, 0x10
-        s32i    a5, sp, 0x14
-        s32i    a6, sp, 0x18
-        s32i    a7, sp, 0x1c
-        s32i    a8, sp, 0x20
-        s32i    a9, sp, 0x24
-        s32i    a10, sp, 0x28
-        s32i    a11, sp, 0x2c
-        s32i    a12, sp, 0x30
-        s32i    a13, sp, 0x34
-        s32i    a14, sp, 0x38
-        s32i    a15, sp, 0x3c
+        s32i    a2, sp, 0x04
+        s32i    a3, sp, 0x08
+        s32i    a4, sp, 0x0c
+        s32i    a5, sp, 0x10
+        s32i    a6, sp, 0x14
+        s32i    a7, sp, 0x18
+        s32i    a8, sp, 0x1c
+        s32i    a9, sp, 0x20
+        s32i    a10, sp, 0x24
+        s32i    a11, sp, 0x28
         rsr     a0, epc1
-        s32i    a0, sp, 0x40
+        s32i    a0, sp, 0x2c
         rsr     a0, exccause
-        s32i    a0, sp, 0x44
+        s32i    a0, sp, 0x30
         rsr     a0, excsave1
-        s32i    a0, sp, 0x48
+        s32i    a0, sp, 0x34
         rsr     a0, excvaddr
-        s32i    a0, sp, 0x4c
+        s32i    a0, sp, 0x38
         rsr     a0, sar
-        s32i    a0, sp, 0x50
+        s32i    a0, sp, 0x3c
         movi    a0, 0x23        # Override PS for NMI handler
         wsr     a0, ps
         rsync
         call0   sdk_wDev_ProcessFiq
 
-	l32i 	a0, sp, 0x50
+	l32i 	a0, sp, 0x3c
 	wsr	a0, sar
-	l32i	a0, sp, 0x4c
+	l32i	a0, sp, 0x38
 	wsr	a0, excvaddr
-	l32i	a0, sp, 0x48
+	l32i	a0, sp, 0x34
 	wsr	a0, excsave1
-	l32i	a0, sp, 0x44
+	l32i	a0, sp, 0x30
 	wsr	a0, exccause
-	l32i	a0, sp, 0x40
+	l32i	a0, sp, 0x2c
 	wsr	a0, epc1
-	l32i	a15, sp, 0x3c
-	l32i	a14, sp, 0x38
-	l32i	a13, sp, 0x34
-	l32i	a12, sp, 0x30
-	l32i	a11, sp, 0x2c
-	l32i 	a10, sp, 0x28
-	l32i	a9, sp, 0x24
-	l32i	a8, sp, 0x20
-	l32i	a7, sp, 0x1c
-	l32i 	a6, sp, 0x18
-	l32i	a5, sp, 0x14
-	l32i	a4, sp, 0x10
-	l32i	a3, sp, 0x0c
+	l32i	a11, sp, 0x28
+	l32i 	a10, sp, 0x24
+	l32i	a9, sp, 0x20
+	l32i	a8, sp, 0x1c
+	l32i	a7, sp, 0x18
+	l32i 	a6, sp, 0x14
+	l32i	a5, sp, 0x10
+	l32i	a4, sp, 0x0c
+	l32i	a3, sp, 0x08
         movi    a0, 0x33    	  # Reset PS
         wsr     a0, ps
         rsync
@@ -438,7 +432,7 @@ NMIExceptionHandler:
         movi    a0, 0x3ff00000
         movi    a2, 0x1
         s32i    a2, a0, 0
-	l32i	a2, sp, 0x08
+	l32i	a2, sp, 0x04
 	l32i	a0, sp, 0x00
 	movi	a1, 0x0
 	xsr	a1, excsave3       # Load stack back from excsave3, clear excsave3

From d39b3f6bb6cdd874a5128f05fafd7601cd9a1c3f Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Tue, 29 Sep 2015 17:31:38 +1000
Subject: [PATCH 6/8] NMI Handler: Save 512 bytes of RAM via NMI stack space.
 Add stack overflow detection.

---
 core/exception_vectors.S | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/core/exception_vectors.S b/core/exception_vectors.S
index fed2f67..0dba785 100644
--- a/core/exception_vectors.S
+++ b/core/exception_vectors.S
@@ -25,9 +25,16 @@
 
         .section .bss
 
+/* Stack space for NMI handler
+
+   NMI handler stack high water mark measured at 0x134 bytes. Any use
+   of the NMI timer callback will add stack overhead as well.
+
+   The NMI handler does a basic check for stack overflow
+*/
         .balign 16
-NMIHandlerStack:                # stack space for NMI handler
-        .skip   4*0x100
+NMIHandlerStack:
+        .skip 0x200
 .NMIHandlerStackTop:
 
         .balign 16
@@ -368,6 +375,8 @@ call_user_start:
 
 /*************************** NMI Exception Handler ***************************/
 
+#define NMI_STACK_CANARY 0xABBABABA
+
         .section .vecbase.text, "x"
 
         .literal_position
@@ -401,8 +410,20 @@ NMIExceptionHandler:
         movi    a0, 0x23        # Override PS for NMI handler
         wsr     a0, ps
         rsync
+
+        /* mark the stack overflow point before we call the actual NMI handler */
+        movi    a0, NMIHandlerStack
+        movi    a2, NMI_STACK_CANARY
+        s32i    a2, a0, 0x00
+
         call0   sdk_wDev_ProcessFiq
 
+        /* verify we didn't overflow */
+        movi    a0, NMIHandlerStack
+        l32i    a3, a0, 0
+        movi    a2, NMI_STACK_CANARY
+        bne    a3, a2, .NMIFatalStackOverflow
+
 	l32i 	a0, sp, 0x3c
 	wsr	a0, sar
 	l32i	a0, sp, 0x38
@@ -438,6 +459,19 @@ NMIExceptionHandler:
 	xsr	a1, excsave3       # Load stack back from excsave3, clear excsave3
         rfi     3
 
+        .section .rodata
+
+.NMIStackOverflowErrorMsg:
+        .string "\nFATAL: NMI Stack Overflow\n"
+
+        .section .vecbase.text, "x"
+
+.NMIFatalStackOverflow:
+        movi a2, .NMIStackOverflowErrorMsg
+        call0 printf
+.NMIInfiniteLoop:
+        j .NMIInfiniteLoop /* TODO: replace with call to abort() */
+
 /*********************** General UserException Handler ***********************/
 
         .section .vecbase.text, "x"

From 0d35c0c02d7ee08603e210414cc80b3388aae003 Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Tue, 6 Oct 2015 18:24:35 +1100
Subject: [PATCH 7/8] exception_vectors.S: Use .Lnnn syntax for local labels,
 as noted by @foogod

---
 core/exception_vectors.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/exception_vectors.S b/core/exception_vectors.S
index 0dba785..64c1919 100644
--- a/core/exception_vectors.S
+++ b/core/exception_vectors.S
@@ -501,8 +501,8 @@ UserExceptionHandler:
         rsync
         rsr     a2, exccause
         /* Any UserException cause other than a level 1 interrupt is fatal */
-        bnei    a2, CAUSE_LVL1INT, .UserFailOtherExceptionCause
-.UserHandleInterrupt:
+        bnei    a2, CAUSE_LVL1INT, .LUserFailOtherExceptionCause
+.LUserHandleInterrupt:
         rsil    a0, 1
         rsr     a2, intenable
         rsr     a3, interrupt
@@ -513,7 +513,7 @@ UserExceptionHandler:
         j       sdk__xt_int_exit # once finished, jumps to _xt_user_exit via stack
 
         .literal_position
-.UserFailOtherExceptionCause:
+.LUserFailOtherExceptionCause:
         break   1, 1
         call0   sdk_user_fatal_exception_handler
 

From 12399c05ef003ec38831eab840ef2358c5a289a2 Mon Sep 17 00:00:00 2001
From: Angus Gratton <gus@projectgus.com>
Date: Tue, 6 Oct 2015 18:25:48 +1100
Subject: [PATCH 8/8] exception_vectors: Use call0 for sdk__xt_int_exit, in
 case it needs a literal

---
 core/exception_vectors.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/exception_vectors.S b/core/exception_vectors.S
index 64c1919..c1f1761 100644
--- a/core/exception_vectors.S
+++ b/core/exception_vectors.S
@@ -510,7 +510,7 @@ UserExceptionHandler:
         and     a2, a2, a3
         and     a2, a2, a4       # a2 = 0x3FFF & INTENABLE & INTERRUPT
         call0   _xt_isr_handler
-        j       sdk__xt_int_exit # once finished, jumps to _xt_user_exit via stack
+        call0   sdk__xt_int_exit # once finished, jumps to _xt_user_exit via stack
 
         .literal_position
 .LUserFailOtherExceptionCause: