diff --git a/core/esp_spi.c b/core/esp_spi.c
index 320716c..d91e1b2 100644
--- a/core/esp_spi.c
+++ b/core/esp_spi.c
@@ -72,19 +72,32 @@ bool spi_init(uint8_t bus, spi_mode_t mode, uint32_t freq_divider, bool msb, spi
 
 void spi_set_mode(uint8_t bus, spi_mode_t mode)
 {
+    bool cpha = (uint8_t)mode & 1;
+    bool cpol = (uint8_t)mode & 2;
+    if (cpol)
+        cpha = !cpha;  // CPHA must be inverted when CPOL = 1, I have no idea why
+
     // CPHA
-    if ((uint8_t)mode & 1)
+    if (cpha)
         SPI(bus).USER0 |= SPI_USER0_CLOCK_OUT_EDGE;
     else
         SPI(bus).USER0 &= ~SPI_USER0_CLOCK_OUT_EDGE;
 
     // CPOL - see http://bbs.espressif.com/viewtopic.php?t=342#p5384
-    if ((uint8_t)mode & 2)
+    if (cpol)
         SPI(bus).PIN |= SPI_PIN_IDLE_EDGE;
     else
         SPI(bus).PIN &= ~SPI_PIN_IDLE_EDGE;
 }
 
+spi_mode_t spi_get_mode(uint8_t bus)
+{
+    uint8_t cpha = SPI(bus).USER0 & SPI_USER0_CLOCK_OUT_EDGE ? 1 : 0;
+    uint8_t cpol = SPI(bus).PIN & SPI_PIN_IDLE_EDGE ? 2 : 0;
+
+    return (spi_mode_t)(cpol | (cpol ? 1 - cpha : cpha)); // see spi_set_mode
+}
+
 void spi_set_msb(uint8_t bus, bool msb)
 {
     if (msb)
@@ -103,17 +116,15 @@ void spi_set_endianness(uint8_t bus, spi_endianness_t endianness)
 
 void spi_set_frequency_div(uint8_t bus, uint32_t divider)
 {
-    uint32_t predivider = divider & 0xffff;
-    uint32_t count = divider >> 16;
-    if (count > 1 || divider > 1)
+    uint32_t predivider = (divider & 0xffff) - 1;
+    uint32_t count = (divider >> 16) - 1;
+    if (count || predivider)
     {
-        predivider = predivider > SPI_CLOCK_DIV_PRE_M + 1 ? SPI_CLOCK_DIV_PRE_M + 1 : predivider;
-        count = count > SPI_CLOCK_COUNT_NUM_M + 1 ? SPI_CLOCK_COUNT_NUM_M + 1 : count;
         IOMUX.CONF &= ~(bus == 0 ? IOMUX_CONF_SPI0_CLOCK_EQU_SYS_CLOCK : IOMUX_CONF_SPI1_CLOCK_EQU_SYS_CLOCK);
-        SPI(bus).CLOCK = (((predivider - 1) & SPI_CLOCK_DIV_PRE_M) << SPI_CLOCK_DIV_PRE_S) |
-            (((count - 1)     & SPI_CLOCK_COUNT_NUM_M)  << SPI_CLOCK_COUNT_NUM_S) |
-            (((count / 2 - 1) & SPI_CLOCK_COUNT_HIGH_M) << SPI_CLOCK_COUNT_HIGH_S) |
-            (((count - 1)     & SPI_CLOCK_COUNT_LOW_M)  << SPI_CLOCK_COUNT_LOW_S);
+        SPI(bus).CLOCK = VAL2FIELD_M(SPI_CLOCK_DIV_PRE, predivider) |
+                         VAL2FIELD_M(SPI_CLOCK_COUNT_NUM, count) |
+                         VAL2FIELD_M(SPI_CLOCK_COUNT_HIGH, count / 2) |
+                         VAL2FIELD_M(SPI_CLOCK_COUNT_LOW, count);
     }
     else
     {
@@ -124,11 +135,9 @@ void spi_set_frequency_div(uint8_t bus, uint32_t divider)
 
 inline static void _set_size(uint8_t bus, uint8_t bytes)
 {
-    uint16_t bits = ((uint16_t)bytes << 3) - 1;
-    const uint32_t mask = ~((SPI_USER1_MOSI_BITLEN_M << SPI_USER1_MOSI_BITLEN_S) |
-        (SPI_USER1_MISO_BITLEN_M << SPI_USER1_MISO_BITLEN_S));
-    SPI(bus).USER1 = (SPI(bus).USER1 & mask) | (bits << SPI_USER1_MOSI_BITLEN_S) |
-        (bits << SPI_USER1_MISO_BITLEN_S);
+    uint32_t bits = ((uint32_t)bytes << 3) - 1;
+    SPI(bus).USER1 = SET_FIELD(SPI(bus).USER1, SPI_USER1_MISO_BITLEN, bits);
+    SPI(bus).USER1 = SET_FIELD(SPI(bus).USER1, SPI_USER1_MOSI_BITLEN, bits);
 }
 
 inline static void _wait(uint8_t bus)
@@ -142,71 +151,95 @@ inline static void _start(uint8_t bus)
     SPI(bus).CMD |= SPI_CMD_USR;
 }
 
-inline static uint32_t _reverse_bytes(uint32_t value)
+inline static uint32_t _swap_bytes(uint32_t value)
 {
     return (value << 24) | ((value << 8) & 0x00ff0000) | ((value >> 8) & 0x0000ff00) | (value >> 24);
 }
 
-static uint32_t _spi_single_transfer (uint8_t bus, uint32_t data, uint8_t len)
+inline static uint32_t _swap_words(uint32_t value)
 {
-    _wait(bus);
-    _set_size(bus, len);
-    spi_endianness_t e = spi_get_endianness(bus);
-    SPI(bus).W0 = e == SPI_BIG_ENDIAN ? _reverse_bytes(data) : data;
-    _start(bus);
-    _wait(bus);
-    return e == SPI_BIG_ENDIAN ? _reverse_bytes(SPI(bus).W0) : SPI(bus).W0;
+    return (value << 16) | (value >> 16);
 }
 
-// works properly only with little endian byte order
-static void _spi_buf_transfer (uint8_t bus, const uint8_t *out_data, uint8_t *in_data, size_t len)
+static void _prepare_buffer(uint8_t bus, size_t len, spi_endianness_t e, spi_word_size_t word_size)
+{
+    if (e == SPI_LITTLE_ENDIAN || word_size == SPI_32BIT) return;
+
+    if (word_size == SPI_16BIT)
+    {
+        if (len % 2)
+            len ++;
+        len /= 2;
+    }
+
+    uint32_t *data = (uint32_t *)&SPI(bus).W0;
+    for (size_t i = 0; i < len; i ++)
+    {
+        data[i] = word_size == SPI_16BIT
+            ? _swap_words(data[i])
+            : _swap_bytes(data[i]);
+    }
+}
+
+static void _spi_buf_transfer(uint8_t bus, const void *out_data, void *in_data,
+    size_t len, spi_endianness_t e, spi_word_size_t word_size)
 {
     _wait(bus);
-    _set_size(bus, len);
-    memcpy((void *)&SPI(bus).W0, out_data, len);
+    size_t bytes = len * (uint8_t)word_size;
+    _set_size(bus, bytes);
+    memcpy((void *)&SPI(bus).W0, out_data, bytes);
+    _prepare_buffer(bus, len, e, word_size);
     _start(bus);
     _wait(bus);
     if (in_data)
-        memcpy(in_data, (void *)&SPI(bus).W0, len);
+    {
+        _prepare_buffer(bus, len, e, word_size);
+        memcpy(in_data, (void *)&SPI(bus).W0, bytes);
+    }
 }
 
 uint8_t spi_transfer_8(uint8_t bus, uint8_t data)
 {
-    return _spi_single_transfer(bus, data, sizeof(data));
+    uint8_t res;
+    _spi_buf_transfer(bus, &data, &res, 1, spi_get_endianness(bus), SPI_8BIT);
+    return res;
 }
 
 uint16_t spi_transfer_16(uint8_t bus, uint16_t data)
 {
-    return _spi_single_transfer(bus, data, sizeof(data));
+    uint16_t res;
+    _spi_buf_transfer(bus, &data, &res, 1, spi_get_endianness(bus), SPI_16BIT);
+    return res;
 }
 
 uint32_t spi_transfer_32(uint8_t bus, uint32_t data)
 {
-    return _spi_single_transfer(bus, data, sizeof(data));
+    uint32_t res;
+    _spi_buf_transfer(bus, &data, &res, 1, spi_get_endianness(bus), SPI_32BIT);
+    return res;
 }
 
-void spi_transfer(uint8_t bus, const void *out_data, void *in_data, size_t len)
+size_t spi_transfer(uint8_t bus, const void *out_data, void *in_data, size_t len, spi_word_size_t word_size)
 {
-    if (!out_data || !len) return;
+    if (!out_data || !len) return 0;
 
-    _wait(bus);
     spi_endianness_t e = spi_get_endianness(bus);
-    spi_set_endianness(bus, SPI_LITTLE_ENDIAN);
+    uint8_t buf_size = _SPI_BUF_SIZE / (uint8_t)word_size;
 
-    size_t blocks = len / _SPI_BUF_SIZE;
+    size_t blocks = len / buf_size;
     for (size_t i = 0; i < blocks; i++)
     {
         size_t offset = i * _SPI_BUF_SIZE;
         _spi_buf_transfer(bus, (const uint8_t *)out_data + offset,
-            in_data ? (uint8_t *)in_data + offset : NULL, _SPI_BUF_SIZE);
+            in_data ? (uint8_t *)in_data + offset : NULL, buf_size, e, word_size);
     }
 
-    uint8_t tail = len % _SPI_BUF_SIZE;
+    uint8_t tail = len % buf_size;
     if (tail)
     {
         _spi_buf_transfer(bus, (const uint8_t *)out_data + blocks * _SPI_BUF_SIZE,
-            in_data ? (uint8_t *)in_data + blocks * _SPI_BUF_SIZE : NULL, tail);
+            in_data ? (uint8_t *)in_data + blocks * _SPI_BUF_SIZE : NULL, tail, e, word_size);
     }
 
-    spi_set_endianness(bus, e);
+    return len;
 }
diff --git a/core/include/esp/spi.h b/core/include/esp/spi.h
index 5de966c..ec86e9d 100644
--- a/core/include/esp/spi.h
+++ b/core/include/esp/spi.h
@@ -49,10 +49,16 @@ typedef enum _spi_mode_t {
 } spi_mode_t;
 
 typedef enum _spi_endianness_t {
-    SPI_LITTLE_ENDIAN,
+    SPI_LITTLE_ENDIAN = 0,
     SPI_BIG_ENDIAN
 } spi_endianness_t;
 
+typedef enum _spi_word_size_t {
+    SPI_8BIT  = 1,  ///< 1 byte, no endian swapping
+    SPI_16BIT = 2,  ///< 2 bytes, swap 16-bit values in SPI_BIG_ENDIAN mode
+    SPI_32BIT = 4   ///< 4 bytes, swap 32-bit values in SPI_BIG_ENDIAN mode
+} spi_word_size_t;
+
 /**
  * \brief Initalize SPI bus
  * Initalize specified SPI bus and setup appropriate pins:
@@ -90,10 +96,7 @@ void spi_set_mode(uint8_t bus, spi_mode_t mode);
  * \param bus Bus ID: 0 - system, 1 - user
  * \return Bus mode
  */
-inline spi_mode_t spi_get_mode(uint8_t bus)
-{
-    return (spi_mode_t)((SPI(bus).PIN & SPI_PIN_IDLE_EDGE ? 2 : 0) | (SPI(bus).USER0 & SPI_USER0_CLOCK_OUT_EDGE ? 1 : 0));
-}
+spi_mode_t spi_get_mode(uint8_t bus);
 
 /**
  * \brief Set SPI bus frequency
@@ -120,8 +123,8 @@ void spi_set_frequency_div(uint8_t bus, uint32_t divider);
 inline uint32_t spi_get_frequency_hz(uint8_t bus)
 {
     return APB_CLK_FREQ /
-        (((SPI(bus).CLOCK >> SPI_CLOCK_DIV_PRE_S) & SPI_CLOCK_DIV_PRE_M) + 1) /
-        (((SPI(bus).CLOCK >> SPI_CLOCK_COUNT_NUM_S) & SPI_CLOCK_COUNT_NUM_M) + 1);
+    	(FIELD2VAL(SPI_CLOCK_DIV_PRE, SPI(bus).CLOCK) + 1) /
+    	(FIELD2VAL(SPI_CLOCK_COUNT_NUM, SPI(bus).CLOCK) + 1);
 }
 
 /**
@@ -155,39 +158,48 @@ void spi_set_endianness(uint8_t bus, spi_endianness_t endianness);
 inline spi_endianness_t spi_get_endianness(uint8_t bus)
 {
     return SPI(bus).USER0 & (SPI_USER0_WR_BYTE_ORDER | SPI_USER0_RD_BYTE_ORDER)
-    	? SPI_BIG_ENDIAN
-    	: SPI_LITTLE_ENDIAN;
+        ? SPI_BIG_ENDIAN
+        : SPI_LITTLE_ENDIAN;
 }
 
 /**
- * \brief Transfer byte over SPI
+ * \brief Transfer 8 bits over SPI
  * \param bus Bus ID: 0 - system, 1 - user
  * \param data Byte to send
  * \return Received byte
  */
 uint8_t spi_transfer_8(uint8_t bus, uint8_t data);
 /**
- * \brief Transfer word over SPI
+ * \brief Transfer 16 bits over SPI
  * \param bus Bus ID: 0 - system, 1 - user
  * \param data Word to send
  * \return Received word
  */
 uint16_t spi_transfer_16(uint8_t bus, uint16_t data);
 /**
- * \brief Transfer dword over SPI
+ * \brief Transfer 32 bits over SPI
  * \param bus Bus ID: 0 - system, 1 - user
  * \param data dword to send
  * \return Received dword
  */
 uint32_t spi_transfer_32(uint8_t bus, uint32_t data);
 /**
- * \brief Transfer buffer over SPI
+ * \brief Transfer buffer of words over SPI
+ * Please note that the buffer size is in words, not in bytes!
+ * Example:
+ *    const uint16_t out_buf[4] = { 0xa0b0, 0xa1b1, 0xa2b2, 0xa3b3 };
+ *    uint16_t in_buf[4];
+ *    spi_init(1, SPI_MODE1, SPI_FREQ_DIV_4M, true, SPI_BIG_ENDIAN, true);
+ *    spi_transfer(1, buf, in_buf, 4, SPI_16BIT); // len = 4 words = 8 bytes
+ *
  * \param bus Bus ID: 0 - system, 1 - user
  * \param out_data Data to send.
  * \param in_data Receive buffer. If NULL, received data will be lost.
- * \param len Buffer size
+ * \param len Buffer size in words
+ * \param word_size Size of the word
+ * \return Transmitted/received words count
  */
-void spi_transfer(uint8_t bus, const void *out_data, void *in_data, size_t len);
+size_t spi_transfer(uint8_t bus, const void *out_data, void *in_data, size_t len, spi_word_size_t word_size);
 
 #ifdef __cplusplus
 }