From 38cccbd456182c6d30fad2e031fc1aeb8293fa75 Mon Sep 17 00:00:00 2001 From: sheinz Date: Thu, 21 Jul 2016 16:36:55 +0300 Subject: [PATCH] SPIFFS: Optimized SPI data read/write. Unaligned read/write from/to SPI data registers is rewritten in assembler to improve performance. --- extras/spiffs/esp_spiffs_flash.c | 72 ++++++++++---------- extras/spiffs/unaligned_memcpy.S | 112 +++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 34 deletions(-) create mode 100644 extras/spiffs/unaligned_memcpy.S diff --git a/extras/spiffs/esp_spiffs_flash.c b/extras/spiffs/esp_spiffs_flash.c index fafd167..f0a6049 100644 --- a/extras/spiffs/esp_spiffs_flash.c +++ b/extras/spiffs/esp_spiffs_flash.c @@ -45,40 +45,53 @@ * called where it needed and not. */ -#define SPI_WRITE_MAX_SIZE 32 -#define SPI_READ_MAX_SIZE 32 +#define SPI_WRITE_MAX_SIZE 64 + +// 64 bytes read causes hang +// http://bbs.espressif.com/viewtopic.php?f=6&t=2439 +#define SPI_READ_MAX_SIZE 60 + + +/** + * Copy unaligned data to 4-byte aligned destination buffer. + * + * @param words Number of 4-byte words to write. + * + * @see unaligned_memcpy.S + */ +void memcpy_unaligned_src(volatile uint32_t *dst, uint8_t *src, uint8_t words); + +/** + * Copy 4-byte aligned source data to unaligned destination buffer. + * + * @param bytes Number of byte to copy to dst. + * + * @see unaligned_memcpy.S + */ +void memcpy_unaligned_dst(uint8_t *dst, volatile uint32_t *src, uint8_t bytes); + /** * Low level SPI flash write. Write block of data up to 64 bytes. */ -static inline uint32_t IRAM spi_write_data(sdk_flashchip_t *chip, uint32_t addr, +static inline void IRAM spi_write_data(sdk_flashchip_t *chip, uint32_t addr, uint8_t *buf, uint32_t size) { - Wait_SPI_Idle(chip); // wait for previous write to finish - - SPI(0).ADDR = (addr & 0x00FFFFFF) | (size << 24); - uint32_t words = size >> 2; if (size & 0b11) { words++; } - uint32_t data = 0; - for (uint32_t i = 0; i < (words << 2); i++) { - data >>= 8; - data |= (uint32_t)buf[i] << 24; - if (i & 0b11) { - SPI(0).W[i >> 2] = data; - } - } - if (SPI_write_enable(chip)) { - return ESP_SPIFFS_FLASH_ERROR; - } + Wait_SPI_Idle(chip); // wait for previous write to finish + + SPI(0).ADDR = (addr & 0x00FFFFFF) | (size << 24); + + memcpy_unaligned_src(SPI(0).W, buf, words); + + SPI_write_enable(chip); SPI(0).CMD = SPI_CMD_PP; while (SPI(0).CMD) {} - - return ESP_SPIFFS_FLASH_OK; } /** @@ -97,9 +110,7 @@ static uint32_t IRAM spi_write_page(sdk_flashchip_t *flashchip, uint32_t dest_ad } while (size >= SPI_WRITE_MAX_SIZE) { - if (spi_write_data(flashchip, dest_addr, buf, SPI_WRITE_MAX_SIZE)) { - return ESP_SPIFFS_FLASH_ERROR; - } + spi_write_data(flashchip, dest_addr, buf, SPI_WRITE_MAX_SIZE); size -= SPI_WRITE_MAX_SIZE; dest_addr += SPI_WRITE_MAX_SIZE; @@ -110,9 +121,7 @@ static uint32_t IRAM spi_write_page(sdk_flashchip_t *flashchip, uint32_t dest_ad } } - if (spi_write_data(flashchip, dest_addr, buf, size)) { - return ESP_SPIFFS_FLASH_ERROR; - } + spi_write_data(flashchip, dest_addr, buf, size); return ESP_SPIFFS_FLASH_OK; } @@ -185,15 +194,10 @@ static inline void IRAM read_block(sdk_flashchip_t *chip, uint32_t addr, { SPI(0).ADDR = (addr & 0x00FFFFFF) | (size << 24); SPI(0).CMD = SPI_CMD_READ; + while (SPI(0).CMD) {}; - uint32_t data = 0; - for (uint32_t i = 0; i < size; i++) { - if (!(i & 0b11)) { - data = SPI(0).W[i>>2]; - } - buf[i] = 0xFF & data; - data >>= 8; - } + + memcpy_unaligned_dst(buf, SPI(0).W, size); } /** diff --git a/extras/spiffs/unaligned_memcpy.S b/extras/spiffs/unaligned_memcpy.S new file mode 100644 index 0000000..b96c92c --- /dev/null +++ b/extras/spiffs/unaligned_memcpy.S @@ -0,0 +1,112 @@ +/** + * The MIT License (MIT) + * + * Copyright (c) 2016 sheinz (https://github.com/sheinz) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + .text + .section .iram1.text, "x" + .literal_position + +/** + * Copy unaligned data to 4-byte aligned buffer. + */ + .align 4 + .global memcpy_unaligned_src + .type memcpy_unaligned_src, @function +memcpy_unaligned_src: +/* a2: dst, a3: src, a4: size */ + ssa8l a3 + srli a3, a3, 2 + slli a3, a3, 2 + beqz a4, u_src_end + l32i a6, a3, 0 +u_src_loop: + l32i a7, a3, 4 + src a8, a7, a6 + memw + s32i a8, a2, 0 + mov a6, a7 + addi a3, a3, 4 + addi a2, a2, 4 + addi a4, a4, -1 + bnez a4, u_src_loop +u_src_end: + movi a2, 0 + ret.n + + +/** + * Copy data from 4-byte aligned source to unaligned destination buffer. + */ + .align 4 + .global memcpy_unaligned_dst + .type memcpy_unaligned_dst, @function +memcpy_unaligned_dst: +/* a2: dst, a3: src, a4: size */ + beqz.n a4, u_dst_end + extui a5, a4, 0, 2 + beqz.n a5, aligned_dst_loop +u_dst_loop: + /* Load data word */ + memw + l32i.n a5, a3, 0 + + /* Save byte number 0 */ + s8i a5, a2, 0 + addi.n a4, a4, -1 + beqz a4, u_dst_end + addi.n a2, a2, 1 + + /* Shift and save byte number 1 */ + srli a5, a5, 8 + s8i a5, a2, 0 + addi.n a4, a4, -1 + beqz a4, u_dst_end + addi.n a2, a2, 1 + + /* Shift and save byte number 2 */ + srli a5, a5, 8 + s8i a5, a2, 0 + addi.n a4, a4, -1 + beqz a4, u_dst_end + addi.n a2, a2, 1 + + /* Shift and save byte number 3 */ + srli a5, a5, 8 + s8i a5, a2, 0 + addi.n a4, a4, -1 + addi.n a2, a2, 1 + + /* Next word */ + addi.n a3, a3, 4 + bnez.n a4, u_dst_loop + ret.n +aligned_dst_loop: + memw + l32i a5, a3, 0 + s32i a5, a2, 0 + addi.n a3, a3, 4 + addi.n a2, a2, 4 + addi.n a4, a4, -4 + bnez.n a4, aligned_dst_loop +u_dst_end: ret.n +