diff --git a/units/usart/_dmas.c b/units/usart/_dmas.c index 0b9659a..eba54d0 100644 --- a/units/usart/_dmas.c +++ b/units/usart/_dmas.c @@ -1,10 +1,9 @@ // // Created by MightyPork on 2018/01/14. // -#include -#include -#include + #include "platform.h" +#include "irq_dispatcher.h" #include "unit_base.h" #define UUSART_INTERNAL @@ -13,6 +12,12 @@ static void UUSART_DMA_RxHandler(void *arg); static void UUSART_DMA_TxHandler(void *arg); +#if UUSART_DEBUG +#define dbg_uusart(fmt, ...) dbg(fmt, ##__VA_ARGS__) +#else +#define dbg_uusart(fmt, ...) +#endif + error_t UUSART_ClaimDMAs(Unit *unit) { error_t rv; @@ -110,7 +115,7 @@ error_t UUSART_ClaimDMAs(Unit *unit) trap("Missing DMA mapping for USART%d", (int)priv->periph_num); } -// dbg("USART %d - selected DMA ch Tx(%d), Rx(%d)", priv->periph_num, priv->dma_tx_chnum, priv->dma_rx_chnum); + dbg_uusart("USART %d - selected DMA ch Tx(%d), Rx(%d)", priv->periph_num, priv->dma_tx_chnum, priv->dma_rx_chnum); return E_SUCCESS; } @@ -127,6 +132,11 @@ error_t UUSART_SetupDMAs(Unit *unit) priv->tx_buffer = malloc_ck(UUSART_TXBUF_LEN); if (NULL == priv->tx_buffer) return E_OUT_OF_MEM; + // Those must be aligned to a word boundary for the DMAs to work. + // Any well-behaved malloc impl should do this correctly. + assert_param(((uint32_t)priv->rx_buffer & 3) == 0); + assert_param(((uint32_t)priv->tx_buffer & 3) == 0); + priv->rx_buf_readpos = 0; LL_DMA_InitTypeDef init; @@ -231,35 +241,41 @@ static void UUSART_DMA_RxHandler(void *arg) */ static void UUSART_DMA_TxStart(struct priv *priv) { - dbg("DMA_TxStart (nr %d, nw %d)", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw); - + priv->tx_dma_busy = true; assert_param(priv->dma_tx->CNDTR == 0); + + dbg_uusart("DMA_TxStart (nr %d, nw %d)", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw); + uint16_t nr = priv->tx_buf_nr; uint16_t nw = priv->tx_buf_nw; -// if (nr == nw-1 || nr==0&&nw==UUSART_TXBUF_LEN-1) { -// dbg("FULL buf, cant start") -// } - if (nr == nw) { - dbg("remain=0,do nothing"); + dbg_uusart("remain=0,do nothing"); return; } // do nothing if we're done - uint16_t chunk = 0; - if (nr < nw) { - // linear forward - chunk = nw - nr; - } else { - // wrapped - chunk = (uint16_t) (UUSART_TXBUF_LEN - nr); + uint8_t chunk = priv->tx_buffer[nr++]; + //nr += (uint16_t) (4 - (nr & 0b11)); + if (chunk == 0) { + // wrap-around + chunk = priv->tx_buffer[0]; + nr = 1; + assert_param(nr < nw); } - dbg("chunk %d", (int)chunk); - priv->tx_buf_chunk = chunk; + // nr was advanced by the lpad preamble + priv->tx_buf_nr = nr; + priv->tx_buf_chunk = chunk; // will be further moved by 'chunk' bytes when dma completes + + dbg_uusart("# TX: chunk start %d, len %d", (int)nr, (int)chunk); +#if UUSART_DEBUG + PUTS(">"); PUTSN((char *) (priv->tx_buffer + nr), chunk); PUTS("<"); + PUTNL(); +#endif LL_DMA_DisableChannel(priv->dma, priv->dma_tx_chnum); { + LL_DMA_ClearFlags(priv->dma, priv->dma_tx_chnum); LL_DMA_SetMemoryAddress(priv->dma, priv->dma_tx_chnum, (uint32_t) (priv->tx_buffer + nr)); LL_DMA_SetDataLength(priv->dma, priv->dma_tx_chnum, chunk); LL_USART_ClearFlag_TC(priv->periph); @@ -267,6 +283,8 @@ static void UUSART_DMA_TxStart(struct priv *priv) LL_DMA_EnableChannel(priv->dma, priv->dma_tx_chnum); } +COMPILER_ASSERT(UUSART_TXBUF_LEN <= 256); // more would break the "len tag" algorithm + /** * Put data on the queue. Only a part may be sent due to a buffer size limit. * @@ -280,12 +298,13 @@ uint16_t UUSART_DMA_TxQueue(struct priv *priv, const uint8_t *buffer, uint16_t l const uint16_t nr = priv->tx_buf_nr; uint16_t nw = priv->tx_buf_nw; + // shortcut for checking a completely full buffer if (nw == nr-1 || (nr==0&&nw==UUSART_TXBUF_LEN-1)) { - dbg("Buffer full, cant queue"); + dbg_uusart("Buffer full, cant queue"); return 0; } - dbg("\r\nQueue.."); + dbg_uusart("\r\nQueue.."); uint16_t used = 0; if (nr == nw) { @@ -300,42 +319,75 @@ uint16_t UUSART_DMA_TxQueue(struct priv *priv, const uint8_t *buffer, uint16_t l used = (uint16_t) ((UUSART_TXBUF_LEN - nr) + nw); } - dbg("Trying to send buffer of len %d", (int)len); + dbg_uusart("Trying to send buffer of len %d", (int)len); uint16_t avail = (const uint16_t) (UUSART_TXBUF_LEN - 1 - used); - dbg("nr %d, nw %d, used %d, avail %d", (int)nr, (int)nw, (int)used, (int)avail); + dbg_uusart("nr %d, nw %d, used %d, avail %d", (int)nr, (int)nw, (int)used, (int)avail); - uint16_t towrite = MIN(avail, len); - const uint16_t towrite_orig = towrite; + // hack to avoid too large chunks (we use 1 byte to store chunk size) + if (avail > 255) avail = 255; - uint32_t cnt = 0; - while (towrite > 0) { - // this should run max 2x - assert_param(cnt < 2); - cnt++; + uint8_t written = 0; - uint16_t chunk = (uint16_t) MIN(towrite, UUSART_TXBUF_LEN - nw); - memcpy((uint8_t *) (priv->tx_buffer + nw), buffer, chunk); - dbg("- memcpy %d bytes at %d", (int)chunk, (int)nw); - nw += chunk; - towrite -= chunk; + if (avail <= 5) { + dbg_uusart("No space (only %d)", (int) avail); + return written; + } + + while (avail > 0 && written < len) { + // Padding with chunk information (1 byte: length) + const uint8_t lpad = 1; + + // Chunk can go max to the end of the buffer + uint8_t chunk = (uint8_t) MIN((len-written) + lpad, UUSART_TXBUF_LEN - nw); + if (chunk > avail) chunk = (uint8_t) avail; - if (nw == UUSART_TXBUF_LEN) { + dbg_uusart("nw %d, raw available chunk %d", (int) nw, (int)chunk); + if (chunk <= lpad + 1) { + // write 0 to indicate a wrap-around + dbg_uusart("Wrap-around marker at offset %d", (int) nw); + priv->tx_buffer[nw] = 0; nw = 0; } + else { + // enough space for a preamble + some data + dbg_uusart("Preamble of %d bytes at offset %d", (int) lpad, (int) nw); + priv->tx_buffer[nw] = (uint8_t) (chunk - lpad); + nw += lpad; + uint8_t datachunk = (uint8_t) (chunk - lpad); + dbg_uusart("Datachunk len %d at offset %d", (int) datachunk, (int) nw); +#if UUSART_DEBUG + PUTS("mcpy src >"); PUTSN((char *) (buffer), datachunk); PUTS("<\r\n"); +#endif + memcpy((uint8_t *) (priv->tx_buffer + nw), buffer, datachunk); +#if UUSART_DEBUG + PUTS("mcpy dst >"); PUTSN((char *) (priv->tx_buffer + nw), datachunk); PUTS("<\r\n"); +#endif + buffer += datachunk; + nw += datachunk; + written += datachunk; + if (nw == UUSART_TXBUF_LEN) nw = 0; + } + avail -= chunk; + dbg_uusart(". end of loop, avail is %d", (int)avail); } - priv->tx_buf_nw = nw; - dbg("Written. -> nr %d, nw %d, used %d, avail %d", (int)nr, (int)nw, (int)used, (int)avail); + { + dbg_uusart("Write done -> nr %d, nw %d", (int) nr, (int) nw); + + // FIXME a potential race condition can happen here + + priv->tx_buf_nw = nw; - // start the DMA if it's idle - if (priv->dma_tx->CNDTR == 0) { - dbg("Write done, requesting DMA."); - UUSART_DMA_TxStart(priv); - } else { - dbg("DMA in progress, not requesting"); + if (!priv->tx_dma_busy) { + dbg_uusart("Write done, requesting DMA."); + UUSART_DMA_TxStart(priv); + } + else { + dbg_uusart("DMA in progress, not requesting"); + } } - return towrite_orig; + return written; } /** @@ -352,9 +404,8 @@ static void UUSART_DMA_TxHandler(void *arg) uint32_t isrsnapshot = priv->dma->ISR; if (LL_DMA_IsActiveFlag_TC(isrsnapshot, priv->dma_tx_chnum)) { // chunk Tx is finished - dbg("DMA_TxHandler, lr %d, nw %d, chunk %d", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw, (int)priv->tx_buf_chunk); + dbg_uusart("~ DMA tx done, nr %d, nw %d, chunk %d", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw, (int)priv->tx_buf_chunk); -// dbg("StartPos advance..."); priv->tx_buf_nr += priv->tx_buf_chunk; if (UUSART_TXBUF_LEN == priv->tx_buf_nr) priv->tx_buf_nr = 0; priv->tx_buf_chunk = 0; @@ -362,12 +413,14 @@ static void UUSART_DMA_TxHandler(void *arg) LL_DMA_ClearFlag_TC(priv->dma, priv->dma_tx_chnum); // Wait for TC - while (!LL_USART_IsActiveFlag_TC(priv->periph)); + while (!LL_USART_IsActiveFlag_TC(priv->periph)); // TODO add a timeout here!!! // start the next chunk if (priv->tx_buf_nr != priv->tx_buf_nw) { - dbg("Flag cleared ... asking for more. lr %d, nw %d", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw); + dbg_uusart(" Asking for more, if any"); UUSART_DMA_TxStart(priv); + } else { + priv->tx_dma_busy = false; } } } diff --git a/units/usart/_internal.h b/units/usart/_internal.h index b9132fc..21e1e9d 100644 --- a/units/usart/_internal.h +++ b/units/usart/_internal.h @@ -50,11 +50,13 @@ struct priv { // DMA stuff volatile uint8_t *rx_buffer; - volatile uint8_t *tx_buffer; volatile uint16_t rx_buf_readpos; + + volatile uint8_t *tx_buffer; volatile uint16_t tx_buf_nr; volatile uint16_t tx_buf_nw; volatile uint16_t tx_buf_chunk; + volatile bool tx_dma_busy; }; /** Allocate data structure and set defaults */