diff --git a/units/usart/_dmas.c b/units/usart/_dmas.c index 0b9659a..00910a8 100644 --- a/units/usart/_dmas.c +++ b/units/usart/_dmas.c @@ -127,6 +127,11 @@ error_t UUSART_SetupDMAs(Unit *unit) priv->tx_buffer = malloc_ck(UUSART_TXBUF_LEN); if (NULL == priv->tx_buffer) return E_OUT_OF_MEM; + // Those must be aligned to a word boundary for the DMAs to work. + // Any well-behaved malloc impl should do this correctly. + assert_param(((uint32_t)priv->rx_buffer & 3) == 0); + assert_param(((uint32_t)priv->tx_buffer & 3) == 0); + priv->rx_buf_readpos = 0; LL_DMA_InitTypeDef init; @@ -237,29 +242,31 @@ static void UUSART_DMA_TxStart(struct priv *priv) uint16_t nr = priv->tx_buf_nr; uint16_t nw = priv->tx_buf_nw; -// if (nr == nw-1 || nr==0&&nw==UUSART_TXBUF_LEN-1) { -// dbg("FULL buf, cant start") -// } - if (nr == nw) { dbg("remain=0,do nothing"); return; } // do nothing if we're done - uint16_t chunk = 0; - if (nr < nw) { - // linear forward - chunk = nw - nr; - } else { - // wrapped - chunk = (uint16_t) (UUSART_TXBUF_LEN - nr); + uint8_t chunk = priv->tx_buffer[nr]; + nr += (uint16_t) (4 - (nr & 0b11)); + if (chunk == 0) { + // wrap-around + chunk = priv->tx_buffer[0]; + nr = 4; + assert_param(nr < nw); } - dbg("chunk %d", (int)chunk); - priv->tx_buf_chunk = chunk; + // nr was advanced by the lpad preamble + priv->tx_buf_nr = nr; + priv->tx_buf_chunk = chunk; // will be further moved by 'chunk' bytes when dma completes + + dbg("# TX: chunk start %d, len %d", (int)nr, (int)chunk); + PUTS(">"); PUTSN((char *) (priv->tx_buffer + nr), chunk); PUTS("<"); + PUTNL(); LL_DMA_DisableChannel(priv->dma, priv->dma_tx_chnum); { + LL_DMA_ClearFlags(priv->dma, priv->dma_tx_chnum); LL_DMA_SetMemoryAddress(priv->dma, priv->dma_tx_chnum, (uint32_t) (priv->tx_buffer + nr)); LL_DMA_SetDataLength(priv->dma, priv->dma_tx_chnum, chunk); LL_USART_ClearFlag_TC(priv->periph); @@ -267,6 +274,8 @@ static void UUSART_DMA_TxStart(struct priv *priv) LL_DMA_EnableChannel(priv->dma, priv->dma_tx_chnum); } +COMPILER_ASSERT(UUSART_TXBUF_LEN <= 256); // more would break the "len tag" algorithm + /** * Put data on the queue. Only a part may be sent due to a buffer size limit. * @@ -280,6 +289,7 @@ uint16_t UUSART_DMA_TxQueue(struct priv *priv, const uint8_t *buffer, uint16_t l const uint16_t nr = priv->tx_buf_nr; uint16_t nw = priv->tx_buf_nw; + // shortcut for checking a completely full buffer if (nw == nr-1 || (nr==0&&nw==UUSART_TXBUF_LEN-1)) { dbg("Buffer full, cant queue"); return 0; @@ -304,28 +314,53 @@ uint16_t UUSART_DMA_TxQueue(struct priv *priv, const uint8_t *buffer, uint16_t l uint16_t avail = (const uint16_t) (UUSART_TXBUF_LEN - 1 - used); dbg("nr %d, nw %d, used %d, avail %d", (int)nr, (int)nw, (int)used, (int)avail); - uint16_t towrite = MIN(avail, len); - const uint16_t towrite_orig = towrite; + // hack to avoid too large chunks - XXX this is not ideal + if (avail > 255) avail = 255; - uint32_t cnt = 0; - while (towrite > 0) { - // this should run max 2x - assert_param(cnt < 2); - cnt++; + uint8_t written = 0; - uint16_t chunk = (uint16_t) MIN(towrite, UUSART_TXBUF_LEN - nw); - memcpy((uint8_t *) (priv->tx_buffer + nw), buffer, chunk); - dbg("- memcpy %d bytes at %d", (int)chunk, (int)nw); - nw += chunk; - towrite -= chunk; + if (avail <= 10) { + dbg("No space (only %d)", (int) avail); + return written; + } + + while (avail > 0 && written < len) { + // DMA must start at a word boundary, for this reason we pad it and insert the chunk length (1 byte + padding) + uint8_t lpad = (uint8_t) (4 - (nw & 0b11)); + + // Chunk can go max to the end of the buffer + uint8_t chunk = (uint8_t) MIN((len-written) + lpad, UUSART_TXBUF_LEN - nw); + if (chunk > avail) chunk = (uint8_t) avail; - if (nw == UUSART_TXBUF_LEN) { + dbg("nw %d, raw available chunk %d", (int) nw, (int)chunk); + if (chunk <= lpad + 1) { + // write 0 to indicate a wrap-around + dbg("Wrap-around marker at offset %d", (int) nw); + priv->tx_buffer[nw] = 0; nw = 0; } + else { + // enough space for a preamble + some data + dbg("Preamble of %d bytes at offset %d", (int) lpad, (int) nw); + priv->tx_buffer[nw] = (uint8_t) (chunk - lpad); + nw += lpad; + uint8_t datachunk = (uint8_t) (chunk - lpad); + dbg("Datachunk len %d at offset %d", (int) datachunk, (int) nw); + PUTS("mcpy src >"); PUTSN((char *) (buffer), datachunk); PUTS("<\r\n"); + memcpy((uint8_t *) (priv->tx_buffer + nw), buffer, datachunk); + PUTS("mcpy dst >"); PUTSN((char *) (priv->tx_buffer + nw), datachunk); PUTS("<\r\n"); + buffer += datachunk; + nw += datachunk; + written += datachunk; + if (nw == UUSART_TXBUF_LEN) nw = 0; + } + avail -= chunk; + dbg(". end of loop, avail is %d", (int)avail); } + priv->tx_buf_nw = nw; - dbg("Written. -> nr %d, nw %d, used %d, avail %d", (int)nr, (int)nw, (int)used, (int)avail); + dbg("Writte done -> nr %d, nw %d", (int)nr, (int)nw); // start the DMA if it's idle if (priv->dma_tx->CNDTR == 0) { @@ -335,7 +370,7 @@ uint16_t UUSART_DMA_TxQueue(struct priv *priv, const uint8_t *buffer, uint16_t l dbg("DMA in progress, not requesting"); } - return towrite_orig; + return written; } /** @@ -352,7 +387,7 @@ static void UUSART_DMA_TxHandler(void *arg) uint32_t isrsnapshot = priv->dma->ISR; if (LL_DMA_IsActiveFlag_TC(isrsnapshot, priv->dma_tx_chnum)) { // chunk Tx is finished - dbg("DMA_TxHandler, lr %d, nw %d, chunk %d", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw, (int)priv->tx_buf_chunk); + dbg("~ DMA tx done, nr %d, nw %d, chunk %d", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw, (int)priv->tx_buf_chunk); // dbg("StartPos advance..."); priv->tx_buf_nr += priv->tx_buf_chunk; @@ -366,7 +401,7 @@ static void UUSART_DMA_TxHandler(void *arg) // start the next chunk if (priv->tx_buf_nr != priv->tx_buf_nw) { - dbg("Flag cleared ... asking for more. lr %d, nw %d", (int)priv->tx_buf_nr, (int)priv->tx_buf_nw); + dbg(" Asking for more, if any"); UUSART_DMA_TxStart(priv); } }