Dear experts,
I want to ask for question related to use AXI-stream with struct. I see several posts related to this post1 post2 while there are no full example or tutorial codes, so I am going to make one from stretch (as simple as stream in two INTs and add them and stream out one INT. But met problem: the axi_dma.recvchannel.wait() in notebook never return.
The tool I use:
PYNQ 3.0.1, custom board(zcu15eg), vivadoo 2024.02 (the base overlay and image build with 2022.01 and the official PYNQ AXI-stream runs perfectly)
The codes I use:
#include "ap_axi_sdata.h"
#include "hls_stream.h"
struct vin_t {
int input1;
int input2;
};
typedef hls::axis<vin_t,0,0,0> data_t_in;
typedef hls::axis<int,0,0,0> data_t_out;
typedef hls::stream<data_t_in> istrm_t;
typedef hls::stream<data_t_out> ostrm_t;
void example(istrm_t &A,
ostrm_t &B) {
#pragma HLS INTERFACE axis port = A
#pragma HLS INTERFACE axis port = B
#pragma hls interface s_axilite port=return
data_t_in i;
data_t_out o;
while (1) {
i = A.read();
o.data = i.data.input1 + i.data.input2;
o.last=i.last;
B.write(o);
if (i.last) {
break;
}
}
}
The block design I use ( I am following PYNQ AXI stream tutorial and only replace the interconect with smart connect as suggested by AMD. I can use the same design with HLS take INT in and INT out perfectly)
The notebook I use:
from pynq import Overlay
ol = Overlay("./dmas.bit")
dma = ol.axi_dma
dma_send = ol.axi_dma.sendchannel
dma_recv = ol.axi_dma.recvchannel
hls_ip = ol.example_0
hls_ip.register_map
# ----------------------------------------
RegisterMap {
CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
GIER = Register(Enable=0, RESERVED=0),
IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0)
}
# ----------------------------------------
CONTROL_REGISTER = 0x0
hls_ip.write(CONTROL_REGISTER, 0x81) # 0x81 will set bit 0
hls_ip.register_map
# ----------------------------------------
RegisterMap {
CTRL = Register(AP_START=1, AP_DONE=0, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=1, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
GIER = Register(Enable=0, RESERVED=0),
IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0)
}
# ----------------------------------------
from pynq import allocate
import numpy as np
vin_t = np.dtype([(f'input1', np.int32),(f'input2', np.int32)]) # also tested np.dtype([(f'input', np.int32,2)])
vout_t = np.int32
i_buffer = allocate(shape=(5,), dtype=vin_t)
o_buffer = allocate(shape=(5,), dtype=vout_t)
for i in range(5):
for j in range(2):
i_buffer[i][j] = i+j
i_buffer,o_buffer
# ----------------------------------------
(PynqBuffer([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)],
dtype=[('input1', '<i4'), ('input2', '<i4')]),
PynqBuffer([0, 0, 0, 0, 0], dtype=int32))
# ----------------------------------------
#send
dma_send.transfer(i_buffer)
dma_send.wait()
# ----------------------------------------
# ----------------------------------------
# recieve
dma_recv.transfer(o_buffer)
dma_recv.wait()
print(o_buffer)
# ----------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Input In [5], in <cell line: 3>()
1 # recieve
2 dma_recv.transfer(o_buffer)
----> 3 dma_recv.wait()
4 print(o_buffer)
File /usr/local/share/pynq-venv/lib/python3.10/site-packages/pynq/lib/dma.py:171, in _SDMAChannel.wait(self)
169 raise RuntimeError("DMA channel not started")
170 while True:
--> 171 error = self._mmio.read(self._offset + 4)
172 if self.error:
173 if error & 0x10:
File /usr/local/share/pynq-venv/lib/python3.10/site-packages/pynq/mmio.py:121, in MMIO.read(self, offset, length, word_order)
118 raise MemoryError("Unaligned read: offset must be multiple of 4.")
120 # Read data out
--> 121 lsb = int(self.array[idx])
122 if length == 8:
123 if word_order == "little":
KeyboardInterrupt:
The problem, as shown the recv wait never return and if I break, it ends at MMIO.read
So I was wondering it there anything wrong I am doing or is there any AXI-stream struct example for PYNQ could be used as a startup. Thank you!
Best,
Q.
# DMA register map AFTER break if helps
RegisterMap {
MM2S_DMACR = Register(RS=1, Reset=0, Keyhole=0, Cyclic_BD_Enable=0, IOC_IrqEn=0, Dly_IrqEn=0, Err_IrqEn=0, IRQThreshold=1, IRQDelay=0),
MM2S_DMASR = Register(Halted=0, Idle=1, SGIncld=0, DMAIntErr=0, DMASlvErr=0, DMADecErr=0, SGIntErr=0, SGSlvErr=0, SGDecErr=0, IOC_Irq=1, Dly_Irq=0, Err_Irq=0, IRQThresholdSts=0, IRQDelaySts=0),
MM2S_CURDESC = Register(Current_Descriptor_Pointer=0),
MM2S_CURDESC_MSB = Register(Current_Descriptor_Pointer=0),
MM2S_TAILDESC = Register(Tail_Descriptor_Pointer=0),
MM2S_TAILDESC_MSB = Register(Tail_Descriptor_Pointer=0),
MM2S_SA = Register(Source_Address=25165824),
MM2S_SA_MSB = Register(Source_Address=0),
MM2S_LENGTH = Register(Length=40),
SG_CTL = Register(SG_CACHE=0, SG_USER=0),
S2MM_DMACR = Register(RS=1, Reset=0, Keyhole=0, Cyclic_BD_Enable=0, IOC_IrqEn=0, Dly_IrqEn=0, Err_IrqEn=0, IRQThreshold=1, IRQDelay=0),
S2MM_DMASR = Register(Halted=0, Idle=0, SGIncld=0, DMAIntErr=0, DMASlvErr=0, DMADecErr=0, SGIntErr=0, SGSlvErr=0, SGDecErr=0, IOC_Irq=0, Dly_Irq=0, Err_Irq=0, IRQThresholdSts=0, IRQDelaySts=0),
S2MM_CURDESC = Register(Current_Descriptor_Pointer=0),
S2MM_CURDESC_MSB = Register(Current_Descriptor_Pointer=0),
S2MM_TAILDESC = Register(Tail_Descriptor_Pointer=0),
S2MM_TAILDESC_MSB = Register(Tail_Descriptor_Pointer=0),
S2MM_DA = Register(Destination_Address=25169920),
S2MM_DA_MSB = Register(Destination_Address=0),
S2MM_LENGTH = Register(Length=20)
}