I am using Ultra96v2, and tried my own HLS IP. But the code in jupyer notebook always stuck at recvchannel.wait(). How to deal with this?
Here is my Block design, the HLS IP is called HLS_accel_0.
The hls code is atteched below, and the main inferface function is defined as
void Hls_accel(hls::stream<AXI_DMA_IO> &stream_in, hls::stream<AXI_DMA_IO> &stream_out,
int a, int b, int &c){
#pragma HLS INTERFACE s_axilite register port=return
#pragma HLS INTERFACE s_axilite port=b
#pragma HLS INTERFACE s_axilite port=a
#pragma HLS INTERFACE s_axilite port=c
#pragma HLS INTERFACE axis register both port=stream_in
#pragma HLS INTERFACE axis register both port=stream_out
c = a + b;
#pragma HLS DATAFLOW
stream<ap_uint<IN0_CH*ACT_BW_D> > img_in("img_in_stream");
strm_image<IN0_ROW,IN0_COL,IN0_CH,ACT_BW_D>(stream_in, img_in);
strm_out<IN0_ROW, IN0_COL, IN0_CH, ACT_BW_D>(img_in, stream_out);
}
AXI_DMA_IO is defined in main.h.
struct AXI_DMA_IO{
ap_uint<64> data;
ap_uint<1> last;
};
strm_image and strm_out is defined in function.h
template < unsigned IN_ROW,
unsigned IN_COL,
unsigned IN_CH,
unsigned BW
>
void strm_image(hls::stream<AXI_DMA_IO> &stream_in, hls::stream<ap_uint<IN_CH*BW> > &in){
AXI_DMA_IO tmp;
// static_assert(sizeof(tmp.data)==sizeof(ap_int<IN_CH*BW>),"DMA width != img_in width");
for (int i = 0; i < IN_ROW*IN_COL; i++){
#pragma HLS PIPELINE
tmp = stream_in.read();
in.write(tmp.data);
}
}
template < unsigned IN_ROW,
unsigned IN_COL,
unsigned IN_CH,
unsigned BW
>
void strm_out(hls::stream<ap_uint<IN_CH*BW> > & img_out, hls::stream<AXI_DMA_IO>& stream_out){
AXI_DMA_IO tmp;
// static_assert(sizeof(tmp.data)==sizeof(ap_int<IN_CH*BW>),"DMA width != img_in width");
for (int i = 0; i < IN_ROW*IN_COL; i++){
#pragma HLS PIPELINE
tmp.data = img_out.read();
if(i == IN_ROW*IN_COL -1)
tmp.last = 1;
else
tmp.last = 0;
stream_out.write(tmp);
}
}
hls_code.zip (2.4 KB)
The jupyer code is
from pynq import Overlay
import pynq
import numpy as np
overlay = Overlay('./design_1.bit')
dma_x = overlay.axi_dma_0.sendchannel
dma_y = overlay.axi_dma_0.recvchannel
x = np.random.randint(0, 10, size=(4,4), dtype=np.uint64)
buff_x = pynq.allocate(shape=(4,4), dtype=np.int64)
buff_x[:] = x
buff_y = pynq.allocate(shape=(4,4), dtype=np.uint64)
dma_x.transfer(buff_x)
dma_y.transfer(buff_y)
dma_x.wait()
dma_y.wait()