Hello
I am trying to implement the Sobel filter using Vitis Vision Library 2021.2, Vitis HLS and Vivado 2021.2 on the Pynq-Z1 board.
Based on the Pynq-Hello example, I tried to modify the Sobel L1 C++ example file to use DMA. The code is shown below. It runs correctly and the IP and the log file is attached:
#include "hls_stream.h"
#include "common/xf_common.hpp"
#include "common/xf_infra.hpp"
#include "imgproc/xf_sobel.hpp"
#include "xf_config_params.h"
#include "ap_int.h"
///////
#include "xf_sobel_config.h"
///////
#define DATA_WIDTH 24
#define NPIX XF_NPPC1
/* set the height and width */
#define WIDTH 3840
#define HEIGHT 2160
#define FILTER_WIDTH 3
#define TYPE XF_8UC3
#define XF_USE_URAM
typedef hls::stream<ap_axiu<DATA_WIDTH,1,1,1>> stream_t;
template <int W, int TYPE, int ROWS, int COLS, int NPPC>
void axis2xfMat (hls::stream<ap_axiu<W, 1, 1, 1> >& AXI_video_strm, xf::cv::Mat<TYPE, ROWS, COLS, NPPC>& img) {
ap_axiu<W, 1, 1, 1> axi;
const int m_pix_width = XF_PIXELWIDTH(TYPE, NPPC) * XF_NPIXPERCYCLE(NPPC);
int rows = img.rows;
int cols = img.cols >> XF_BITSHIFT(NPPC);
assert(img.rows <= ROWS);
assert(img.cols <= COLS);
loop_row_axi2mat:
for (int i = 0; i < rows; i++) {
loop_col_zxi2mat:
for (int j = 0; j < cols; j++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1
AXI_video_strm.read(axi);
img.write(i*rows + j, axi.data(m_pix_width - 1, 0));
}
}
}
template <int W, int TYPE, int ROWS, int COLS, int NPPC>
void xfMat2axis(xf::cv::Mat<TYPE, ROWS, COLS, NPPC>& img, hls::stream<ap_axiu<W, 1, 1, 1> >& dst) {
ap_axiu<W, 1, 1, 1> axi;
int rows = img.rows;
int cols = img.cols >> XF_BITSHIFT(NPPC);
assert(img.rows <= ROWS);
assert(img.cols <= COLS);
const int m_pix_width = XF_PIXELWIDTH(TYPE, NPPC) * XF_NPIXPERCYCLE(NPPC);
loop_row_mat2axi:
for (int i = 0; i < rows; i++) {
loop_col_mat2axi:
for (int j = 0; j < cols; j++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II = 1
/*Assert last only in the last pixel*/
if ((j == cols-1) && (i == rows-1)) {
axi.last = 1;
} else {
axi.last = 0;
}
axi.data = 0;
axi.data(m_pix_width - 1, 0) = img.read(i*rows + j);
axi.keep = -1;
dst.write(axi);
}
}
}
void sobel_accel(stream_t& img_inp, stream_t& img_out1, stream_t& img_out2, int rows, int cols)
{
// clang-format off
//#pragma HLS INTERFACE m_axi port=img_inp offset=slave bundle=gmem1 depth=__XF_DEPTH
//#pragma HLS INTERFACE m_axi port=img_out1 offset=slave bundle=gmem2 depth=__XF_DEPTH_OUT
//#pragma HLS INTERFACE m_axi port=img_out2 offset=slave bundle=gmem3 depth=__XF_DEPTH_OUT
#pragma HLS INTERFACE axis register both port=img_inp
#pragma HLS INTERFACE axis register both port=img_out1
#pragma HLS INTERFACE axis register both port=img_out2
#pragma HLS INTERFACE s_axilite port=rows
#pragma HLS INTERFACE s_axilite port=cols
//#pragma HLS INTERFACE s_axilite port=rows bundle=control
//#pragma HLS INTERFACE s_axilite port=cols bundle=control
#pragma HLS INTERFACE s_axilite port=return //bundle=control
xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> in_mat(rows, cols);
xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> _dstgx(rows, cols);
xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> _dstgy(rows, cols);
#pragma HLS DATAFLOW
//printf("Array2xfMat .... !!!\n");
//xf::cv::Array2xfMat<INPUT_PTR_WIDTH, IN_TYPE, HEIGHT, WIDTH, NPC1>(img_inp, in_mat);
axis2xfMat<DATA_WIDTH, TYPE, HEIGHT, WIDTH, NPIX>(img_inp, in_mat);
//printf("Sobel .... !!!\n");
//xf::cv::Sobel<XF_BORDER_CONSTANT, FILTER_WIDTH, TYPE, TYPE, HEIGHT, WIDTH, NPIX, XF_USE_URAM>(in_mat,_dstgx,_dstgy);
xf::cv::Sobel<XF_BORDER_CONSTANT, FILTER_WIDTH, TYPE, TYPE, HEIGHT, WIDTH, NPIX, false>(in_mat, _dstgx,_dstgy);
//printf("xfMat2Array .... !!!\n");
xfMat2axis<DATA_WIDTH, TYPE, HEIGHT, WIDTH, NPIX>(_dstgx, img_out1);
xfMat2axis<DATA_WIDTH, TYPE, HEIGHT, WIDTH, NPIX>(_dstgy, img_out2);
//xf::cv::xfMat2Array<OUTPUT_PTR_WIDTH, OUT_TYPE, HEIGHT, WIDTH, NPIX>(_dstgx, img_out1);
//xf::cv::xfMat2Array<OUTPUT_PTR_WIDTH, OUT_TYPE, HEIGHT, WIDTH, NPIX>(_dstgy, img_out2);
}
I have a problem with the Vivado design. Below, I joined my TCL file. I do not know how to interface the generated Y gradient output image with DMA.
Copying the generated bit file & hwh file to the board and executing the attached python script to get the X gradient output returns this error when running the kernel.
Could someone help me achieve this implementation?
I can upload the whole project if needed.
I really appreciate any help you can provide.
RuntimeError Traceback (most recent call last)
<ipython-input-13-86fae4039038> in <module>
----> 1 run_kernel()
2 edge_image = Image.fromarray(out_buffer)
<ipython-input-11-0f054f9b8a08> in run_kernel()
3 dma.recvchannel.transfer(out_buffer)
4 sobel.write(0x00,0x81) # start
----> 5 dma.sendchannel.wait()
6 dma.recvchannel.wait()
/usr/local/share/pynq-venv/lib/python3.8/site-packages/pynq/lib/dma.py in wait(self)
214 'DMA Slave Error (cannot access memory map interface)')
215 if error & 0x40:
--> 216 raise RuntimeError(
217 'DMA Decode Error (invalid address)')
218 if self.idle:
RuntimeError: DMA Decode Error (invalid address)
Sobel_pl.ipynb (1002.0 KB)
sobel.tcl (46.3 KB)
solution1.log (177.2 KB)