Hi,
I’ve been looking for the composable pipeline github rep and I tried to replicate the IP core generation of one function from the Vitis vision library using Vitis HLS. I’ve followed the .cpp and .tcl files from Composable pipeline/src folder and I’ve changed the xfOpenCV function to the one I am interested in, which is threshold. I have implemented the same function using AXI memory mapped, and now I am trying to use AXI stream interface. So that, I replicate the .cpp code and I have generated the HLS IP successfully. Then, I have created a design in Vivado, using the Zynq PS, the HLS threshold IP and one DMA. The design validates successfully in Vivado. Finally, when I move to PYNQ-Z2, into the notebook, the DMA receive channel freezes forever at the wait function.
.cpp file:
#include "hls_stream.h"
#include "ap_int.h"
#include "common/xf_common.hpp"
#include "common/xf_infra.hpp"
#include "imgproc/xf_threshold.hpp"
/* set the type of thresholding*/
#define THRESH_TYPE XF_THRESHOLD_TYPE_BINARY
/* set the height and weight */
#define HEIGHT 2496
#define WIDTH 3360
/* XF_NPPC8 for 8 pixels per clock, XF_NPPC1 for 1 pixel per clock */
#define NPIX XF_NPPC1
#define TYPE XF_8UC1
#define DATA_WIDTH 8
typedef xf::cv::ap_axiu<DATA_WIDTH,1,1,1> interface_t;
typedef hls::stream<interface_t> stream_t;
void threshold_accel(stream_t& img_inp,
stream_t& img_out,
unsigned char thresh,
unsigned char maxval,
int rows,
int cols) {
#pragma HLS INTERFACE axis register both port=img_inp
#pragma HLS INTERFACE axis register both port=img_out
#pragma HLS INTERFACE s_axilite port=thresh bundle=control
#pragma HLS INTERFACE s_axilite port=maxval bundle=control
#pragma HLS INTERFACE s_axilite port=rows bundle=control
#pragma HLS INTERFACE s_axilite port=cols bundle=control
#pragma HLS INTERFACE s_axilite port=return bundle=control
xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> in_mat(rows, cols);
xf::cv::Mat<TYPE, HEIGHT, WIDTH, NPIX> out_mat(rows, cols);
#pragma HLS stream variable=in_mat.data
#pragma HLS stream variable=out_mat.data
#pragma HLS DATAFLOW
// Convert array to xf::cv::Mat
xf::cv::AXIvideo2xfMat<DATA_WIDTH, TYPE, HEIGHT, WIDTH, NPIX>(img_inp, in_mat);
// Run xfOpenCV kernel
xf::cv::Threshold<THRESH_TYPE, TYPE, HEIGHT, WIDTH, NPIX>(in_mat, out_mat, thresh, maxval);
// COnvert xf::cv::Mat to array
xf::cv::xfMat2AXIvideo<DATA_WIDTH, TYPE, HEIGHT, WIDTH, NPIX>(out_mat, img_out);
}
.tcl file:
# Copyright 2019 Xilinx, Inc.
# Set project name
set PROJ "threshold.vhlsprj"
set XF_PROJ_ROOT "/home/USER/Documents/PYNQ/src/"
# Set FPGA part
set XPART "xc7z020clg400-1"
# Set period
set CLKP "10"
# Set vitis library path
set VITIS_INCLUDE "/home/pemp/Documents/Vitis_Libraries/vision/L1/include/"
# Set device macro
set DEVICE_MACRO "_ZYNQ_"
# Create project
open_project -reset $PROJ
#Add source file, set top, set FPGA part, set clock period and name solution
set_top threshold_accel
add_files "${XF_PROJ_ROOT}/threshold_axis/xf_threshold_accel.cpp" -cflags "-I${VITIS_INCLUDE} -D${DEVICE_MACRO} -std=c++14" -csimflags "-I${XF_PROJ_ROOT}/L1/include -I ${XF_PROJ_ROOT}/L1/examples/"
open_solution -reset "solution1" -flow_target vivado
set_part $XPART
create_clock -period $CLKP
#Synthesize and export IP using Vivado flow
config_export -format ip_catalog -rtl verilog
csynth_design
#export_design -flow impl -rtl verilog -format ip_catalog
export_design -rtl verilog -format ip_catalog
exit
vivado design:
notebook:
from pynq import Overlay, allocate
import cv2 as cv
import numpy as np
import time
overlay = Overlay('/home/xilinx/pynq/overlays/threshold_dma/threshold_dma.bit',download=True)
img_path = '/home/xilinx/jupyter_notebooks/img.png'
img = cv.imread(img_path,-1)
img = (img/256).astype(np.uint8)
height, width = img.shape
in_buffer = allocate(shape=(height,width), dtype=np.uint8, cacheable=False)
out_buffer = allocate(shape=(height,width), dtype=np.uint8, cacheable=False)
in_buffer[:] = img
threshold = 100
max_value = 255
overlay.threshold_accel_0.write(0x10,threshold)
overlay.threshold_accel_0.write(0x18,max_value)
overlay.threshold_accel_0.write(0x20,height)
overlay.threshold_accel_0.write(0x28,width)
def run_kernel():
overlay.axi_dma_0.sendchannel.transfer(in_buffer)
overlay.axi_dma_0.recvchannel.transfer(out_buffer)
overlay.threshold_accel_0.write(0x00,0x81) # start
overlay.axi_dma_0.sendchannel.wait()
overlay.axi_dma_0.recvchannel.wait()
run_kernel()