The Vitis threshold IP was generated using the following .cpp
#include "xf_threshold_config.h"
static constexpr int __XF_DEPTH = (HEIGHT * WIDTH * (XF_PIXELWIDTH(XF_8UC1, NPIX)) / 8) / (INPUT_PTR_WIDTH / 8);
void threshold_accel(ap_uint<INPUT_PTR_WIDTH>* img_inp,
ap_uint<OUTPUT_PTR_WIDTH>* img_out,
unsigned char thresh,
unsigned char maxval,
int rows,
int cols) {
// clang-format off
#pragma HLS INTERFACE m_axi port=img_inp offset=slave bundle=gmem1 depth=__XF_DEPTH
#pragma HLS INTERFACE m_axi port=img_out offset=slave bundle=gmem2 depth=__XF_DEPTH
#pragma HLS INTERFACE s_axilite port=thresh bundle=control
#pragma HLS INTERFACE s_axilite port=maxval bundle=control
#pragma HLS INTERFACE s_axilite port=rows bundle=control
#pragma HLS INTERFACE s_axilite port=cols bundle=control
#pragma HLS INTERFACE s_axilite port=return bundle=control
// clang-format on
const int pROWS = HEIGHT;
const int pCOLS = WIDTH;
const int pNPC1 = NPIX;
xf::cv::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> in_mat(rows, cols);
// clang-format off
// clang-format on
xf::cv::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> out_mat(rows, cols);
// clang-format off
// clang-format on
// clang-format off
#pragma HLS DATAFLOW
// clang-format on
xf::cv::Array2xfMat<INPUT_PTR_WIDTH, XF_8UC1, HEIGHT, WIDTH, NPIX>(img_inp, in_mat);
xf::cv::Threshold<THRESH_TYPE, XF_8UC1, HEIGHT, WIDTH, NPIX>(in_mat, out_mat, thresh, maxval);
xf::cv::xfMat2Array<OUTPUT_PTR_WIDTH, XF_8UC1, HEIGHT, WIDTH, NPIX>(out_mat, img_out);
}
and .h file
#ifndef _XF_THRESHOLD_CONFIG_H_
#define _XF_THRESHOLD_CONFIG_H_
#include "hls_stream.h"
#include "ap_int.h"
#include "common/xf_common.hpp"
#include "common/xf_utility.hpp"
#include "imgproc/xf_threshold.hpp"
#include "xf_config_params.h"
typedef ap_uint<8> ap_uint8_t;
typedef ap_uint<64> ap_uint64_t;
/* set the height and weight */
#define HEIGHT 2496
#define WIDTH 3360
#if RO
#define NPIX XF_NPPC8
#endif
#if NO
#define NPIX XF_NPPC1
#endif
#define INPUT_PTR_WIDTH 8
#define OUTPUT_PTR_WIDTH 8
void threshold_accel(ap_uint<INPUT_PTR_WIDTH>* img_inp,
ap_uint<OUTPUT_PTR_WIDTH>* img_out,
unsigned char thresh,
unsigned char maxval,
int rows,
int cols);
#endif // end of _XF_THRESHOLD_CONFIG_H_
with RO set to 1 and NO set to 0 in config file.
Vivado design looks like this,
with S_AXI_HP0 in 64 bits mode.
The hardware drivers for control registers into the xthreshold_accel_hw.h file look like this,
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - enable ap_done interrupt (Read/Write)
// bit 1 - enable ap_ready interrupt (Read/Write)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - ap_done (COR/TOW)
// bit 1 - ap_ready (COR/TOW)
// others - reserved
// 0x10 : Data signal of thresh
// bit 7~0 - thresh[7:0] (Read/Write)
// others - reserved
// 0x14 : reserved
// 0x18 : Data signal of maxval
// bit 7~0 - maxval[7:0] (Read/Write)
// others - reserved
// 0x1c : reserved
// 0x20 : Data signal of rows
// bit 31~0 - rows[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of cols
// bit 31~0 - cols[31:0] (Read/Write)
// 0x2c : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_AP_CTRL 0x00
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_GIE 0x04
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_IER 0x08
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_ISR 0x0c
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_THRESH_DATA 0x10
#define XTHRESHOLD_ACCEL_CONTROL_BITS_THRESH_DATA 8
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_MAXVAL_DATA 0x18
#define XTHRESHOLD_ACCEL_CONTROL_BITS_MAXVAL_DATA 8
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_ROWS_DATA 0x20
#define XTHRESHOLD_ACCEL_CONTROL_BITS_ROWS_DATA 32
#define XTHRESHOLD_ACCEL_CONTROL_ADDR_COLS_DATA 0x28
#define XTHRESHOLD_ACCEL_CONTROL_BITS_COLS_DATA 32
// control_r
// 0x00 : reserved
// 0x04 : reserved
// 0x08 : reserved
// 0x0c : reserved
// 0x10 : Data signal of img_inp
// bit 31~0 - img_inp[31:0] (Read/Write)
// 0x14 : Data signal of img_inp
// bit 31~0 - img_inp[63:32] (Read/Write)
// 0x18 : reserved
// 0x1c : Data signal of img_out
// bit 31~0 - img_out[31:0] (Read/Write)
// 0x20 : Data signal of img_out
// bit 31~0 - img_out[63:32] (Read/Write)
// 0x24 : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
#define XTHRESHOLD_ACCEL_CONTROL_R_ADDR_IMG_INP_DATA 0x10
#define XTHRESHOLD_ACCEL_CONTROL_R_BITS_IMG_INP_DATA 64
#define XTHRESHOLD_ACCEL_CONTROL_R_ADDR_IMG_OUT_DATA 0x1c
#define XTHRESHOLD_ACCEL_CONTROL_R_BITS_IMG_OUT_DATA 64
Finally, the notebook looks like this,
from pynq import Overlay, allocate
import cv2 as cv
import numpy as np
import time
overlay = Overlay('/home/xilinx/pynq/overlays/threshold_8pix/threshold.bit',download=True)
img_path = '/home/xilinx/jupyter_notebooks/image.png'
img = cv.imread(img_path,-1)
img = (img/256).astype(np.uint8)
height, width = img.shape
in_buffer = allocate(shape=(height,width), dtype=np.uint8)
out_buffer = allocate(shape=(height,width), dtype=np.uint8)
in_buffer[:] = img
overlay.threshold_accel_0.s_axi_control.write(0x00,2)
overlay.threshold_accel_0.s_axi_control.write(0x10,75)
overlay.threshold_accel_0.s_axi_control.write(0x18,255)
overlay.threshold_accel_0.s_axi_control.write(0x20,height)
overlay.threshold_accel_0.s_axi_control.write(0x28,width)
overlay.threshold_accel_0.s_axi_control_r.write(0x10,in_buffer.device_address)
overlay.threshold_accel_0.s_axi_control_r.write(0x1c,out_buffer.device_address)
overlay.threshold_accel_0.s_axi_control.write(0x00,0x1)
The height and width of the image are exactly the same as I defined in the .h file from the HLS IP generation.
When I do this with the 1 pixel per clock mode, the threshold IP runs with success and the control register stays in IDLE mode. When I do the same with 8 pixel per clock, the register remains always with AP_START set to one and I have to restart my board before I could run again the 1 ppc example successfully. It seems like there could be some denied memory access that “crash” the system independently of reprogramming the FPGA by downloading the bitstream.
Thanks for the support,
Pedro