Hello,
I created a custom IP for the Ultra96 board starting from the FAST algorithm example provided here using Vivado HLS. The creation of the IP is successful, the obtained scheme is the one attached below:
I want to test the IP on the Ultra96 using Pynq.
The problem that I have is that the output image is all zeros, it seems that it is not processed by the PL.
A similar problem is discussed here, without any useful suggestions.
The Python3 code of the Jupyter notebook is the following:
from pynq import Overlay
from cffi import FFI
from pynq import Xlnk
import numpy as np
from PIL import Image
import pynq
# utilities functions
CONTROL_ADDR = 0x00
def start(ip):
data = ip.read(CONTROL_ADDR) & 0x80
ip.write(CONTROL_ADDR, data | 0x01)
def enable_auto_restart(ip):
ip.write(CONTROL_ADDR, 0x80)
def is_done(ip):
data = ip.read(CONTROL_ADDR)
return (data >> 1) & 0x1
def is_idle(ip):
data = ip.read(CONTROL_ADDR)
return (data >> 2) & 0x1;
def is_ready(ip):
data = ip.read(CONTROL_ADDR)
return (data >> 2) & 0x1;
def set_img1(ip, image_buffer, input_image: bool):
addr = 0x10 if input_image else 0x24
print('Image addr', hex(addr))
print('writing', hex(image_buffer.physical_address))
ip.write(addr, image_buffer.physical_address)
def get_img(ip, input_image: bool):
# input/output image reg addr
addr = 0x10 if input_image else 0x24
print('Image addr', addr)
data_0 = ip.read(addr)
#data_1 = ip.read(addr + 4) << 32
data_1 = 0
data = int('{:32b}'.format(data_0 + data_1), 2)
print('read ', hex(data))
return data
# rows, cols, threshold
def set_params(ip, rows, cols, threshold=20):
ip.write(0x1c, threshold)
ip.write(0x30, rows)
ip.write(0x38, cols)
------------------------------------------------------------------
overlay = Overlay("/home/xilinx/pynq/overlays/FAST/FAST_new.bit")
# Load fast IP
fast_ip = overlay.fast_accel_0
ffi = FFI()
IMAGE_PATH = '/home/xilinx/lena256.png'
# prepare input/output image
COLS = 256
ROWS = 256
CHANNELS = 1
# load original image + grayscale conversion
original_image = Image.open(IMAGE_PATH).convert('L')
original_image.load()
# display origina image
display(original_image, 'input image')
# to numpy array
gray_input_array = np.array(original_image)
newgraynp = gray_input_array.reshape(gray_input_array.shape[0],gray_input_array.shape[1], CHANNELS)
# allocate memory buffer
xlnk = Xlnk()
image_buffer = xlnk.cma_array(shape=(ROWS,COLS,CHANNELS), dtype=np.uint8, cacheable=1)
return_buffer = xlnk.cma_array(shape=(ROWS,COLS,CHANNELS), dtype=np.uint8, cacheable=1)
# copy input image to memory buffer
image_buffer[0:ROWS * COLS * CHANNELS] = newgraynp
return_buffer[0:ROWS * COLS * CHANNELS] = 0
# input/output pointers
#image_pointer = ffi.cast("uint8_t *", ffi.from_buffer(image_buffer))
#return_pointer = ffi.cast("uint8_t *", ffi.from_buffer(return_buffer))
# start image processing
enable_auto_restart(fast_ip)
# write values in ip registers
set_params(fast_ip, 256, 256, 20)
set_img1(fast_ip, image_buffer, input_image=True)
set_img1(fast_ip, return_buffer, input_image=False)
# start the ip
start(fast_ip)
#fast_ip.write(0x00, 0x01)
# the return buffer is not modified, it is all zeros!
np.unique(return_buffer)
The addresses of the registers are the following:
// ==============================================================
// Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2020.1 (64-bit)
// Copyright 1986-2020 Xilinx, Inc. All Rights Reserved.
// ==============================================================
// control
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - enable ap_done interrupt (Read/Write)
// bit 1 - enable ap_ready interrupt (Read/Write)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - ap_done (COR/TOW)
// bit 1 - ap_ready (COR/TOW)
// others - reserved
// 0x10 : Data signal of img_in
// bit 31~0 - img_in[31:0] (Read/Write)
// 0x14 : Data signal of img_in
// bit 31~0 - img_in[63:32] (Read/Write)
// 0x18 : reserved
// 0x1c : Data signal of threshold
// bit 31~0 - threshold[31:0] (Read/Write)
// 0x20 : reserved
// 0x24 : Data signal of img_out
// bit 31~0 - img_out[31:0] (Read/Write)
// 0x28 : Data signal of img_out
// bit 31~0 - img_out[63:32] (Read/Write)
// 0x2c : reserved
// 0x30 : Data signal of rows
// bit 31~0 - rows[31:0] (Read/Write)
// 0x34 : reserved
// 0x38 : Data signal of cols
// bit 31~0 - cols[31:0] (Read/Write)
// 0x3c : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
Any help would be greatly appreciated!
Thank you!