I am trying to use a Vitis accumulate core via AXI interface. The block design is as shown below.
When trying to get the output via the following call:
c.sync_from_device()
it’s all zeros.
The IP passes CSIM and COSIM , so this looks like a PYNQ allocate issue since a custom matrix multiplication core works correctly using the same procedure.
Any ideas?accum.tcl (53.9 KB) AccumulateAXI.ipynb (13.8 KB)
xilinx_com_hls_accumulate_accel_1_0.zip (324.6 KB)
#!/usr/bin/env python
# coding: utf-8
# # Accumulate IP in AXI mode
# In[2]:
import datetime
from pynq import Overlay
from pynq import DefaultIP
from pynq import DefaultHierarchy
from pynq import allocate
from pynq import MMIO
from pynq.pl import *
import pynq.lib.dma
import numpy as np
import time
XACCUMULATE_ACCEL_CONTROL_ADDR_AP_CTRL = 0x00
XACCUMULATE_ACCEL_CONTROL_ADDR_GIE = 0x04
XACCUMULATE_ACCEL_CONTROL_ADDR_IER = 0x08
XACCUMULATE_ACCEL_CONTROL_ADDR_ISR = 0x0c
XACCUMULATE_ACCEL_CONTROL_ADDR_IMG_IN1_V_DATA = 0x10
XACCUMULATE_ACCEL_CONTROL_BITS_IMG_IN1_V_DATA = 32
XACCUMULATE_ACCEL_CONTROL_ADDR_IMG_IN2_V_DATA = 0x18
XACCUMULATE_ACCEL_CONTROL_BITS_IMG_IN2_V_DATA = 32
XACCUMULATE_ACCEL_CONTROL_ADDR_IMG_OUT_V_DATA = 0x20
XACCUMULATE_ACCEL_CONTROL_BITS_IMG_OUT_V_DATA = 32
XACCUMULATE_ACCEL_CONTROL_ADDR_HEIGHT_DATA = 0x28
XACCUMULATE_ACCEL_CONTROL_BITS_HEIGHT_DATA = 32
XACCUMULATE_ACCEL_CONTROL_ADDR_WIDTH_DATA = 0x30
XACCUMULATE_ACCEL_CONTROL_BITS_WIDTH_DATA = 32
# In[16]:
#------------------------Address Info-------------------
# 0x00 : Control signals
# bit 0 - ap_start (Read/Write/COH)
# bit 1 - ap_done (Read/COR)
# bit 2 - ap_idle (Read)
# bit 3 - ap_ready (Read)
# bit 7 - auto_restart (Read/Write)
# others - reserved
# 0x04 : Global Interrupt Enable Register
# bit 0 - Global Interrupt Enable (Read/Write)
# others - reserved
# 0x08 : IP Interrupt Enable Register (Read/Write)
# bit 0 - Channel 0 (ap_done)
# bit 1 - Channel 1 (ap_ready)
# others - reserved
# 0x0c : IP Interrupt Status Register (Read/TOW)
# bit 0 - Channel 0 (ap_done)
# bit 1 - Channel 1 (ap_ready)
# others - reserved
# 0x10 : Data signal of img_in1_V
# bit 31~0 - img_in1_V[31:0] (Read/Write)
# 0x18 : Data signal of img_in2_V
# bit 31~0 - img_in2_V[31:0] (Read/Write)
# 0x1c : reserved
# 0x20 : Data signal of img_out_V
# bit 31~0 - img_out_V[31:0] (Read/Write)
# 0x24 : reserved
# 0x28 : Data signal of height
# bit 31~0 - height[31:0] (Read/Write)
# 0x2c : reserved
# 0x30 : Data signal of width
# bit 31~0 - width[31:0] (Read/Write)
# 0x34 : reserved
# (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
# In[17]:
ol = Overlay("accum.bit")
# In[18]:
get_ipython().run_line_magic('pinfo', 'ol')
# In[19]:
ip = ol.accumulate_accel_0
# In[20]:
DIM = 128
a = allocate(shape=((DIM, DIM)), dtype=np.uint8, cacheable=True)
b = allocate(shape=((DIM, DIM)), dtype=np.uint8, cacheable=True)
c = allocate(shape=((DIM, DIM)), dtype=np.uint16, cacheable=True)
a[:] = np.ones((DIM,DIM)).astype('int') * 11
b[:] = np.ones((DIM,DIM)).astype('int') * 23
c[:] = np.zeros((DIM,DIM)).astype('int')
ip.write(XACCUMULATE_ACCEL_CONTROL_ADDR_HEIGHT_DATA, DIM) # dst rows
ip.write(XACCUMULATE_ACCEL_CONTROL_ADDR_WIDTH_DATA, DIM) # dst cols
ip.write(0x00, 4)
fpga_state = ip.read(0x00)
print(fpga_state)
a_p_ptr = a.physical_address
b_p_ptr = b.physical_address
c_p_ptr = c.physical_address
ip.write(0x00, 4)
if fpga_state == 4:
ip.write(XACCUMULATE_ACCEL_CONTROL_ADDR_IMG_IN1_V_DATA, a_p_ptr)
ip.write(XACCUMULATE_ACCEL_CONTROL_ADDR_IMG_IN2_V_DATA, b_p_ptr)
ip.write(XACCUMULATE_ACCEL_CONTROL_ADDR_IMG_OUT_V_DATA, c_p_ptr)
else:
print("Can't write values, must be in IDLE state")
raise KeyboardInterrupt
#get_ipython().run_cell_magic('timeit', '', '\nip.write(0x00, 0x81)\nfpga_state = ip.read(0x00)\n\nmax_try = 100\nwhile fpga_state != 6 and fpga_state != 4:\n fpga_state = ip.read(0x00)\n max_try = max_try -1\n if max_try == 0:\n print("ERROR: Can\'t go ahead")\n ip.write(0x00, 4)\n raise KeyboardInterrupt\n \nip.write(0x00, 4)')
c.sync_from_device()
print(c)