Hello, I tried to create a Vitis HLS IP as follows:
kernel.hpp
#ifndef _Z_KERNEL_H_
#define _Z_KERNEL_H_
#include "ap_axi_sdata.h"
#include "ap_int.h"
#include "hls_stream.h"
#define M 64
#define N 512
typedef ap_axiu<32, 0, 0, 0> axis_t;
void z_kernel(hls::stream<axis_t> *A, hls::stream<axis_t> *X, hls::stream<axis_t> *Y, hls::stream<axis_t> *Z);
#endif
kernel.cpp
#include "z_kernel.hpp"
#include "hls_stream.h"
void z_kernel(hls::stream<axis_t> &A, hls::stream<axis_t> &X, hls::stream<axis_t> &Y, hls::stream<axis_t> &Z)
{
#pragma HLS INTERFACE s_axilite port=return bundle=control
#pragma HLS INTERFACE axis port=A
#pragma HLS INTERFACE axis port=X
#pragma HLS INTERFACE axis port=Y
#pragma HLS INTERFACE axis port=Z
float x[N];
#pragma HLS ARRAY_PARTITION variable=x dim=1 complete
ld_X: for(int n=0;n<N;n++)
{
axis_t tx = X.read();
x[n] = tx.data;
}
l1: for(int m=0;m<M;m++)
{
float sum = 0;
l2: for(int n=0;n<N;n++)
{
axis_t ta = A.read();
sum += ta.data * x[n];
}
axis_t ty = Y.read();
ty.data = sum - ty.data;
Z.write(ty);
}
}
Then I created the following BD on Vivado and generated the bitstream with any critical warning:
and on the pynq board I made the following:
import time
from pynq import Overlay
import pynq.lib.dma
from pynq import Xlnk
import numpy as np
from pynq import MMIO
import random
ol = Overlay('z_accel.bit')
dma_A = ol.dma_A
dma_X = ol.dma_X
dma_YZ = ol.dma_YZ
z_ip = ol.z_kernel_0
xlnk = Xlnk()
# Define dimensions
M = 64
N = 512
# Allocate memory for DMA transfers
A_buffer = xlnk.cma_array(shape=(M,N), dtype=np.float32, cacheable=False)
X_buffer = xlnk.cma_array(shape=(N), dtype=np.float32, cacheable=False)
Y_buffer = xlnk.cma_array(shape=(M), dtype=np.float32, cacheable=False)
Z_buffer = xlnk.cma_array(shape=(M), dtype=np.float32, cacheable=False)
def run_kernel():
dma_A.sendchannel.transfer(A_buffer)
dma_X.sendchannel.transfer(X_buffer)
dma_YZ.sendchannel.transfer(Y_buffer)
dma_YZ.recvchannel.transfer(Z_buffer)
z_ip.write(0x00, ((1<<0) | (1<<7))) # initialize the module
dma_A.sendchannel.wait()
dma_X.sendchannel.wait()
dma_YZ.sendchannel.wait()
dma_YZ.recvchannel.wait()
A = np.random.rand(M, N).astype(dtype=np.float32)
X = np.random.rand(N).astype(dtype=np.float32)
Y = np.random.rand(M).astype(dtype=np.float32)
A_buffer[:] = A
X_buffer[:] = X
Y_buffer[:] = Y
run_kernel()
but the output is zeros on the Z_buffer
. What could be the problem? Any help here please?