Hi @briansune @cathalmccabe
Thanks for guiding me.
I’m working with Vivado 2019.2 and the ZCU111 board.
I took this example and modified it to fit the needs of my model. https://github.com/ZhaoqxCN/PYNQ-CNN-ATTEMPT/tree/master/Minst-CNN/CNN HLS
Yes, I changed the HLS code of the convolution and FC layers (input dimensions, kernel dimensions, input and output channels), as well as the parameters AXI Master.h, AXI slave.h, and config.h. However, no pragma functions were changed.
@briansune what do you mean by sanity test on HLS?
mnist example
FPGA_CNN (2).ipynb (23.5 KB)
My python code:
class Convolutional_Neural_Network(DefaultHierarchy):
def init(self, description):
super().init(description)
def loadweight(self, W, index, IFMDim, OFMDim, IFMDim1, OFMDim1):
KerDim = W.shape[2]
IFMCH = W.shape[1]
OFMCH = W.shape[0]
kernel_val = W.ravel() * 43000
kernel = np.append([index, 0, KerDim, IFMCH,
IFMDim, OFMDim, OFMCH, IFMDim1, OFMDim1], kernel_val)
print('kernel = ', ([index, 0, KerDim, IFMCH,
IFMDim, OFMDim, OFMCH, IFMDim1, OFMDim1], kernel_val))
in_buffer = allocate(shape=(kernel.shape[0]) , dtype=np.int16)
out_buffer = allocate(
shape=(kernel.shape[0]) , dtype=np.int16)
print('input buffer = ', kernel.shape[0] )
print('output buffer = ', kernel.shape[0])
for i, v in enumerate(kernel):
in_buffer[i] = v
self.axi_dma_0.sendchannel.transfer(in_buffer)
print('finished sendchannel.transfer')
self.axi_dma_0.recvchannel.transfer(out_buffer)
print('finished recvchannel.transfer')
self.axi_dma_0.sendchannel.wait()
print('finished sendchannel.wait')
self.axi_dma_0.recvchannel.wait()
print('finished recvchannel.wait')
def execute(self, test_data, batch_size, input_ch, input_dim, input_dim1, output_ch, output_dim, output_dim1):
input_mat = test_data[0:batch_size]
print('finished input_mat')
print('input_mat = ', test_data[0:batch_size] )
input_val = np.append([0, batch_size, 0, input_ch, input_dim, input_dim1, output_ch, output_dim, output_dim1], input_mat.ravel())
print('finished input_val')
print('input_val = ', [0, batch_size, 0, input_ch, input_dim, input_dim1, output_ch, output_dim, output_dim1], input_mat.ravel())
in_buffer = allocate(shape=(input_val.shape), dtype=np.int16)
out_buffer = allocate(shape=(9 + output_ch * batch_size * output_dim * output_dim1), dtype=np.int16)
#np.copyto(in_buffer, input_val.astype(np.int16))
for i, v in enumerate(input_val):
in_buffer[i] = v
print('input buffer = ', input_val.shape )
print('output buffer = ', 9 + output_ch * batch_size * output_dim * output_dim1)
start_time = time.process_time()
print('finished copying')
self.axi_dma_0.sendchannel.transfer(in_buffer)
print('finished sendchannel.transfer')
self.axi_dma_0.recvchannel.transfer(out_buffer)
print('finished recvchannel.transfer')
self.axi_dma_0.sendchannel.wait()
print('finished sendchannel.wait')
self.axi_dma_0.recvchannel.wait()
print('finished recvchannel.wait')
end_time = time.process_time()
print("Elapsed Test Time: ", end_time-start_time)
output_mat = out_buffer[9:].reshape(batch_size, -1).astype(np.float32)
print('finished output_mat')
@staticmethod
def checkhierarchy(description):
if 'axi_dma_0' in description['ip']:
return True
return False