I want to drive pynq to use custom ip, my idea is to transfer the data in ps memory to pl through AXI_HP protocol, the specific operation is as follows
def pl_conv(in_buffer, weights_buffer, bias_buffer,out_buffer):
# Setting IP core arguments using physical addresses
conv.write(0x10, in_buffer.physical_address)
conv.write(0x18, in_buffer.physical_address)
conv.write(0x20, in_buffer.physical_address)
conv.write(0x28, weights_buffer.physical_address)
conv.write(0x30, bias_buffer.physical_address)
conv.write(0x38, out_buffer.physical_address)
# Start the IP core
conv.write(0x00, 0x01) # Control signal to start the operation
# Wait for completion
while True: # Check the done bit
if conv.read(0x00) !=0:
break
def SpaceAllocateq(netParam, num_class):
netParam.headParam.Wc1 = xlnk.cma_array(shape=(32*3*3*3,), dtype='int16')
netParam.headParam.bc1 = xlnk.cma_array(shape=(32,), dtype='int16')
netParam.headParam.Wc2 = xlnk.cma_array(shape=(32*1*3*3,), dtype='int16')
netParam.headParam.bc2 = xlnk.cma_array(shape=(32,), dtype='int16')
netParam.headParam.Wc3 = xlnk.cma_array(shape=(32*16*1*1,), dtype='int16')
netParam.headParam.bc3 = xlnk.cma_array(shape=(16,), dtype='int16')
for i in range(16):
netParam.bottleParam[i].Wc1 = xlnk.cma_array(shape=(cfg[i].ch_in*cfg[i].ch_mid*1*1,), dtype='int16')
netParam.bottleParam[i].bc1 = xlnk.cma_array(shape=(cfg[i].ch_mid,), dtype='int16')
netParam.bottleParam[i].Wc2 = xlnk.cma_array(shape=(cfg[i].ch_mid*3*3,), dtype='int16')
netParam.bottleParam[i].bc2 = xlnk.cma_array(shape=(cfg[i].ch_mid,), dtype='int16')
netParam.bottleParam[i].Wc3 = xlnk.cma_array(shape=(cfg[i].ch_out*cfg[i].ch_mid*1*1,), dtype='int16')
netParam.bottleParam[i].bc3 = xlnk.cma_array(shape=(cfg[i].ch_out,), dtype='int16')
netParam.tailParam.Wc1 = xlnk.cma_array(shape=(1280*320*1*1,), dtype='int16')
netParam.tailParam.bc1 = xlnk.cma_array(shape=(1280,), dtype='int16')
netParam.tailParam.Wf1 = xlnk.cma_array(shape=(1280*num_class,), dtype='int16')
netParam.tailParam.bf1 = xlnk.cma_array(shape=(num_class,), dtype='int16')
def ReadParamq(paramDir, netParam, num_class):
np.copyto(netParam.headParam.Wc1 , read_param_q(paramDir+"/Head.Wc1.bin", 32*3*3*3))
np.copyto(netParam.headParam.bc1 , read_param_q(paramDir+"/Head.bc1.bin", 32))
np.copyto(netParam.headParam.Wc2 , read_param_q(paramDir+"/Head.Wc2.bin", 32*3*3))
np.copyto(netParam.headParam.bc2 , read_param_q(paramDir+"/Head.bc2.bin", 32))
np.copyto(netParam.headParam.Wc3 , read_param_q(paramDir+"/Head.Wc3.bin", 32*16))
np.copyto(netParam.headParam.bc3 , read_param_q(paramDir+"/Head.bc3.bin", 16))
for i in range(16):
prestr = f"/FoldedInvertedResidual.{i}"
np.copyto(netParam.bottleParam[i].Wc1 , read_param_q(paramDir+f"{prestr}.Wc1.bin", cfg[i].ch_in*cfg[i].ch_mid))
np.copyto(netParam.bottleParam[i].bc1 , read_param_q(paramDir+f"{prestr}.bc1.bin", cfg[i].ch_mid))
np.copyto(netParam.bottleParam[i].Wc2 , read_param_q(paramDir+f"{prestr}.Wc2.bin", cfg[i].ch_mid*9))
np.copyto(netParam.bottleParam[i].bc2 , read_param_q(paramDir+f"{prestr}.bc2.bin", cfg[i].ch_mid))
np.copyto(netParam.bottleParam[i].Wc3 , read_param_q(paramDir+f"{prestr}.Wc3.bin", cfg[i].ch_out*cfg[i].ch_mid))
np.copyto(netParam.bottleParam[i].bc3 , read_param_q(paramDir+f"{prestr}.bc3.bin", cfg[i].ch_mid))
np.copyto(netParam.tailParam.Wc1 , read_param_q(paramDir+"/Tail.Wc1.bin", 1280*320))
np.copyto(netParam.tailParam.bc1 , read_param_q(paramDir+"/Tail.bc1.bin", 1280))
np.copyto(netParam.tailParam.Wf1 , read_param_q(paramDir+"/Tail.Wf1.bin", 1280*num_class))
np.copyto(netParam.tailParam.bf1 , read_param_q(paramDir+"/Tail.bf1.bin", num_class))
netParam = NetParam_q()
SpaceAllocateq(netParam,3765)
ReadParamq ('weight', netParam, 3765)
num_class = 3765
tmp_buffer = xlnk.cma_array(shape=(100*3*224*224,), dtype='int16')
np.copyto(tmp_buffer, read_param_q("weight/image.bin",100*3*224*224))
tmp1_buffer = xlnk.cma_array(shape=(1500000,), dtype='int16')
pl_conv(tmp_buffer ,netParam.headParam.Wc1,netParam.headParam.bc1,tmp1_buffer)
I allocate memory through xlnk, then copy the data to the allocated temporary memory, send the address of the memory to the pl_conv function, and start to calculate, the problem is that the pl has a working time, indicating that the ip loaded by the pl has started to work, but the data output is all 0, can you see? Thank you