Hi, I am using PYNQ-Z1 and I am having a weird problem. I created a system that uses DMA. Every time I try to complete an operation using some functions (I created functions to utilize PL resources), it seems like the time library needs to be imported over and over again. Moreover, the problem only occurs for the time library. Does anyone know why this is happening? Here is one of the functions that I made:
def Conv1D(channel_in, channel_out, frame_size_x, frame_size_y, frame_length, data, weight, bias):
"""
A function to perform 1D convolution. The data should comply with the Conv2D output structure.
:param frame_size_x: The size of data in x axis
:param frame_size_y: The size of data in y axis
:param frame_length: Data depth
:param channel_in: Number of input channels
:param channel_out: Number of output channels
:param data: Input data, scaled to 16 dec fixed point
:param weight: Weight values, scaled to 16 dec fixed point
:param bias: Bias values, scaled to 16 dec fixed point
"""
# Check whether the data dimension is valid
assert (frame_size_x%2 == 0), "frame_size_x is odd. Size must be an even number. Pad the data if possible."
assert (frame_size_y%2 == 0), "frame_size_y is odd. Size must be an even number. Pad the data if possible."
assert (frame_size_x == frame_size_y), "frame_size_x does not comply with frame_size_y."
assert (frame_length%2 == 0), "frame_length is odd. Size must be an even number. Pad the data if possible."
assert (weight.shape == (3,1,1,channel_in,channel_out)), f"weight structure does not match the data. Given: {weight.shape}, must be: (3,1,1,{channel_in},{channel_out})."
assert (bias.shape == (channel_out,)), f"Bias structure does not match the data. Given: {bias.shape}, must be: ({channel_out})."
# Calculate memory allocation size
channel_in_size = 4*(frame_size_x//2-1)*(frame_size_y//2-1)*frame_length
channel_out_size = 4*(frame_size_x//2-1)*(frame_size_y//2-1)*(frame_length-2)
assert (data.shape == (channel_in, channel_in_size)), f"Data dimension does not match the describing arguments. Given: ({data.shape[0]},{data.shape[1]}), must be: ({channel_in}, {channel_in_size})."
# Initialize PL for 1D process
set_PL_1D(frame_size=((frame_size_x//2-1)*(frame_size_y//2-1)), frame_length=frame_length)
# Allocate memory
input_buffer = allocate(shape=(channel_in_size), dtype=np.int32)
output_list = np.zeros(shape=(channel_out,channel_out_size), dtype=np.int32)
psum_buffer = allocate(shape=(channel_out_size), dtype=np.int32)
# Initialize output values with biases
for ch_out in range(channel_out):
output_list[ch_out] += bias[ch_out]
print("Data and buffer initialization done. Process is starting ...")
time_consumed = 0
# Initialize time information variables
PL_time = 0
tic = 0
toc = 0
for ch_in in range (channel_in):
# Fetch input data
input_buffer[:] = data[ch_in][:]
for ch_out in range (channel_out):
# Start timer
tic = time.perf_counter()
# Fetch weight values
weight_list = []
for i in range(3):
weight_list.append(np.int32(weight[i][0][0][ch_in][ch_out]))
W_1D_array[:] = weight_list[:]
# Start routine
err_time = time.perf_counter()
dma_send.transfer(input_buffer)
dma_recv.transfer(psum_buffer)
# Wait until process done
while not(dma.register_map.MM2S_DMASR.Idle & dma.register_map.S2MM_DMASR.Idle):
assert ((time.perf_counter() - err_time) < 2.), f"DMA Timeout. send_idle:{dma.register_map.MM2S_DMASR.Idle}, recv_idle: {dma.register_map.S2MM_DMASR.Idle}"
pass
toc = time.perf_counter()
PL_time = (toc-tic)
# Accumulate Partial Sum
output_list[ch_out] += psum_buffer
# Stop timer
toc = time.perf_counter()
time_consumed = time_consumed + (toc-tic)
print(f"Channel input:{ch_in}, process time consumed:{time_consumed}")
# Apply ReLU at the end of the process
output_list = np.clip(output_list, 0., None)
# Delete unecessary memory allocations and return
del input_buffer, psum_buffer
return (output_list, time_consumed)