Hi everyone, I am new to FPGAs and am currently struggling with an issue regarding the use of DMA on my custom HLS IP, mainly the DMA being stuck on recvchannel.wait(). I am aware there are several post made on this issue and have tried some of the common fixes including ensuring TLAST is asserted in HLS code, and power cycling the board and ensuring DMA is configured correctly but to no avail. I have been stuck on this issue for too long and have no idea how to proceed. My HLS ip involves a Gaussian Elimination Implementation that takes in a flattened matrix+vector and calculates a solution vector,

Any help would be appreciated, and I would be glad to provide more details.

#ifndef _GAUSSIAN_ELIM_
#define _GAUSSIAN_ELIM_
#include "ap_axi_sdata.h"
#include "ap_int.h"
#include <inttypes.h>
#define N 128
#define N2 128*128 // N*N, assuming a square matrix
#define DWIDTH 512
typedef ap_axiu<DWIDTH, 0, 0, 0> axis_t;
typedef ap_uint<512> uint512_t;
typedef float DataType; // Data type for matrix elements
const int DataTypeSize = sizeof(DataType) * 8;
typedef ap_uint<DataTypeSize> DataTypeInt;
// Union for converting between DataType and integer representation
typedef union converter {
DataType d;
uint32_t i;
} converter_t;
// Function prototype for the Gaussian Elimination kernel
// This needs to be aligned with your Gaussian Elimination implementation
template <typename T> void gaussian_elimination(T matrix[N2],T vector[N] ,T result[N]);
#endif // _GAUSSIAN_ELIM_

HLS source code

template <typename T> void gaussian_elimination(T A[N2], T b[N], T out[N]) {
// Additional array for row permutation
int perm[N];
for (int i = 0; i < N; i++) {
perm[i] = i;
}
// Forward Elimination with Partial Pivoting
for (int k = 0; k < N - 1; k++) {
#pragma HLS PIPELINE II = 1
// Find the pivot element (maximum absolute value) in the current column
T max_val = 0;
int pivot_row = k;
for (int i = k; i < N; i++) {
if (fabs(A[perm[i] * N + k]) > max_val) {
max_val = fabs(A[perm[i] * N + k]);
pivot_row = i;
}
}
// Swap rows if necessary
if (pivot_row != k) {
int temp = perm[k];
perm[k] = perm[pivot_row];
perm[pivot_row] = temp;
}
for (int i = k + 1; i < N; i++) {
T factor = A[perm[i] * N + k] / A[perm[k] * N + k];
for (int j = k; j < N; j++) {
A[perm[i] * N + j] -= factor * A[perm[k] * N + j];
}
b[perm[i]] -= factor * b[perm[k]];
}
}
// Back Substitution
for (int i = N - 1; i >= 0; i--) {
out[i] = b[perm[i]];
for (int j = i + 1; j < N; j++) {
out[i] -= A[perm[i] * N + j] * out[j];
}
out[i] = out[i] / A[perm[i] * N + i];
}
}
// The rest of your code remains the same
extern "C" {
void gaussian_elim_accel(hls::stream<axis_t> &in, hls::stream<axis_t> &out) {
#pragma HLS INTERFACE axis port=in
#pragma HLS INTERFACE axis port=out
#pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS
DataType A[N2]; // Declaration of A
DataType B[N]; // Declaration of B
DataType X[N]; // Declaration of X
// Read input data from the 'in' stream and populate A and B
converter_t converter;
for (int i = 0; i < N*N; ++i) { // Correctly iterating over N*N elements for A
axis_t temp = in.read();
converter.i = temp.data;
A[i] = converter.d; // Populate A matrix
}
// Then, read input data for B
for (int i = 0; i < N; ++i) { // Correctly iterating over N elements for B
axis_t temp = in.read();
converter.i = temp.data;
B[i] = converter.d; // Populate B vector
}
// Process with Gaussian Elimination
gaussian_elimination<DataType>(A, B, X);
// Write back the results to the 'out' stream
for (int i = 0; i < N; ++i) {
axis_t temp;
converter.d = X[i]; // Use X vector for the result
temp.data = converter.i;
// Set the last signal for the last data word
if (i == N - 1) {
temp.last = 1; // Assert TLAST on the last piece of data
} else {
temp.last = 0; // Otherwise, do not assert TLAST
}
// Enable all bytes in the data word
temp.keep = -1;
// Write to the output stream
out.write(temp);
}
}
}```
For the custom IP