PYNQ: PYTHON PRODUCTIVITY FOR ZYNQ

Testing pyrDown function with Pynq

Hello, I am using the source code HLS of the example “PYNQ Hello World” from Pynq community.

I have exchanged the function “xf::resize” for “xf::pyrDown”

#pragma HLS dataflow

axis2xfMat(_src, src, src_rows, src_cols);

dst_rows= (_src.rows+1) >> 1;
dst_cols= (_src.cols+1) >> 1;

xf::pyrDown <TYPE, HEIGHT, WIDTH, NPC1>(_src,_dst);

xfMat2axis(_dst, dst, dst_rows, dst_cols);

I adapted the code in Jupyter notebook too and I get the next result

Do you know why is this happening? I have done a test with the software function and the output is the expected.

Thank you in advance

Check

On page 163,
Pixel type. XF_8UC1 is the only supported pixel type

So you only see red components out of a 32bit value [R,G,B,8-bit padding].

I suspected that when I read it, but I thought it might be my mistake. It seemed weird that Xilinx created this function just for one channel when the original from OpenCV has the 3 ones.

Taking advantage of this thread, do you think is possible synthesize the function cv::pyrDown(InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) just modifying part of the code? Or is it something unfeasible? pyrDown() is an example, but I would need to do it with other functions too. Below part of the pyrDown code:

void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
{
CV_INSTRUMENT_REGION();

CV_Assert(borderType != BORDER_CONSTANT);

CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
           ocl_pyrDown(_src, _dst, _dsz, borderType))

CV_OVX_RUN(_src.dims() <= 2,
           openvx_pyrDown(_src, _dst, _dsz, borderType))

Mat src = _src.getMat();
Size dsz = _dsz.empty() ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
_dst.create( dsz, src.type() );
Mat dst = _dst.getMat();
int depth = src.depth();

CALL_HAL(pyrDown, cv_hal_pyrdown, src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, depth, src.channels(), borderType);

#ifdef HAVE_TEGRA_OPTIMIZATION
if(borderType == BORDER_DEFAULT && tegra::useTegra() && tegra::pyrDown(src, dst))
return;
#endif

#ifdef HAVE_IPP
bool isolated = (borderType & BORDER_ISOLATED) != 0;
int borderTypeNI = borderType & ~BORDER_ISOLATED;
#endif
CV_IPP_RUN(borderTypeNI == BORDER_DEFAULT && (!_src.isSubmatrix() || isolated) && dsz == Size((_src.cols() + 1)/2, (_src.rows() + 1)/2),
ipp_pyrdown( _src, _dst, _dsz, borderType));

PyrFunc func = 0;
if( depth == CV_8U )
    func = pyrDown_< FixPtCast<uchar, 8> >;
else if( depth == CV_16S )
    func = pyrDown_< FixPtCast<short, 8> >;
else if( depth == CV_16U )
    func = pyrDown_< FixPtCast<ushort, 8> >;
else if( depth == CV_32F )
    func = pyrDown_< FltCast<float, 8> >;
else if( depth == CV_64F )
    func = pyrDown_< FltCast<double, 8> >;
else
    CV_Error( CV_StsUnsupportedFormat, "" );

func( src, dst, borderType );

}

pyrDown_( const Mat& _src, Mat& _dst, int borderType )
{
const int PD_SZ = 5;
typedef typename CastOp::type1 WT;
typedef typename CastOp::rtype T;

CV_Assert( !_src.empty() );
Size ssize = _src.size(), dsize = _dst.size();
int cn = _src.channels();
int bufstep = (int)alignSize(dsize.width*cn, 16);
AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
WT* buf = alignPtr((WT*)_buf.data(), 16);
int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
AutoBuffer<int> _tabM(dsize.width*cn);
int* tabM = _tabM.data();
WT* rows[PD_SZ];
CastOp castOp;

CV_Assert( ssize.width > 0 && ssize.height > 0 &&
           std::abs(dsize.width*2 - ssize.width) <= 2 &&
           std::abs(dsize.height*2 - ssize.height) <= 2 );
int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);

for( x = 0; x <= PD_SZ+1; x++ )
{
    int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn;
    int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn;
    for( k = 0; k < cn; k++ )
    {
        tabL[x*cn + k] = sx0 + k;
        tabR[x*cn + k] = sx1 + k;
    }
}

ssize.width *= cn;
dsize.width *= cn;
width0 *= cn;

for( x = 0; x < dsize.width; x++ )
    tabM[x] = (x/cn)*2*cn + x % cn;

for( int y = 0; y < dsize.height; y++ )
{
    T* dst = _dst.ptr<T>(y);
    WT *row0, *row1, *row2, *row3, *row4;

    // fill the ring buffer (horizontal convolution and decimation)
    for( ; sy <= y*2 + 2; sy++ )
    {
        WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;
        int _sy = borderInterpolate(sy, ssize.height, borderType);
        const T* src = _src.ptr<T>(_sy);
        int limit = cn;
        const int* tab = tabL;

        for( x = 0;;)
        {
            for( ; x < limit; x++ )
            {
                row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 +
                    src[tab[x]] + src[tab[x+cn*4]];
            }

            if( x == dsize.width )
                break;

            if( cn == 1 )
            {
                x += PyrDownVecH<T, WT, 1>(src + x * 2 - 2, row + x, width0 - x);
                for( ; x < width0; x++ )
                    row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 +
                        src[x*2 - 2] + src[x*2 + 2];
            }
            else if( cn == 2 )
            {
                x += PyrDownVecH<T, WT, 2>(src + x * 2 - 4, row + x, width0 - x);
                for( ; x < width0; x += 2 )
                {
                    const T* s = src + x*2;
                    WT t0 = s[0] * 6 + (s[-2] + s[2]) * 4 + s[-4] + s[4];
                    WT t1 = s[1] * 6 + (s[-1] + s[3]) * 4 + s[-3] + s[5];
                    row[x] = t0; row[x + 1] = t1;
                }
            }
            else if( cn == 3 )
            {
                x += PyrDownVecH<T, WT, 3>(src + x * 2 - 6, row + x, width0 - x);
                for( ; x < width0; x += 3 )
                {
                    const T* s = src + x*2;
                    WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6];
                    WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7];
                    WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8];
                    row[x] = t0; row[x+1] = t1; row[x+2] = t2;
                }
            }
            else if( cn == 4 )
            {
                x += PyrDownVecH<T, WT, 4>(src + x * 2 - 8, row + x, width0 - x);
                for( ; x < width0; x += 4 )
                {
                    const T* s = src + x*2;
                    WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8];
                    WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9];
                    row[x] = t0; row[x+1] = t1;
                    t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10];
                    t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11];
                    row[x+2] = t0; row[x+3] = t1;
                }
            }
            else
            {
                for( ; x < width0; x++ )
                {
                    int sx = tabM[x];
                    row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 +
                        src[sx - cn*2] + src[sx + cn*2];
                }
            }

            limit = dsize.width;
            tab = tabR - x;
        }
    }

    // do vertical convolution and decimation and write the result to the destination image
    for( k = 0; k < PD_SZ; k++ )
        rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;
    row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];

    x = PyrDownVecV<WT, T>(rows, dst, dsize.width);
    for( ; x < dsize.width; x++ )
        dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]);
}

}

From my knowledge, I see it impossible to copy the code (so many references from other libraries) and modify part of them to allow Vivado HLS synthesize it. Would you suggest to write my own code instead?
Thank you so much for your reply.

I think if you really need to have you own pyrdown working, you probably need to rewrite parts of the codes. However, I am not sure if you need to do so since resizer has already been working.