#include "my_chacha_hls.hpp" void chachaCore(stream &ps2ipIntFifo, stream &masterOut) { static enum inStates { IN_STATE_IDLE = 0 } curState; axiWord newInWord = {0,0}; //static stream chacha2ps("chacha2ps"); //#pragma HLS STREAM variable=chacha2ps depth=10 dim=1 uint32 instate[16],state[16],tmp; #pragma HLS array_partition variable=instate complete #pragma HLS array_partition variable=state complete uint64 nonce,startoffset = 0,endoffset = 0,repeat,initmp; keepstate ks; uint256 key = 0; uint5 i = 0; switch(curState) { case IN_STATE_IDLE: { if (!ps2ipIntFifo.empty()) { ps2ipIntFifo.read(newInWord); key = newInWord.data(255,0); nonce = newInWord.data(319,256); startoffset = newInWord.data(383,320); endoffset = newInWord.data(447,384); instate[0] = 0x61707865; /* "expa" */ instate[1] = 0x3320646e; /* "nd 3" */ instate[2] = 0x79622d32; /* "2-by" */ instate[3] = 0x6b206574; /* "te k" */ LOOP_KEY:for(i = 4; i < 12; ++i) { #pragma HLS pipeline instate[i] = LITTLE_INT32(key.range(255,224)); /* We use specific range with shift operation -> less LUT */ key <<= 32; /* Shift left in order to take the next 32 bit */ } /*End of LOOP_KEY */ /* instate 12 & 13 is the counter every time */ instate[14] = LITTLE_INT32(nonce.range(63,32)); instate[15] = LITTLE_INT32(nonce.range(31,0)); //initmp = startoffset; LOOP_CHACHA: for(repeat = 0; repeat <= 1000000; ++repeat) { #pragma HLS pipeline if ( repeat < endoffset) { //#pragma HLS loop_tripcount min=1 max=1000000 /* Estimate latency no impact on Synthesize only Reporting */ chachaOut newOutStream = {0,0}; instate[12] = startoffset.range(31,0); instate[13] = startoffset.range(63,32); LOOP_INIT:for(i = 0; i < 16; ++i) { state[i] = instate[i]; } /*End of LOOP_INIT */ LOOP_QR: for(i = 0; i < ROUNDS; i +=2) { /* Odd round */ ks = QR(state[0], state[4], state[8], state[12]); /* column 0 */ state[0] = ks.stat[0]; state[4] = ks.stat[1]; state[8] = ks.stat[2]; state[12] = ks.stat[3]; ks = QR(state[1], state[5], state[9], state[13]); /* column 1 */ state[1] = ks.stat[0]; state[5] = ks.stat[1]; state[9] = ks.stat[2]; state[13] = ks.stat[3]; ks = QR(state[2], state[6], state[10], state[14]); /* column 2 */ state[2] = ks.stat[0]; state[6] = ks.stat[1]; state[10] = ks.stat[2]; state[14] = ks.stat[3]; ks = QR(state[3], state[7], state[11], state[15]); /* column 3 */ state[3] = ks.stat[0]; state[7] = ks.stat[1]; state[11] = ks.stat[2]; state[15] = ks.stat[3]; /* Even round */ ks = QR(state[0], state[5], state[10], state[15]); /* diagonal 1 (main diagonal) */ state[0] = ks.stat[0]; state[5] = ks.stat[1]; state[10] = ks.stat[2]; state[15] = ks.stat[3]; ks = QR(state[1], state[6], state[11], state[12]); /* diagonal 2 */ state[1] = ks.stat[0]; state[6] = ks.stat[1]; state[11] = ks.stat[2]; state[12] = ks.stat[3]; ks = QR(state[2], state[7], state[8], state[13]); /* diagonal 3 */ state[2] = ks.stat[0]; state[7] = ks.stat[1]; state[8] = ks.stat[2]; state[13] = ks.stat[3]; ks = QR(state[3], state[4], state[9], state[14]); /* diagonal 4 */ state[3] = ks.stat[0]; state[4] = ks.stat[1]; state[9] = ks.stat[2]; state[14] = ks.stat[3]; } /*End of LOOP_QR */ LOOP_OUT:for(i = 0; i < 16; ++i) { tmp = MOD_Ov(instate[i] + state[i]); newOutStream.datstream <<= 32; newOutStream.datstream.range(31,0) = LITTLE_INT32(tmp); } /*End of LOOP_OUT */ if(startoffset == endoffset){ newOutStream.last = newInWord.last; } startoffset ++; masterOut.write(newOutStream); }else{ break; } } /*End of LOOP_CHACHA */ } curState = IN_STATE_IDLE; break; } /*End of case IN_STATE_IDLE */ }; return; }