Newer
Older
#ifndef _WINO_BUFFER_HPP_
#define _WINO_BUFFER_HPP_
#include <ap_int.h>
#include <hls_stream.h>
xliu79
committed
void input_feed_underconstruction(
ap_uint<16> input_buffer[INBUFFER_HEIGHT][INBUFFER_WIDTH][INPUT_BUFFER_DEPTH],
hls::stream< ap_uint<8*BATCH_SIZE*WINO_DOMAIN_SIZE_SQUARE> > input_tile_stream[WINO_WIDTH],
ap_uint<16> inwidth,
xliu79
committed
ap_uint<16> pad_size,
ap_uint<16> weightbuffer_load_outdepth_number,
ap_uint<16> wino_output_tile_size,
ap_uint<32> input_buffer_feeding_loop_bound,
ap_uint<16> loop_wino_tile_row_reset_cycle,
ap_uint<16> loop_wino_tile_col_reset_cycle,
ap_uint<10> buffer_address_mid_minitile_depth_step,
xliu79
committed
ap_uint<16> wino_out_size_by_wino_width,
ap_uint<1> row_address_bitnumber_flag,
ap_int<16> start_row_idx_minus_pad_size
xliu79
committed
)
{
printf("---input_feed_underconstruction---\n");fflush(stdout);
// row_selection preparation
#pragma HLS array_partition variable=input_buffer dim=1 complete
#pragma HLS array_partition variable=input_buffer dim=2 complete
ap_int<10> input_row_idx[WINO_DOMAIN_SIZE];
#pragma HLS array_partition variable=input_row_idx complete
ap_int<10> input_col_idx[WINO_WIDTH][WINO_DOMAIN_SIZE];
#pragma HLS array_partition variable=input_col_idx dim=1 complete
#pragma HLS array_partition variable=input_col_idx dim=2 complete
ap_uint<16> wino_col_offset_constant[WINO_WIDTH];
#pragma HLS array_partition variable=wino_col_offset_constant complete
for(int i=0;i<WINO_WIDTH;i++)
{
#pragma HLS unroll
wino_col_offset_constant[i]=wino_output_tile_size*i;
}
ap_uint<16> first_col_idx=0;
ap_uint<INDEPTH_MINITILE_SIZE_BITWIDTH> loop_indepth_minitile_idx=0;
ap_uint<16> loop_wino_tile_row_cnt=1;
ap_uint<16> loop_wino_tile_col_cnt=1;
xliu79
committed
// loop_wino_tile_col_reset_cycle =conv_desc.wino_tile_number_in_outwidth*conv_desc.weightbuffer_outdepth_minitile_number*INDEPTH_MINITILE_SIZE;
ap_uint<INBUFFER_MID_ADDR_BITWIDTH> buffer_address_mid_minitile_depth_offset=0;
ap_uint<INBUFFER_MID_ADDR_BITWIDTH> buffer_address_mid_buffertile_depth_offset=0;
ap_int<10> input_head_row_idx=start_row_idx_minus_pad_size;
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
input_row_idx[i]=start_row_idx_minus_pad_size+i;
}
ap_int<16> input_head_col_idx=wino_col_offset_constant[0]-pad_size;
for(int wino_array_col=0;wino_array_col<WINO_WIDTH;wino_array_col++)
{
#pragma HLS unroll
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
input_col_idx[wino_array_col][i]=i-pad_size+wino_col_offset_constant[wino_array_col];
}
}
for(ap_uint<16> outdepth_buffertile_idx=0;outdepth_buffertile_idx<weightbuffer_load_outdepth_number;outdepth_buffertile_idx++)
{
xliu79
committed
buffer_address_mid_minitile_depth_offset = 0;
xliu79
committed
for(int counter=0;counter<input_buffer_feeding_loop_bound;counter++ )
{
#pragma HLS pipeline II =1
// it is a flattened loop which does following
// for(ap_uint<16> indepth_buffertile_baseidx=0;indepth_buffertile_baseidx<weightbuffer_load_indepth_number;indepth_buffertile_baseidx++)
// for( int indepth_minitile_baseidx=0;indepth_minitile_baseidx<weightbuffer_indepth_minitile_number; indepth_minitile_baseidx ++)
// for( int wino_row_cnt;wino_row_cnt <conv_desc.wino_tile_number_in_outwidth;wino_row_cnt++)
xliu79
committed
// for(int wino_tile_col_idx =1; wino_tile_col_idx < wino_tile_number_in_outwidth+1 ; wino_tile_col_idx++)
// for(ap_uint<3> indepth_minitile_idx=0; indepth_minitile_idx< INDEPTH_MINITILE_SIZE; indepth_minitile_idx++)
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
ap_uint<1> row_legal_flag[WINO_DOMAIN_SIZE];
#pragma HLS array_partition variable=row_legal_flag complete
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
row_legal_flag[i] = ( input_row_idx[i] >=0 && input_row_idx[i] < inwidth);
}
ap_uint<1> row_address_offset_bit1[INBUFFER_HEIGHT];
#pragma HLS array_partition variable=row_address_offset_bit1 complete
ap_uint<2> row_address_offset_bit2[INBUFFER_HEIGHT];
#pragma HLS array_partition variable=row_address_offset_bit2 complete
ap_uint<INBUFFER_HEIGHT_BITWIDTH> row_breakpoint=input_head_row_idx.range(INBUFFER_HEIGHT_BITWIDTH-1,0);
ap_uint<2> input_head_row_address_offset=input_head_row_idx.range(INBUFFER_HEIGHT_BITWIDTH+1,INBUFFER_HEIGHT_BITWIDTH);
for(int i=0;i<INBUFFER_HEIGHT;i++)
{
if(i>=row_breakpoint)
{
row_address_offset_bit1[i]=input_head_row_address_offset;
row_address_offset_bit2[i]=input_head_row_address_offset;
}
else
{
row_address_offset_bit1[i]=input_head_row_address_offset+1;
row_address_offset_bit2[i]=input_head_row_address_offset+1;
}
}
xliu79
committed
ap_uint<1> col_legal_flag[WINO_WIDTH][WINO_DOMAIN_SIZE];
#pragma HLS array_partition variable=col_legal_flag complete
for(int wino_array_col=0;wino_array_col<WINO_WIDTH;wino_array_col++)
{
#pragma HLS unroll
xliu79
committed
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
xliu79
committed
#pragma HLS unroll
col_legal_flag[wino_array_col][i]= ( input_col_idx[wino_array_col][i] >=0 && input_col_idx[wino_array_col][i] < inwidth);
xliu79
committed
}
xliu79
committed
ap_uint<INBUFFER_MID_ADDR_BITWIDTH> col_pix_address_offset[INBUFFER_WIDTH];
#pragma HLS array_partition variable=col_pix_address_offset complete
xliu79
committed
ap_uint<INBUFFER_WIDTH_BITWIDTH> col_breakpoint=input_head_col_idx.range(INBUFFER_WIDTH_BITWIDTH-1,0);
xliu79
committed
ap_uint<INBUFFER_MID_ADDR_BITWIDTH> input_head_col_address_offset;
input_head_col_address_offset= input_head_col_idx.range(INBUFFER_WIDTH_BITWIDTH+INBUFFER_MID_ADDR_BITWIDTH-1,INBUFFER_WIDTH_BITWIDTH)
+ buffer_address_mid_minitile_depth_offset;
xliu79
committed
for(int i=0;i<INBUFFER_WIDTH;i++)
{
if(i>=col_breakpoint)
col_pix_address_offset[i] = input_head_col_address_offset;
else
col_pix_address_offset[i] = input_head_col_address_offset+1;
}
ap_uint<INPUT_BUFFER_DEPTH_BITWIDTH> buffer_address[INBUFFER_HEIGHT][INBUFFER_WIDTH];
#pragma HLS array_partition variable=buffer_address complete dim=2
#pragma HLS array_partition variable=buffer_address complete dim=1
xliu79
committed
for(int i=0;i<INBUFFER_HEIGHT; i++)
{
#pragma HLS unroll
for(int j=0;j<INBUFFER_WIDTH;j++)
{
#pragma HLS unroll
ap_uint<INPUT_BUFFER_DEPTH_BITWIDTH-2> common=((ap_uint<INBUFFER_MID_ADDR_BITWIDTH-1> )col_pix_address_offset[j],loop_indepth_minitile_idx);
ap_uint<2> headbits=row_address_bitnumber_flag?
(row_address_offset_bit1[i],col_pix_address_offset[j][INBUFFER_MID_ADDR_BITWIDTH-1]):
row_address_offset_bit2[i];
buffer_address[i][j]=(headbits,common);
xliu79
committed
}
xliu79
committed
xliu79
committed
ap_uint<16> input_buffer_val[INBUFFER_HEIGHT][INBUFFER_WIDTH];
#pragma HLS array_partition variable=input_buffer_val complete dim=1
#pragma HLS array_partition variable=input_buffer_val complete dim=2
xliu79
committed
for(int i=0;i<INBUFFER_HEIGHT; i++)
{
#pragma HLS unroll
for(int j=0;j<INBUFFER_WIDTH;j++)
{
#pragma HLS unroll
xliu79
committed
input_buffer_val[i][j]=input_buffer[i][j][buffer_address[i][j]];
xliu79
committed
}
xliu79
committed
ap_uint<16> input_plane_tile_row[WINO_DOMAIN_SIZE][INBUFFER_WIDTH];
#pragma HLS array_partition variable=input_plane_tile_row dim=1 complete
#pragma HLS array_partition variable=input_plane_tile_row dim=2 complete
xliu79
committed
for(int j=0;j<INBUFFER_WIDTH;j++)
{
#pragma HLS unroll
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
xliu79
committed
if(row_legal_flag[i])
input_plane_tile_row[i][j]=input_buffer_val[ (ap_uint<INBUFFER_HEIGHT_BITWIDTH>) input_row_idx[i].range(INBUFFER_HEIGHT_BITWIDTH-1,0) ][j];
xliu79
committed
}
else
{
input_plane_tile_row[i][j]=0;
xliu79
committed
}
ap_uint<16*WINO_DOMAIN_SIZE_SQUARE> input_plane_tile[WINO_WIDTH];
#pragma HLS array_partition variable=input_plane_tile complete dim=1
xliu79
committed
for(int i=0;i<WINO_WIDTH;i++)
{
#pragma HLS unroll
for(int j=0;j<WINO_DOMAIN_SIZE;j++)
{
#pragma HLS unroll
xliu79
committed
for(int k=0;k<WINO_DOMAIN_SIZE;k++)
{
#pragma HLS unroll
xliu79
committed
if(col_legal_flag[i][k])
input_plane_tile[i].range((j*WINO_DOMAIN_SIZE+k)*16+15,(j*WINO_DOMAIN_SIZE+k)*16)=input_plane_tile_row[j][ (ap_uint<INBUFFER_WIDTH_BITWIDTH>) input_col_idx[i][k].range(INBUFFER_WIDTH_BITWIDTH-1,0) ];
xliu79
committed
else
input_plane_tile[i].range((j*WINO_DOMAIN_SIZE+k)*16+15,(j*WINO_DOMAIN_SIZE+k)*16)=0;
xliu79
committed
}
for(int i=0;i<WINO_WIDTH;i++)
{
#pragma HLS unroll
input_tile_stream[i]<<input_plane_tile[i];
}
xliu79
committed
#if DEBUG_FILE_PRINT
int indepth = buffer_address_mid_minitile_depth_offset/buffer_address_mid_minitile_depth_step*INDEPTH_MINITILE_SIZE
+loop_indepth_minitile_idx;
attach_streaming_content<WINO_WIDTH,WINO_DOMAIN_SIZE>(input_plane_tile, start_row_idx_minus_pad_size+pad_size, input_head_col_idx+pad_size, indepth, (char*) "instream.txt");
xliu79
committed
#endif
if(loop_wino_tile_row_cnt == loop_wino_tile_row_reset_cycle)
xliu79
committed
{
buffer_address_mid_minitile_depth_offset += buffer_address_mid_minitile_depth_step;
}
xliu79
committed
if(loop_wino_tile_col_cnt == loop_wino_tile_col_reset_cycle)
{
input_head_col_idx=wino_col_offset_constant[0]-pad_size;
for(int wino_array_col=0;wino_array_col<WINO_WIDTH;wino_array_col++)
xliu79
committed
#pragma HLS unroll
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
xliu79
committed
input_col_idx[wino_array_col][i]=i-pad_size+wino_col_offset_constant[wino_array_col];
xliu79
committed
}
else if(loop_indepth_minitile_idx==INDEPTH_MINITILE_SIZE-1)
xliu79
committed
{
input_head_col_idx+=wino_out_size_by_wino_width;
for(int wino_array_col=0;wino_array_col<WINO_WIDTH;wino_array_col++)
xliu79
committed
#pragma HLS unroll
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
xliu79
committed
input_col_idx[wino_array_col][i]+=wino_out_size_by_wino_width;
xliu79
committed
}
if(loop_wino_tile_row_cnt == loop_wino_tile_row_reset_cycle)
xliu79
committed
{
input_head_row_idx=start_row_idx_minus_pad_size;
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
input_row_idx[i]=start_row_idx_minus_pad_size+i;
}
xliu79
committed
}
else if(loop_wino_tile_col_cnt == loop_wino_tile_col_reset_cycle)
xliu79
committed
{
input_head_row_idx+=wino_output_tile_size;
for(int i=0;i<WINO_DOMAIN_SIZE;i++)
{
#pragma HLS unroll
input_row_idx[i]+=wino_output_tile_size;
}
xliu79
committed
}
if(loop_wino_tile_row_cnt==loop_wino_tile_row_reset_cycle)
xliu79
committed
{
loop_wino_tile_row_cnt=1;
xliu79
committed
}
else
{
loop_wino_tile_row_cnt++;
}
if(loop_wino_tile_col_cnt == loop_wino_tile_col_reset_cycle)
{
loop_wino_tile_col_cnt=1;
}
else
{
loop_wino_tile_col_cnt++;
xliu79
committed
loop_indepth_minitile_idx++;
xliu79
committed
}
xliu79
committed
ap_uint<128>* weight_DDR,
ap_uint<W_WIDTH*4> weight_buff[WEIGHT_FEED_NUMBER_PER_PORT][WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4][WEIGHT_BUFFER_DEPTH],
xliu79
committed
ap_uint<16> weightDDR_buffer_burst_length,
xliu79
committed
ap_uint<32> ddr_address_offset,
ap_uint<1> pingpong,
ap_uint<1> skip_flag
xliu79
committed
,ConvDesc_t conv_desc
#endif
)
printf("\t^^load_weight_ddr_one_port^^\n");fflush(stdout);
#pragma HLS array_partition variable = weight_buff dim=1 complete
#pragma HLS array_partition variable = weight_buff dim=2 complete
xliu79
committed
if(skip_flag)
return;
xliu79
committed
ap_uint<128>* offseted_weight_DDR=weight_DDR+ddr_address_offset;
ap_uint<WEIGHTDDR_INDEPTH_MINITILE_128BIT_STEP_BITWIDTH+1> counter_x2=0;
xliu79
committed
ap_uint<WEIGHT_BUFFER_DEPTH_BITWIDTH> buffer_address_offset_x2=0;
xliu79
committed
#if WEIGHT_FEED_NUMBER_PER_PORT_BITWIDTH == 0
ap_uint<1> buffer_idx;
#else
ap_uint<WEIGHT_FEED_NUMBER_PER_PORT_BITWIDTH> buffer_idx=0;
xliu79
committed
#endif
for(int address = 0; address<weightDDR_port_burst_length; address++)
xliu79
committed
xliu79
committed
ap_uint<128> temp128 = offseted_weight_DDR[address];
ap_uint<W_WIDTH*4> temp32[4];
xliu79
committed
#pragma HLS array_partition variable = temp32 complete
xliu79
committed
for(int i=0;i<4;i++)
for(int j=0;j<4;j++)
{
#pragma HLS unroll
temp32[i].range(j*W_WIDTH+W_WIDTH-1,j*W_WIDTH) =temp128.range(i*32+j*8+W_WIDTH-1,i*32+j*8);
}
}
ap_uint<WEIGHT_BUFFER_DEPTH_BITWIDTH> buffer_address = (pingpong,buffer_address_offset_x2.range(WEIGHT_BUFFER_DEPTH_BITWIDTH-1,1));
// printf("\taddress: %d, port_idx: %d, bufferaddr:%d\n", (int)address,(int) buffer_idx,(int) buffer_address );
xliu79
committed
for(int i=0;i<WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4;i++)
xliu79
committed
#pragma HLS unroll
xliu79
committed
{
#if WEIGHT_FEED_NUMBER_PER_PORT_BITWIDTH == 0
xliu79
committed
weight_buff[0][i][buffer_address]=temp32[i%4];
#else
weight_buff[buffer_idx][i][buffer_address]=temp32[i%4];
#endif
}
}
if(port_load_cnt_x2/2==weightDDR_buffer_burst_length)
else if(counter_x2/2== WEIGHTDDR_INDEPTH_MINITILE_128BIT_STEP-1)
if(port_load_cnt_x2/2==weightDDR_buffer_burst_length)
if(counter_x2/2==WEIGHTDDR_INDEPTH_MINITILE_128BIT_STEP-1)
xliu79
committed
void weight_streamer(
ap_uint<W_WIDTH*4> weight_buff[WEIGHT_FEED_NUMBER_PER_PORT][WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4][WEIGHT_BUFFER_DEPTH],
hls::stream<ap_uint<W_WIDTH*INDEPTH_MINITILE_SIZE*WINO_DOMAIN_SIZE_SQUARE> > weight_stream[WEIGHT_FEED_NUMBER_PER_PORT],
ap_uint<16> loop_outdepth_minitile_baseidx_reset_cycle_minus1,
ap_uint<16> loop_start_output_baserowcol_reset_cycle,
xliu79
committed
ap_uint<1> pingpong
xliu79
committed
,ConvDesc_t conv_desc
#endif
)
{
#pragma HLS array_partition variable = weight_buff dim=1 complete
#pragma HLS array_partition variable = weight_buff dim=2 complete
xliu79
committed
// int weight_feed_total_size_by2 = weight_feed_total_size/2;
xliu79
committed
ap_uint<WEIGHT_BUFFER_DEPTH_BITWIDTH -1> outdepth_minitile_addr_offset=0;
ap_uint<WEIGHT_BUFFER_DEPTH_BITWIDTH -1> indepth_minitile_addr_offset=0;
xliu79
committed
ap_uint<16> loop_outdepth_minitile_baseidx_cnt=1;
ap_uint<16> loop_start_output_baserowcol_cnt=1;
xliu79
committed
// loop_outdepth_minitile_baseidx_reset_cycle_minus1=conv_desc.weightbuffer_outdepth_minitile_number-1;
// loop_start_output_baserowcol_reset_cycle=conv_desc.weightbuffer_outdepth_minitile_number * conv_desc.wino_tile_number_in_outwidth;
// int loop_weight_feed_bound = conv_desc.weightbuffer_indepth_minitile_number * conv_desc.weightbuffer_outdepth_minitile_number * conv_desc.wino_tile_number_in_outwidth;
xliu79
committed
for(ap_uint<32> cycle=0;cycle < loop_weight_feed_bound; cycle++)
#pragma HLS pipeline
// for(int indepth_minitile_baseidx=0;indepth_minitile_baseidx<conv_desc.weightbuffer_load_indepth_step; indepth_minitile_baseidx += INDEPTH_MINITILE_SIZE)
// for(int start_output_baserowcol =0; start_output_baserowcol < conv_desc.outwidth *conv_desc.wino_out_tile_in_rowstep; start_output_baserowcol+=conv_desc.wino_output_tile_size*WINO_WIDTH)
// for(int outdepth_minitile_baseidx=0;outdepth_minitile_baseidx<conv_desc.weightbuffer_load_outdepth_step; outdepth_minitile_baseidx += OUTDEPTH_MINITILE_SIZE)
ap_uint<WEIGHT_BUFFER_DEPTH_BITWIDTH-1> weight_buffer_address_right=indepth_minitile_addr_offset+outdepth_minitile_addr_offset;
ap_uint<WEIGHT_BUFFER_DEPTH_BITWIDTH> weight_buffer_address = (pingpong,weight_buffer_address_right);
xliu79
committed
ap_uint<W_WIDTH*4> temp18[WEIGHT_FEED_NUMBER_PER_PORT][WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4];
#pragma HLS array_partition variable = temp18 complete
xliu79
committed
for(int buffer_idx =0; buffer_idx< WEIGHT_FEED_NUMBER_PER_PORT; buffer_idx++)
{
#pragma HLS unroll
for(int j18=0;j18<WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4;j18++)
{
#pragma HLS unroll
temp18[buffer_idx][j18]=weight_buff[buffer_idx][j18][weight_buffer_address];
}
}
xliu79
committed
ap_uint<W_WIDTH*INDEPTH_MINITILE_SIZE*WINO_DOMAIN_SIZE_SQUARE> temp16x36[WEIGHT_FEED_NUMBER_PER_PORT];
#pragma HLS array_partition variable = temp16x36 complete
xliu79
committed
for(int buffer_idx =0; buffer_idx< WEIGHT_FEED_NUMBER_PER_PORT; buffer_idx++)
{
#pragma HLS unroll
for(int j18=0;j18<WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4;j18++)
{
#pragma HLS unroll
temp16x36[buffer_idx].range(j18*W_WIDTH*4+W_WIDTH*4-1,j18*W_WIDTH*4)=temp18[buffer_idx][j18];
}
}
for(int buffer_idx =0; buffer_idx< WEIGHT_FEED_NUMBER_PER_PORT; buffer_idx++)
{
#pragma HLS unroll
weight_stream[buffer_idx]<<temp16x36[buffer_idx];
}
xliu79
committed
if(loop_start_output_baserowcol_cnt==loop_start_output_baserowcol_reset_cycle){
indepth_minitile_addr_offset+=weightbuffer_outdepth_minitile_number;
}
if(outdepth_minitile_addr_offset==loop_outdepth_minitile_baseidx_reset_cycle_minus1){
outdepth_minitile_addr_offset=0;
}
else{
outdepth_minitile_addr_offset++;
}
xliu79
committed
if(loop_start_output_baserowcol_cnt==loop_start_output_baserowcol_reset_cycle){
loop_start_output_baserowcol_cnt=1;
loop_start_output_baserowcol_cnt++;
template<int dummy> // the dummy template is to make sure each module have indepedent weight buffer
xliu79
committed
void weight_feed_one_port(
hls::stream<ap_uint<W_WIDTH*INDEPTH_MINITILE_SIZE*WINO_DOMAIN_SIZE_SQUARE> > weight_stream[WEIGHT_FEED_NUMBER_PER_PORT],
xliu79
committed
ap_uint<16> weightDDR_burst_number,
ap_uint<16> loop_outdepth_minitile_baseidx_reset_cycle_minus1,
ap_uint<16> loop_start_output_baserowcol_reset_cycle,
xliu79
committed
ap_uint<1> first_flag,
ap_uint<1> last_flag
xliu79
committed
,ConvDesc_t conv_desc
printf("---weight_feed_one_port---\n");fflush(stdout);
static ap_uint<W_WIDTH*4> weight_buff[WEIGHT_FEED_NUMBER_PER_PORT][WINO_DOMAIN_SIZE_SQUARE*INDEPTH_MINITILE_SIZE/4][WEIGHT_BUFFER_DEPTH];
#pragma HLS array_partition variable=weight_buff complete dim=1
#pragma HLS array_partition variable=weight_buff complete dim=2
#pragma HLS resource variable=weight_buff core=RAM_S2P_BRAM
static ap_uint<24> DDR_offset;
static ap_uint<16> DDR_load_cnt;
static ap_uint<1> pingpong;
if(first_flag)
xliu79
committed
DDR_offset=0;
DDR_load_cnt=0;
xliu79
committed
load_weight_ddr_one_port(
weightDDR_buffer_burst_length,
weightDDR_port_burst_length,
xliu79
committed
0,
xliu79
committed
~first_flag
xliu79
committed
,conv_desc
#endif
);
// #if DEBUG_CONV_DESC
// char filename[100];
// sprintf(filename,"weightbuffer_content%d.txt",dummy);
// attach_weight_buffer_content_uniformed<W_WIDTH,WINO_DOMAIN_SIZE,INDEPTH_MINITILE_SIZE,WEIGHT_FEED_NUMBER_PER_PORT,WEIGHT_BUFFER_DEPTH>
// (weight_buff,0,filename);
// #endif
xliu79
committed
for(ap_uint<16> cnt=0;cnt< weightDDR_burst_number ;cnt++)
xliu79
committed
if(DDR_load_cnt == weightDDR_burst_number-1)
{
DDR_load_cnt = 0;
DDR_offset = 0;
}
else
{
DDR_load_cnt+=1;
DDR_offset+=weightDDR_port_burst_length;
}
xliu79
committed
load_weight_ddr_one_port(
weightDDR_buffer_burst_length,
weightDDR_port_burst_length,
xliu79
committed
~pingpong,
last_flag & (DDR_load_cnt==0)
xliu79
committed
,conv_desc
xliu79
committed
);
// #if DEBUG_CONV_DESC
// attach_weight_buffer_content_uniformed<W_WIDTH,WINO_DOMAIN_SIZE,INDEPTH_MINITILE_SIZE,WEIGHT_FEED_NUMBER_PER_PORT,WEIGHT_BUFFER_DEPTH>
// (weight_buff,cnt+1,filename);
// #endif
xliu79
committed
weight_streamer(
weight_buff,
weight_stream,
loop_outdepth_minitile_baseidx_reset_cycle_minus1,
loop_start_output_baserowcol_reset_cycle,
xliu79
committed
pingpong
xliu79
committed
,conv_desc
#endif
);
pingpong = ~pingpong;
}
xliu79
committed
xliu79
committed