Newer
Older
xliu79
committed
#include "wino_macro.h"
#include <ap_int.h>
#include <hls_stream.h>
xliu79
committed
xliu79
committed
#include "wino_cell.cpp"
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
void load_params(
ap_int<32>* mem_params,
ConvDesc_t &conv_desc
)
{
ap_int<32> params[128];
for(int i=0;i<128;i++)
{
#pragma HLS pipeline
params[i]=mem_params[i];
}
//Original signal
conv_desc.inheight=params[0];
conv_desc.inwidth=params[1];
conv_desc.indepth=params[2];
conv_desc.outheight=params[3];
conv_desc.outwidth=params[4];
conv_desc.outdepth=params[5];
conv_desc.kernel_size=params[6];
conv_desc.pad_size=params[7];
conv_desc.stride=params[8];
//wino related
conv_desc.wino5x5_flag=params[9];
// 1: 3x3, 0:5x5
conv_desc.wino_output_tile_size=params[10];
//input buffer related
conv_desc.indepth_align_minitile_size=params[11];
conv_desc.indepth_align8=params[12];
conv_desc.indepth_ceildiv8=params[13];
conv_desc.inwidth_ceildiv_inbufferwidth=params[14];
conv_desc.inwidth_align8=params[15];
conv_desc.group_indepth_offset=params[16];
conv_desc.group_indepth=params[17];
conv_desc.input_ddr_bytes=params[18];
conv_desc.input_ddr_128bits=params[19];
conv_desc.group_indepth_x_inwidth_align8_by8=params[20];
conv_desc.group_indepth_offset_x_inwidth_align8_by8=params[21];
conv_desc.input_load_burst_length=params[22];
conv_desc.buffer_address_mid_increment_step=params[23];
conv_desc.row_address_bitnumber_flag=params[24];
// ouput_buffer_related
conv_desc.outwidth_align8=params[25];
conv_desc.outdepth_align8=params[26];
conv_desc.outheight_align4=params[27];
conv_desc.outdepth_align_minitile_size=params[28];
conv_desc.group_outdepth_offset=params[29];
conv_desc.group_outdepth=params[30];
conv_desc.output_ddr_bytes=params[31];
conv_desc.output_ddr_128bits=params[32];
// Weight_related
conv_desc.weightbuffer_load_indepth_number=params[33];
conv_desc.weightbuffer_load_indepth_step=params[34];
conv_desc.weightbuffer_load_outdepth_number=params[35];
conv_desc.weightbuffer_load_outdepth_step=params[36];
conv_desc.weightbuffer_indepth_minitile_number=params[37];
conv_desc.weightbuffer_outdepth_minitile_number=params[38];
conv_desc.weightbuffer_total_load_number=params[39];
//weight_load hardware
conv_desc.weightDDR_buffer_burst_length=params[40];
conv_desc.weightDDR_port_burst_length=params[41];
conv_desc.weightDDR_burst_number=params[42];
conv_desc.loop_outdepth_minitile_baseidx_reset_cycle_minus1=params[43];
conv_desc.loop_start_output_baserowcol_reset_cycle=params[44];
conv_desc.loop_weight_feed_bound=params[45];
// input buffer feeding related
conv_desc.wino_out_size_by_wino_width=params[46];
conv_desc.wino_tile_number_in_outwidth=params[47];
conv_desc.loop_outdepth_minitile_baseidx_reset_cycle=params[48];
conv_desc.loop_wino_tile_col_reset_cycle=params[49];
conv_desc.loop_wino_tile_row_reset_cycle=params[50];
conv_desc.buffer_address_mid_minitile_depth_step=params[51];
conv_desc.input_buffer_feeding_loop_bound=params[52];
conv_desc.input_transform_feeding_loop_bound=params[53];
// row_tile calculation , these parameter have to be solved after weight parameters are decided.
conv_desc.out_rowstep=params[54];
conv_desc.wino_tile_number_in_out_rowstep=params[55];
// wino computation
conv_desc.total_input_stream_tile=params[56];
conv_desc.loop_omini_base_reset_cycle=params[57];
conv_desc.loop_wino_cell_bound=params[58];
conv_desc.loop_wino_tile_rowcol_self_reset_cycle_min1=params[59];
conv_desc.loop_iload_reset_cycle=params[60];
conv_desc.outbuffer_oload_increment_step=params[61];
conv_desc.outbuffer_omini_increment_step=params[62];
//output write back
conv_desc.outdepth_ceildiv8=params[63];
conv_desc.output_burst_length=params[64];
conv_desc.write_back_flag=params[65];
conv_desc.wino_col_pix_upper_bound=params[66];
conv_desc.wino_tile_number_rowcol=params[67];
conv_desc.out_ddr_increment_step=params[68];
}
void wino_systolic_kernel(
ap_uint<128> *weight_DDR0,
ap_uint<128> *weight_DDR1,
ap_uint<128> *weight_DDR2,
ap_uint<128> *weight_DDR3,
ap_uint<16> input_buffer[INBUFFER_HEIGHT][INBUFFER_WIDTH][INPUT_BUFFER_DEPTH],
ap_uint<OUT_WIDTH*2> out_buffer[WINO_OUT_SIZE][WINO_OUT_SIZE][OUTDEPTH_MINITILE_SIZE][WINO_WIDTH][OUTPUT_BUFFER_DEPTH],
ap_uint<16> start_output_row,
ap_int<16> start_row_idx_minus_pad_size,
ap_uint<1> first_flag,
ap_uint<1> last_flag,
ConvDesc_t conv_desc,
ap_uint<1> ap_clk_div2
)
{
printf("****wino_systolic_kernel****\n");
#pragma HLS interface ap_stable port=conv_desc
#pragma HLS array_partition variable =input_buffer dim=1 complete
#pragma HLS array_partition variable =input_buffer dim=2 complete
#pragma HLS array_partition variable =out_buffer dim=4 complete
#pragma HLS array_partition variable =out_buffer dim=3 complete
#pragma HLS array_partition variable =out_buffer dim=2 complete
#pragma HLS array_partition variable =out_buffer dim=1 complete
#pragma HLS dataflow
hls::stream< ap_uint<8*BATCH_SIZE*WINO_DOMAIN_SIZE_SQUARE> > input_tile_stream[WINO_WIDTH];
hls::stream< ap_uint<BTB_WIDTH*BATCH_SIZE*WINO_DOMAIN_SIZE_SQUARE> > input_tile_transformed_stream[WINO_WIDTH];
#pragma HLS stream variable=input_tile_transformed_stream depth=1
hls::stream<ap_uint<W_WIDTH*INDEPTH_MINITILE_SIZE*WINO_DOMAIN_SIZE_SQUARE> > weight_stream[4][WEIGHT_FEED_NUMBER_PER_PORT];
input_feed_underconstruction(
input_buffer,
input_tile_stream,
// hls::stream< ap_uint<16*BATCH_SIZE*36> > &input_tile_stream1,
// hls::stream< ap_uint<16*BATCH_SIZE*36> > &input_tile_stream2,
conv_desc.inwidth,
conv_desc.pad_size,
conv_desc.weightbuffer_load_outdepth_number,
conv_desc.wino_output_tile_size,
conv_desc.input_buffer_feeding_loop_bound,
conv_desc.loop_wino_tile_row_reset_cycle,
conv_desc.loop_wino_tile_col_reset_cycle,
conv_desc.buffer_address_mid_minitile_depth_step,
conv_desc.wino_out_size_by_wino_width,
conv_desc.row_address_bitnumber_flag,
start_row_idx_minus_pad_size
);
for(int i=0;i<WINO_WIDTH;i++)
{
input_transform(
input_tile_stream[i],
input_tile_transformed_stream[i],
);
}
weight_feed_one_port<0>(
weight_DDR0,
weight_stream[0],
conv_desc.weightDDR_burst_number,
conv_desc.weightDDR_buffer_burst_length,
conv_desc.weightDDR_port_burst_length,
conv_desc.loop_outdepth_minitile_baseidx_reset_cycle_minus1,
conv_desc.loop_start_output_baserowcol_reset_cycle,
conv_desc.loop_weight_feed_bound,
first_flag,
last_flag
,conv_desc
#endif
);
weight_feed_one_port<1>(
weight_DDR1,
weight_stream[1],
conv_desc.weightDDR_burst_number,
conv_desc.weightDDR_buffer_burst_length,
conv_desc.weightDDR_port_burst_length,
conv_desc.loop_outdepth_minitile_baseidx_reset_cycle_minus1,
conv_desc.loop_start_output_baserowcol_reset_cycle,
conv_desc.loop_weight_feed_bound,
first_flag,
last_flag
,conv_desc
#endif
);
weight_feed_one_port<2>(
weight_DDR2,
weight_stream[2],
conv_desc.weightDDR_burst_number,
conv_desc.weightDDR_buffer_burst_length,
conv_desc.weightDDR_port_burst_length,
conv_desc.loop_outdepth_minitile_baseidx_reset_cycle_minus1,
conv_desc.loop_start_output_baserowcol_reset_cycle,
conv_desc.loop_weight_feed_bound,
first_flag,
last_flag
,conv_desc
#endif
);
weight_feed_one_port<3>(
weight_DDR3,
weight_stream[3],
conv_desc.weightDDR_burst_number,
conv_desc.weightDDR_buffer_burst_length,
conv_desc.weightDDR_port_burst_length,
conv_desc.loop_outdepth_minitile_baseidx_reset_cycle_minus1,
conv_desc.loop_start_output_baserowcol_reset_cycle,
conv_desc.loop_weight_feed_bound,
first_flag,
last_flag
,conv_desc
#endif
);
wino_stream_block(
input_tile_transformed_stream,
weight_stream,
out_buffer,
conv_desc.weightbuffer_outdepth_minitile_number,
conv_desc.total_input_stream_tile,
conv_desc.loop_omini_base_reset_cycle,
conv_desc.loop_wino_tile_rowcol_self_reset_cycle_min1,
conv_desc.loop_iload_reset_cycle,
conv_desc.loop_wino_cell_bound,
conv_desc.outbuffer_oload_increment_step,
conv_desc.outbuffer_omini_increment_step,
conv_desc.wino5x5_flag
,conv_desc
#endif
,ap_clk_div2
);
}
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#pragma SDS data zero_copy(input_DDR0[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(input_DDR1[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(input_DDR2[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(input_DDR3[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(weight_DDR0[0:WEIGHT_PORT_DEPTH])
#pragma SDS data zero_copy(weight_DDR1[0:WEIGHT_PORT_DEPTH])
#pragma SDS data zero_copy(weight_DDR2[0:WEIGHT_PORT_DEPTH])
#pragma SDS data zero_copy(weight_DDR3[0:WEIGHT_PORT_DEPTH])
#pragma SDS data zero_copy(output_DDR0[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(output_DDR1[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(output_DDR2[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(output_DDR3[0:FEATURE_PORT_DEPTH])
#pragma SDS data zero_copy(mem_params[0:128])
#pragma SDS data sys_port(input_DDR0:ps_e_S_AXI_HP0_FPD)
#pragma SDS data sys_port(input_DDR1:ps_e_S_AXI_HP1_FPD)
#pragma SDS data sys_port(input_DDR2:ps_e_S_AXI_HP2_FPD)
#pragma SDS data sys_port(input_DDR3:ps_e_S_AXI_HP3_FPD)
#pragma SDS data sys_port(weight_DDR0:ps_e_S_AXI_HP0_FPD)
#pragma SDS data sys_port(weight_DDR1:ps_e_S_AXI_HP1_FPD)
#pragma SDS data sys_port(weight_DDR2:ps_e_S_AXI_HP2_FPD)
#pragma SDS data sys_port(weight_DDR3:ps_e_S_AXI_HP3_FPD)
#pragma SDS data sys_port(output_DDR0:ps_e_S_AXI_HP0_FPD)
#pragma SDS data sys_port(output_DDR1:ps_e_S_AXI_HP1_FPD)
#pragma SDS data sys_port(output_DDR2:ps_e_S_AXI_HP2_FPD)
#pragma SDS data sys_port(output_DDR3:ps_e_S_AXI_HP3_FPD)
#pragma SDS data sys_port(mem_params:ps_e_S_AXI_HP0_FPD)
void wino_systolic_top(
ap_uint<128> *input_DDR0,
ap_uint<128> *input_DDR1,
ap_uint<128> *input_DDR2,
ap_uint<128> *input_DDR3,
ap_uint<128> *weight_DDR0,
ap_uint<128> *weight_DDR1,
ap_uint<128> *weight_DDR2,
ap_uint<128> *weight_DDR3,
ap_uint<ODDR_WIDTH*BATCH_SIZE*8> *output_DDR0,
ap_uint<ODDR_WIDTH*BATCH_SIZE*8> *output_DDR1,
ap_uint<ODDR_WIDTH*BATCH_SIZE*8> *output_DDR2,
ap_uint<ODDR_WIDTH*BATCH_SIZE*8> *output_DDR3,
ap_int<32> *mem_params
#ifdef __SDSVHLS__
,ap_uint<1> ap_clk_div2
#endif
xliu79
committed
)
// #pragma HLS interface m_axi port= input_DDR3 depth=65535
// #pragma HLS interface m_axi port= input_DDR2 depth=65535
// #pragma HLS interface m_axi port= input_DDR1 depth=65535
// #pragma HLS interface m_axi port= input_DDR0 depth=65535
// #pragma HLS interface m_axi port= output_DDR3 depth=65535
// #pragma HLS interface m_axi port= output_DDR2 depth=65535
// #pragma HLS interface m_axi port= output_DDR1 depth=65535
// #pragma HLS interface m_axi port= output_DDR0 depth=65535
// #pragma HLS interface m_axi port= weight_DDR3 depth=65535
// #pragma HLS interface m_axi port= weight_DDR2 depth=65535
// #pragma HLS interface m_axi port= weight_DDR1 depth=65535
// #pragma HLS interface m_axi port= weight_DDR0 depth=65535
// #pragma HLS interface m_axi port= mem_params depth=128
xliu79
committed
ap_uint<16> input_buffer[INBUFFER_HEIGHT][INBUFFER_WIDTH][INPUT_BUFFER_DEPTH];
#pragma HLS array_partition variable=input_buffer complete dim=1
#pragma HLS array_partition variable=input_buffer complete dim=2
ap_uint<OUT_WIDTH*2> output_buffer0[WINO_OUT_SIZE][WINO_OUT_SIZE][OUTDEPTH_MINITILE_SIZE][WINO_WIDTH][OUTPUT_BUFFER_DEPTH];
#pragma HLS array_partition variable=output_buffer0 complete dim=1
#pragma HLS array_partition variable=output_buffer0 complete dim=2
#pragma HLS resource variable=output_buffer0 core=RAM_T2P_BRAM
ap_uint<OUT_WIDTH*2> output_buffer1[WINO_OUT_SIZE][WINO_OUT_SIZE][OUTDEPTH_MINITILE_SIZE][WINO_WIDTH][OUTPUT_BUFFER_DEPTH];
#pragma HLS array_partition variable=output_buffer1 complete dim=1
#pragma HLS array_partition variable=output_buffer1 complete dim=2
#pragma HLS resource variable=output_buffer1 core=RAM_T2P_BRAM
ConvDesc_t conv_desc;
int inheight=conv_desc.inheight;
int inwidth=conv_desc.inwidth;
int stride=conv_desc.stride;
int pad_size=conv_desc.pad_size;
int inwidth_align8=conv_desc.inwidth_align8;
int indepth_align8=conv_desc.indepth_align8;
int group_indepth_x_inwidth_align8_by8=conv_desc.group_indepth_x_inwidth_align8_by8;
int group_indepth_offset_x_inwidth_align8_by8=conv_desc.group_indepth_offset_x_inwidth_align8_by8;
int inwidth_ceildiv_inbufferwidth=conv_desc.inwidth_ceildiv_inbufferwidth;
int buffer_address_mid_increment_step=conv_desc.buffer_address_mid_increment_step;
int input_load_burst_length=conv_desc.input_load_burst_length;
int row_address_bitnumber_flag=conv_desc.row_address_bitnumber_flag;
int out_rowstep=conv_desc.out_rowstep;
xliu79
committed
xliu79
committed
clear_buffer_content<INBUFFER_HEIGHT,INBUFFER_WIDTH, INPUT_BUFFER_DEPTH>(input_buffer);
clear_output_buffer_content_uniformed_hw<OUT_WIDTH,BATCH_SIZE,WINO_HEIGHT,WINO_WIDTH,WINO_OUT_SIZE,OUTPUT_BUFFER_DEPTH>(output_buffer0);
clear_output_buffer_content_uniformed_hw<OUT_WIDTH,BATCH_SIZE,WINO_HEIGHT,WINO_WIDTH,WINO_OUT_SIZE,OUTPUT_BUFFER_DEPTH>(output_buffer1);
input_DDR0,
input_DDR1,
input_DDR2,
input_DDR3,
input_buffer,
xliu79
committed
conv_desc.inheight,
conv_desc.inwidth,
conv_desc.stride,
conv_desc.pad_size,
conv_desc.inwidth_align8,
conv_desc.indepth_align8,
conv_desc.group_indepth_x_inwidth_align8_by8,
conv_desc.group_indepth_offset_x_inwidth_align8_by8,
conv_desc.inwidth_ceildiv_inbufferwidth,
conv_desc.buffer_address_mid_increment_step,
xliu79
committed
conv_desc.input_load_burst_length,
conv_desc.row_address_bitnumber_flag,
conv_desc.out_rowstep,
0,
attach_input_buffer_content_uniformed<INBUFFER_HEIGHT,INBUFFER_WIDTH, INPUT_BUFFER_DEPTH>(input_buffer,0,(char*) "input_buffer_content.txt");
ap_int<16> next_start_row= conv_desc.out_rowstep;
for( ap_int<16> compute_start_row =0; compute_start_row < conv_desc.outheight; compute_start_row+=conv_desc.out_rowstep)
ap_uint<16> start_row_idx_minus_pad_size=compute_start_row-conv_desc.pad_size;
#pragma HLS DEPENDENCE variable=input_buffer inter false
#pragma HLS DEPENDENCE variable=input_buffer intra false
if(pingpong )
{
wino_systolic_kernel(
weight_DDR0,
weight_DDR1,
weight_DDR2,
weight_DDR3,
input_buffer,
output_buffer0,
compute_start_row,
start_row_idx_minus_pad_size,
compute_start_row==0,
next_start_row > conv_desc.outheight,
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
conv_desc,
ap_clk_div2
);
#if DEBUG_FILE_PRINT
char outfilename[100];
sprintf(outfilename,"outbuffer.txt");
attach_output_buffer_content_uniformed_hw<OUT_WIDTH,BATCH_SIZE,WINO_HEIGHT,WINO_WIDTH,WINO_OUT_SIZE,OUTPUT_BUFFER_DEPTH>(
output_buffer0,0,outfilename);
#endif
write_output_to_DDR(
output_DDR0,
output_DDR1,
output_DDR2,
output_DDR3,
output_buffer1,
conv_desc.outheight,
conv_desc.outwidth_align8,
conv_desc.wino_output_tile_size,
conv_desc.wino_tile_number_in_outwidth,
conv_desc.wino_tile_number_in_out_rowstep,
conv_desc.wino_col_pix_upper_bound,
conv_desc.wino_tile_number_rowcol,
conv_desc.output_burst_length,
conv_desc.out_ddr_increment_step,
write_start_row,
write_start_row==0
#if DEBUG_CONV_DESC
,conv_desc
#endif
);
load_input_rowtile_from_ddr(
input_DDR0,
input_DDR1,
input_DDR2,
input_DDR3,
input_buffer,
conv_desc.inheight,
conv_desc.inwidth,
conv_desc.stride,
conv_desc.pad_size,
conv_desc.inwidth_align8,
conv_desc.indepth_align8,
conv_desc.group_indepth_x_inwidth_align8_by8,
conv_desc.group_indepth_offset_x_inwidth_align8_by8,
conv_desc.inwidth_ceildiv_inbufferwidth,
conv_desc.buffer_address_mid_increment_step,
conv_desc.input_load_burst_length,
conv_desc.row_address_bitnumber_flag,
conv_desc.out_rowstep,
next_start_row,
0);
}
else
{
wino_systolic_kernel(
weight_DDR0,
weight_DDR1,
weight_DDR2,
weight_DDR3,
input_buffer,
output_buffer1,
compute_start_row,
start_row_idx_minus_pad_size,
compute_start_row==0,
next_start_row > conv_desc.outheight,
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
conv_desc,
ap_clk_div2
);
#if DEBUG_FILE_PRINT
char outfilename[100];
sprintf(outfilename,"outbuffer.txt");
attach_output_buffer_content_uniformed_hw<OUT_WIDTH,BATCH_SIZE,WINO_HEIGHT,WINO_WIDTH,WINO_OUT_SIZE,OUTPUT_BUFFER_DEPTH>(
output_buffer1,0,outfilename);
#endif
write_output_to_DDR(
output_DDR0,
output_DDR1,
output_DDR2,
output_DDR3,
output_buffer0,
conv_desc.outheight,
conv_desc.outwidth_align8,
conv_desc.wino_output_tile_size,
conv_desc.wino_tile_number_in_outwidth,
conv_desc.wino_tile_number_in_out_rowstep,
conv_desc.wino_col_pix_upper_bound,
conv_desc.wino_tile_number_rowcol,
conv_desc.output_burst_length,
conv_desc.out_ddr_increment_step,
write_start_row,
write_start_row==0
#if DEBUG_CONV_DESC
,conv_desc
#endif
);
input_DDR0,
input_DDR1,
input_DDR2,
input_DDR3,
input_buffer,
conv_desc.inheight,
conv_desc.inwidth,
conv_desc.stride,
conv_desc.pad_size,
conv_desc.inwidth_align8,
conv_desc.indepth_align8,
conv_desc.group_indepth_x_inwidth_align8_by8,
conv_desc.group_indepth_offset_x_inwidth_align8_by8,
conv_desc.inwidth_ceildiv_inbufferwidth,
conv_desc.buffer_address_mid_increment_step,
conv_desc.input_load_burst_length,
conv_desc.row_address_bitnumber_flag,
conv_desc.out_rowstep,
pingpong =~pingpong;
write_start_row+=conv_desc.out_rowstep;
next_start_row+=conv_desc.out_rowstep;
#if DEBUG_FILE_PRINT
attach_input_buffer_content_uniformed<INBUFFER_HEIGHT,INBUFFER_WIDTH, INPUT_BUFFER_DEPTH>(input_buffer,0,(char*) "input_buffer_content.txt");
#endif
xliu79
committed
}
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
if(pingpong )
{
write_output_to_DDR(
output_DDR0,
output_DDR1,
output_DDR2,
output_DDR3,
output_buffer1,
conv_desc.outheight,
conv_desc.outwidth_align8,
conv_desc.wino_output_tile_size,
conv_desc.wino_tile_number_in_outwidth,
conv_desc.wino_tile_number_in_out_rowstep,
conv_desc.wino_col_pix_upper_bound,
conv_desc.wino_tile_number_rowcol,
conv_desc.output_burst_length,
conv_desc.out_ddr_increment_step,
write_start_row,
write_start_row==0
#if DEBUG_CONV_DESC
,conv_desc
#endif
);
}
else
{
write_output_to_DDR(
output_DDR0,
output_DDR1,
output_DDR2,
output_DDR3,
output_buffer0,
conv_desc.outheight,
conv_desc.outwidth_align8,
conv_desc.wino_output_tile_size,
conv_desc.wino_tile_number_in_outwidth,
conv_desc.wino_tile_number_in_out_rowstep,
conv_desc.wino_col_pix_upper_bound,
conv_desc.wino_tile_number_rowcol,
conv_desc.output_burst_length,
conv_desc.out_ddr_increment_step,
write_start_row,
write_start_row==0
#if DEBUG_CONV_DESC
,conv_desc
#endif
);
}
xliu79
committed
xliu79
committed
}