Skip to content
Snippets Groups Projects
wino.cpp 7.69 KiB
Newer Older
  • Learn to ignore specific revisions
  • #include <ap_int.h>
    #include <hls_stream.h>
    
    
    xliu79's avatar
    xliu79 committed
    #include "wino_IO.cpp"
    
    xliu79's avatar
    xliu79 committed
    
    
    
    
    
    void wino_systolic_top(
        ap_uint<128> *input_DDR0,
        ap_uint<128> *input_DDR1,
        ap_uint<128> *input_DDR2,
        ap_uint<128> *input_DDR3,
        ap_uint<128> *weight_DDR0,
        ap_uint<128> *weight_DDR1,
        ap_uint<128> *weight_DDR2,
        ap_uint<128> *weight_DDR3,
        ap_uint<128> *output_DDR0,
        ap_uint<128> *output_DDR1,
        ap_uint<128> *output_DDR2,
        ap_uint<128> *output_DDR3,
    
    #pragma HLS interface m_axi port= input_DDR3 depth=65535
    #pragma HLS interface m_axi port= input_DDR2 depth=65535
    #pragma HLS interface m_axi port= input_DDR1 depth=65535
    #pragma HLS interface m_axi port= input_DDR0 depth=65535
    #pragma HLS interface m_axi port= output_DDR3 depth=65535
    #pragma HLS interface m_axi port= output_DDR2 depth=65535
    #pragma HLS interface m_axi port= output_DDR1 depth=65535
    #pragma HLS interface m_axi port= output_DDR0 depth=65535
    #pragma HLS interface m_axi port= weight_DDR3 depth=65535
    #pragma HLS interface m_axi port= weight_DDR2 depth=65535
    #pragma HLS interface m_axi port= weight_DDR1 depth=65535
    #pragma HLS interface m_axi port= weight_DDR0 depth=65535
    
        ap_uint<16> input_buffer[INBUFFER_HEIGHT][INBUFFER_WIDTH][INPUT_BUFFER_DEPTH];
    #pragma HLS array_partition variable=input_buffer complete dim=1 
    #pragma HLS array_partition variable=input_buffer complete dim=2 
    
        ap_uint<36> output_buffer0[16][16][OUTPUT_BUFFER_DEPTH];
    
    #pragma HLS array_partition variable=output_buffer0 complete dim=1 
    #pragma HLS array_partition variable=output_buffer0 complete dim=2 
    
        ap_uint<36> output_buffer1[16][16][OUTPUT_BUFFER_DEPTH];
    
    #pragma HLS array_partition variable=output_buffer1 complete dim=1 
    #pragma HLS array_partition variable=output_buffer1 complete dim=2 
    
        ap_uint<1> pingpong;
    
    
        #if DEBUG_FILE_PRINT
    
            clear_buffer_content<INBUFFER_HEIGHT,INBUFFER_WIDTH, INPUT_BUFFER_DEPTH>(input_buffer);
    
    xliu79's avatar
    xliu79 committed
        load_input_rowtile_from_ddr(
    
            input_DDR0,
            input_DDR1,
            input_DDR2,
            input_DDR3,
            input_buffer,
    
    		conv_desc.inheight,
    		conv_desc.inwidth,
    		conv_desc.stride,
            conv_desc.pad_size,
    		conv_desc.inwidth_align8,
    		conv_desc.indepth_align8,
    		conv_desc.group_indepth_x_inwidth_align8_by8,
    		conv_desc.group_indepth_offset_x_inwidth_align8_by8,
    		conv_desc.input_load_burst_length,
            conv_desc.wino_output_tile_size,
    
            1);
    
        #if DEBUG_FILE_PRINT
    
            attach_input_buffer_content_uniformed<INBUFFER_HEIGHT,INBUFFER_WIDTH, INPUT_BUFFER_DEPTH>(input_buffer,0,"input_buffer_content.txt");
    
        ap_uint<16> start_output_row =0;
        for( ; start_output_row < conv_desc.outheight; start_output_row+=conv_desc.wino_output_tile_size)
    
            input_DDR0,
            input_DDR1,
            input_DDR2,
            input_DDR3,
            input_buffer,
    
    		conv_desc.inheight,
    		conv_desc.inwidth,
    		conv_desc.stride,
            conv_desc.pad_size,
    		conv_desc.inwidth_align8,
    		conv_desc.indepth_align8,
    		conv_desc.group_indepth_x_inwidth_align8_by8,
    		conv_desc.group_indepth_offset_x_inwidth_align8_by8,
    		conv_desc.input_load_burst_length,
            start_output_row + conv_desc.wino_output_tile_size,
    
            0);
            #if DEBUG_FILE_PRINT
    
                attach_input_buffer_content_uniformed<INBUFFER_HEIGHT,INBUFFER_WIDTH, INPUT_BUFFER_DEPTH>(input_buffer,0,"input_buffer_content.txt");
    
        //     if(pingpong)
        //     {
        //         wino_systolic(
        //         input_buffer,
        //         output_buffer0,
        //         weight_DDR0,
        //         weight_DDR1,
        //         weight_DDR2,
        //         weight_DDR3,
        //         input_height,
        //         input_width,
        //         input_depth,
        //         input_width_ceildiv_16,
        //         input_depth_align8,
        //         output_height,
        //         output_width,
        //         output_depth,
        //         kernel_window_size,
        //         pad_size,
        //         weight_indepth_load_number,
        //         weight_outdepth_load_number,
        //         weight_outdepth_feed_size,
        //         start_output_row,
        //         weight_total_load_number,
        //         weight_total_feed_size,
        //         ddr_load_length,
        //         ddr_load_length_per_feed,
        //         row_repeat_times,
        //         (start_output_row==0) ,
        //         (start_output_row+wino_output_tile_size >= output_height));
        //     #if DEBUG_FILE_PRINT
        //         attach_output_buffer_content<0>(output_buffer0,"output_buffer_content.txt");
        //         #endif
    
        //         write_output_to_DDR(
        //         output_DDR0,
        //         output_DDR1,
        //         output_DDR2,
        //         output_DDR3,
        //         output_buffer1,
        //         outdepth_ceil_div8,
        //         start_output_row,
        //         start_output_row+4,
        //         output_height,
        //         output_width,
        //         wino_output_tile_size,
        //         row_repeat_times,
        //         1,
        //         0);
    
        //         pingpong=0;
        //     }
        //     else
        //     {
        //         wino_systolic(
        //         input_buffer,
        //         output_buffer1,
        //         weight_DDR0,
        //         weight_DDR1,
        //         weight_DDR2,
        //         weight_DDR3,
        //         input_height,
        //         input_width,
        //         input_depth,
        //         input_width_ceildiv_16,
        //         input_depth_align8,
        //         output_height,
        //         output_width,
        //         output_depth,
        //         kernel_window_size,
        //         pad_size,
        //         weight_indepth_load_number,
        //         weight_outdepth_load_number,
        //         weight_outdepth_feed_size,
        //         start_output_row,
        //         weight_total_load_number,
        //         weight_total_feed_size,
        //         ddr_load_length,
        //         ddr_load_length_per_feed,
        //         row_repeat_times,
        //         (start_output_row==0) ,
        //         (start_output_row+wino_output_tile_size >= output_height));
        //     #if DEBUG_FILE_PRINT
        //         attach_output_buffer_content<0>(output_buffer0,"output_buffer_content.txt");
        //         #endif
    
        //         write_output_to_DDR(
        //         output_DDR0,
        //         output_DDR1,
        //         output_DDR2,
        //         output_DDR3,
        //         output_buffer0,
        //         outdepth_ceil_div8,
        //         start_output_row,
        //         start_output_row+4,
        //         output_height,
        //         output_width,
        //         wino_output_tile_size,
        //         row_repeat_times,
        //         1,
        //         0);
        //         pingpong=1;
        //     }
    
        // if(pingpong)
        // {
        //         write_output_to_DDR(
        //         output_DDR0,
        //         output_DDR1,
        //         output_DDR2,
        //         output_DDR3,
        //         output_buffer1,
        //         outdepth_ceil_div8,
        //         start_output_row,
        //         start_output_row+4,
        //         output_height,
        //         output_width,
        //         wino_output_tile_size,
        //         row_repeat_times,
        //         1,
        //         0);
        // }
        // else
        // {
        //                write_output_to_DDR(
        //         output_DDR0,
        //         output_DDR1,
        //         output_DDR2,
        //         output_DDR3,
        //         output_buffer0,
        //         outdepth_ceil_div8,
        //         start_output_row,
        //         start_output_row+4,
        //         output_height,
        //         output_width,
        //         wino_output_tile_size,
        //         row_repeat_times,
        //         1,
        //         0);
        // }