Skip to content
Snippets Groups Projects
gpu.sch 1.86 KiB
Newer Older
  • Learn to ignore specific revisions
  • rarbore2's avatar
    rarbore2 committed
    macro simpl!(X) {
      ccp(X);
      simplify-cfg(X);
      lift-dc-math(X);
      gvn(X);
      phi-elim(X);
      dce(X);
      infer-schedules(X);
    }
    
    
    rarbore2's avatar
    rarbore2 committed
    no-memset(srad@scratch);
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    phi-elim(*);
    
    rarbore2's avatar
    rarbore2 committed
    let sum_loop = outline(srad@loop1);
    let main_loops = outline(srad@loop2 | srad@loop3);
    gpu(main_loops, extract, compress);
    simpl!(*);
    const-inline[true](*);
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    crc(*);
    slf(*);
    
    rarbore2's avatar
    rarbore2 committed
    write-predication(*);
    simpl!(*);
    predication(*);
    simpl!(*);
    predication(*);
    simpl!(*);
    fixpoint {
      forkify(*);
      fork-guard-elim(*);
      fork-coalesce(*);
    }
    simpl!(*);
    reduce-slf(*);
    simpl!(*);
    array-slf(*);
    simpl!(*);
    slf(*);
    simpl!(*);
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    
    
    rarbore2's avatar
    rarbore2 committed
    fork-dim-merge(sum_loop);
    simpl!(sum_loop);
    fork-tile[32, 0, false, true](sum_loop);
    let out = fork-split(sum_loop);
    clean-monoid-reduces(sum_loop);
    simpl!(sum_loop);
    
    rarbore2's avatar
    rarbore2 committed
    
    let fission1 = fork-fission[out.srad_0.fj0](sum_loop);
    simpl!(sum_loop);
    fork-tile[32, 0, false, true](fission1.srad_0.fj_bottom);
    let out = fork-split(fission1.srad_0.fj_bottom);
    clean-monoid-reduces(sum_loop);
    simpl!(sum_loop);
    
    let fission2 = fork-fission[out.srad_0.fj0](sum_loop);
    
    rarbore2's avatar
    rarbore2 committed
    simpl!(sum_loop);
    
    rarbore2's avatar
    rarbore2 committed
    fork-tile[32, 0, false, true](fission2.srad_0.fj_bottom);
    let out = fork-split(fission2.srad_0.fj_bottom);
    
    rarbore2's avatar
    rarbore2 committed
    clean-monoid-reduces(sum_loop);
    simpl!(sum_loop);
    
    rarbore2's avatar
    rarbore2 committed
    
    let first = outline(fission1.srad_0.fj_top);
    let second = outline(fission2.srad_0.fj_top);
    let third = outline(out.srad_0.fj0);
    gpu(first, second, third);
    const-inline[false](*);
    
    rarbore2's avatar
    rarbore2 committed
    ip-sroa(*);
    sroa(*);
    simpl!(*);
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    
    
    rarbore2's avatar
    rarbore2 committed
    fork-interchange[0, 1](main_loops);
    
    rarbore2's avatar
    rarbore2 committed
    fork-dim-merge(main_loops);
    fork-tile[32, 0, false, true](main_loops);
    dce(main_loops);
    fork-split(main_loops);
    simpl!(main_loops);
    
    
    rarbore2's avatar
    rarbore2 committed
    fork-interchange[0, 1](extract);
    
    rarbore2's avatar
    rarbore2 committed
    fork-dim-merge(extract);
    fork-tile[32, 0, false, true](extract);
    dce(extract);
    fork-split(extract);
    simpl!(extract);
    
    
    rarbore2's avatar
    rarbore2 committed
    fork-interchange[0, 1](compress);
    
    rarbore2's avatar
    rarbore2 committed
    fork-dim-merge(compress);
    fork-tile[32, 0, false, true](compress);
    dce(compress);
    fork-split(compress);
    simpl!(compress);
    
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    gcm(*);