Skip to content
Snippets Groups Projects

Fork reshape

Merged Aaron Councilman requested to merge fork-reshape into main
1 file
+ 63
0
Compare changes
  • Side-by-side
  • Inline
+ 63
0
 
macro optimize!(X) {
 
gvn(X);
 
phi-elim(X);
 
dce(X);
 
ip-sroa(X);
 
sroa(X);
 
dce(X);
 
gvn(X);
 
phi-elim(X);
 
dce(X);
 
}
 
 
macro codegen!(X) {
 
gcm(*);
 
float-collections(*);
 
dce(*);
 
gcm(*);
 
}
 
 
optimize!(*);
 
 
fixpoint panic after 20 {
 
forkify(matmul);
 
fork-guard-elim(matmul);
 
}
 
 
// Mark the whole loop nest as associative, any order of iterations is equivalent
 
associative(matmul@outer);
 
 
// Tile the outer 2 loops to create 16 parallel threads (each responsible for
 
// computing one block of the output
 
let par = matmul@outer \ matmul@inner;
 
fork-tile[4, 0, false, true](par);
 
fork-coalesce(par);
 
fork-interchange[0, 1](par);
 
fork-interchange[2, 3](par);
 
fork-interchange[1, 2](par);
 
 
let split = fork-split(*);
 
fork-coalesce(split.matmul.fj0 \ split.matmul.fj2);
 
parallel-fork(split.matmul.fj0 \ split.matmul.fj2);
 
 
// Pull the body of the parallel loop out into its own device function
 
let body = outline(split.matmul.fj2);
 
cpu(body);
 
 
// Tile the loop nest for cache performance; 16x16x16 tile
 
fork-tile[16, 0, false, true](body);
 
fixpoint { fork-coalesce(body); }
 
 
fork-interchange[1, 2](body);
 
fork-interchange[3, 4](body);
 
fork-interchange[2, 3](body);
 
 
optimize!(*);
 
 
fork-split(body);
 
reduce-slf(*);
 
unforkify(body);
 
 
optimize!(*);
 
 
codegen!(*);
Loading