Skip to content
Snippets Groups Projects
Commit 199a8a80 authored by Aaron Councilman's avatar Aaron Councilman
Browse files

Manual gpu schedule

parent 972c1e6e
No related branches found
No related tags found
1 merge request!205Fork reshape
Pipeline #201825 failed
phi-elim(*); macro optimize!(X) {
gvn(X);
phi-elim(X);
dce(X);
ip-sroa(X);
sroa(X);
dce(X);
gvn(X);
phi-elim(X);
dce(X);
}
macro codegen!(X) {
gcm(*);
float-collections(*);
dce(*);
gcm(*);
}
forkify(*); optimize!(*);
fork-guard-elim(*);
dce(*);
fixpoint { fixpoint panic after 20 {
reduce-slf(*); forkify(matmul);
slf(*); fork-guard-elim(matmul);
infer-schedules(*);
} }
fork-coalesce(*);
infer-schedules(*); optimize!(*);
dce(*);
rewrite(*); fixpoint panic after 20 {
fixpoint { reduce-slf(matmul);
simplify-cfg(*); slf(matmul);
dce(*); infer-schedules(matmul);
} }
dce(matmul);
ip-sroa(*); // Tile outer and middle loops into 32x32 sized blocks
sroa(*); fork-tile[32, 0, false, true](matmul@outer \ matmul@inner);
dce(*); // Merge outer and middle loops and interchange so blocks are first
fork-coalesce(matmul@outer \ matmul@inner);
fork-interchange[1, 2](matmul@outer \ matmul@inner);
// Split forks
let split = fork-split(matmul);
// Join the threads and then blocks into a single fork each
fork-coalesce(split.matmul.fj2 \ matmul@inner);
fork-coalesce(split.matmul.fj0 \ split.matmul.fj2);
let auto = auto-outline(*);
float-collections(*); float-collections(*);
gcm(*); gpu(auto.matmul);
codegen!(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment