From 199a8a80e72e8377faedea785a879847edf11ff7 Mon Sep 17 00:00:00 2001 From: Aaron Councilman <aaronjc4@illinois.edu> Date: Sat, 22 Feb 2025 18:44:44 -0600 Subject: [PATCH] Manual gpu schedule --- juno_samples/matmul/src/gpu.sch | 63 +++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/juno_samples/matmul/src/gpu.sch b/juno_samples/matmul/src/gpu.sch index 76808149..effdc6b2 100644 --- a/juno_samples/matmul/src/gpu.sch +++ b/juno_samples/matmul/src/gpu.sch @@ -1,26 +1,51 @@ -phi-elim(*); +macro optimize!(X) { + gvn(X); + phi-elim(X); + dce(X); + ip-sroa(X); + sroa(X); + dce(X); + gvn(X); + phi-elim(X); + dce(X); +} + +macro codegen!(X) { + gcm(*); + float-collections(*); + dce(*); + gcm(*); +} -forkify(*); -fork-guard-elim(*); -dce(*); +optimize!(*); -fixpoint { - reduce-slf(*); - slf(*); - infer-schedules(*); +fixpoint panic after 20 { + forkify(matmul); + fork-guard-elim(matmul); } -fork-coalesce(*); -infer-schedules(*); -dce(*); -rewrite(*); -fixpoint { - simplify-cfg(*); - dce(*); + +optimize!(*); + +fixpoint panic after 20 { + reduce-slf(matmul); + slf(matmul); + infer-schedules(matmul); } +dce(matmul); -ip-sroa(*); -sroa(*); -dce(*); +// Tile outer and middle loops into 32x32 sized blocks +fork-tile[32, 0, false, true](matmul@outer \ matmul@inner); +// Merge outer and middle loops and interchange so blocks are first +fork-coalesce(matmul@outer \ matmul@inner); +fork-interchange[1, 2](matmul@outer \ matmul@inner); +// Split forks +let split = fork-split(matmul); +// Join the threads and then blocks into a single fork each +fork-coalesce(split.matmul.fj2 \ matmul@inner); +fork-coalesce(split.matmul.fj0 \ split.matmul.fj2); +let auto = auto-outline(*); float-collections(*); -gcm(*); +gpu(auto.matmul); + +codegen!(*); -- GitLab