Skip to content
Snippets Groups Projects

Fix backprop and matmul cpu schedules

Merged Aaron Councilman requested to merge fix-cpu-tilings into main
2 files
+ 12
6
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -25,6 +25,10 @@ macro forkify!(X) {
}
}
macro fork-chunk![n](X) {
fork-tile[n, 0, false, false](X);
}
macro fork-tile![n](X) {
fork-tile[n, 0, false, true](X);
}
@@ -66,8 +70,8 @@ if feature("cuda") {
// Parallelize by computing output array as 16 chunks
let par = matmul@outer \ matmul@inner;
fork-tile![4](par);
let (outer, inner, _) = fork-reshape[[1, 3], [0], [2]](par);
fork-chunk![4](par);
let (outer, inner, _) = fork-reshape[[0, 2], [1], [3]](par);
parallelize!(outer \ inner);
let body = outline(inner);
Loading