Skip to content
Snippets Groups Projects

GPU backend

Merged prathi3 requested to merge gpu-cg into main
1 file
+ 0
45
Compare changes
  • Side-by-side
  • Inline
+ 0
45
rarbore2
Last comment by prathi3
#[entry]
fn tiled_64_matmul_with_n_1024<m : usize, l : usize>(a : i32[1024, m], b : i32[m, l]) -> i32 {
let res = 0;
for bi = 0 to 16 {
for bk = 0 to l / 64 {
// TODO: make these all the same size, clone analysis should undo GVN's
// combining of these three arrays.
let atile : i32[66, 64];
let btile : i32[65, 64];
let ctile : i32[64, 64];
for tile_idx = 0 to m / 64 {
for ti = 0 to 64 {
for tk = 0 to 64 {
atile[ti, tk] = a[bi * 64 + ti, tile_idx * 64 + tk];
btile[ti, tk] = b[tile_idx * 64 + ti, bk * 64 + tk];
// TODO: remove setting ctile to zero explicitly, clone analysis
// should see a lack of a phi for ctile in the block loops and
// induce a copy of an initial value of ctile (all zeros) on each
// iteration of the block loops.
ctile[ti, tk] = 0;
}
}
for ti = 0 to 64 {
for tk = 0 to 64 {
let c_acc = ctile[ti, tk];
for inner_idx = 0 to 64 {
c_acc += atile[ti, inner_idx] * btile[inner_idx, tk];
}
ctile[ti, tk] = c_acc;
}
}
}
for ti = 0 to 64 {
for tk = 0 to 64 {
res += ctile[ti, tk];
}
}
}
}
return res;
}
Loading