Skip to content
Snippets Groups Projects
Commit 25db4a50 authored by Aaron Councilman's avatar Aaron Councilman Committed by rarbore2
Browse files

Fix backprop and matmul cpu schedules

parent 8e7b89c5
No related branches found
No related tags found
1 merge request!223Fix backprop and matmul cpu schedules
......@@ -25,6 +25,10 @@ macro forkify!(X) {
}
}
macro fork-chunk![n](X) {
fork-tile[n, 0, false, false](X);
}
macro fork-tile![n](X) {
fork-tile[n, 0, false, true](X);
}
......@@ -66,8 +70,8 @@ if feature("cuda") {
// Parallelize by computing output array as 16 chunks
let par = matmul@outer \ matmul@inner;
fork-tile![4](par);
let (outer, inner, _) = fork-reshape[[1, 3], [0], [2]](par);
fork-chunk![4](par);
let (outer, inner, _) = fork-reshape[[0, 2], [1], [3]](par);
parallelize!(outer \ inner);
let body = outline(inner);
......
......@@ -38,8 +38,8 @@ let forward_input = outline(backprop@forward_input);
let forward_hidden = outline(backprop@forward_hidden);
if !feature("seq") {
fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
fork-tile[16, 0, false, false](forward_input@outer_loop \ forward_input@inner_loop);
let (outer, inner) = fork-reshape[[0], [1]](forward_input@outer_loop \ forward_input@inner_loop);
forward_input = outline(inner);
inline(backprop@forward_input);
}
......@@ -53,8 +53,8 @@ let adjust_hidden = outline(backprop@adjust_hidden);
let adjust_input = outline(backprop@adjust_input);
if !feature("seq") {
fork-tile[16, 0, false, true](adjust_input);
let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
fork-tile[16, 0, false, false](adjust_input);
let (outer, inner) = fork-reshape[[0], [1, 2]](adjust_input);
adjust_input = outline(inner);
inline(backprop@adjust_input);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment