Skip to content
Snippets Groups Projects

More optimizations

Merged rarbore2 requested to merge more_opt3 into main
1 file
+ 14
1
Compare changes
  • Side-by-side
  • Inline
@@ -42,12 +42,25 @@ let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_i
let forward_input = outline(inner);
inline(backprop@forward_input);
// The first call to adjust_weights has total loop dimensions of 1 * 17, so not
// worth parallelizing (given that the body is trivial)
// The second call to adjust_weights has a total dimension of 16 * (input + 1)
// which is worth parallelizing, we'll do it by 16
inline(backprop@adjust_hidden, backprop@adjust_input);
let adjust_hidden = outline(backprop@adjust_hidden);
let adjust_input = outline(backprop@adjust_input);
fork-tile[16, 0, false, true](adjust_input);
let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
let adjust_input = outline(inner);
inline(backprop@adjust_input);
delete-uncalled(*);
const-inline(*);
simpl!(*);
fork-split(*);
unforkify(output_error, hidden_error, adjust_weights, forward_hidden, forward_input);
unforkify(output_error, hidden_error, adjust_hidden, adjust_input, forward_hidden, forward_input);
simpl!(*);
gcm(*);
Loading