Skip to content
Snippets Groups Projects
gpu.sch 1.40 KiB
macro simpl!(X) {
  ccp(X);
  simplify-cfg(X);
  lift-dc-math(X);
  gvn(X);
  phi-elim(X);
  dce(X);
  infer-schedules(X);
}

no-memset(layer_forward@res, output_error@res, hidden_error@res);
phi-elim(*);
let output_loop1 = outline(output_error@loop1);
let output_loop2 = outline(output_error@loop2);
let hidden_loop1 = outline(hidden_error@loop1);
let hidden_loop2 = outline(hidden_error@loop2);
simpl!(*);
inline(layer_forward, backprop@output_error, backprop@hidden_error);
delete-uncalled(*);
gpu(layer_forward, output_loop1, output_loop2, hidden_loop1, hidden_loop2, adjust_weights);
const-inline(*);

lift-dc-math(*);
loop-bound-canon(*);
simpl!(*);
lift-dc-math(*);
slf(*);
fixpoint {
  forkify(*);
  fork-guard-elim(*);
  fork-coalesce(*);
}
reduce-slf(*);
simpl!(*);

fork-extend[32768](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
simpl!(layer_forward);
fork-tile[32768, 0, false, true](layer_forward@inner_loop);
fork-tile[1024, 1, false, true](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
let out = fork-split(layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
simpl!(layer_forward);
let fission = fork-fission[out._1_layer_forward.fj0](layer_forward);
simpl!(layer_forward);

fork-dim-merge(adjust_weights);
simpl!(adjust_weights);
fork-extend[32](adjust_weights);
fork-tile[32, 0, false, true](adjust_weights);
fork-split(adjust_weights);
simpl!(adjust_weights);

gcm(*);