gpu.sch 1.40 KiB
macro simpl!(X) {
ccp(X);
simplify-cfg(X);
lift-dc-math(X);
gvn(X);
phi-elim(X);
dce(X);
infer-schedules(X);
}
no-memset(layer_forward@res, output_error@res, hidden_error@res);
phi-elim(*);
let output_loop1 = outline(output_error@loop1);
let output_loop2 = outline(output_error@loop2);
let hidden_loop1 = outline(hidden_error@loop1);
let hidden_loop2 = outline(hidden_error@loop2);
simpl!(*);
inline(layer_forward, backprop@output_error, backprop@hidden_error);
delete-uncalled(*);
gpu(layer_forward, output_loop1, output_loop2, hidden_loop1, hidden_loop2, adjust_weights);
const-inline(*);
lift-dc-math(*);
loop-bound-canon(*);
simpl!(*);
lift-dc-math(*);
slf(*);
fixpoint {
forkify(*);
fork-guard-elim(*);
fork-coalesce(*);
}
reduce-slf(*);
simpl!(*);
fork-extend[32768](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
simpl!(layer_forward);
fork-tile[32768, 0, false, true](layer_forward@inner_loop);
fork-tile[1024, 1, false, true](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
let out = fork-split(layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
simpl!(layer_forward);
let fission = fork-fission[out._1_layer_forward.fj0](layer_forward);
simpl!(layer_forward);
fork-dim-merge(adjust_weights);
simpl!(adjust_weights);
fork-extend[32](adjust_weights);
fork-tile[32, 0, false, true](adjust_weights);
fork-split(adjust_weights);
simpl!(adjust_weights);
gcm(*);