Skip to content
Snippets Groups Projects
cpu.sch 1.88 KiB
Newer Older
  • Learn to ignore specific revisions
  • macro simpl!(X) {
      ccp(X);
      simplify-cfg(X);
      lift-dc-math(X);
      gvn(X);
      phi-elim(X);
      dce(X);
      infer-schedules(X);
    }
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    
    
    simpl!(*);
    inline(layer_forward);
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    delete-uncalled(*);
    
    
    rarbore2's avatar
    rarbore2 committed
    no-memset(layer_forward@res, output_error@res, hidden_error@res);
    
    lift-dc-math(*);
    loop-bound-canon(*);
    
    rarbore2's avatar
    rarbore2 committed
    simpl!(*);
    
    Xavier Routh's avatar
    Xavier Routh committed
    lift-dc-math(*);
    
    rarbore2's avatar
    rarbore2 committed
    slf(*);
    
    fixpoint {
      forkify(*);
      fork-guard-elim(*);
      fork-coalesce(*);
    }
    
    rarbore2's avatar
    rarbore2 committed
    reduce-slf(*);
    simpl!(*);
    
    rarbore2's avatar
    rarbore2 committed
    fork-interchange[0, 1](adjust_weights);
    simpl!(*);
    
    infer-schedules(*);
    
    // The first call to layer_forward can be parallelized by 16 (the size of the
    // hidden layer) and the second can't be parallelized at all (the size of the
    // output layer is 1)
    inline(backprop@forward_input, backprop@forward_hidden);
    let forward_input = outline(backprop@forward_input);
    let forward_hidden = outline(backprop@forward_hidden);
    
    
    rarbore2's avatar
    rarbore2 committed
    if !feature("seq") {
      fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
      let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
      forward_input = outline(inner);
      inline(backprop@forward_input);
    }
    
    rarbore2's avatar
    rarbore2 committed
    
    // The first call to adjust_weights has total loop dimensions of 1 * 17, so not
    // worth parallelizing (given that the body is trivial)
    // The second call to adjust_weights has a total dimension of 16 * (input + 1)
    // which is worth parallelizing, we'll do it by 16
    inline(backprop@adjust_hidden, backprop@adjust_input);
    let adjust_hidden = outline(backprop@adjust_hidden);
    let adjust_input = outline(backprop@adjust_input);
    
    
    rarbore2's avatar
    rarbore2 committed
    if !feature("seq") {
      fork-tile[16, 0, false, true](adjust_input);
      let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
      adjust_input = outline(inner);
      inline(backprop@adjust_input);
    }
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    
    
    rarbore2's avatar
    rarbore2 committed
    delete-uncalled(*);
    const-inline(*);
    
    simpl!(*);
    
    Xavier Routh's avatar
    Xavier Routh committed
    fork-split(*);
    
    rarbore2's avatar
    rarbore2 committed
    unforkify(output_error, hidden_error, adjust_hidden, adjust_input, forward_hidden, forward_input);
    simpl!(*);
    
    
    Aaron Councilman's avatar
    Aaron Councilman committed
    gcm(*);