Compare revisions

rarbore2 · rarbore2 · 4a0f20de · 4a0f20de · 4a0f20de · 4a0f20de
--- a/hercules_ir/src/collections.rs
+++ b/hercules_ir/src/collections.rs
-use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::iter::{once, repeat, zip};

 use either::Either;
@@ -432,20 +432,8 @@ pub fn no_reset_constant_collections(
                init: _,
                reduct: _,
            } => {
-                // If the einsum for this reduce node is a full array
-                // comprehension, then every array element is written to, and
-                // the empty indices set (the whole collection) is considered as
-                // written to.
-                let (env, exprs) = reduce_einsum;
-                if let Some(expr) = exprs.get(&id)
-                    && let MathExpr::Comprehension(_, _) = env[expr.idx()]
-                {
-                    ZeroLattice::top()
-                }
                // Otherwise, meet the `init` and `reduct` inputs.
-                else {
-                    ZeroLattice::meet(&inputs[0], &inputs[1])
-                }
+                ZeroLattice::meet(&inputs[0], &inputs[1])
            }
            Node::Write {
                collect: _,

--- a/hercules_ir/src/einsum.rs
+++ b/hercules_ir/src/einsum.rs
@@ -400,6 +400,35 @@ impl<'a> EinsumContext<'a> {
    }
 }

+pub fn opaque_nodes_in_expr(env: &MathEnv, id: MathID) -> HashSet<NodeID> {
+    let mut set = HashSet::new();
+    let mut stack = vec![id];
+    while let Some(id) = stack.pop() {
+        match env[id.idx()] {
+            MathExpr::Zero(_) | MathExpr::One(_) | MathExpr::ThreadID(_) => {}
+            MathExpr::OpaqueNode(id) => {
+                set.insert(id);
+            }
+            MathExpr::SumReduction(id, _) | MathExpr::Comprehension(id, _) => {
+                stack.push(id);
+            }
+            MathExpr::Read(id, ref ids) => {
+                stack.push(id);
+                stack.extend(ids);
+            }
+            MathExpr::Add(left, right)
+            | MathExpr::Sub(left, right)
+            | MathExpr::Mul(left, right)
+            | MathExpr::Div(left, right)
+            | MathExpr::Rem(left, right) => {
+                stack.push(left);
+                stack.push(right);
+            }
+        }
+    }
+    set
+}
+
 fn representable(op: BinaryOperator) -> bool {
    match op {
        BinaryOperator::Add

--- a/hercules_samples/matmul/src/gpu.sch
+++ b/hercules_samples/matmul/src/gpu.sch
+no-memset(matmul@c);
+
 gvn(*);
 phi-elim(*);
 dce(*);

--- a/juno_samples/fork_join_tests/src/fork_join_tests.jn
+++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn
 #[entry]
 fn test1(input : i32) -> i32[4, 4] {
-  let arr : i32[4, 4];
+  @const let arr : i32[4, 4];
  for i = 0 to 4 {
    for j = 0 to 4 {
      arr[i, j] = input;
@@ -24,19 +24,19 @@ fn test2(input : i32) -> i32[4, 4] {

 #[entry]
 fn test3(input : i32) -> i32[3, 3] {
-  let arr1 : i32[3, 3];
+  @const1 let arr1 : i32[3, 3];
  for i = 0 to 3 {
    for j = 0 to 3 {
      arr1[i, j] = (i + j) as i32 + input;
    }
  }
-  let arr2 : i32[3, 3];
+  @const2 let arr2 : i32[3, 3];
  for i = 0 to 3 {
    for j = 0 to 3 {
      arr2[i, j] = arr1[2 - i, 2 - j];
    }
  }
-  let arr3 : i32[3, 3];
+  @const3 let arr3 : i32[3, 3];
  for i = 0 to 3 {
    for j = 0 to 3 {
      arr3[i, j] = arr2[i, j] + 7;
@@ -54,7 +54,7 @@ fn test4(input : i32) -> i32[4, 4] {
      for k = 0 to 7 {
        acc += input;
      }
-      arr[i, j] = acc;
+      @reduce arr[i, j] = acc;
    }
  }
  return arr;
@@ -62,7 +62,7 @@ fn test4(input : i32) -> i32[4, 4] {

 #[entry]
 fn test5(input : i32) -> i32[4] {
-  @cons let arr1 : i32[4];
+  let arr1 : i32[4];
  for i = 0 to 4 {
    let red = arr1[i];
    for k = 0 to 3 {

--- a/juno_samples/fork_join_tests/src/gpu.sch
+++ b/juno_samples/fork_join_tests/src/gpu.sch
-no-memset(test5@cons);
+parallel-reduce(test4@reduce);
 parallel-reduce(test5@reduce);
+no-memset(test1@const);
+no-memset(test3@const1);
+no-memset(test3@const2);
+no-memset(test3@const3);

 gvn(*);
 phi-elim(*);
@@ -33,5 +37,5 @@ fixpoint panic after 20 {
  infer-schedules(*);
 }

-float-collections(test2, out.test2, test4, out.test4);
+float-collections(test2, out.test2, test4, out.test4, test5, out.test5);
 gcm(*);
--- a/juno_scheduler/src/pm.rs
+++ b/juno_scheduler/src/pm.rs
@@ -1329,7 +1329,7 @@ fn run_pass(
    pm: &mut PassManager,
    pass: Pass,
    args: Vec<Value>,
-    mut selection: Option<Vec<CodeLocation>>,
+    selection: Option<Vec<CodeLocation>>,
 ) -> Result<(Value, bool), SchedulerError> {
    let mut result = Value::Record {
        fields: HashMap::new(),
No results found