diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index b2f9767c9cd2eb56b698f05d7b93dbba59c8681e..ae8c813d47e5192c2aa1937917a88ef9f0e83e8c 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -324,6 +324,12 @@ fn basic_blocks( // 3: If the node producing the collection is a reduce node, then any read // users that aren't in the reduce's cycle shouldn't anti-depend user any // mutators in the reduce cycle. + // + // Because we do a liveness analysis based spill of collections, anti- + // dependencies can be best effort. Thus, when we encounter a read and + // mutator where the read doesn't dominate the mutator, but an anti-depdence + // edge is derived for the pair, we just don't draw the edge since it would + // break the scheduler. let mut antideps = BTreeSet::new(); for id in reverse_postorder.iter() { // Find a terminating read node and the collections it reads. @@ -385,6 +391,7 @@ fn basic_blocks( .get(root) .map(|cycle| cycle.contains(mutator)) .unwrap_or(false)) + && dom.does_dom(schedule_early[id.idx()].unwrap(), mutator_early) { antideps.insert((*id, *mutator)); } diff --git a/juno_samples/antideps/src/antideps.jn b/juno_samples/antideps/src/antideps.jn index 85c8b9d487175d01c51f6c4b2c1dfb4d756be86e..23532cc9d7b6e2f10efafec3d91a58db705c0e13 100644 --- a/juno_samples/antideps/src/antideps.jn +++ b/juno_samples/antideps/src/antideps.jn @@ -135,3 +135,29 @@ fn array_of_structs(input: i32) -> i32 { arr[0].1 = 99; return result + sub.1 - arr[0].1; } + +#[entry] +fn issue_21<n: usize>() -> i32 { + let visited: bool[n]; + let cost: i32[n]; + + for i = 0 to n { + if visited[i] { + cost[i] = cost[i] + 1; + } + } + + for i = 0 to n { + visited[i] = true; + } + + let total : i32; + for i = 0 to n { + if visited[i] { + total += cost[i] + 2; + } + } + + return total; +} + diff --git a/juno_samples/antideps/src/gpu.sch b/juno_samples/antideps/src/gpu.sch index e166515dc5562f2e142792229fea92309a42e526..7e049268bdd31e2ac2e10a468e66db9740fab2f4 100644 --- a/juno_samples/antideps/src/gpu.sch +++ b/juno_samples/antideps/src/gpu.sch @@ -3,7 +3,7 @@ phi-elim(*); dce(*); let out = auto-outline(*); -gpu(out.simple_antideps, out.loop_antideps, out.complex_antideps1, out.complex_antideps2, out.very_complex_antideps, out.read_chains, out.array_of_structs); +gpu(out.simple_antideps, out.loop_antideps, out.complex_antideps1, out.complex_antideps2, out.very_complex_antideps, out.read_chains, out.array_of_structs, out.issue_21); ip-sroa(*); sroa(*); diff --git a/juno_samples/antideps/src/main.rs b/juno_samples/antideps/src/main.rs index 9c37bd0148e1b4dbf53705c03db1f47346359684..d311ae484a0f5b734dc7053930c61d1f1fd5037e 100644 --- a/juno_samples/antideps/src/main.rs +++ b/juno_samples/antideps/src/main.rs @@ -40,6 +40,11 @@ fn main() { let output = r.run(2).await; println!("{}", output); assert_eq!(output, 14); + + let mut r = runner!(issue_21); + let output = r.run(42).await; + println!("{}", output); + assert_eq!(output, 42 * 2); }); }