From 3f8acd3bb4d0b7388a74c7dc24e3d1e4ad9771b1 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Thu, 27 Feb 2025 22:44:17 -0600 Subject: [PATCH] misc. progress for bfs gpu --- hercules_cg/src/rt.rs | 2 +- hercules_opt/src/gcm.rs | 70 ++++++++++++++++------------ juno_samples/rodinia/bfs/src/gpu.sch | 52 ++++++++++++++------- 3 files changed, 76 insertions(+), 48 deletions(-) diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs index 6981a3da..d94f0e19 100644 --- a/hercules_cg/src/rt.rs +++ b/hercules_cg/src/rt.rs @@ -938,7 +938,7 @@ impl<'a> RTContext<'a> { let dst_device = self.node_colors.0[&collect]; write!( block, - "::hercules_rt::__copy_{}_to_{}({}.byte_add({} as usize).0, {}.0, {});", + "::hercules_rt::__copy_{}_to_{}({}.byte_add({} as usize).0, {}.0, {} as usize);", src_device.name(), dst_device.name(), self.get_value(collect, bb, false), diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index c612acac..3f326051 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -1304,37 +1304,49 @@ enum UTerm { fn unify( mut equations: VecDeque<(UTerm, UTerm)>, -) -> Result<BTreeMap<NodeID, Device>, BTreeMap<NodeID, Device>> { +) -> Result<BTreeMap<NodeID, Device>, (NodeID, NodeID)> { let mut theta = BTreeMap::new(); + // First, assign devices to nodes when a rule directly says to. + for _ in 0..equations.len() { + let (l, r) = equations.pop_front().unwrap(); + match (l, r) { + (UTerm::Node(n), UTerm::Device(d)) | (UTerm::Device(d), UTerm::Node(n)) => { + theta.insert(n, d); + } + _ => equations.push_back((l, r)), + } + } + + // Second, iterate the rest of the rules until... + // 1. The rules are exhausted. All the nodes have device assignments. + // 2. No progress is being made. Some nodes may not have device assignments. + // 3. An inconsistency has been found. The inconsistency is returned. let mut no_progress_iters = 0; while no_progress_iters <= equations.len() && let Some((l, r)) = equations.pop_front() { - match (l, r) { - (UTerm::Node(_), UTerm::Node(_)) => { - if l != r { - equations.push_back((l, r)); - } - no_progress_iters += 1; - } - (UTerm::Node(n), UTerm::Device(d)) | (UTerm::Device(d), UTerm::Node(n)) => { - theta.insert(n, d); - for (l, r) in equations.iter_mut() { - if *l == UTerm::Node(n) { - *l = UTerm::Device(d); - } - if *r == UTerm::Node(n) { - *r = UTerm::Device(d); - } + let (UTerm::Node(l), UTerm::Node(r)) = (l, r) else { + panic!(); + }; + + match (theta.get(&l), theta.get(&r)) { + (Some(ld), Some(rd)) => { + if ld != rd { + return Err((l, r)); + } else { + no_progress_iters = 0; } - no_progress_iters = 0; } - (UTerm::Device(d1), UTerm::Device(d2)) if d1 == d2 => { + (Some(d), None) | (None, Some(d)) => { + let d = *d; + theta.insert(l, d); + theta.insert(r, d); no_progress_iters = 0; } - _ => { - return Err(theta); + (None, None) => { + equations.push_back((UTerm::Node(l), UTerm::Node(r))); + no_progress_iters += 1; } } } @@ -1377,8 +1389,8 @@ fn color_nodes( } if !editor.get_type(typing[id.idx()]).is_primitive() => { // Every input to a phi needs to be on the same device. The // phi itself is also on this device. - for (l, r) in zip(data.into_iter(), data.into_iter().skip(1).chain(once(&id))) { - equations.push((UTerm::Node(*l), UTerm::Node(*r))); + for data in data { + equations.push((UTerm::Node(*data), UTerm::Node(id))); } } Node::Reduce { @@ -1394,7 +1406,7 @@ fn color_nodes( } if !editor.get_type(typing[id.idx()]).is_primitive() => { // Every input to the reduce, and the reduce itself, are on // the same device. - equations.push((UTerm::Node(first), UTerm::Node(second))); + equations.push((UTerm::Node(first), UTerm::Node(id))); equations.push((UTerm::Node(second), UTerm::Node(id))); } Node::Constant { id: _ } @@ -1533,12 +1545,12 @@ fn color_nodes( } Some(func_colors) } - Err(progress) => { + Err(inconsistency) => { // If unification failed, then there's some node using a node in - // `progress` that's expecting a different type than what it got. - // Pick one and add potentially inter-device copies on each def-use - // edge. We'll clean these up later. - let (id, _) = progress.into_iter().next().unwrap(); + // that's expecting a different type than what it got. Pick one and + // add potentially inter-device copies on each def-use edge. We'll + // clean these up later. + let id = inconsistency.0; let users: Vec<_> = editor.get_users(id).collect(); let success = editor.edit(|mut edit| { let cons = edit.add_zero_constant(typing[id.idx()]); diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 0a3f4d77..49d44b98 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -1,23 +1,39 @@ -gvn(*); -phi-elim(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + dce(X); + infer-schedules(X); +} -let outline = auto-outline(bfs); -gpu(outline.bfs); +phi-elim(bfs); +no-memset(bfs@cost); +let cost_init = outline(bfs@cost_init); +let loop1 = outline(bfs@loop1); +let loop2 = outline(bfs@loop2); +gpu(cost_init, loop1, loop2); -ip-sroa(*); -sroa(*); -dce(*); -gvn(*); -phi-elim(*); -dce(*); +simpl!(*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); +predication(*); +simpl!(*); -//forkify(*); -infer-schedules(*); +parallel-reduce(loop1); +forkify(*); +fork-guard-elim(*); +simpl!(*); +predication(*); +reduce-slf(*); +simpl!(*); gcm(*); -fixpoint { - float-collections(*); - dce(*); - gcm(*); -} +xdot[true](*); -- GitLab