From dc0ebc25fadb92e6872667b66d1e4064a8b2e304 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 4 Mar 2025 17:51:37 -0600 Subject: [PATCH 1/2] fix bfs --- hercules_opt/src/gcm.rs | 25 +++++++++++++++++++++++++ juno_samples/rodinia/bfs/src/cpu.sch | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 4a6365c8..4cee5e83 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -221,6 +221,31 @@ fn preliminary_fixups( } } + // Add region nodes between join nodes and loop headers to aid in block + // placement. + for (_, join) in fork_join_map { + let control_user = editor + .get_users(*join) + .filter(|id| nodes[id.idx()].is_control()) + .next() + .unwrap(); + if nodes[control_user.idx()].is_fork() + || nodes[control_user.idx()] + .try_region() + .map(|preds| preds.len() > 1) + .unwrap_or(false) + { + let success = editor.edit(|mut edit| { + let region = edit.add_node(Node::Region { + preds: Box::new([*join]), + }); + edit.replace_all_uses_where(*join, region, |id| *id == control_user) + }); + assert!(success); + return true; + } + } + false } diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch index 339782d6..2bd762b2 100644 --- a/juno_samples/rodinia/bfs/src/cpu.sch +++ b/juno_samples/rodinia/bfs/src/cpu.sch @@ -50,7 +50,7 @@ fork-tile[32, 0, false, true](init); let (outer, inner) = fork-reshape[[1], [0]](init); let init_body = outline(inner); -inline(bfs@loop1, bfs@loop2); +inline(bfs@cost_init, bfs@loop1, bfs@loop2); delete-uncalled(*); const-inline(*); simpl!(*); -- GitLab From 3530e5ab8ac647e713243e55ee8836f2c2a92c81 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 4 Mar 2025 18:52:10 -0600 Subject: [PATCH 2/2] Spin barrier that might be useful later --- hercules_rt/src/lib.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs index a5954ca0..9265808b 100644 --- a/hercules_rt/src/lib.rs +++ b/hercules_rt/src/lib.rs @@ -5,6 +5,7 @@ use std::future::Future; use std::marker::PhantomData; use std::ptr::{copy_nonoverlapping, write_bytes, NonNull}; use std::slice::{from_raw_parts, from_raw_parts_mut}; +use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::OnceLock; /* @@ -928,3 +929,30 @@ unsafe impl GlobalAlloc for AlignedAlloc { #[global_allocator] static A: AlignedAlloc = AlignedAlloc; + +pub struct SpinBarrier { + num: usize, + waiting: AtomicUsize, + gen: AtomicUsize, +} + +impl SpinBarrier { + pub const fn new(num: usize) -> Self { + SpinBarrier { + num, + waiting: AtomicUsize::new(0), + gen: AtomicUsize::new(0), + } + } + + pub fn wait(&self) { + let old_gen = self.gen.load(Ordering::Acquire); + let old_waiting = self.waiting.fetch_add(1, Ordering::Relaxed); + if old_waiting + 1 == self.num { + self.waiting.store(0, Ordering::Relaxed); + self.gen.fetch_add(1, Ordering::Release); + } else { + while old_gen == self.gen.load(Ordering::Acquire) {} + } + } +} -- GitLab