From 6382ef4263b16f54a8d3b4d5e3a795c9c9e11013 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 3 Feb 2025 15:44:25 -0600 Subject: [PATCH] fix ups --- Cargo.lock | 11 -- hercules_cg/src/fork_tree.rs | 19 +- hercules_opt/src/fork_guard_elim.rs | 19 +- hercules_opt/src/fork_transforms.rs | 166 +++++++++++------- hercules_opt/src/forkify.rs | 61 ++++--- hercules_samples/dot/build.rs | 6 +- hercules_samples/dot/src/main.rs | 2 +- hercules_samples/matmul/build.rs | 6 +- hercules_samples/matmul/src/main.rs | 6 +- .../hercules_interpreter/src/interpreter.rs | 76 ++++---- .../hercules_interpreter/src/value.rs | 10 +- .../hercules_tests/tests/loop_tests.rs | 40 ++--- juno_frontend/src/semant.rs | 11 +- juno_samples/cava/src/main.rs | 45 ++--- juno_samples/concat/src/main.rs | 4 +- juno_samples/edge_detection/src/main.rs | 11 +- juno_samples/matmul/src/main.rs | 18 +- juno_samples/nested_ccp/src/main.rs | 2 +- juno_samples/patterns/src/main.rs | 2 +- juno_samples/schedule_test/build.rs | 6 +- juno_samples/schedule_test/src/main.rs | 13 +- juno_samples/simple3/src/main.rs | 2 +- juno_scheduler/src/compile.rs | 2 +- juno_scheduler/src/ir.rs | 2 +- juno_scheduler/src/pm.rs | 31 ++-- 25 files changed, 336 insertions(+), 235 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a70825a..af7902c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1236,17 +1236,6 @@ dependencies = [ "with_builtin_macros", ] -[[package]] -name = "juno_test" -version = "0.1.0" -dependencies = [ - "async-std", - "hercules_rt", - "juno_build", - "rand", - "with_builtin_macros", -] - [[package]] name = "juno_utils" version = "0.1.0" diff --git a/hercules_cg/src/fork_tree.rs b/hercules_cg/src/fork_tree.rs index 64a93160..c048f7e3 100644 --- a/hercules_cg/src/fork_tree.rs +++ b/hercules_cg/src/fork_tree.rs @@ -9,11 +9,16 @@ use crate::*; * c) no domination by any other fork that's also dominated by F, where we do count self-domination * Here too we include the non-fork start node, as key for all controls outside any fork. */ -pub fn fork_control_map(fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>) -> HashMap<NodeID, HashSet<NodeID>> { +pub fn fork_control_map( + fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>, +) -> HashMap<NodeID, HashSet<NodeID>> { let mut fork_control_map = HashMap::new(); for (control, forks) in fork_join_nesting { let fork = forks.first().copied().unwrap_or(NodeID::new(0)); - fork_control_map.entry(fork).or_insert_with(HashSet::new).insert(*control); + fork_control_map + .entry(fork) + .or_insert_with(HashSet::new) + .insert(*control); } fork_control_map } @@ -24,13 +29,19 @@ pub fn fork_control_map(fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>) -> Has * c) no domination by any other fork that's also dominated by F, where we don't count self-domination * Note that the fork_tree also includes the non-fork start node, as unique root node. */ -pub fn fork_tree(function: &Function, fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>) -> HashMap<NodeID, HashSet<NodeID>> { +pub fn fork_tree( + function: &Function, + fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>, +) -> HashMap<NodeID, HashSet<NodeID>> { let mut fork_tree = HashMap::new(); for (control, forks) in fork_join_nesting { if function.nodes[control.idx()].is_fork() { fork_tree.entry(*control).or_insert_with(HashSet::new); let nesting_fork = forks.get(1).copied().unwrap_or(NodeID::new(0)); - fork_tree.entry(nesting_fork).or_insert_with(HashSet::new).insert(*control); + fork_tree + .entry(nesting_fork) + .or_insert_with(HashSet::new) + .insert(*control); } } fork_tree diff --git a/hercules_opt/src/fork_guard_elim.rs b/hercules_opt/src/fork_guard_elim.rs index f6914b74..df40e60f 100644 --- a/hercules_opt/src/fork_guard_elim.rs +++ b/hercules_opt/src/fork_guard_elim.rs @@ -76,13 +76,16 @@ fn guarded_fork( }; // Filter out any terms which are just 1s - let non_ones = xs.iter().filter(|i| { - if let DynamicConstant::Constant(1) = editor.get_dynamic_constant(**i).deref() { - false - } else { - true - } - }).collect::<Vec<_>>(); + let non_ones = xs + .iter() + .filter(|i| { + if let DynamicConstant::Constant(1) = editor.get_dynamic_constant(**i).deref() { + false + } else { + true + } + }) + .collect::<Vec<_>>(); // If we're left with just one term x, we had max { 1, x } if non_ones.len() == 1 { Factor::Max(idx, *non_ones[0]) @@ -326,7 +329,7 @@ pub fn fork_guard_elim(editor: &mut FunctionEditor, fork_join_map: &HashMap<Node guard_if, guard_join_region, } in guard_info - { + { let Some(guard_pred) = editor.get_uses(guard_if).next() else { unreachable!() }; diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs index 190dbd25..ed6283fd 100644 --- a/hercules_opt/src/fork_transforms.rs +++ b/hercules_opt/src/fork_transforms.rs @@ -708,14 +708,13 @@ pub fn chunk_all_forks_unguarded( Ok(edit) }); - for (fork, _ ) in fork_join_map { + for (fork, _) in fork_join_map { chunk_fork_unguarded(editor, *fork, dim_idx, dc_id); } - } -// Splits a dimension of a single fork join into multiple. -// Iterates an outer loop original_dim / tile_size times -// adds a tile_size loop as the inner loop +// Splits a dimension of a single fork join into multiple. +// Iterates an outer loop original_dim / tile_size times +// adds a tile_size loop as the inner loop // Assumes that tile size divides original dim evenly. pub fn chunk_fork_unguarded( editor: &mut FunctionEditor, @@ -724,42 +723,68 @@ pub fn chunk_fork_unguarded( tile_size: DynamicConstantID, ) -> () { // tid_dim_idx = tid_dim_idx * tile_size + tid_(dim_idx + 1) - - - let Node::Fork { control: old_control, factors: ref old_factors} = *editor.node(fork) else {return}; - - if dim_idx >= old_factors.len() { - return; // FIXME Error here? - } - + let Node::Fork { + control: old_control, + factors: ref old_factors, + } = *editor.node(fork) + else { + return; + }; + assert!(dim_idx < old_factors.len()); let mut new_factors: Vec<_> = old_factors.to_vec(); - - let fork_users: Vec<_> = editor.get_users(fork).map(|f| (f, editor.node(f).clone())).collect(); + let fork_users: Vec<_> = editor + .get_users(fork) + .map(|f| (f, editor.node(f).clone())) + .collect(); editor.edit(|mut edit| { let outer = DynamicConstant::div(new_factors[dim_idx], tile_size); new_factors.insert(dim_idx + 1, tile_size); new_factors[dim_idx] = edit.add_dynamic_constant(outer); - let new_fork = Node::Fork { control: old_control, factors: new_factors.into() }; + let new_fork = Node::Fork { + control: old_control, + factors: new_factors.into(), + }; let new_fork = edit.add_node(new_fork); edit = edit.replace_all_uses(fork, new_fork)?; for (tid, node) in fork_users { - let Node::ThreadID { control: _, dimension: tid_dim } = node else {continue}; + let Node::ThreadID { + control: _, + dimension: tid_dim, + } = node + else { + continue; + }; if tid_dim > dim_idx { - let new_tid = Node::ThreadID { control: new_fork, dimension: tid_dim + 1 }; + let new_tid = Node::ThreadID { + control: new_fork, + dimension: tid_dim + 1, + }; let new_tid = edit.add_node(new_tid); edit = edit.replace_all_uses(tid, new_tid)?; + edit = edit.delete_node(tid)?; } else if tid_dim == dim_idx { - let tile_tid = Node::ThreadID { control: new_fork, dimension: tid_dim + 1 }; + let tile_tid = Node::ThreadID { + control: new_fork, + dimension: tid_dim + 1, + }; let tile_tid = edit.add_node(tile_tid); - + let tile_size = edit.add_node(Node::DynamicConstant { id: tile_size }); - let mul = edit.add_node(Node::Binary { left: tid, right: tile_size, op: BinaryOperator::Mul }); - let add = edit.add_node(Node::Binary { left: mul, right: tile_tid, op: BinaryOperator::Add }); - edit = edit.replace_all_uses_where(tid, add, |usee| *usee != mul )?; + let mul = edit.add_node(Node::Binary { + left: tid, + right: tile_size, + op: BinaryOperator::Mul, + }); + let add = edit.add_node(Node::Binary { + left: mul, + right: tile_tid, + op: BinaryOperator::Add, + }); + edit = edit.replace_all_uses_where(tid, add, |usee| *usee != mul)?; } } edit = edit.delete_node(fork)?; @@ -767,13 +792,13 @@ pub fn chunk_fork_unguarded( }); } - -pub fn merge_all_fork_dims( - editor: &mut FunctionEditor, - fork_join_map: &HashMap<NodeID, NodeID>, -) { +pub fn merge_all_fork_dims(editor: &mut FunctionEditor, fork_join_map: &HashMap<NodeID, NodeID>) { for (fork, _) in fork_join_map { - let Node::Fork { control: _, factors: dims } = editor.node(fork) else { + let Node::Fork { + control: _, + factors: dims, + } = editor.node(fork) + else { unreachable!(); }; @@ -786,10 +811,6 @@ pub fn merge_all_fork_dims( } } -// Splits a dimension of a single fork join into multiple. -// Iterates an outer loop original_dim / tile_size times -// adds a tile_size loop as the inner loop -// Assumes that tile size divides original dim evenly. pub fn fork_dim_merge( editor: &mut FunctionEditor, fork: NodeID, @@ -806,61 +827,85 @@ pub fn fork_dim_merge( } else { (dim_idx1, dim_idx2) }; - - let Node::Fork { control: old_control, factors: ref old_factors} = *editor.node(fork) else {return fork}; - + let Node::Fork { + control: old_control, + factors: ref old_factors, + } = *editor.node(fork) + else { + return fork; + }; let mut new_factors: Vec<_> = old_factors.to_vec(); - - - let fork_users: Vec<_> = editor.get_users(fork).map(|f| (f, editor.node(f).clone())).collect(); - + let fork_users: Vec<_> = editor + .get_users(fork) + .map(|f| (f, editor.node(f).clone())) + .collect(); let mut new_nodes = vec![]; - let outer_dc_id = new_factors[outer_idx]; let inner_dc_id = new_factors[inner_idx]; - - let mut new_fork_id = NodeID::new(0); + let mut new_fork = NodeID::new(0); editor.edit(|mut edit| { - new_factors[outer_idx] = edit.add_dynamic_constant(DynamicConstant::mul(new_factors[outer_idx], new_factors[inner_idx])); + new_factors[outer_idx] = edit.add_dynamic_constant(DynamicConstant::mul( + new_factors[outer_idx], + new_factors[inner_idx], + )); new_factors.remove(inner_idx); - - let new_fork = Node::Fork { control: old_control, factors: new_factors.into() }; - let new_fork = edit.add_node(new_fork); - new_fork_id = new_fork; - + new_fork = edit.add_node(Node::Fork { + control: old_control, + factors: new_factors.into(), + }); edit.sub_edit(fork, new_fork); - edit = edit.replace_all_uses(fork, new_fork)?; edit = edit.delete_node(fork)?; for (tid, node) in fork_users { - // FIXME: DO we want sub edits in this? - - let Node::ThreadID { control: _, dimension: tid_dim } = node else { continue }; + let Node::ThreadID { + control: _, + dimension: tid_dim, + } = node + else { + continue; + }; if tid_dim > inner_idx { - let new_tid = Node::ThreadID { control: new_fork_id, dimension: tid_dim - 1 }; + let new_tid = Node::ThreadID { + control: new_fork_id, + dimension: tid_dim - 1, + }; let new_tid = edit.add_node(new_tid); edit = edit.replace_all_uses(tid, new_tid)?; edit.sub_edit(tid, new_tid); } else if tid_dim == outer_idx { - let outer_tid = Node::ThreadID { control: new_fork_id, dimension: outer_idx }; + let outer_tid = Node::ThreadID { + control: new_fork_id, + dimension: outer_idx, + }; let outer_tid = edit.add_node(outer_tid); let outer_dc = edit.add_node(Node::DynamicConstant { id: outer_dc_id }); new_nodes.push(outer_tid); // inner_idx % dim(outer_idx) - let rem = edit.add_node(Node::Binary { left: outer_tid, right: outer_dc, op: BinaryOperator::Rem}); + let rem = edit.add_node(Node::Binary { + left: outer_tid, + right: outer_dc, + op: BinaryOperator::Rem, + }); edit.sub_edit(tid, rem); edit = edit.replace_all_uses(tid, rem)?; } else if tid_dim == inner_idx { - let outer_tid = Node::ThreadID { control: new_fork_id, dimension: outer_idx }; + let outer_tid = Node::ThreadID { + control: new_fork_id, + dimension: outer_idx, + }; let outer_tid = edit.add_node(outer_tid); let outer_dc = edit.add_node(Node::DynamicConstant { id: outer_dc_id }); // inner_idx / dim(outer_idx) - let div = edit.add_node(Node::Binary { left: outer_tid, right: outer_dc, op: BinaryOperator::Div}); + let div = edit.add_node(Node::Binary { + left: outer_tid, + right: outer_dc, + op: BinaryOperator::Div, + }); edit.sub_edit(tid, div); edit = edit.replace_all_uses(tid, div)?; } @@ -868,6 +913,5 @@ pub fn fork_dim_merge( Ok(edit) }); - return new_fork_id; - -} \ No newline at end of file + new_fork +} diff --git a/hercules_opt/src/forkify.rs b/hercules_opt/src/forkify.rs index 0a2d5601..f6db06ca 100644 --- a/hercules_opt/src/forkify.rs +++ b/hercules_opt/src/forkify.rs @@ -300,32 +300,33 @@ pub fn forkify_loop( // Start failable edit: - let redcutionable_phis_and_init: Vec<(_, NodeID)> = - reductionable_phis.iter().map(|reduction_phi| { - - let LoopPHI::Reductionable { - phi, - data_cycle: _, - continue_latch, - is_associative: _, - } = reduction_phi - else { - panic!(); - }; + let redcutionable_phis_and_init: Vec<(_, NodeID)> = reductionable_phis + .iter() + .map(|reduction_phi| { + let LoopPHI::Reductionable { + phi, + data_cycle: _, + continue_latch, + is_associative: _, + } = reduction_phi + else { + panic!(); + }; - let function = editor.func(); + let function = editor.func(); - let init = *zip( - editor.get_uses(l.header), - function.nodes[phi.idx()].try_phi().unwrap().1.iter(), - ) - .filter(|(c, _)| *c == loop_pred) - .next() - .unwrap() - .1; + let init = *zip( + editor.get_uses(l.header), + function.nodes[phi.idx()].try_phi().unwrap().1.iter(), + ) + .filter(|(c, _)| *c == loop_pred) + .next() + .unwrap() + .1; - (reduction_phi, init) - }).collect(); + (reduction_phi, init) + }) + .collect(); editor.edit(|mut edit| { let thread_id = Node::ThreadID { @@ -351,14 +352,13 @@ pub fn forkify_loop( else { panic!(); }; - + let reduce = Node::Reduce { control: join_id, init, reduct: continue_latch, }; - - + let reduce_id = edit.add_node(reduce); if (!edit.get_node(init).is_reduce() @@ -387,7 +387,6 @@ pub fn forkify_loop( !loop_nodes.contains(usee) && *usee != reduce_id })?; edit = edit.delete_node(phi)? - } edit = edit.replace_all_uses(l.header, fork_id)?; @@ -401,7 +400,7 @@ pub fn forkify_loop( edit = edit.delete_node(l.header)?; Ok(edit) }); - + return true; } @@ -538,7 +537,11 @@ pub fn analyze_phis<'a>( // by the time the reduce is triggered (at the end of the loop's internal control). // If anything in the intersection is a phi (that isn't this own phi), then the reduction cycle depends on control. // Which is not allowed. - if intersection.iter().any(|cycle_node| editor.node(cycle_node).is_phi() && *cycle_node != *phi) || editor.node(loop_continue_latch).is_phi() { + if intersection + .iter() + .any(|cycle_node| editor.node(cycle_node).is_phi() && *cycle_node != *phi) + || editor.node(loop_continue_latch).is_phi() + { return LoopPHI::ControlDependant(*phi); } diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs index 8657fdc1..c8de7e90 100644 --- a/hercules_samples/dot/build.rs +++ b/hercules_samples/dot/build.rs @@ -4,7 +4,11 @@ fn main() { JunoCompiler::new() .ir_in_src("dot.hir") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src(if cfg!(feature = "cuda") { + "gpu.sch" + } else { + "cpu.sch" + }) .unwrap() .build() .unwrap(); diff --git a/hercules_samples/dot/src/main.rs b/hercules_samples/dot/src/main.rs index 8862c11a..7f5b453a 100644 --- a/hercules_samples/dot/src/main.rs +++ b/hercules_samples/dot/src/main.rs @@ -1,8 +1,8 @@ #![feature(concat_idents)] -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("dot"); diff --git a/hercules_samples/matmul/build.rs b/hercules_samples/matmul/build.rs index 735458c0..ed92e022 100644 --- a/hercules_samples/matmul/build.rs +++ b/hercules_samples/matmul/build.rs @@ -4,7 +4,11 @@ fn main() { JunoCompiler::new() .ir_in_src("matmul.hir") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src(if cfg!(feature = "cuda") { + "gpu.sch" + } else { + "cpu.sch" + }) .unwrap() .build() .unwrap(); diff --git a/hercules_samples/matmul/src/main.rs b/hercules_samples/matmul/src/main.rs index abd25ec9..5c879915 100644 --- a/hercules_samples/matmul/src/main.rs +++ b/hercules_samples/matmul/src/main.rs @@ -2,9 +2,9 @@ use rand::random; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("matmul"); @@ -36,7 +36,9 @@ fn main() { let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) + .await; let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); c.to_cpu_ref(&mut c_cpu); assert_eq!(&*c_cpu, &*correct_c); diff --git a/hercules_test/hercules_interpreter/src/interpreter.rs b/hercules_test/hercules_interpreter/src/interpreter.rs index 22ef062a..2e352644 100644 --- a/hercules_test/hercules_interpreter/src/interpreter.rs +++ b/hercules_test/hercules_interpreter/src/interpreter.rs @@ -69,18 +69,18 @@ pub fn dyn_const_value( match dc { DynamicConstant::Constant(v) => *v, DynamicConstant::Parameter(v) => dyn_const_params[*v], - DynamicConstant::Add(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(0, |s, v| s + v) - } + DynamicConstant::Add(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(0, |s, v| s + v), DynamicConstant::Sub(a, b) => { dyn_const_value(a, dyn_const_values, dyn_const_params) - dyn_const_value(b, dyn_const_values, dyn_const_params) } - DynamicConstant::Mul(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(1, |p, v| p * v) - } + DynamicConstant::Mul(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(1, |p, v| p * v), DynamicConstant::Div(a, b) => { dyn_const_value(a, dyn_const_values, dyn_const_params) / dyn_const_value(b, dyn_const_values, dyn_const_params) @@ -89,28 +89,28 @@ pub fn dyn_const_value( dyn_const_value(a, dyn_const_values, dyn_const_params) % dyn_const_value(b, dyn_const_values, dyn_const_params) } - DynamicConstant::Max(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(None, |m, v| { - if let Some(m) = m { - Some(max(m, v)) - } else { - Some(v) - } - }) - .unwrap() - } - DynamicConstant::Min(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(None, |m, v| { - if let Some(m) = m { - Some(min(m, v)) - } else { - Some(v) - } - }) - .unwrap() - } + DynamicConstant::Max(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(None, |m, v| { + if let Some(m) = m { + Some(max(m, v)) + } else { + Some(v) + } + }) + .unwrap(), + DynamicConstant::Min(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(None, |m, v| { + if let Some(m) = m { + Some(min(m, v)) + } else { + Some(v) + } + }) + .unwrap(), } } @@ -775,15 +775,13 @@ impl<'a> FunctionExecutionState<'a> { // panic!("multi-dimensional forks unimplemented") // } - let factors = factors - .iter() - .map(|f| { - dyn_const_value( - &f, - &self.module.dynamic_constants, - &self.dynamic_constant_params, - ) - }); + let factors = factors.iter().map(|f| { + dyn_const_value( + &f, + &self.module.dynamic_constants, + &self.dynamic_constant_params, + ) + }); let n_tokens: usize = factors.clone().product(); diff --git a/hercules_test/hercules_interpreter/src/value.rs b/hercules_test/hercules_interpreter/src/value.rs index adbed6e6..4a802f7a 100644 --- a/hercules_test/hercules_interpreter/src/value.rs +++ b/hercules_test/hercules_interpreter/src/value.rs @@ -156,7 +156,15 @@ impl<'a> InterpreterVal { Constant::Float64(v) => Self::Float64(v), Constant::Product(ref type_id, ref constant_ids) => { - let contents = constant_ids.iter().map(|const_id| InterpreterVal::from_constant(&constants[const_id.idx()], constants, types, dynamic_constants, dynamic_constant_params)); + let contents = constant_ids.iter().map(|const_id| { + InterpreterVal::from_constant( + &constants[const_id.idx()], + constants, + types, + dynamic_constants, + dynamic_constant_params, + ) + }); InterpreterVal::Product(*type_id, contents.collect_vec().into_boxed_slice()) } Constant::Summation(_, _, _) => todo!(), diff --git a/hercules_test/hercules_tests/tests/loop_tests.rs b/hercules_test/hercules_tests/tests/loop_tests.rs index 192c1366..795642b2 100644 --- a/hercules_test/hercules_tests/tests/loop_tests.rs +++ b/hercules_test/hercules_tests/tests/loop_tests.rs @@ -35,9 +35,7 @@ fn alternate_bounds_use_after_loop_no_tid() { println!("result: {:?}", result_1); - let schedule = default_schedule![ - Forkify, - ]; + let schedule = default_schedule![Forkify,]; let module = run_schedule_on_hercules(module, Some(schedule)).unwrap(); @@ -61,9 +59,7 @@ fn alternate_bounds_use_after_loop() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - Forkify, - ]); + let schedule = Some(default_schedule![Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -108,10 +104,7 @@ fn do_while_separate_body() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - PhiElim, - Forkify, - ]); + let schedule = Some(default_schedule![PhiElim, Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -131,10 +124,7 @@ fn alternate_bounds_internal_control() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - PhiElim, - Forkify, - ]); + let schedule = Some(default_schedule![PhiElim, Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -155,10 +145,7 @@ fn alternate_bounds_internal_control2() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - PhiElim, - Forkify, - ]); + let schedule = Some(default_schedule![PhiElim, Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -366,16 +353,13 @@ fn look_at_local() { "/home/xavierrouth/dev/hercules/hercules_test/hercules_tests/save_me.hbin", ); - let schedule = Some(default_schedule![ - ]); + let schedule = Some(default_schedule![]); let result_1 = interp_module!(module, 0, dyn_consts, a.clone(), b.clone()); let module = run_schedule_on_hercules(module.clone(), schedule).unwrap(); - let schedule = Some(default_schedule![ - Unforkify, Verify, - ]); + let schedule = Some(default_schedule![Unforkify, Verify,]); let module = run_schedule_on_hercules(module.clone(), schedule).unwrap(); @@ -425,7 +409,15 @@ fn matmul_pipeline() { }; assert_eq!(correct_c[0], value); - let schedule = Some(default_schedule![AutoOutline, InterproceduralSROA, SROA, InferSchedules, DCE, Xdot, GCM]); + let schedule = Some(default_schedule![ + AutoOutline, + InterproceduralSROA, + SROA, + InferSchedules, + DCE, + Xdot, + GCM + ]); module = run_schedule_on_hercules(module, schedule).unwrap(); diff --git a/juno_frontend/src/semant.rs b/juno_frontend/src/semant.rs index e133e3c2..8668d1b4 100644 --- a/juno_frontend/src/semant.rs +++ b/juno_frontend/src/semant.rs @@ -752,7 +752,16 @@ fn analyze_program( } arg_info.push((ty, inout.is_some(), var)); - match process_irrefutable_pattern(pattern, false, var, ty, lexer, &mut stringtab, &mut env, &mut types) { + match process_irrefutable_pattern( + pattern, + false, + var, + ty, + lexer, + &mut stringtab, + &mut env, + &mut types, + ) { Ok(prep) => { stmts.extend(prep); } diff --git a/juno_samples/cava/src/main.rs b/juno_samples/cava/src/main.rs index 482bbf8d..e8a7e4e9 100644 --- a/juno_samples/cava/src/main.rs +++ b/juno_samples/cava/src/main.rs @@ -8,9 +8,9 @@ use self::camera_model::*; use self::cava_rust::CHAN; use self::image_proc::*; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; use image::ImageError; @@ -31,7 +31,6 @@ fn run_cava( coefs: &[f32], tonemap: &[f32], ) -> Box<[u8]> { - assert_eq!(image.len(), CHAN * rows * cols); assert_eq!(tstw.len(), CHAN * CHAN); assert_eq!(ctrl_pts.len(), num_ctrl_pts * CHAN); @@ -47,21 +46,24 @@ fn run_cava( let weights = HerculesCPURef::from_slice(weights); let coefs = HerculesCPURef::from_slice(coefs); let tonemap = HerculesCPURef::from_slice(tonemap); - let mut r = runner!(cava); - async_std::task::block_on(async { - r.run( - rows as u64, - cols as u64, - num_ctrl_pts as u64, - image, - tstw, - ctrl_pts, - weights, - coefs, - tonemap, - ) - .await - }).as_slice::<u8>().to_vec().into_boxed_slice() + let mut r = runner!(cava); + async_std::task::block_on(async { + r.run( + rows as u64, + cols as u64, + num_ctrl_pts as u64, + image, + tstw, + ctrl_pts, + weights, + coefs, + tonemap, + ) + .await + }) + .as_slice::<u8>() + .to_vec() + .into_boxed_slice() } #[cfg(feature = "cuda")] @@ -72,8 +74,8 @@ fn run_cava( let weights = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(weights)); let coefs = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(coefs)); let tonemap = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(tonemap)); - let mut r = runner!(cava); - let res = async_std::task::block_on(async { + let mut r = runner!(cava); + let res = async_std::task::block_on(async { r.run( rows as u64, cols as u64, @@ -86,7 +88,7 @@ fn run_cava( tonemap.get_ref(), ) .await - }); + }); let num_out = unsafe { res.__size() / std::mem::size_of::<u8>() }; let mut res_cpu: Box<[u8]> = vec![0; num_out].into_boxed_slice(); res.to_cpu_ref(&mut res_cpu); @@ -204,7 +206,8 @@ fn cava_harness(args: CavaInputs) { .expect("Error saving verification image"); } - let max_diff = result.iter() + let max_diff = result + .iter() .zip(cpu_result.iter()) .map(|(a, b)| (*a as i16 - *b as i16).abs()) .max() diff --git a/juno_samples/concat/src/main.rs b/juno_samples/concat/src/main.rs index 9674c2c5..547dee08 100644 --- a/juno_samples/concat/src/main.rs +++ b/juno_samples/concat/src/main.rs @@ -1,9 +1,9 @@ #![feature(concat_idents)] use hercules_rt::runner; -use hercules_rt::HerculesCPURef; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::HerculesCPURef; juno_build::juno!("concat"); @@ -20,7 +20,7 @@ fn main() { assert_eq!(output, 42); const N: usize = 3; - let arr : Box<[i32]> = (2..=4).collect(); + let arr: Box<[i32]> = (2..=4).collect(); let arr = HerculesCPURef::from_slice(&arr); let mut r = runner!(concat_switch); diff --git a/juno_samples/edge_detection/src/main.rs b/juno_samples/edge_detection/src/main.rs index eda65016..3b067ebd 100644 --- a/juno_samples/edge_detection/src/main.rs +++ b/juno_samples/edge_detection/src/main.rs @@ -2,9 +2,9 @@ mod edge_detection_rust; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; use std::slice::from_raw_parts; @@ -228,9 +228,9 @@ fn edge_detection_harness(args: EdgeDetectionInputs) { }); #[cfg(not(feature = "cuda"))] - let result : Box<[f32]> = result.as_slice::<f32>().to_vec().into_boxed_slice(); + let result: Box<[f32]> = result.as_slice::<f32>().to_vec().into_boxed_slice(); #[cfg(feature = "cuda")] - let result : Box<[f32]> = { + let result: Box<[f32]> = { let num_out = unsafe { result.__size() / std::mem::size_of::<f32>() }; let mut res_cpu: Box<[f32]> = vec![0.0; num_out].into_boxed_slice(); result.to_cpu_ref(&mut res_cpu); @@ -261,7 +261,10 @@ fn edge_detection_harness(args: EdgeDetectionInputs) { theta, ); - assert_eq!(result.as_ref(), <Vec<f32> as AsRef<[f32]>>::as_ref(&rust_result)); + assert_eq!( + result.as_ref(), + <Vec<f32> as AsRef<[f32]>>::as_ref(&rust_result) + ); println!("Frames {} match", i); if display_verify { diff --git a/juno_samples/matmul/src/main.rs b/juno_samples/matmul/src/main.rs index 50fe1760..2892cd34 100644 --- a/juno_samples/matmul/src/main.rs +++ b/juno_samples/matmul/src/main.rs @@ -2,9 +2,9 @@ use rand::random; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("matmul"); @@ -28,10 +28,14 @@ fn main() { let a = HerculesCPURef::from_slice(&a); let b = HerculesCPURef::from_slice(&b); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.clone(), b.clone()) + .await; assert_eq!(c.as_slice::<i32>(), &*correct_c); let mut r = runner!(tiled_64_matmul); - let tiled_c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + let tiled_c = r + .run(I as u64, J as u64, K as u64, a.clone(), b.clone()) + .await; assert_eq!(tiled_c.as_slice::<i32>(), &*correct_c); } #[cfg(feature = "cuda")] @@ -39,12 +43,16 @@ fn main() { let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) + .await; let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); c.to_cpu_ref(&mut c_cpu); assert_eq!(&*c_cpu, &*correct_c); let mut r = runner!(tiled_64_matmul); - let tiled_c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let tiled_c = r + .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) + .await; let mut tiled_c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); tiled_c.to_cpu_ref(&mut tiled_c_cpu); assert_eq!(&*tiled_c_cpu, &*correct_c); diff --git a/juno_samples/nested_ccp/src/main.rs b/juno_samples/nested_ccp/src/main.rs index bc99a4bd..b364c03c 100644 --- a/juno_samples/nested_ccp/src/main.rs +++ b/juno_samples/nested_ccp/src/main.rs @@ -1,8 +1,8 @@ #![feature(concat_idents)] -use hercules_rt::{runner, HerculesCPURef, HerculesCPURefMut}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef, HerculesCPURefMut}; juno_build::juno!("nested_ccp"); diff --git a/juno_samples/patterns/src/main.rs b/juno_samples/patterns/src/main.rs index 5cc2e7c8..a5586c8b 100644 --- a/juno_samples/patterns/src/main.rs +++ b/juno_samples/patterns/src/main.rs @@ -1,6 +1,6 @@ #![feature(concat_idents)] -use hercules_rt::{runner}; +use hercules_rt::runner; juno_build::juno!("patterns"); diff --git a/juno_samples/schedule_test/build.rs b/juno_samples/schedule_test/build.rs index 749a660c..0129c4de 100644 --- a/juno_samples/schedule_test/build.rs +++ b/juno_samples/schedule_test/build.rs @@ -4,7 +4,11 @@ fn main() { JunoCompiler::new() .file_in_src("code.jn") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src(if cfg!(feature = "cuda") { + "gpu.sch" + } else { + "cpu.sch" + }) .unwrap() .build() .unwrap(); diff --git a/juno_samples/schedule_test/src/main.rs b/juno_samples/schedule_test/src/main.rs index 1505d4e5..f769e750 100644 --- a/juno_samples/schedule_test/src/main.rs +++ b/juno_samples/schedule_test/src/main.rs @@ -2,9 +2,9 @@ use rand::random; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("code"); @@ -43,7 +43,16 @@ fn main() { let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); let c = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&c)); let mut r = runner!(test); - let res = r.run(N as u64, M as u64, K as u64, a.get_ref(), b.get_ref(), c.get_ref()).await; + let res = r + .run( + N as u64, + M as u64, + K as u64, + a.get_ref(), + b.get_ref(), + c.get_ref(), + ) + .await; let mut res_cpu: Box<[i32]> = vec![0; correct_res.len()].into_boxed_slice(); res.to_cpu_ref(&mut res_cpu); assert_eq!(&*res_cpu, &*correct_res); diff --git a/juno_samples/simple3/src/main.rs b/juno_samples/simple3/src/main.rs index 8eb78f7c..687ff414 100644 --- a/juno_samples/simple3/src/main.rs +++ b/juno_samples/simple3/src/main.rs @@ -1,8 +1,8 @@ #![feature(concat_idents)] -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("simple3"); diff --git a/juno_scheduler/src/compile.rs b/juno_scheduler/src/compile.rs index ea06a0f2..713c30d4 100644 --- a/juno_scheduler/src/compile.rs +++ b/juno_scheduler/src/compile.rs @@ -108,7 +108,7 @@ impl FromStr for Appliable { "inline" => Ok(Appliable::Pass(ir::Pass::Inline)), "ip-sroa" | "interprocedural-sroa" => { Ok(Appliable::Pass(ir::Pass::InterproceduralSROA)) - }, + } "fork-dim-merge" => Ok(Appliable::Pass(ir::Pass::ForkDimMerge)), "fork-chunk" | "fork-tile" => Ok(Appliable::Pass(ir::Pass::ForkChunk)), "lift-dc-math" => Ok(Appliable::Pass(ir::Pass::LiftDCMath)), diff --git a/juno_scheduler/src/ir.rs b/juno_scheduler/src/ir.rs index 796437a7..9e85509f 100644 --- a/juno_scheduler/src/ir.rs +++ b/juno_scheduler/src/ir.rs @@ -36,7 +36,7 @@ impl Pass { pub fn num_args(&self) -> usize { match self { Pass::Xdot => 1, - Pass::ForkChunk => 3, + Pass::ForkChunk => 3, _ => 0, } } diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index d176b636..2142d5c5 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -1566,7 +1566,7 @@ fn run_pass( // this eventually. let c = forkify(&mut func, control_subgraph, fork_join_map, loop_nest); changed |= c; - inner_changed |= c; + inner_changed |= c; } pm.delete_gravestones(); pm.clear_analyses(); @@ -1921,24 +1921,32 @@ fn run_pass( let dim_idx = args.get(1); let Some(Value::Boolean { val: guarded_flag }) = args.get(2) else { - panic!(); // How to error here? + return Err(SchedulerError::PassError { + pass: "forkChunk".to_string(), + error: "expected boolean argument".to_string(), + }); }; let Some(Value::Integer { val: dim_idx }) = args.get(1) else { - panic!(); // How to error here? + return Err(SchedulerError::PassError { + pass: "forkChunk".to_string(), + error: "expected integer argument".to_string(), + }); }; let Some(Value::Integer { val: tile_size }) = args.get(0) else { - panic!(); // How to error here? + return Err(SchedulerError::PassError { + pass: "forkChunk".to_string(), + error: "expected integer argument".to_string(), + }); }; assert_eq!(*guarded_flag, true); pm.make_fork_join_maps(); let fork_join_maps = pm.fork_join_maps.take().unwrap(); - for (func, fork_join_map) in - build_selection(pm, selection) - .into_iter() - .zip(fork_join_maps.iter()) + for (func, fork_join_map) in build_selection(pm, selection) + .into_iter() + .zip(fork_join_maps.iter()) { let Some(mut func) = func else { continue; @@ -1953,10 +1961,9 @@ fn run_pass( assert!(args.is_empty()); pm.make_fork_join_maps(); let fork_join_maps = pm.fork_join_maps.take().unwrap(); - for (func, fork_join_map) in - build_selection(pm, selection) - .into_iter() - .zip(fork_join_maps.iter()) + for (func, fork_join_map) in build_selection(pm, selection) + .into_iter() + .zip(fork_join_maps.iter()) { let Some(mut func) = func else { continue; -- GitLab