diff --git a/hercules_cg/src/fork_tree.rs b/hercules_cg/src/fork_tree.rs index 64a93160aabc5564c784bf0ff8b14e82045b81bb..c048f7e388a7eb4f73c0c866896fadfa5d2556a1 100644 --- a/hercules_cg/src/fork_tree.rs +++ b/hercules_cg/src/fork_tree.rs @@ -9,11 +9,16 @@ use crate::*; * c) no domination by any other fork that's also dominated by F, where we do count self-domination * Here too we include the non-fork start node, as key for all controls outside any fork. */ -pub fn fork_control_map(fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>) -> HashMap<NodeID, HashSet<NodeID>> { +pub fn fork_control_map( + fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>, +) -> HashMap<NodeID, HashSet<NodeID>> { let mut fork_control_map = HashMap::new(); for (control, forks) in fork_join_nesting { let fork = forks.first().copied().unwrap_or(NodeID::new(0)); - fork_control_map.entry(fork).or_insert_with(HashSet::new).insert(*control); + fork_control_map + .entry(fork) + .or_insert_with(HashSet::new) + .insert(*control); } fork_control_map } @@ -24,13 +29,19 @@ pub fn fork_control_map(fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>) -> Has * c) no domination by any other fork that's also dominated by F, where we don't count self-domination * Note that the fork_tree also includes the non-fork start node, as unique root node. */ -pub fn fork_tree(function: &Function, fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>) -> HashMap<NodeID, HashSet<NodeID>> { +pub fn fork_tree( + function: &Function, + fork_join_nesting: &HashMap<NodeID, Vec<NodeID>>, +) -> HashMap<NodeID, HashSet<NodeID>> { let mut fork_tree = HashMap::new(); for (control, forks) in fork_join_nesting { if function.nodes[control.idx()].is_fork() { fork_tree.entry(*control).or_insert_with(HashSet::new); let nesting_fork = forks.get(1).copied().unwrap_or(NodeID::new(0)); - fork_tree.entry(nesting_fork).or_insert_with(HashSet::new).insert(*control); + fork_tree + .entry(nesting_fork) + .or_insert_with(HashSet::new) + .insert(*control); } } fork_tree diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index 1086d1aa3f3c662e12da9f28102d4fc89f321bec..f20bfeb727dc6233dbb212a72c3a25b0c720cf36 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -1521,6 +1521,8 @@ extern \"C\" {} {}(", let collect_variable = self.get_value(*collect, false, false); write!(w, "{}{} = {};\n", tabs, define_variable, collect_variable)?; } + // Undef nodes never need to be assigned to. + Node::Undef { ty: _ } => {} _ => { panic!( "Unsupported data node type: {:?}", diff --git a/hercules_opt/src/fork_guard_elim.rs b/hercules_opt/src/fork_guard_elim.rs index 052fd0e493327fceb3bd1b1918d4d4aafc93bf79..eb755509e48eccb02a311fb6004abd37d263c7f8 100644 --- a/hercules_opt/src/fork_guard_elim.rs +++ b/hercules_opt/src/fork_guard_elim.rs @@ -77,13 +77,16 @@ fn guarded_fork( }; // Filter out any terms which are just 1s - let non_ones = xs.iter().filter(|i| { - if let DynamicConstant::Constant(1) = editor.get_dynamic_constant(**i).deref() { - false - } else { - true - } - }).collect::<Vec<_>>(); + let non_ones = xs + .iter() + .filter(|i| { + if let DynamicConstant::Constant(1) = editor.get_dynamic_constant(**i).deref() { + false + } else { + true + } + }) + .collect::<Vec<_>>(); // If we're left with just one term x, we had max { 1, x } if non_ones.len() == 1 { Factor::Max(idx, *non_ones[0]) diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index d9505fde5e5f7ec19be86ded7cbd4af6e467869a..b2f9767c9cd2eb56b698f05d7b93dbba59c8681e 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -181,8 +181,13 @@ fn preliminary_fixups( reduce_cycles: &HashMap<NodeID, HashSet<NodeID>>, ) -> bool { let nodes = &editor.func().nodes; + let schedules = &editor.func().schedules; + + // Sequentialize non-parallel forks that contain problematic reduce cycles. for (reduce, cycle) in reduce_cycles { - if cycle.into_iter().any(|id| nodes[id.idx()].is_reduce()) { + if !schedules[reduce.idx()].contains(&Schedule::ParallelReduce) + && cycle.into_iter().any(|id| nodes[id.idx()].is_reduce()) + { let join = nodes[reduce.idx()].try_reduce().unwrap().0; let fork = fork_join_map .into_iter() @@ -198,6 +203,31 @@ fn preliminary_fixups( return true; } } + + // Get rid of the backward edge on parallel reduces in fork-joins. + for (_, join) in fork_join_map { + let parallel_reduces: Vec<_> = editor + .get_users(*join) + .filter(|id| { + nodes[id.idx()].is_reduce() + && schedules[id.idx()].contains(&Schedule::ParallelReduce) + }) + .collect(); + for reduce in parallel_reduces { + if reduce_cycles[&reduce].is_empty() { + continue; + } + let (_, init, _) = nodes[reduce.idx()].try_reduce().unwrap(); + + // Replace uses of the reduce in its cycle with the init. + let success = editor.edit(|edit| { + edit.replace_all_uses_where(reduce, init, |id| reduce_cycles[&reduce].contains(id)) + }); + assert!(success); + return true; + } + } + false } @@ -511,7 +541,8 @@ fn basic_blocks( // outside of reduce loops. Nodes that do need to be in a reduce // loop use the reduce node forming the loop, so the dominator chain // will consist of one block, and this loop won't ever iterate. - let currently_at_join = function.nodes[location.idx()].is_join(); + let currently_at_join = function.nodes[location.idx()].is_join() + && !function.nodes[control_node.idx()].is_join(); if (!is_constant_or_undef || is_gpu_returned) && (shallower_nest || currently_at_join) @@ -811,7 +842,14 @@ fn spill_clones( .into_iter() .any(|u| *u == *b) && (editor.func().nodes[a.idx()].is_phi() - || editor.func().nodes[a.idx()].is_reduce())) + || editor.func().nodes[a.idx()].is_reduce()) + && !editor.func().nodes[a.idx()] + .try_reduce() + .map(|(_, init, _)| { + init == *b + && editor.func().schedules[a.idx()].contains(&Schedule::ParallelReduce) + }) + .unwrap_or(false)) }); // Step 3: if there is a spill edge, spill it and return true. Otherwise, @@ -989,15 +1027,16 @@ fn liveness_dataflow( } let mut num_phis_reduces = vec![0; function.nodes.len()]; let mut has_phi = vec![false; function.nodes.len()]; - let mut has_reduce = vec![false; function.nodes.len()]; + let mut has_seq_reduce = vec![false; function.nodes.len()]; for (node_idx, bb) in bbs.0.iter().enumerate() { let node = &function.nodes[node_idx]; if node.is_phi() || node.is_reduce() { num_phis_reduces[bb.idx()] += 1; } has_phi[bb.idx()] = node.is_phi(); - has_reduce[bb.idx()] = node.is_reduce(); - assert!(!has_phi[bb.idx()] || !has_reduce[bb.idx()]); + has_seq_reduce[bb.idx()] = + node.is_reduce() && !function.schedules[node_idx].contains(&Schedule::ParallelReduce); + assert!(!node.is_phi() || !node.is_reduce()); } let is_obj = |id: NodeID| !objects[&func_id].objects(id).is_empty(); @@ -1009,11 +1048,14 @@ fn liveness_dataflow( let last_pt = bbs.1[bb.idx()].len(); let old_value = &liveness[&bb][last_pt]; let mut new_value = BTreeSet::new(); - for succ in control_subgraph.succs(*bb).chain(if has_reduce[bb.idx()] { - Either::Left(once(*bb)) - } else { - Either::Right(empty()) - }) { + for succ in control_subgraph + .succs(*bb) + .chain(if has_seq_reduce[bb.idx()] { + Either::Left(once(*bb)) + } else { + Either::Right(empty()) + }) + { // The liveness at the bottom of a basic block is the union of: // 1. The liveness of each succecessor right after its phis and // reduces. @@ -1041,7 +1083,9 @@ fn liveness_dataflow( assert_eq!(control, succ); if succ == *bb { new_value.insert(reduct); - } else { + } else if !function.schedules[id.idx()] + .contains(&Schedule::ParallelReduce) + { new_value.insert(init); } } @@ -1058,6 +1102,7 @@ fn liveness_dataflow( let mut new_value = liveness[&bb][pt + 1].clone(); let id = bbs.1[bb.idx()][pt]; let uses = get_uses(&function.nodes[id.idx()]); + let is_obj = |id: &NodeID| is_obj(*id); new_value.remove(&id); new_value.extend( if let Node::Write { @@ -1070,14 +1115,19 @@ fn liveness_dataflow( // If this write is a cloning write, the `collect` input // isn't actually live, because its value doesn't // matter. - Either::Left(once(data).filter(|id| is_obj(*id))) + Either::Left(once(data).filter(is_obj)) + } else if let Node::Reduce { + control: _, + init: _, + reduct, + } = function.nodes[id.idx()] + && function.schedules[id.idx()].contains(&Schedule::ParallelReduce) + { + // If this reduce is a parallel reduce, the `init` input + // isn't actually live. + Either::Left(once(reduct).filter(is_obj)) } else { - Either::Right( - uses.as_ref() - .into_iter() - .map(|id| *id) - .filter(|id| is_obj(*id)), - ) + Either::Right(uses.as_ref().into_iter().map(|id| *id).filter(is_obj)) }, ); changed |= *old_value != new_value; diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs index 8657fdc166fe68ad2565a8a0736984c7991be0a7..c8de7e9009a2a4a30831f8826d3f60b46d3e536a 100644 --- a/hercules_samples/dot/build.rs +++ b/hercules_samples/dot/build.rs @@ -4,7 +4,11 @@ fn main() { JunoCompiler::new() .ir_in_src("dot.hir") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src(if cfg!(feature = "cuda") { + "gpu.sch" + } else { + "cpu.sch" + }) .unwrap() .build() .unwrap(); diff --git a/hercules_samples/dot/src/main.rs b/hercules_samples/dot/src/main.rs index 8862c11a9273f9808f2148f1067dcf3f5953c11f..7f5b453ab426f1ce0ab220682ce6be89bf851305 100644 --- a/hercules_samples/dot/src/main.rs +++ b/hercules_samples/dot/src/main.rs @@ -1,8 +1,8 @@ #![feature(concat_idents)] -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("dot"); diff --git a/hercules_samples/matmul/build.rs b/hercules_samples/matmul/build.rs index 735458c0c8be76bdae6cd7b3b308e38ccae78edd..ed92e02249a754cb0cbc41b8e39e97bfcf93c9ed 100644 --- a/hercules_samples/matmul/build.rs +++ b/hercules_samples/matmul/build.rs @@ -4,7 +4,11 @@ fn main() { JunoCompiler::new() .ir_in_src("matmul.hir") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src(if cfg!(feature = "cuda") { + "gpu.sch" + } else { + "cpu.sch" + }) .unwrap() .build() .unwrap(); diff --git a/hercules_samples/matmul/src/main.rs b/hercules_samples/matmul/src/main.rs index abd25ec9cddbd4be508b3f484cffc1df1365dc4d..5c87991569ab59e8d978f17d74f1d18423679669 100644 --- a/hercules_samples/matmul/src/main.rs +++ b/hercules_samples/matmul/src/main.rs @@ -2,9 +2,9 @@ use rand::random; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("matmul"); @@ -36,7 +36,9 @@ fn main() { let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) + .await; let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); c.to_cpu_ref(&mut c_cpu); assert_eq!(&*c_cpu, &*correct_c); diff --git a/hercules_test/hercules_interpreter/src/interpreter.rs b/hercules_test/hercules_interpreter/src/interpreter.rs index 871e304a2f8fb285cc9d8c64d4aa62ec5eef3a1d..709c64fb0f8b45d7903d6b1e7f1a5d5ee2f28185 100644 --- a/hercules_test/hercules_interpreter/src/interpreter.rs +++ b/hercules_test/hercules_interpreter/src/interpreter.rs @@ -69,18 +69,18 @@ pub fn dyn_const_value( match dc { DynamicConstant::Constant(v) => *v, DynamicConstant::Parameter(v) => dyn_const_params[*v], - DynamicConstant::Add(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(0, |s, v| s + v) - } + DynamicConstant::Add(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(0, |s, v| s + v), DynamicConstant::Sub(a, b) => { dyn_const_value(a, dyn_const_values, dyn_const_params) - dyn_const_value(b, dyn_const_values, dyn_const_params) } - DynamicConstant::Mul(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(1, |p, v| p * v) - } + DynamicConstant::Mul(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(1, |p, v| p * v), DynamicConstant::Div(a, b) => { dyn_const_value(a, dyn_const_values, dyn_const_params) / dyn_const_value(b, dyn_const_values, dyn_const_params) @@ -89,28 +89,28 @@ pub fn dyn_const_value( dyn_const_value(a, dyn_const_values, dyn_const_params) % dyn_const_value(b, dyn_const_values, dyn_const_params) } - DynamicConstant::Max(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(None, |m, v| { - if let Some(m) = m { - Some(max(m, v)) - } else { - Some(v) - } - }) - .unwrap() - } - DynamicConstant::Min(xs) => { - xs.iter().map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) - .fold(None, |m, v| { - if let Some(m) = m { - Some(min(m, v)) - } else { - Some(v) - } - }) - .unwrap() - } + DynamicConstant::Max(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(None, |m, v| { + if let Some(m) = m { + Some(max(m, v)) + } else { + Some(v) + } + }) + .unwrap(), + DynamicConstant::Min(xs) => xs + .iter() + .map(|x| dyn_const_value(x, dyn_const_values, dyn_const_params)) + .fold(None, |m, v| { + if let Some(m) = m { + Some(min(m, v)) + } else { + Some(v) + } + }) + .unwrap(), } } diff --git a/hercules_test/hercules_tests/tests/loop_tests.rs b/hercules_test/hercules_tests/tests/loop_tests.rs index 5832a161a18f18ea43860c1c5d6364385d0f187f..7e40f602294243bd93e5ba3d075c10cfffa5d614 100644 --- a/hercules_test/hercules_tests/tests/loop_tests.rs +++ b/hercules_test/hercules_tests/tests/loop_tests.rs @@ -35,9 +35,7 @@ fn alternate_bounds_use_after_loop_no_tid() { println!("result: {:?}", result_1); - let schedule = default_schedule![ - Forkify, - ]; + let schedule = default_schedule![Forkify,]; let module = run_schedule_on_hercules(module, Some(schedule)).unwrap(); @@ -61,9 +59,7 @@ fn alternate_bounds_use_after_loop() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - Forkify, - ]); + let schedule = Some(default_schedule![Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -108,10 +104,7 @@ fn do_while_separate_body() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - PhiElim, - Forkify, - ]); + let schedule = Some(default_schedule![PhiElim, Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -131,10 +124,7 @@ fn alternate_bounds_internal_control() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - PhiElim, - Forkify, - ]); + let schedule = Some(default_schedule![PhiElim, Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -155,10 +145,7 @@ fn alternate_bounds_internal_control2() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - PhiElim, - Forkify, - ]); + let schedule = Some(default_schedule![PhiElim, Forkify,]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -366,16 +353,13 @@ fn look_at_local() { "/home/xavierrouth/dev/hercules/hercules_test/hercules_tests/save_me.hbin", ); - let schedule = Some(default_schedule![ - ]); + let schedule = Some(default_schedule![]); let result_1 = interp_module!(module, 0, dyn_consts, a.clone(), b.clone()); let module = run_schedule_on_hercules(module.clone(), schedule).unwrap(); - let schedule = Some(default_schedule![ - Unforkify, Verify, - ]); + let schedule = Some(default_schedule![Unforkify, Verify,]); let module = run_schedule_on_hercules(module.clone(), schedule).unwrap(); diff --git a/juno_frontend/src/semant.rs b/juno_frontend/src/semant.rs index e133e3c20b7590eb372756cfdbce1f732d57d4f6..8668d1b45f7d68cff90ec90f7b2b55953b8d5a9c 100644 --- a/juno_frontend/src/semant.rs +++ b/juno_frontend/src/semant.rs @@ -752,7 +752,16 @@ fn analyze_program( } arg_info.push((ty, inout.is_some(), var)); - match process_irrefutable_pattern(pattern, false, var, ty, lexer, &mut stringtab, &mut env, &mut types) { + match process_irrefutable_pattern( + pattern, + false, + var, + ty, + lexer, + &mut stringtab, + &mut env, + &mut types, + ) { Ok(prep) => { stmts.extend(prep); } diff --git a/juno_samples/cava/src/main.rs b/juno_samples/cava/src/main.rs index 482bbf8deb0af323255b004a7e33e70202acb886..e8a7e4e94393c8684c2a10a1e040e3be3f2600cb 100644 --- a/juno_samples/cava/src/main.rs +++ b/juno_samples/cava/src/main.rs @@ -8,9 +8,9 @@ use self::camera_model::*; use self::cava_rust::CHAN; use self::image_proc::*; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; use image::ImageError; @@ -31,7 +31,6 @@ fn run_cava( coefs: &[f32], tonemap: &[f32], ) -> Box<[u8]> { - assert_eq!(image.len(), CHAN * rows * cols); assert_eq!(tstw.len(), CHAN * CHAN); assert_eq!(ctrl_pts.len(), num_ctrl_pts * CHAN); @@ -47,21 +46,24 @@ fn run_cava( let weights = HerculesCPURef::from_slice(weights); let coefs = HerculesCPURef::from_slice(coefs); let tonemap = HerculesCPURef::from_slice(tonemap); - let mut r = runner!(cava); - async_std::task::block_on(async { - r.run( - rows as u64, - cols as u64, - num_ctrl_pts as u64, - image, - tstw, - ctrl_pts, - weights, - coefs, - tonemap, - ) - .await - }).as_slice::<u8>().to_vec().into_boxed_slice() + let mut r = runner!(cava); + async_std::task::block_on(async { + r.run( + rows as u64, + cols as u64, + num_ctrl_pts as u64, + image, + tstw, + ctrl_pts, + weights, + coefs, + tonemap, + ) + .await + }) + .as_slice::<u8>() + .to_vec() + .into_boxed_slice() } #[cfg(feature = "cuda")] @@ -72,8 +74,8 @@ fn run_cava( let weights = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(weights)); let coefs = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(coefs)); let tonemap = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(tonemap)); - let mut r = runner!(cava); - let res = async_std::task::block_on(async { + let mut r = runner!(cava); + let res = async_std::task::block_on(async { r.run( rows as u64, cols as u64, @@ -86,7 +88,7 @@ fn run_cava( tonemap.get_ref(), ) .await - }); + }); let num_out = unsafe { res.__size() / std::mem::size_of::<u8>() }; let mut res_cpu: Box<[u8]> = vec![0; num_out].into_boxed_slice(); res.to_cpu_ref(&mut res_cpu); @@ -204,7 +206,8 @@ fn cava_harness(args: CavaInputs) { .expect("Error saving verification image"); } - let max_diff = result.iter() + let max_diff = result + .iter() .zip(cpu_result.iter()) .map(|(a, b)| (*a as i16 - *b as i16).abs()) .max() diff --git a/juno_samples/concat/src/main.rs b/juno_samples/concat/src/main.rs index 9674c2c54b328aefcb4e670dc7e9ec482f8b2508..547dee08b118c475e1905d0fe93e9aaebfdca535 100644 --- a/juno_samples/concat/src/main.rs +++ b/juno_samples/concat/src/main.rs @@ -1,9 +1,9 @@ #![feature(concat_idents)] use hercules_rt::runner; -use hercules_rt::HerculesCPURef; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::HerculesCPURef; juno_build::juno!("concat"); @@ -20,7 +20,7 @@ fn main() { assert_eq!(output, 42); const N: usize = 3; - let arr : Box<[i32]> = (2..=4).collect(); + let arr: Box<[i32]> = (2..=4).collect(); let arr = HerculesCPURef::from_slice(&arr); let mut r = runner!(concat_switch); diff --git a/juno_samples/edge_detection/src/main.rs b/juno_samples/edge_detection/src/main.rs index eda65016c60de26a5cd1fe21d8d95dcaff826cf9..3b067ebd0c74ba4fe4b1cd4a39cf4f0b0c8b46cd 100644 --- a/juno_samples/edge_detection/src/main.rs +++ b/juno_samples/edge_detection/src/main.rs @@ -2,9 +2,9 @@ mod edge_detection_rust; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; use std::slice::from_raw_parts; @@ -228,9 +228,9 @@ fn edge_detection_harness(args: EdgeDetectionInputs) { }); #[cfg(not(feature = "cuda"))] - let result : Box<[f32]> = result.as_slice::<f32>().to_vec().into_boxed_slice(); + let result: Box<[f32]> = result.as_slice::<f32>().to_vec().into_boxed_slice(); #[cfg(feature = "cuda")] - let result : Box<[f32]> = { + let result: Box<[f32]> = { let num_out = unsafe { result.__size() / std::mem::size_of::<f32>() }; let mut res_cpu: Box<[f32]> = vec![0.0; num_out].into_boxed_slice(); result.to_cpu_ref(&mut res_cpu); @@ -261,7 +261,10 @@ fn edge_detection_harness(args: EdgeDetectionInputs) { theta, ); - assert_eq!(result.as_ref(), <Vec<f32> as AsRef<[f32]>>::as_ref(&rust_result)); + assert_eq!( + result.as_ref(), + <Vec<f32> as AsRef<[f32]>>::as_ref(&rust_result) + ); println!("Frames {} match", i); if display_verify { diff --git a/juno_samples/fork_join_tests/src/fork_join_tests.jn b/juno_samples/fork_join_tests/src/fork_join_tests.jn index bcfa1d25d0281b5c842d62142976c7518fe194e6..fdfa51a87a36f0def2c3b24701469a56fe498440 100644 --- a/juno_samples/fork_join_tests/src/fork_join_tests.jn +++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn @@ -62,7 +62,7 @@ fn test4(input : i32) -> i32[4, 4] { #[entry] fn test5(input : i32) -> i32[4] { - let arr1 : i32[4]; + @cons let arr1 : i32[4]; for i = 0 to 4 { let red = arr1[i]; for k = 0 to 3 { diff --git a/juno_samples/fork_join_tests/src/gpu.sch b/juno_samples/fork_join_tests/src/gpu.sch index 6e2d6845423d13e0c2208b0169e3437a4735ba8f..0987083ec8c40565d8c079fbc5535177d68353b7 100644 --- a/juno_samples/fork_join_tests/src/gpu.sch +++ b/juno_samples/fork_join_tests/src/gpu.sch @@ -1,3 +1,4 @@ +no-memset(test5@cons); parallel-reduce(test5@reduce); gvn(*); diff --git a/juno_samples/fork_join_tests/src/main.rs b/juno_samples/fork_join_tests/src/main.rs index 4b6aba655a45a2fe2f844efe75f4e8a738a17b96..5e848adeeb3ddde17a5794afd783559b8f0f8c16 100644 --- a/juno_samples/fork_join_tests/src/main.rs +++ b/juno_samples/fork_join_tests/src/main.rs @@ -46,6 +46,6 @@ fn main() { } #[test] -fn implicit_clone_test() { +fn fork_join_test() { main(); } diff --git a/juno_samples/matmul/src/main.rs b/juno_samples/matmul/src/main.rs index 50fe1760eeeedc946d510a6d5285d76e1346f3cc..2892cd3473251b60b0a2d3d381ae0848c8b7fadb 100644 --- a/juno_samples/matmul/src/main.rs +++ b/juno_samples/matmul/src/main.rs @@ -2,9 +2,9 @@ use rand::random; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("matmul"); @@ -28,10 +28,14 @@ fn main() { let a = HerculesCPURef::from_slice(&a); let b = HerculesCPURef::from_slice(&b); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.clone(), b.clone()) + .await; assert_eq!(c.as_slice::<i32>(), &*correct_c); let mut r = runner!(tiled_64_matmul); - let tiled_c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + let tiled_c = r + .run(I as u64, J as u64, K as u64, a.clone(), b.clone()) + .await; assert_eq!(tiled_c.as_slice::<i32>(), &*correct_c); } #[cfg(feature = "cuda")] @@ -39,12 +43,16 @@ fn main() { let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) + .await; let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); c.to_cpu_ref(&mut c_cpu); assert_eq!(&*c_cpu, &*correct_c); let mut r = runner!(tiled_64_matmul); - let tiled_c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let tiled_c = r + .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) + .await; let mut tiled_c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); tiled_c.to_cpu_ref(&mut tiled_c_cpu); assert_eq!(&*tiled_c_cpu, &*correct_c); diff --git a/juno_samples/nested_ccp/src/main.rs b/juno_samples/nested_ccp/src/main.rs index bc99a4bdd071ff19c70977e29241b76e3e249014..b364c03c4cddcc1fc94171ff55eb3c1ff00e5b3e 100644 --- a/juno_samples/nested_ccp/src/main.rs +++ b/juno_samples/nested_ccp/src/main.rs @@ -1,8 +1,8 @@ #![feature(concat_idents)] -use hercules_rt::{runner, HerculesCPURef, HerculesCPURefMut}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef, HerculesCPURefMut}; juno_build::juno!("nested_ccp"); diff --git a/juno_samples/patterns/src/main.rs b/juno_samples/patterns/src/main.rs index 5cc2e7c874c590ab4f7ce313b697ea9ed3ae3a30..a5586c8b0ff3ce4e1ebddcf8d1a592701dfbbcee 100644 --- a/juno_samples/patterns/src/main.rs +++ b/juno_samples/patterns/src/main.rs @@ -1,6 +1,6 @@ #![feature(concat_idents)] -use hercules_rt::{runner}; +use hercules_rt::runner; juno_build::juno!("patterns"); diff --git a/juno_samples/schedule_test/build.rs b/juno_samples/schedule_test/build.rs index 749a660c551e8b231f63287898adb2863aef826e..0129c4de3d2ceb99b060b0c3d61523b5188639dd 100644 --- a/juno_samples/schedule_test/build.rs +++ b/juno_samples/schedule_test/build.rs @@ -4,7 +4,11 @@ fn main() { JunoCompiler::new() .file_in_src("code.jn") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src(if cfg!(feature = "cuda") { + "gpu.sch" + } else { + "cpu.sch" + }) .unwrap() .build() .unwrap(); diff --git a/juno_samples/schedule_test/src/main.rs b/juno_samples/schedule_test/src/main.rs index 1505d4e5ff620a53d1095cdc4185a5a6d665e71e..f769e750a5ac3fae2594b2d4285646efd544416e 100644 --- a/juno_samples/schedule_test/src/main.rs +++ b/juno_samples/schedule_test/src/main.rs @@ -2,9 +2,9 @@ use rand::random; -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("code"); @@ -43,7 +43,16 @@ fn main() { let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); let c = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&c)); let mut r = runner!(test); - let res = r.run(N as u64, M as u64, K as u64, a.get_ref(), b.get_ref(), c.get_ref()).await; + let res = r + .run( + N as u64, + M as u64, + K as u64, + a.get_ref(), + b.get_ref(), + c.get_ref(), + ) + .await; let mut res_cpu: Box<[i32]> = vec![0; correct_res.len()].into_boxed_slice(); res.to_cpu_ref(&mut res_cpu); assert_eq!(&*res_cpu, &*correct_res); diff --git a/juno_samples/simple3/src/main.rs b/juno_samples/simple3/src/main.rs index 8eb78f7c93b0f195ffee1be120376dbe3f9a2a62..687ff414a399679fd27aae5c7b81555c1e1d5e2c 100644 --- a/juno_samples/simple3/src/main.rs +++ b/juno_samples/simple3/src/main.rs @@ -1,8 +1,8 @@ #![feature(concat_idents)] -use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; +use hercules_rt::{runner, HerculesCPURef}; juno_build::juno!("simple3");