diff --git a/Cargo.lock b/Cargo.lock index 61cde7f161b7c4177cb781addeb2f484af3b7477..acc9783042434d0569ed2fe7f1f577a43fc597d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,12 +23,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "anes" version = "0.1.6" @@ -684,27 +678,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" -[[package]] -name = "egg" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb749745461743bb477fba3ef87c663d5965876155c676c9489cfe0963de5ab" -dependencies = [ - "env_logger", - "hashbrown", - "indexmap", - "log", - "num-bigint", - "num-traits", - "quanta", - "rustc-hash", - "saturating", - "smallvec", - "symbol_table", - "symbolic_expressions", - "thiserror", -] - [[package]] name = "either" version = "1.13.0" @@ -723,15 +696,6 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "log", -] - [[package]] name = "equivalent" version = "1.0.2" @@ -845,12 +809,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foldhash" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" - [[package]] name = "funty" version = "2.0.0" @@ -975,11 +933,6 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", -] [[package]] name = "heapless" @@ -1047,7 +1000,6 @@ version = "0.1.0" dependencies = [ "bimap", "bitvec", - "egg", "either", "hercules_cg", "hercules_ir", @@ -2117,21 +2069,6 @@ dependencies = [ "bytemuck", ] -[[package]] -name = "quanta" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e" -dependencies = [ - "crossbeam-utils", - "libc", - "once_cell", - "raw-cpuid", - "wasi 0.11.0+wasi-snapshot-preview1", - "web-sys", - "winapi", -] - [[package]] name = "quick-error" version = "2.0.1" @@ -2266,15 +2203,6 @@ dependencies = [ "rgb", ] -[[package]] -name = "raw-cpuid" -version = "11.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "529468c1335c1c03919960dfefdb1b3648858c20d7ec2d0663e728e4a717efbc" -dependencies = [ - "bitflags 2.8.0", -] - [[package]] name = "rayon" version = "1.10.0" @@ -2339,12 +2267,6 @@ version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - [[package]] name = "rustc_version" version = "0.4.1" @@ -2388,12 +2310,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "saturating" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" - [[package]] name = "scopeguard" version = "1.2.0" @@ -2537,23 +2453,6 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "symbol_table" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f19bffd69fb182e684d14e3c71d04c0ef33d1641ac0b9e81c712c734e83703bc" -dependencies = [ - "crossbeam-utils", - "foldhash", - "hashbrown", -] - -[[package]] -name = "symbolic_expressions" -version = "5.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71" - [[package]] name = "syn" version = "1.0.109" @@ -2938,22 +2837,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.9" @@ -2963,12 +2846,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows" version = "0.59.0" diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index 1e6067a360c6a57674efd43444abd300d74075b4..4069cb02bcf1ed0a93f5c40df432253e842ead9a 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -354,7 +354,6 @@ impl GPUContext<'_> { write!( w, " -#define _CG_ABI_EXPERIMENTAL #include <assert.h> #include <stdio.h> #include <stddef.h> @@ -362,8 +361,23 @@ impl GPUContext<'_> { #include <cuda_runtime.h> #include <math_constants.h> #include <mma.h> + +#if (CUDA_VERSION >= 12000) +#else +#define _CG_ABI_EXPERIMENTAL +#endif + #include <cooperative_groups.h> #include <cooperative_groups/reduce.h> + +#if (CUDA_VERSION >= 12000) +namespace cg = cooperative_groups; +namespace cge = cooperative_groups; +#else +namespace cg = cooperative_groups; +namespace cge = cooperative_groups::experimental; +#endif + #include <cuda_bf16.h> namespace cg = cooperative_groups; @@ -562,9 +576,15 @@ namespace cg = cooperative_groups; * and writes. */ fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> { - write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?; + write!( + w, + "\t__shared__ cge::block_tile_memory<1024> block_sync_shared;\n" + )?; write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?; - write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?; + write!( + w, + "\tcg::thread_block block = cge::this_thread_block(block_sync_shared);\n" + )?; Ok(()) } @@ -1709,24 +1729,25 @@ namespace cg = cooperative_groups; }; write!( thread_block_tiles, - "\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", + "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n", use_thread_per_id, cg_tile, use_thread_per_id )?; let cg_tile_use = self.get_cg_tile(id, CGType::Use); write!( thread_block_tiles, - "\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", + "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n", use_thread_quota, cg_tile_use, use_thread_quota )?; let available_thread_quota = available_thread_quota.unwrap(); let cg_tile_available = self.get_cg_tile(id, CGType::Available); write!( thread_block_tiles, - "\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", + "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n", available_thread_quota, cg_tile_available, available_thread_quota )?; if parallel_factor.is_none() { - write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, true))?; + write!(thread_block_tiles, "\t{};\n", self.get_fork_iter(id, true))?; + write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, false))?; write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?; } } diff --git a/hercules_opt/Cargo.toml b/hercules_opt/Cargo.toml index 92e0533938eab0cc76fe4b69a205f4dceae2f244..892e4c1539f34a91087472b052f015f542a351a6 100644 --- a/hercules_opt/Cargo.toml +++ b/hercules_opt/Cargo.toml @@ -22,4 +22,4 @@ hercules_cg = { path = "../hercules_cg" } hercules_ir = { path = "../hercules_ir" } nestify = "*" bimap = "*" -egg = "*" +#egg = "*" diff --git a/hercules_opt/src/rewrite_math_expressions.rs b/hercules_opt/src/rewrite_math_expressions.rs index 6f52dc58ee32751b96190b42a0add01d42ce3b1e..55e695ba75704395c5fde371449fad940310ad43 100644 --- a/hercules_opt/src/rewrite_math_expressions.rs +++ b/hercules_opt/src/rewrite_math_expressions.rs @@ -3,10 +3,22 @@ use std::fmt::{Error, Write}; use hercules_ir::*; -use egg::*; +//use egg::*; use crate::*; +pub fn rewrite_math_expressions( + editor: &mut FunctionEditor, + device: Device, + typing: &Vec<TypeID>, + fork_join_map: &HashMap<NodeID, NodeID>, + nodes_in_fork_joins: &HashMap<NodeID, HashSet<NodeID>>, + reduce_einsums: &(MathEnv, HashMap<NodeID, MathID>), +) { + panic!("PANIC: The rewrite math expressions pass is currently disabled, as including egg increases compile times and we're not using it currently."); +} + +/* define_language! { enum MathLanguage { "zero" = Zero, @@ -164,3 +176,4 @@ fn egg_print_math_expr<W: Write>(id: MathID, env: &MathEnv, w: &mut W) -> Result _ => Err(Error::default()), } } +*/ diff --git a/juno_samples/matmul/src/matmul.sch b/juno_samples/matmul/src/matmul.sch index 306997f58eb217f9ce301dc18e418c412e6df621..6867576e4a3a81f327d23eca6dccbcd6a6183eb4 100644 --- a/juno_samples/matmul/src/matmul.sch +++ b/juno_samples/matmul/src/matmul.sch @@ -51,7 +51,9 @@ if feature("cuda") { fork-coalesce(*); infer-schedules(*); dce(*); - rewrite(*); + //rewrite(*); + let out = outline(matmul@outer); + gpu(out); fixpoint { simplify-cfg(*); dce(*); diff --git a/juno_samples/rodinia/backprop/src/gpu.sch b/juno_samples/rodinia/backprop/src/gpu.sch index f8cc84a3206c97889e0151c86cc2549bd0b65c9b..1773f713ee3cdd223f5e2fa5c88ef2d4a837f2dc 100644 --- a/juno_samples/rodinia/backprop/src/gpu.sch +++ b/juno_samples/rodinia/backprop/src/gpu.sch @@ -33,10 +33,11 @@ fixpoint { reduce-slf(*); simpl!(*); -fork-extend[32](layer_forward@inner_loop); +fork-extend[32768](layer_forward@inner_loop); clean-monoid-reduces(layer_forward); simpl!(layer_forward); -fork-tile[32, 0, false, true](layer_forward@inner_loop); +fork-tile[32768, 0, false, true](layer_forward@inner_loop); +fork-tile[1024, 1, false, true](layer_forward@inner_loop); clean-monoid-reduces(layer_forward); let out = fork-split(layer_forward@inner_loop); clean-monoid-reduces(layer_forward); diff --git a/juno_samples/rodinia/bfs/benches/bfs_bench.rs b/juno_samples/rodinia/bfs/benches/bfs_bench.rs index f87b4e09bc2c3707b1436ad21b2f8f8407033e52..ea8ba0ed3d8ac9c80b1a2b34529d70ac745868e2 100644 --- a/juno_samples/rodinia/bfs/benches/bfs_bench.rs +++ b/juno_samples/rodinia/bfs/benches/bfs_bench.rs @@ -13,41 +13,22 @@ fn bfs_bench(c: &mut Criterion) { let mut r = runner!(bfs); - group.bench_function("bfs bench 4096", |b| { - let input = "data/graph4096.txt"; - let (nodes, source, edges) = parse_graph(input.into()).unwrap(); - let n = nodes.len() as u64; - let m = edges.len() as u64; - let nodes = HerculesImmBox::from(&nodes as &[Node]); - let edges = HerculesImmBox::from(&edges as &[u32]); - b.iter(|| { - async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); - }) - }); - - group.bench_function("bfs bench 65536", |b| { - let input = "data/graph65536.txt"; - let (nodes, source, edges) = parse_graph(input.into()).unwrap(); - let n = nodes.len() as u64; - let m = edges.len() as u64; - let nodes = HerculesImmBox::from(&nodes as &[Node]); - let edges = HerculesImmBox::from(&edges as &[u32]); - b.iter(|| { - async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); - }) - }); - - group.bench_function("bfs bench 64M", |b| { - let input = "/scratch/aaronjc4/rodinia_3.1/data/bfs/graph64M.txt"; + let mut bench = |name, input: &'_ str| { let (nodes, source, edges) = parse_graph(input.into()).expect("PANIC: Couldn't read input file for 64M benchmark. Currently, this benchmark uses a hard-coded path, so it can only be run on the lab machines."); let n = nodes.len() as u64; let m = edges.len() as u64; let nodes = HerculesImmBox::from(&nodes as &[Node]); let edges = HerculesImmBox::from(&edges as &[u32]); - b.iter(|| { - async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); - }) - }); + group.bench_function(name, |b| { + b.iter(|| { + async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); + }) + }); + }; + + bench("bfs bench 4096", "data/graph4096.txt"); + bench("bfs bench 65536", "data/graph65536.txt"); + bench("bfs bench 64M", "/scratch/aaronjc4/rodinia_3.1/data/bfs/graph64M.txt"); } criterion_group!(benches, bfs_bench); diff --git a/juno_samples/rodinia/bfs/src/bfs.jn b/juno_samples/rodinia/bfs/src/bfs.jn index d6ec25f26b997205d4dcdf116bce462fa228fc88..75190300e4d8be463176c4c346d89715cece0ced 100644 --- a/juno_samples/rodinia/bfs/src/bfs.jn +++ b/juno_samples/rodinia/bfs/src/bfs.jn @@ -24,7 +24,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] @cost_init for i in 0..n { cost[i] = -1; } - cost[source as u64] = 0; + @cost_init cost[source as u64] = 0; // Nodes that were updated in the current iteration let updated: bool[n]; diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch index e51005613f9d14a3e16938b1a77d55aec96714bd..339782d6a84b6a43f31d81f5c9b23240dd10a292 100644 --- a/juno_samples/rodinia/bfs/src/cpu.sch +++ b/juno_samples/rodinia/bfs/src/cpu.sch @@ -46,16 +46,14 @@ let traverse_body = outline(inner); let (outer, inner) = fork-reshape[[1], [0]](collect); let collect_body = outline(inner); -let init_body = init; -// Following code seems to generate breaking RT code -//fork-tile[32, 0, false, true](init); -//let (outer, inner) = fork-reshape[[1], [0]](init); -//let init_body = outline(inner); -//inline(bfs@cost_init); +fork-tile[32, 0, false, true](init); +let (outer, inner) = fork-reshape[[1], [0]](init); +let init_body = outline(inner); inline(bfs@loop1, bfs@loop2); delete-uncalled(*); const-inline(*); +simpl!(*); unforkify(init_body, traverse_body, collect_body); simpl!(*); diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 4e5c1f74e34cedacd48ddf6c33b631a08079b995..0253a0210f6cc2451d38399601ad39ff3ab9465a 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -38,8 +38,12 @@ fixpoint { } simpl!(collect); +fork-tile[32, 0, false, true](init); +let (outer, inner) = fork-reshape[[1], [0]](init); +let init_body = outline(inner); + fork-tile[1024, 0, false, true](traverse, collect); fork-split(traverse, collect); -unforkify(init); +unforkify(init_body); gcm(*); diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs index aa6d772724238d13b6464398b07b67e287716085..5b4db044c3f66fc630cc434794e2a6c3de684682 100644 --- a/juno_samples/rodinia/cfd/benches/cfd_bench.rs +++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs @@ -13,38 +13,38 @@ fn cfd_bench(c: &mut Criterion) { group.sample_size(10); let mut euler_bench = |name, data_file, iterations| { - group.bench_function(name, |b| { - let mut r = runner!(euler); - let block_size = 16; - let FarFieldConditions { - ff_variable, - ff_fc_momentum_x, - ff_fc_momentum_y, - ff_fc_momentum_z, - ff_fc_density_energy, - } = set_far_field_conditions(); - let GeometryData { - nelr, - areas, - elements_surrounding_elements, - normals, - } = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines."); - let mut variables = initialize_variables(nelr, &ff_variable); + let mut r = runner!(euler); + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines."); + let mut variables = initialize_variables(nelr, &ff_variable); - let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); - let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); - let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); - let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); - let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); + let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); + let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); + let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); + let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); + let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); - let areas = HerculesImmBox::from(areas.as_slice()); - let elements_surrounding_elements = - HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); - let normals_x = HerculesImmBox::from(normals.x.as_slice()); - let normals_y = HerculesImmBox::from(normals.y.as_slice()); - let normals_z = HerculesImmBox::from(normals.z.as_slice()); + let normals_x = HerculesImmBox::from(normals.x.as_slice()); + let normals_y = HerculesImmBox::from(normals.y.as_slice()); + let normals_z = HerculesImmBox::from(normals.z.as_slice()); + group.bench_function(name, |b| { b.iter(|| { async_std::task::block_on(async { r.run( @@ -91,38 +91,38 @@ fn cfd_bench(c: &mut Criterion) { ); let mut pre_euler_bench = |name, data_file, iterations| { - group.bench_function(name, |b| { - let mut r = runner!(pre_euler); - let block_size = 16; - let FarFieldConditions { - ff_variable, - ff_fc_momentum_x, - ff_fc_momentum_y, - ff_fc_momentum_z, - ff_fc_density_energy, - } = set_far_field_conditions(); - let GeometryData { - nelr, - areas, - elements_surrounding_elements, - normals, - } = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines."); - let mut variables = initialize_variables(nelr, &ff_variable); + let mut r = runner!(pre_euler); + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines."); + let mut variables = initialize_variables(nelr, &ff_variable); - let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); - let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); - let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); - let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); - let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); + let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); + let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); + let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); + let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); + let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); - let areas = HerculesImmBox::from(areas.as_slice()); - let elements_surrounding_elements = - HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); - let normals_x = HerculesImmBox::from(normals.x.as_slice()); - let normals_y = HerculesImmBox::from(normals.y.as_slice()); - let normals_z = HerculesImmBox::from(normals.z.as_slice()); + let normals_x = HerculesImmBox::from(normals.x.as_slice()); + let normals_y = HerculesImmBox::from(normals.y.as_slice()); + let normals_z = HerculesImmBox::from(normals.z.as_slice()); + group.bench_function(name, |b| { b.iter(|| { async_std::task::block_on(async { r.run(