Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • llvm/hercules
1 result
Show changes
Commits on Source (6)
......@@ -23,12 +23,6 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anes"
version = "0.1.6"
......@@ -684,27 +678,6 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
[[package]]
name = "egg"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb749745461743bb477fba3ef87c663d5965876155c676c9489cfe0963de5ab"
dependencies = [
"env_logger",
"hashbrown",
"indexmap",
"log",
"num-bigint",
"num-traits",
"quanta",
"rustc-hash",
"saturating",
"smallvec",
"symbol_table",
"symbolic_expressions",
"thiserror",
]
[[package]]
name = "either"
version = "1.13.0"
......@@ -723,15 +696,6 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "env_logger"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
dependencies = [
"log",
]
[[package]]
name = "equivalent"
version = "1.0.2"
......@@ -845,12 +809,6 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f"
[[package]]
name = "funty"
version = "2.0.0"
......@@ -975,11 +933,6 @@ name = "hashbrown"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "heapless"
......@@ -1047,7 +1000,6 @@ version = "0.1.0"
dependencies = [
"bimap",
"bitvec",
"egg",
"either",
"hercules_cg",
"hercules_ir",
......@@ -2117,21 +2069,6 @@ dependencies = [
"bytemuck",
]
[[package]]
name = "quanta"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
dependencies = [
"crossbeam-utils",
"libc",
"once_cell",
"raw-cpuid",
"wasi 0.11.0+wasi-snapshot-preview1",
"web-sys",
"winapi",
]
[[package]]
name = "quick-error"
version = "2.0.1"
......@@ -2266,15 +2203,6 @@ dependencies = [
"rgb",
]
[[package]]
name = "raw-cpuid"
version = "11.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "529468c1335c1c03919960dfefdb1b3648858c20d7ec2d0663e728e4a717efbc"
dependencies = [
"bitflags 2.8.0",
]
[[package]]
name = "rayon"
version = "1.10.0"
......@@ -2339,12 +2267,6 @@ version = "0.8.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustc_version"
version = "0.4.1"
......@@ -2388,12 +2310,6 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "saturating"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71"
[[package]]
name = "scopeguard"
version = "1.2.0"
......@@ -2537,23 +2453,6 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "symbol_table"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f19bffd69fb182e684d14e3c71d04c0ef33d1641ac0b9e81c712c734e83703bc"
dependencies = [
"crossbeam-utils",
"foldhash",
"hashbrown",
]
[[package]]
name = "symbolic_expressions"
version = "5.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71"
[[package]]
name = "syn"
version = "1.0.109"
......@@ -2938,22 +2837,6 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.9"
......@@ -2963,12 +2846,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.59.0"
......
......@@ -146,7 +146,7 @@ impl<'a> CPUContext<'a> {
self.function.name,
)?;
}
write!(w, ") {{\n")?;
write!(w, ") nounwind nosync willreturn norecurse {{\n")?;
let mut blocks: BTreeMap<_, _> = (0..self.function.nodes.len())
.filter(|idx| self.function.nodes[*idx].is_control())
......@@ -346,7 +346,7 @@ impl<'a> CPUContext<'a> {
let offset = offsets[&id].0;
write!(
body,
" {} = getelementptr i8, ptr %backing, i64 %dc{}\n",
" {} = getelementptr inbounds i8, ptr %backing, i64 %dc{}\n",
self.get_value(id, false),
offset.idx()
)?;
......@@ -473,11 +473,14 @@ impl<'a> CPUContext<'a> {
let opcode = match (op, op_ty) {
(BinaryOperator::Add, OpTy::Float) => "fadd",
(BinaryOperator::Add, _) => "add",
(BinaryOperator::Add, OpTy::Unsigned) => "add nuw",
(BinaryOperator::Add, OpTy::Signed) => "add nsw",
(BinaryOperator::Sub, OpTy::Float) => "fsub",
(BinaryOperator::Sub, _) => "sub",
(BinaryOperator::Sub, OpTy::Unsigned) => "sub nuw",
(BinaryOperator::Sub, OpTy::Signed) => "sub nsw",
(BinaryOperator::Mul, OpTy::Float) => "fmul",
(BinaryOperator::Mul, _) => "mul",
(BinaryOperator::Mul, OpTy::Unsigned) => "mul nuw",
(BinaryOperator::Mul, OpTy::Signed) => "mul nsw",
(BinaryOperator::Div, OpTy::Float) => "fdiv",
(BinaryOperator::Div, OpTy::Unsigned) => "udiv",
(BinaryOperator::Div, OpTy::Signed) => "sdiv",
......@@ -1005,7 +1008,7 @@ impl<'a> CPUContext<'a> {
let name = format!("%gep.{}", Self::gen_filler_id());
write!(
body,
" {} = getelementptr i8, ptr {}, i64 {}\n",
" {} = getelementptr inbounds i8, ptr {}, i64 {}\n",
name, ptr, size
)?;
Ok(name)
......
......@@ -354,7 +354,6 @@ impl GPUContext<'_> {
write!(
w,
"
#define _CG_ABI_EXPERIMENTAL
#include <assert.h>
#include <stdio.h>
#include <stddef.h>
......@@ -362,8 +361,23 @@ impl GPUContext<'_> {
#include <cuda_runtime.h>
#include <math_constants.h>
#include <mma.h>
#if (CUDA_VERSION >= 12000)
#else
#define _CG_ABI_EXPERIMENTAL
#endif
#include <cooperative_groups.h>
#include <cooperative_groups/reduce.h>
#if (CUDA_VERSION >= 12000)
namespace cg = cooperative_groups;
namespace cge = cooperative_groups;
#else
namespace cg = cooperative_groups;
namespace cge = cooperative_groups::experimental;
#endif
#include <cuda_bf16.h>
namespace cg = cooperative_groups;
......@@ -562,9 +576,15 @@ namespace cg = cooperative_groups;
* and writes.
*/
fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> {
write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?;
write!(
w,
"\t__shared__ cge::block_tile_memory<1024> block_sync_shared;\n"
)?;
write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?;
write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?;
write!(
w,
"\tcg::thread_block block = cge::this_thread_block(block_sync_shared);\n"
)?;
Ok(())
}
......@@ -1709,24 +1729,25 @@ namespace cg = cooperative_groups;
};
write!(
thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n",
"\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
use_thread_per_id, cg_tile, use_thread_per_id
)?;
let cg_tile_use = self.get_cg_tile(id, CGType::Use);
write!(
thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n",
"\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
use_thread_quota, cg_tile_use, use_thread_quota
)?;
let available_thread_quota = available_thread_quota.unwrap();
let cg_tile_available = self.get_cg_tile(id, CGType::Available);
write!(
thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n",
"\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
available_thread_quota, cg_tile_available, available_thread_quota
)?;
if parallel_factor.is_none() {
write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, true))?;
write!(thread_block_tiles, "\t{};\n", self.get_fork_iter(id, true))?;
write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, false))?;
write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?;
}
}
......
......@@ -22,4 +22,4 @@ hercules_cg = { path = "../hercules_cg" }
hercules_ir = { path = "../hercules_ir" }
nestify = "*"
bimap = "*"
egg = "*"
#egg = "*"
......@@ -221,6 +221,31 @@ fn preliminary_fixups(
}
}
// Add region nodes between join nodes and loop headers to aid in block
// placement.
for (_, join) in fork_join_map {
let control_user = editor
.get_users(*join)
.filter(|id| nodes[id.idx()].is_control())
.next()
.unwrap();
if nodes[control_user.idx()].is_fork()
|| nodes[control_user.idx()]
.try_region()
.map(|preds| preds.len() > 1)
.unwrap_or(false)
{
let success = editor.edit(|mut edit| {
let region = edit.add_node(Node::Region {
preds: Box::new([*join]),
});
edit.replace_all_uses_where(*join, region, |id| *id == control_user)
});
assert!(success);
return true;
}
}
false
}
......
......@@ -3,10 +3,22 @@ use std::fmt::{Error, Write};
use hercules_ir::*;
use egg::*;
//use egg::*;
use crate::*;
pub fn rewrite_math_expressions(
editor: &mut FunctionEditor,
device: Device,
typing: &Vec<TypeID>,
fork_join_map: &HashMap<NodeID, NodeID>,
nodes_in_fork_joins: &HashMap<NodeID, HashSet<NodeID>>,
reduce_einsums: &(MathEnv, HashMap<NodeID, MathID>),
) {
panic!("PANIC: The rewrite math expressions pass is currently disabled, as including egg increases compile times and we're not using it currently.");
}
/*
define_language! {
enum MathLanguage {
"zero" = Zero,
......@@ -164,3 +176,4 @@ fn egg_print_math_expr<W: Write>(id: MathID, env: &MathEnv, w: &mut W) -> Result
_ => Err(Error::default()),
}
}
*/
......@@ -5,6 +5,7 @@ use std::future::Future;
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::OnceLock;
/*
......@@ -928,3 +929,30 @@ unsafe impl GlobalAlloc for AlignedAlloc {
#[global_allocator]
static A: AlignedAlloc = AlignedAlloc;
pub struct SpinBarrier {
num: usize,
waiting: AtomicUsize,
gen: AtomicUsize,
}
impl SpinBarrier {
pub const fn new(num: usize) -> Self {
SpinBarrier {
num,
waiting: AtomicUsize::new(0),
gen: AtomicUsize::new(0),
}
}
pub fn wait(&self) {
let old_gen = self.gen.load(Ordering::Acquire);
let old_waiting = self.waiting.fetch_add(1, Ordering::Relaxed);
if old_waiting + 1 == self.num {
self.waiting.store(0, Ordering::Relaxed);
self.gen.fetch_add(1, Ordering::Release);
} else {
while old_gen == self.gen.load(Ordering::Acquire) {}
}
}
}
......@@ -51,7 +51,9 @@ if feature("cuda") {
fork-coalesce(*);
infer-schedules(*);
dce(*);
rewrite(*);
//rewrite(*);
let out = outline(matmul@outer);
gpu(out);
fixpoint {
simplify-cfg(*);
dce(*);
......
......@@ -33,10 +33,11 @@ fixpoint {
reduce-slf(*);
simpl!(*);
fork-extend[32](layer_forward@inner_loop);
fork-extend[32768](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
simpl!(layer_forward);
fork-tile[32, 0, false, true](layer_forward@inner_loop);
fork-tile[32768, 0, false, true](layer_forward@inner_loop);
fork-tile[1024, 1, false, true](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
let out = fork-split(layer_forward@inner_loop);
clean-monoid-reduces(layer_forward);
......
......@@ -13,41 +13,22 @@ fn bfs_bench(c: &mut Criterion) {
let mut r = runner!(bfs);
group.bench_function("bfs bench 4096", |b| {
let input = "data/graph4096.txt";
let (nodes, source, edges) = parse_graph(input.into()).unwrap();
let n = nodes.len() as u64;
let m = edges.len() as u64;
let nodes = HerculesImmBox::from(&nodes as &[Node]);
let edges = HerculesImmBox::from(&edges as &[u32]);
b.iter(|| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
})
});
group.bench_function("bfs bench 65536", |b| {
let input = "data/graph65536.txt";
let (nodes, source, edges) = parse_graph(input.into()).unwrap();
let n = nodes.len() as u64;
let m = edges.len() as u64;
let nodes = HerculesImmBox::from(&nodes as &[Node]);
let edges = HerculesImmBox::from(&edges as &[u32]);
b.iter(|| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
})
});
group.bench_function("bfs bench 64M", |b| {
let input = "/scratch/aaronjc4/rodinia_3.1/data/bfs/graph64M.txt";
let mut bench = |name, input: &'_ str| {
let (nodes, source, edges) = parse_graph(input.into()).expect("PANIC: Couldn't read input file for 64M benchmark. Currently, this benchmark uses a hard-coded path, so it can only be run on the lab machines.");
let n = nodes.len() as u64;
let m = edges.len() as u64;
let nodes = HerculesImmBox::from(&nodes as &[Node]);
let edges = HerculesImmBox::from(&edges as &[u32]);
b.iter(|| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
})
});
group.bench_function(name, |b| {
b.iter(|| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
})
});
};
bench("bfs bench 4096", "data/graph4096.txt");
bench("bfs bench 65536", "data/graph65536.txt");
bench("bfs bench 64M", "/scratch/aaronjc4/rodinia_3.1/data/bfs/graph64M.txt");
}
criterion_group!(benches, bfs_bench);
......
......@@ -24,7 +24,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
@cost_init for i in 0..n {
cost[i] = -1;
}
cost[source as u64] = 0;
@cost_init cost[source as u64] = 0;
// Nodes that were updated in the current iteration
let updated: bool[n];
......
......@@ -46,16 +46,14 @@ let traverse_body = outline(inner);
let (outer, inner) = fork-reshape[[1], [0]](collect);
let collect_body = outline(inner);
let init_body = init;
// Following code seems to generate breaking RT code
//fork-tile[32, 0, false, true](init);
//let (outer, inner) = fork-reshape[[1], [0]](init);
//let init_body = outline(inner);
//inline(bfs@cost_init);
inline(bfs@loop1, bfs@loop2);
fork-tile[32, 0, false, true](init);
let (outer, inner) = fork-reshape[[1], [0]](init);
let init_body = outline(inner);
inline(bfs@cost_init, bfs@loop1, bfs@loop2);
delete-uncalled(*);
const-inline(*);
simpl!(*);
unforkify(init_body, traverse_body, collect_body);
simpl!(*);
......
......@@ -38,8 +38,12 @@ fixpoint {
}
simpl!(collect);
fork-tile[32, 0, false, true](init);
let (outer, inner) = fork-reshape[[1], [0]](init);
let init_body = outline(inner);
fork-tile[1024, 0, false, true](traverse, collect);
fork-split(traverse, collect);
unforkify(init);
unforkify(init_body);
gcm(*);
......@@ -13,38 +13,38 @@ fn cfd_bench(c: &mut Criterion) {
group.sample_size(10);
let mut euler_bench = |name, data_file, iterations| {
group.bench_function(name, |b| {
let mut r = runner!(euler);
let block_size = 16;
let FarFieldConditions {
ff_variable,
ff_fc_momentum_x,
ff_fc_momentum_y,
ff_fc_momentum_z,
ff_fc_density_energy,
} = set_far_field_conditions();
let GeometryData {
nelr,
areas,
elements_surrounding_elements,
normals,
} = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines.");
let mut variables = initialize_variables(nelr, &ff_variable);
let mut r = runner!(euler);
let block_size = 16;
let FarFieldConditions {
ff_variable,
ff_fc_momentum_x,
ff_fc_momentum_y,
ff_fc_momentum_z,
ff_fc_density_energy,
} = set_far_field_conditions();
let GeometryData {
nelr,
areas,
elements_surrounding_elements,
normals,
} = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines.");
let mut variables = initialize_variables(nelr, &ff_variable);
let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
let areas = HerculesImmBox::from(areas.as_slice());
let elements_surrounding_elements =
HerculesImmBox::from(elements_surrounding_elements.as_slice());
let areas = HerculesImmBox::from(areas.as_slice());
let elements_surrounding_elements =
HerculesImmBox::from(elements_surrounding_elements.as_slice());
let normals_x = HerculesImmBox::from(normals.x.as_slice());
let normals_y = HerculesImmBox::from(normals.y.as_slice());
let normals_z = HerculesImmBox::from(normals.z.as_slice());
let normals_x = HerculesImmBox::from(normals.x.as_slice());
let normals_y = HerculesImmBox::from(normals.y.as_slice());
let normals_z = HerculesImmBox::from(normals.z.as_slice());
group.bench_function(name, |b| {
b.iter(|| {
async_std::task::block_on(async {
r.run(
......@@ -91,38 +91,38 @@ fn cfd_bench(c: &mut Criterion) {
);
let mut pre_euler_bench = |name, data_file, iterations| {
group.bench_function(name, |b| {
let mut r = runner!(pre_euler);
let block_size = 16;
let FarFieldConditions {
ff_variable,
ff_fc_momentum_x,
ff_fc_momentum_y,
ff_fc_momentum_z,
ff_fc_density_energy,
} = set_far_field_conditions();
let GeometryData {
nelr,
areas,
elements_surrounding_elements,
normals,
} = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines.");
let mut variables = initialize_variables(nelr, &ff_variable);
let mut r = runner!(pre_euler);
let block_size = 16;
let FarFieldConditions {
ff_variable,
ff_fc_momentum_x,
ff_fc_momentum_y,
ff_fc_momentum_z,
ff_fc_density_energy,
} = set_far_field_conditions();
let GeometryData {
nelr,
areas,
elements_surrounding_elements,
normals,
} = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines.");
let mut variables = initialize_variables(nelr, &ff_variable);
let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
let areas = HerculesImmBox::from(areas.as_slice());
let elements_surrounding_elements =
HerculesImmBox::from(elements_surrounding_elements.as_slice());
let areas = HerculesImmBox::from(areas.as_slice());
let elements_surrounding_elements =
HerculesImmBox::from(elements_surrounding_elements.as_slice());
let normals_x = HerculesImmBox::from(normals.x.as_slice());
let normals_y = HerculesImmBox::from(normals.y.as_slice());
let normals_z = HerculesImmBox::from(normals.z.as_slice());
let normals_x = HerculesImmBox::from(normals.x.as_slice());
let normals_y = HerculesImmBox::from(normals.y.as_slice());
let normals_z = HerculesImmBox::from(normals.z.as_slice());
group.bench_function(name, |b| {
b.iter(|| {
async_std::task::block_on(async {
r.run(
......