Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • llvm/hercules
1 result
Show changes
Commits on Source (6)
...@@ -23,12 +23,6 @@ version = "0.5.0" ...@@ -23,12 +23,6 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]] [[package]]
name = "anes" name = "anes"
version = "0.1.6" version = "0.1.6"
...@@ -684,27 +678,6 @@ version = "1.0.5" ...@@ -684,27 +678,6 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
[[package]]
name = "egg"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb749745461743bb477fba3ef87c663d5965876155c676c9489cfe0963de5ab"
dependencies = [
"env_logger",
"hashbrown",
"indexmap",
"log",
"num-bigint",
"num-traits",
"quanta",
"rustc-hash",
"saturating",
"smallvec",
"symbol_table",
"symbolic_expressions",
"thiserror",
]
[[package]] [[package]]
name = "either" name = "either"
version = "1.13.0" version = "1.13.0"
...@@ -723,15 +696,6 @@ version = "0.6.1" ...@@ -723,15 +696,6 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "env_logger"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
dependencies = [
"log",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.2" version = "1.0.2"
...@@ -845,12 +809,6 @@ version = "1.0.7" ...@@ -845,12 +809,6 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f"
[[package]] [[package]]
name = "funty" name = "funty"
version = "2.0.0" version = "2.0.0"
...@@ -975,11 +933,6 @@ name = "hashbrown" ...@@ -975,11 +933,6 @@ name = "hashbrown"
version = "0.15.2" version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]] [[package]]
name = "heapless" name = "heapless"
...@@ -1047,7 +1000,6 @@ version = "0.1.0" ...@@ -1047,7 +1000,6 @@ version = "0.1.0"
dependencies = [ dependencies = [
"bimap", "bimap",
"bitvec", "bitvec",
"egg",
"either", "either",
"hercules_cg", "hercules_cg",
"hercules_ir", "hercules_ir",
...@@ -2117,21 +2069,6 @@ dependencies = [ ...@@ -2117,21 +2069,6 @@ dependencies = [
"bytemuck", "bytemuck",
] ]
[[package]]
name = "quanta"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
dependencies = [
"crossbeam-utils",
"libc",
"once_cell",
"raw-cpuid",
"wasi 0.11.0+wasi-snapshot-preview1",
"web-sys",
"winapi",
]
[[package]] [[package]]
name = "quick-error" name = "quick-error"
version = "2.0.1" version = "2.0.1"
...@@ -2266,15 +2203,6 @@ dependencies = [ ...@@ -2266,15 +2203,6 @@ dependencies = [
"rgb", "rgb",
] ]
[[package]]
name = "raw-cpuid"
version = "11.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "529468c1335c1c03919960dfefdb1b3648858c20d7ec2d0663e728e4a717efbc"
dependencies = [
"bitflags 2.8.0",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.10.0" version = "1.10.0"
...@@ -2339,12 +2267,6 @@ version = "0.8.50" ...@@ -2339,12 +2267,6 @@ version = "0.8.50"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"
...@@ -2388,12 +2310,6 @@ dependencies = [ ...@@ -2388,12 +2310,6 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "saturating"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71"
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.2.0" version = "1.2.0"
...@@ -2537,23 +2453,6 @@ version = "0.11.1" ...@@ -2537,23 +2453,6 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "symbol_table"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f19bffd69fb182e684d14e3c71d04c0ef33d1641ac0b9e81c712c734e83703bc"
dependencies = [
"crossbeam-utils",
"foldhash",
"hashbrown",
]
[[package]]
name = "symbolic_expressions"
version = "5.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.109" version = "1.0.109"
...@@ -2938,22 +2837,6 @@ version = "0.1.8" ...@@ -2938,22 +2837,6 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.9" version = "0.1.9"
...@@ -2963,12 +2846,6 @@ dependencies = [ ...@@ -2963,12 +2846,6 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows" name = "windows"
version = "0.59.0" version = "0.59.0"
......
...@@ -146,7 +146,7 @@ impl<'a> CPUContext<'a> { ...@@ -146,7 +146,7 @@ impl<'a> CPUContext<'a> {
self.function.name, self.function.name,
)?; )?;
} }
write!(w, ") {{\n")?; write!(w, ") nounwind nosync willreturn norecurse {{\n")?;
let mut blocks: BTreeMap<_, _> = (0..self.function.nodes.len()) let mut blocks: BTreeMap<_, _> = (0..self.function.nodes.len())
.filter(|idx| self.function.nodes[*idx].is_control()) .filter(|idx| self.function.nodes[*idx].is_control())
...@@ -346,7 +346,7 @@ impl<'a> CPUContext<'a> { ...@@ -346,7 +346,7 @@ impl<'a> CPUContext<'a> {
let offset = offsets[&id].0; let offset = offsets[&id].0;
write!( write!(
body, body,
" {} = getelementptr i8, ptr %backing, i64 %dc{}\n", " {} = getelementptr inbounds i8, ptr %backing, i64 %dc{}\n",
self.get_value(id, false), self.get_value(id, false),
offset.idx() offset.idx()
)?; )?;
...@@ -473,11 +473,14 @@ impl<'a> CPUContext<'a> { ...@@ -473,11 +473,14 @@ impl<'a> CPUContext<'a> {
let opcode = match (op, op_ty) { let opcode = match (op, op_ty) {
(BinaryOperator::Add, OpTy::Float) => "fadd", (BinaryOperator::Add, OpTy::Float) => "fadd",
(BinaryOperator::Add, _) => "add", (BinaryOperator::Add, OpTy::Unsigned) => "add nuw",
(BinaryOperator::Add, OpTy::Signed) => "add nsw",
(BinaryOperator::Sub, OpTy::Float) => "fsub", (BinaryOperator::Sub, OpTy::Float) => "fsub",
(BinaryOperator::Sub, _) => "sub", (BinaryOperator::Sub, OpTy::Unsigned) => "sub nuw",
(BinaryOperator::Sub, OpTy::Signed) => "sub nsw",
(BinaryOperator::Mul, OpTy::Float) => "fmul", (BinaryOperator::Mul, OpTy::Float) => "fmul",
(BinaryOperator::Mul, _) => "mul", (BinaryOperator::Mul, OpTy::Unsigned) => "mul nuw",
(BinaryOperator::Mul, OpTy::Signed) => "mul nsw",
(BinaryOperator::Div, OpTy::Float) => "fdiv", (BinaryOperator::Div, OpTy::Float) => "fdiv",
(BinaryOperator::Div, OpTy::Unsigned) => "udiv", (BinaryOperator::Div, OpTy::Unsigned) => "udiv",
(BinaryOperator::Div, OpTy::Signed) => "sdiv", (BinaryOperator::Div, OpTy::Signed) => "sdiv",
...@@ -1005,7 +1008,7 @@ impl<'a> CPUContext<'a> { ...@@ -1005,7 +1008,7 @@ impl<'a> CPUContext<'a> {
let name = format!("%gep.{}", Self::gen_filler_id()); let name = format!("%gep.{}", Self::gen_filler_id());
write!( write!(
body, body,
" {} = getelementptr i8, ptr {}, i64 {}\n", " {} = getelementptr inbounds i8, ptr {}, i64 {}\n",
name, ptr, size name, ptr, size
)?; )?;
Ok(name) Ok(name)
......
...@@ -354,7 +354,6 @@ impl GPUContext<'_> { ...@@ -354,7 +354,6 @@ impl GPUContext<'_> {
write!( write!(
w, w,
" "
#define _CG_ABI_EXPERIMENTAL
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stddef.h> #include <stddef.h>
...@@ -362,8 +361,23 @@ impl GPUContext<'_> { ...@@ -362,8 +361,23 @@ impl GPUContext<'_> {
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <math_constants.h> #include <math_constants.h>
#include <mma.h> #include <mma.h>
#if (CUDA_VERSION >= 12000)
#else
#define _CG_ABI_EXPERIMENTAL
#endif
#include <cooperative_groups.h> #include <cooperative_groups.h>
#include <cooperative_groups/reduce.h> #include <cooperative_groups/reduce.h>
#if (CUDA_VERSION >= 12000)
namespace cg = cooperative_groups;
namespace cge = cooperative_groups;
#else
namespace cg = cooperative_groups;
namespace cge = cooperative_groups::experimental;
#endif
#include <cuda_bf16.h> #include <cuda_bf16.h>
namespace cg = cooperative_groups; namespace cg = cooperative_groups;
...@@ -562,9 +576,15 @@ namespace cg = cooperative_groups; ...@@ -562,9 +576,15 @@ namespace cg = cooperative_groups;
* and writes. * and writes.
*/ */
fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> { fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> {
write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?; write!(
w,
"\t__shared__ cge::block_tile_memory<1024> block_sync_shared;\n"
)?;
write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?; write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?;
write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?; write!(
w,
"\tcg::thread_block block = cge::this_thread_block(block_sync_shared);\n"
)?;
Ok(()) Ok(())
} }
...@@ -1709,24 +1729,25 @@ namespace cg = cooperative_groups; ...@@ -1709,24 +1729,25 @@ namespace cg = cooperative_groups;
}; };
write!( write!(
thread_block_tiles, thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
use_thread_per_id, cg_tile, use_thread_per_id use_thread_per_id, cg_tile, use_thread_per_id
)?; )?;
let cg_tile_use = self.get_cg_tile(id, CGType::Use); let cg_tile_use = self.get_cg_tile(id, CGType::Use);
write!( write!(
thread_block_tiles, thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
use_thread_quota, cg_tile_use, use_thread_quota use_thread_quota, cg_tile_use, use_thread_quota
)?; )?;
let available_thread_quota = available_thread_quota.unwrap(); let available_thread_quota = available_thread_quota.unwrap();
let cg_tile_available = self.get_cg_tile(id, CGType::Available); let cg_tile_available = self.get_cg_tile(id, CGType::Available);
write!( write!(
thread_block_tiles, thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
available_thread_quota, cg_tile_available, available_thread_quota available_thread_quota, cg_tile_available, available_thread_quota
)?; )?;
if parallel_factor.is_none() { if parallel_factor.is_none() {
write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, true))?; write!(thread_block_tiles, "\t{};\n", self.get_fork_iter(id, true))?;
write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, false))?;
write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?; write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?;
} }
} }
......
...@@ -22,4 +22,4 @@ hercules_cg = { path = "../hercules_cg" } ...@@ -22,4 +22,4 @@ hercules_cg = { path = "../hercules_cg" }
hercules_ir = { path = "../hercules_ir" } hercules_ir = { path = "../hercules_ir" }
nestify = "*" nestify = "*"
bimap = "*" bimap = "*"
egg = "*" #egg = "*"
...@@ -221,6 +221,31 @@ fn preliminary_fixups( ...@@ -221,6 +221,31 @@ fn preliminary_fixups(
} }
} }
// Add region nodes between join nodes and loop headers to aid in block
// placement.
for (_, join) in fork_join_map {
let control_user = editor
.get_users(*join)
.filter(|id| nodes[id.idx()].is_control())
.next()
.unwrap();
if nodes[control_user.idx()].is_fork()
|| nodes[control_user.idx()]
.try_region()
.map(|preds| preds.len() > 1)
.unwrap_or(false)
{
let success = editor.edit(|mut edit| {
let region = edit.add_node(Node::Region {
preds: Box::new([*join]),
});
edit.replace_all_uses_where(*join, region, |id| *id == control_user)
});
assert!(success);
return true;
}
}
false false
} }
......
...@@ -3,10 +3,22 @@ use std::fmt::{Error, Write}; ...@@ -3,10 +3,22 @@ use std::fmt::{Error, Write};
use hercules_ir::*; use hercules_ir::*;
use egg::*; //use egg::*;
use crate::*; use crate::*;
pub fn rewrite_math_expressions(
editor: &mut FunctionEditor,
device: Device,
typing: &Vec<TypeID>,
fork_join_map: &HashMap<NodeID, NodeID>,
nodes_in_fork_joins: &HashMap<NodeID, HashSet<NodeID>>,
reduce_einsums: &(MathEnv, HashMap<NodeID, MathID>),
) {
panic!("PANIC: The rewrite math expressions pass is currently disabled, as including egg increases compile times and we're not using it currently.");
}
/*
define_language! { define_language! {
enum MathLanguage { enum MathLanguage {
"zero" = Zero, "zero" = Zero,
...@@ -164,3 +176,4 @@ fn egg_print_math_expr<W: Write>(id: MathID, env: &MathEnv, w: &mut W) -> Result ...@@ -164,3 +176,4 @@ fn egg_print_math_expr<W: Write>(id: MathID, env: &MathEnv, w: &mut W) -> Result
_ => Err(Error::default()), _ => Err(Error::default()),
} }
} }
*/
...@@ -5,6 +5,7 @@ use std::future::Future; ...@@ -5,6 +5,7 @@ use std::future::Future;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull}; use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::OnceLock; use std::sync::OnceLock;
/* /*
...@@ -928,3 +929,30 @@ unsafe impl GlobalAlloc for AlignedAlloc { ...@@ -928,3 +929,30 @@ unsafe impl GlobalAlloc for AlignedAlloc {
#[global_allocator] #[global_allocator]
static A: AlignedAlloc = AlignedAlloc; static A: AlignedAlloc = AlignedAlloc;
pub struct SpinBarrier {
num: usize,
waiting: AtomicUsize,
gen: AtomicUsize,
}
impl SpinBarrier {
pub const fn new(num: usize) -> Self {
SpinBarrier {
num,
waiting: AtomicUsize::new(0),
gen: AtomicUsize::new(0),
}
}
pub fn wait(&self) {
let old_gen = self.gen.load(Ordering::Acquire);
let old_waiting = self.waiting.fetch_add(1, Ordering::Relaxed);
if old_waiting + 1 == self.num {
self.waiting.store(0, Ordering::Relaxed);
self.gen.fetch_add(1, Ordering::Release);
} else {
while old_gen == self.gen.load(Ordering::Acquire) {}
}
}
}
...@@ -51,7 +51,9 @@ if feature("cuda") { ...@@ -51,7 +51,9 @@ if feature("cuda") {
fork-coalesce(*); fork-coalesce(*);
infer-schedules(*); infer-schedules(*);
dce(*); dce(*);
rewrite(*); //rewrite(*);
let out = outline(matmul@outer);
gpu(out);
fixpoint { fixpoint {
simplify-cfg(*); simplify-cfg(*);
dce(*); dce(*);
......
...@@ -33,10 +33,11 @@ fixpoint { ...@@ -33,10 +33,11 @@ fixpoint {
reduce-slf(*); reduce-slf(*);
simpl!(*); simpl!(*);
fork-extend[32](layer_forward@inner_loop); fork-extend[32768](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward); clean-monoid-reduces(layer_forward);
simpl!(layer_forward); simpl!(layer_forward);
fork-tile[32, 0, false, true](layer_forward@inner_loop); fork-tile[32768, 0, false, true](layer_forward@inner_loop);
fork-tile[1024, 1, false, true](layer_forward@inner_loop);
clean-monoid-reduces(layer_forward); clean-monoid-reduces(layer_forward);
let out = fork-split(layer_forward@inner_loop); let out = fork-split(layer_forward@inner_loop);
clean-monoid-reduces(layer_forward); clean-monoid-reduces(layer_forward);
......
...@@ -13,41 +13,22 @@ fn bfs_bench(c: &mut Criterion) { ...@@ -13,41 +13,22 @@ fn bfs_bench(c: &mut Criterion) {
let mut r = runner!(bfs); let mut r = runner!(bfs);
group.bench_function("bfs bench 4096", |b| { let mut bench = |name, input: &'_ str| {
let input = "data/graph4096.txt";
let (nodes, source, edges) = parse_graph(input.into()).unwrap();
let n = nodes.len() as u64;
let m = edges.len() as u64;
let nodes = HerculesImmBox::from(&nodes as &[Node]);
let edges = HerculesImmBox::from(&edges as &[u32]);
b.iter(|| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
})
});
group.bench_function("bfs bench 65536", |b| {
let input = "data/graph65536.txt";
let (nodes, source, edges) = parse_graph(input.into()).unwrap();
let n = nodes.len() as u64;
let m = edges.len() as u64;
let nodes = HerculesImmBox::from(&nodes as &[Node]);
let edges = HerculesImmBox::from(&edges as &[u32]);
b.iter(|| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
})
});
group.bench_function("bfs bench 64M", |b| {
let input = "/scratch/aaronjc4/rodinia_3.1/data/bfs/graph64M.txt";
let (nodes, source, edges) = parse_graph(input.into()).expect("PANIC: Couldn't read input file for 64M benchmark. Currently, this benchmark uses a hard-coded path, so it can only be run on the lab machines."); let (nodes, source, edges) = parse_graph(input.into()).expect("PANIC: Couldn't read input file for 64M benchmark. Currently, this benchmark uses a hard-coded path, so it can only be run on the lab machines.");
let n = nodes.len() as u64; let n = nodes.len() as u64;
let m = edges.len() as u64; let m = edges.len() as u64;
let nodes = HerculesImmBox::from(&nodes as &[Node]); let nodes = HerculesImmBox::from(&nodes as &[Node]);
let edges = HerculesImmBox::from(&edges as &[u32]); let edges = HerculesImmBox::from(&edges as &[u32]);
b.iter(|| { group.bench_function(name, |b| {
async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); b.iter(|| {
}) async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
}); })
});
};
bench("bfs bench 4096", "data/graph4096.txt");
bench("bfs bench 65536", "data/graph65536.txt");
bench("bfs bench 64M", "/scratch/aaronjc4/rodinia_3.1/data/bfs/graph64M.txt");
} }
criterion_group!(benches, bfs_bench); criterion_group!(benches, bfs_bench);
......
...@@ -24,7 +24,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] ...@@ -24,7 +24,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
@cost_init for i in 0..n { @cost_init for i in 0..n {
cost[i] = -1; cost[i] = -1;
} }
cost[source as u64] = 0; @cost_init cost[source as u64] = 0;
// Nodes that were updated in the current iteration // Nodes that were updated in the current iteration
let updated: bool[n]; let updated: bool[n];
......
...@@ -46,16 +46,14 @@ let traverse_body = outline(inner); ...@@ -46,16 +46,14 @@ let traverse_body = outline(inner);
let (outer, inner) = fork-reshape[[1], [0]](collect); let (outer, inner) = fork-reshape[[1], [0]](collect);
let collect_body = outline(inner); let collect_body = outline(inner);
let init_body = init; fork-tile[32, 0, false, true](init);
// Following code seems to generate breaking RT code let (outer, inner) = fork-reshape[[1], [0]](init);
//fork-tile[32, 0, false, true](init); let init_body = outline(inner);
//let (outer, inner) = fork-reshape[[1], [0]](init);
//let init_body = outline(inner); inline(bfs@cost_init, bfs@loop1, bfs@loop2);
//inline(bfs@cost_init);
inline(bfs@loop1, bfs@loop2);
delete-uncalled(*); delete-uncalled(*);
const-inline(*); const-inline(*);
simpl!(*);
unforkify(init_body, traverse_body, collect_body); unforkify(init_body, traverse_body, collect_body);
simpl!(*); simpl!(*);
......
...@@ -38,8 +38,12 @@ fixpoint { ...@@ -38,8 +38,12 @@ fixpoint {
} }
simpl!(collect); simpl!(collect);
fork-tile[32, 0, false, true](init);
let (outer, inner) = fork-reshape[[1], [0]](init);
let init_body = outline(inner);
fork-tile[1024, 0, false, true](traverse, collect); fork-tile[1024, 0, false, true](traverse, collect);
fork-split(traverse, collect); fork-split(traverse, collect);
unforkify(init); unforkify(init_body);
gcm(*); gcm(*);
...@@ -13,38 +13,38 @@ fn cfd_bench(c: &mut Criterion) { ...@@ -13,38 +13,38 @@ fn cfd_bench(c: &mut Criterion) {
group.sample_size(10); group.sample_size(10);
let mut euler_bench = |name, data_file, iterations| { let mut euler_bench = |name, data_file, iterations| {
group.bench_function(name, |b| { let mut r = runner!(euler);
let mut r = runner!(euler); let block_size = 16;
let block_size = 16; let FarFieldConditions {
let FarFieldConditions { ff_variable,
ff_variable, ff_fc_momentum_x,
ff_fc_momentum_x, ff_fc_momentum_y,
ff_fc_momentum_y, ff_fc_momentum_z,
ff_fc_momentum_z, ff_fc_density_energy,
ff_fc_density_energy, } = set_far_field_conditions();
} = set_far_field_conditions(); let GeometryData {
let GeometryData { nelr,
nelr, areas,
areas, elements_surrounding_elements,
elements_surrounding_elements, normals,
normals, } = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines.");
} = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines."); let mut variables = initialize_variables(nelr, &ff_variable);
let mut variables = initialize_variables(nelr, &ff_variable);
let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
let areas = HerculesImmBox::from(areas.as_slice()); let areas = HerculesImmBox::from(areas.as_slice());
let elements_surrounding_elements = let elements_surrounding_elements =
HerculesImmBox::from(elements_surrounding_elements.as_slice()); HerculesImmBox::from(elements_surrounding_elements.as_slice());
let normals_x = HerculesImmBox::from(normals.x.as_slice()); let normals_x = HerculesImmBox::from(normals.x.as_slice());
let normals_y = HerculesImmBox::from(normals.y.as_slice()); let normals_y = HerculesImmBox::from(normals.y.as_slice());
let normals_z = HerculesImmBox::from(normals.z.as_slice()); let normals_z = HerculesImmBox::from(normals.z.as_slice());
group.bench_function(name, |b| {
b.iter(|| { b.iter(|| {
async_std::task::block_on(async { async_std::task::block_on(async {
r.run( r.run(
...@@ -91,38 +91,38 @@ fn cfd_bench(c: &mut Criterion) { ...@@ -91,38 +91,38 @@ fn cfd_bench(c: &mut Criterion) {
); );
let mut pre_euler_bench = |name, data_file, iterations| { let mut pre_euler_bench = |name, data_file, iterations| {
group.bench_function(name, |b| { let mut r = runner!(pre_euler);
let mut r = runner!(pre_euler); let block_size = 16;
let block_size = 16; let FarFieldConditions {
let FarFieldConditions { ff_variable,
ff_variable, ff_fc_momentum_x,
ff_fc_momentum_x, ff_fc_momentum_y,
ff_fc_momentum_y, ff_fc_momentum_z,
ff_fc_momentum_z, ff_fc_density_energy,
ff_fc_density_energy, } = set_far_field_conditions();
} = set_far_field_conditions(); let GeometryData {
let GeometryData { nelr,
nelr, areas,
areas, elements_surrounding_elements,
elements_surrounding_elements, normals,
normals, } = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines.");
} = read_domain_geometry(data_file, block_size).expect("PANIC: Couldn't read input for CFD benchmark. Currently, the path for the largest CFD benchmark is hard-coded, so it can only be run on the lab machines."); let mut variables = initialize_variables(nelr, &ff_variable);
let mut variables = initialize_variables(nelr, &ff_variable);
let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
let areas = HerculesImmBox::from(areas.as_slice()); let areas = HerculesImmBox::from(areas.as_slice());
let elements_surrounding_elements = let elements_surrounding_elements =
HerculesImmBox::from(elements_surrounding_elements.as_slice()); HerculesImmBox::from(elements_surrounding_elements.as_slice());
let normals_x = HerculesImmBox::from(normals.x.as_slice()); let normals_x = HerculesImmBox::from(normals.x.as_slice());
let normals_y = HerculesImmBox::from(normals.y.as_slice()); let normals_y = HerculesImmBox::from(normals.y.as_slice());
let normals_z = HerculesImmBox::from(normals.z.as_slice()); let normals_z = HerculesImmBox::from(normals.z.as_slice());
group.bench_function(name, |b| {
b.iter(|| { b.iter(|| {
async_std::task::block_on(async { async_std::task::block_on(async {
r.run( r.run(
......