diff --git a/.gitignore b/.gitignore index 45f2e61bd195df944e7db69346bdd6b434308c6f..29eb3e0464828eef0883e286d0a2c7e5396fe2f3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,11 +4,10 @@ *.out *.ll *.c +*.cu *.o *.a *.hrt .*.swp .vscode *_env - -juno_samples/matmul/src/matmul_indented.jn diff --git a/Cargo.lock b/Cargo.lock index a1eb77de19e2b05c2d96c44fe6579b9b71fdd62f..3f1e2e7c9c082585011c0f2b27b0b89fe2ea38c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -374,9 +374,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.9" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" +checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" dependencies = [ "jobserver", "libc", @@ -1278,9 +1278,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" dependencies = [ "adler2", "simd-adler32", diff --git a/Cargo.toml b/Cargo.toml index c57125f7f51c3a8d5c51e81d1fc482ea920fd62d..4e5826caf264974b1e50e13f0bf91083ad43e097 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,13 +5,13 @@ members = [ "hercules_ir", "hercules_opt", "hercules_rt", - + "hercules_tools/hercules_driver", "juno_frontend", "juno_scheduler", "juno_build", - + #"hercules_test/hercules_interpreter", #"hercules_test/hercules_tests", diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index 731e57e3cbc8c5bd3f829c6f2aa85602cc84a001..31c502121e4e02ebb6ff1d121d6cd2d2de90739d 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -638,7 +638,10 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str let ret_type_pnt = self.get_type(*self.return_type_id, true); write!(w, "\t{} ret;\n", ret_type_pnt)?; write!(w, "\tcudaMalloc((void**)&ret, sizeof({}));\n", ret_type)?; - write!(pass_args, ", ret")?; + if !first_param { + write!(pass_args, ", ")?; + } + write!(pass_args, "ret")?; } write!(w, "\t{}_gpu<<<{}, {}, {}>>>({});\n", self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args)?; write!(w, "\tcudaDeviceSynchronize();\n")?; @@ -897,6 +900,9 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str for data in self.bbs.1[control.idx()].iter() { self.codegen_data_node(*data, KernelState::OutBlock, None, None, None, false, extra_dim_collects, dynamic_shared_offset, body, &mut tabs)?; } + for data in self.bbs.1[control.idx()].iter() { + self.codegen_data_phi(*data, tabs, body)?; + } Ok(()) }) } @@ -929,6 +935,9 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str for data in self.bbs.1[control.idx()].iter() { self.codegen_data_node(*data, state, None, None, None, false, extra_dim_collects, dynamic_shared_offset, body, &mut tabs)?; } + for data in self.bbs.1[control.idx()].iter() { + self.codegen_data_phi(*data, tabs, body)?; + } } // Then generate data and control for the single block fork if it exists if block_fork.is_some() { @@ -943,6 +952,9 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str for data in self.bbs.1[control.idx()].iter() { self.codegen_data_node(*data, state, Some(num_threads), None, Some(block_fork.unwrap()), false, extra_dim_collects, dynamic_shared_offset, body, &mut tabs)?; } + for data in self.bbs.1[control.idx()].iter() { + self.codegen_data_phi(*data, tabs, body)?; + } } } // Then generate for the thread fork tree through Fork node traversal. @@ -1022,6 +1034,9 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str &mut tabs, )?; } + for data in self.bbs.1[control.idx()].iter() { + self.codegen_data_phi(*data, tabs, body)?; + } } for child in fork_tree.get(&curr_fork).unwrap() { self.codegen_data_control_traverse( @@ -1398,15 +1413,8 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str panic!("Unsupported data node type") } } - // Since the data uses and reducts are responsible for updating Phi and - // Reduce nodes, respectively, we check and emit those for each data node. - if let Some(phis) = self.label_data_for_phi.get(&id) { - let val = self.get_value(id, false, false); - for phi in phis { - let phi_val = self.get_value(*phi, false, false); - write!(w, "{}{} = {};\n", tabs, phi_val, val,)?; - } - } + // Since reducts are responsible for updating Reduce nodes, + // we check and emit those for each data node. if let Some(reduces) = self.reduct_reduce_map.get(&id) { let val = self.get_value(id, false, false); for reduce in reduces { @@ -1417,6 +1425,22 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str Ok(()) } + /* + * Update Phi assignments for each data node. This is run after all data nodes + * for given control block have been emitted. + */ + fn codegen_data_phi(&self, id: NodeID, num_tabs: usize, w: &mut String) -> Result<(), Error> { + let tabs = "\t".repeat(num_tabs); + if let Some(phis) = self.label_data_for_phi.get(&id) { + let val = self.get_value(id, false, false); + for phi in phis { + let phi_val = self.get_value(*phi, false, false); + write!(w, "{}{} = {};\n", tabs, phi_val, val)?; + } + } + Ok(()) + } + fn codegen_control_node( &self, id: NodeID, diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs index 4d526366808e8b2aea39fecd81f6c00269ffb154..fe6fee0946b4eb05548ac79c31996095585073e1 100644 --- a/hercules_ir/src/dot.rs +++ b/hercules_ir/src/dot.rs @@ -1,5 +1,5 @@ use std::collections::HashMap; -use std::env::temp_dir; +use std::env::{temp_dir}; use std::fmt::Write; use std::fs::File; use std::io::Write as _; @@ -23,6 +23,7 @@ pub fn xdot_module( let mut rng = rand::thread_rng(); let num: u64 = rng.gen(); tmp_path.push(format!("hercules_dot_{}.dot", num)); + let tmp_path = std::path::PathBuf::from(format!("hercules_dot.dot")); let mut file = File::create(&tmp_path).expect("PANIC: Unable to open output file."); let mut contents = String::new(); write_dot( diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index 149e4eeb84f157714a6529ba830541e54f599900..dbc24016d1888a5bdce229f48e4522f1fa41b03b 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -1079,6 +1079,12 @@ impl PassManager { file.write_all(cuda_ir.as_bytes()) .expect("PANIC: Unable to write output CUDA IR file contents."); + let cuda_text_path = format!("{}.cu", module_name); + let mut cuda_text_file = File::create(&cuda_text_path) + .expect("PANIC: Unable to open CUDA IR text file."); + cuda_text_file.write_all(cuda_ir.as_bytes()) + .expect("PANIC: Unable to write CUDA IR text file contents."); + let mut nvcc_process = Command::new("nvcc") .arg("-c") .arg("-O3") diff --git a/hercules_samples/call/Cargo.toml b/hercules_samples/call/Cargo.toml index 52c588e7032a79bf0043a6334e818e68b727f1b3..a5a44c2e061043101e377bff1492a4eaba995927 100644 --- a/hercules_samples/call/Cargo.toml +++ b/hercules_samples/call/Cargo.toml @@ -4,11 +4,14 @@ version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] edition = "2021" +[features] +cuda = ["juno_build/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } rand = "*" async-std = "*" with_builtin_macros = "0.1.0" diff --git a/hercules_samples/ccp/build.rs b/hercules_samples/ccp/build.rs index 0b984a0f4f3581cd4cd1a3b2dcf067ef4ca4c83e..f04d48c7d0ea6df8b16d70b05cedabfc04c1f6f2 100644 --- a/hercules_samples/ccp/build.rs +++ b/hercules_samples/ccp/build.rs @@ -6,11 +6,4 @@ fn main() { .unwrap() .build() .unwrap(); - - #[cfg(feature = "cuda")] - println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-lib=cudart"); } diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs index 43cd34f9300acbd21598ef5685802be1a0ffd58d..2a239bc6c3ebd3780cb15358375c59bdfb2e25ae 100644 --- a/hercules_samples/dot/build.rs +++ b/hercules_samples/dot/build.rs @@ -6,11 +6,4 @@ fn main() { .unwrap() .build() .unwrap(); - - #[cfg(feature = "cuda")] - println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-lib=cudart"); } diff --git a/hercules_samples/fac/build.rs b/hercules_samples/fac/build.rs index 6863b27bc4802abf8dded89759cef5f9e26fbe95..4d8226f11183d9500e6affec4c46110e8626ee69 100644 --- a/hercules_samples/fac/build.rs +++ b/hercules_samples/fac/build.rs @@ -6,11 +6,4 @@ fn main() { .unwrap() .build() .unwrap(); - - #[cfg(feature = "cuda")] - println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-lib=cudart"); } diff --git a/hercules_samples/matmul/build.rs b/hercules_samples/matmul/build.rs index b170024bd280183bc559af88e0570d90c1e0fed2..08478deaac459d9a94f79fdabce37da9a1205f89 100644 --- a/hercules_samples/matmul/build.rs +++ b/hercules_samples/matmul/build.rs @@ -6,11 +6,4 @@ fn main() { .unwrap() .build() .unwrap(); - - #[cfg(feature = "cuda")] - println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); - #[cfg(feature = "cuda")] - println!("cargo:rustc-link-lib=cudart"); } diff --git a/hercules_samples/matmul/src/matmul.hir b/hercules_samples/matmul/src/matmul.hir index 400ab5e14e91169ce9152a79c3401456baa18241..b0c31da4b32207bf3308c4b03583bc74c61f3737 100644 --- a/hercules_samples/matmul/src/matmul.hir +++ b/hercules_samples/matmul/src/matmul.hir @@ -1,9 +1,9 @@ -fn matmul(a: array(i32, 16, 64), b: array(i32, 64, 32)) -> array(i32, 16, 32) - c = constant(array(i32, 16, 32), []) - i_j_ctrl = fork(start, 16, 32) +fn matmul<3>(a: array(i32, #0, #1), b: array(i32, #1, #2)) -> array(i32, #0, #2) + c = constant(array(i32, #0, #2), []) + i_j_ctrl = fork(start, #0, #2) i_idx = thread_id(i_j_ctrl, 0) j_idx = thread_id(i_j_ctrl, 1) - k_ctrl = fork(i_j_ctrl, 64) + k_ctrl = fork(i_j_ctrl, #1) k_idx = thread_id(k_ctrl, 0) k_join_ctrl = join(k_ctrl) i_j_join_ctrl = join(k_join_ctrl) @@ -15,4 +15,4 @@ fn matmul(a: array(i32, 16, 64), b: array(i32, 64, 32)) -> array(i32, 16, 32) add = add(mul, dot) dot = reduce(k_join_ctrl, zero, add) update_c = write(update_i_j_c, dot, position(i_idx, j_idx)) - update_i_j_c = reduce(i_j_join_ctrl, c, update_c) \ No newline at end of file + update_i_j_c = reduce(i_j_join_ctrl, c, update_c) diff --git a/juno_build/build.rs b/juno_build/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..7ba34c8c9395926bd16a92dcf831eadda3f3af26 --- /dev/null +++ b/juno_build/build.rs @@ -0,0 +1,8 @@ +fn main() { + #[cfg(feature = "cuda")] + println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); + #[cfg(feature = "cuda")] + println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); + #[cfg(feature = "cuda")] + println!("cargo:rustc-link-lib=cudart"); +} diff --git a/juno_build/src/lib.rs b/juno_build/src/lib.rs index 0c676e4c1b203c53e98a8430e0f2354104540e07..4066080653c75b42705ed129c0b43f41b30491df 100644 --- a/juno_build/src/lib.rs +++ b/juno_build/src/lib.rs @@ -27,7 +27,7 @@ impl JunoCompiler { src_path: None, out_path: None, verify: JunoVerify::None, - x_dot: false, + x_dot: true, schedule: JunoSchedule::None, } } diff --git a/juno_samples/antideps/Cargo.toml b/juno_samples/antideps/Cargo.toml index e492e2ae7f23c5afa4f9c1ddc50e98dd93f40e4d..e6f38e095f492b2d7934b6ff969fa468137d1d2c 100644 --- a/juno_samples/antideps/Cargo.toml +++ b/juno_samples/antideps/Cargo.toml @@ -8,11 +8,14 @@ edition = "2021" name = "juno_antideps" path = "src/main.rs" +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } with_builtin_macros = "0.1.0" async-std = "*" diff --git a/juno_samples/antideps/antideps.mod b/juno_samples/antideps/antideps.mod new file mode 100644 index 0000000000000000000000000000000000000000..b4abaef48222ace6f66264d6746dd2d7924b9039 Binary files /dev/null and b/juno_samples/antideps/antideps.mod differ diff --git a/juno_samples/casts_and_intrinsics/Cargo.toml b/juno_samples/casts_and_intrinsics/Cargo.toml index 83d5be587125dc9c846b38c3c15089dfda5c3ff8..9fac18b77db9e01bb55ad491a93f1558a5339c54 100644 --- a/juno_samples/casts_and_intrinsics/Cargo.toml +++ b/juno_samples/casts_and_intrinsics/Cargo.toml @@ -8,10 +8,13 @@ edition = "2021" name = "juno_casts_and_intrinsics" path = "src/main.rs" +[features] +cuda = ["juno_build/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } with_builtin_macros = "0.1.0" async-std = "*" diff --git a/juno_samples/cava/Cargo.toml b/juno_samples/cava/Cargo.toml index dfde5978e688cc9635308f6418573cd4e4a22c65..ff375d80d637e2ac9baa1377e168f912454b5fbc 100644 --- a/juno_samples/cava/Cargo.toml +++ b/juno_samples/cava/Cargo.toml @@ -9,11 +9,11 @@ name = "juno_cava" path = "src/main.rs" [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } async-std = "*" clap = { version = "*", features = ["derive"] } image = "*" diff --git a/juno_samples/concat/Cargo.toml b/juno_samples/concat/Cargo.toml index 888a083fa41dd1f154d8b3e72a58d666b43860c2..24ba1acff56dc3997162c1f3afc2334fd3428155 100644 --- a/juno_samples/concat/Cargo.toml +++ b/juno_samples/concat/Cargo.toml @@ -9,10 +9,10 @@ name = "juno_concat" path = "src/main.rs" [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } with_builtin_macros = "0.1.0" async-std = "*" diff --git a/juno_samples/implicit_clone/Cargo.toml b/juno_samples/implicit_clone/Cargo.toml index 4f5387e7a3068536657e50b9852b9006052318f7..9612937190366bea144ade4b9ae334b228233b49 100644 --- a/juno_samples/implicit_clone/Cargo.toml +++ b/juno_samples/implicit_clone/Cargo.toml @@ -8,11 +8,14 @@ edition = "2021" name = "juno_implicit_clone" path = "src/main.rs" +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } with_builtin_macros = "0.1.0" async-std = "*" diff --git a/juno_samples/matmul/Cargo.toml b/juno_samples/matmul/Cargo.toml index 1e004dcd5cd9a56dacfe9f70cee64ae883b90695..eac83d15f430a7752d14a1ec3105c88209143477 100644 --- a/juno_samples/matmul/Cargo.toml +++ b/juno_samples/matmul/Cargo.toml @@ -8,12 +8,15 @@ edition = "2021" name = "juno_matmul" path = "src/main.rs" +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } with_builtin_macros = "0.1.0" async-std = "*" rand = "*" diff --git a/juno_samples/nested_ccp/Cargo.toml b/juno_samples/nested_ccp/Cargo.toml index bcf1fff8fac48b20868b5c6d84f350d98723c637..5ee3f747477c901806642818553e94b046b50242 100644 --- a/juno_samples/nested_ccp/Cargo.toml +++ b/juno_samples/nested_ccp/Cargo.toml @@ -8,11 +8,14 @@ edition = "2021" name = "juno_nested_ccp" path = "src/main.rs" +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } with_builtin_macros = "0.1.0" async-std = "*" diff --git a/juno_samples/simple3/Cargo.toml b/juno_samples/simple3/Cargo.toml index c66dc977c705ec15958a0511b507b0edabe8b663..36d50dbd75c410b88142bb5ca5d680fbac0be3ce 100644 --- a/juno_samples/simple3/Cargo.toml +++ b/juno_samples/simple3/Cargo.toml @@ -8,11 +8,14 @@ edition = "2021" name = "juno_simple3" path = "src/main.rs" +[features] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] + [build-dependencies] -juno_build = { path = "../../juno_build", features = [] } +juno_build = { path = "../../juno_build" } [dependencies] -juno_build = { path = "../../juno_build", features = [] } -hercules_rt = { path = "../../hercules_rt", features = [] } +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } with_builtin_macros = "0.1.0" async-std = "*"