From f1f319bf28aed5a1b988813aa166fb43b267227f Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Thu, 13 Feb 2025 14:26:08 -0600 Subject: [PATCH] multi-core scale --- juno_samples/cava/build.rs | 2 ++ juno_samples/cava/src/cava.jn | 4 ++-- juno_samples/cava/src/cpu.sch | 44 +++++++++++++++++++++++++++++++++++ juno_samples/cava/src/main.rs | 6 ++--- juno_scheduler/src/compile.rs | 2 ++ juno_scheduler/src/ir.rs | 6 ++--- juno_scheduler/src/lang.l | 4 ++-- juno_scheduler/src/pm.rs | 3 +++ 8 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 juno_samples/cava/src/cpu.sch diff --git a/juno_samples/cava/build.rs b/juno_samples/cava/build.rs index 1b6dddf4..ff7e2b6b 100644 --- a/juno_samples/cava/build.rs +++ b/juno_samples/cava/build.rs @@ -13,6 +13,8 @@ fn main() { JunoCompiler::new() .file_in_src("cava.jn") .unwrap() + .schedule_in_src("cpu.sch") + .unwrap() .build() .unwrap(); } diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index 95a73f5b..bb8afded 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -22,12 +22,12 @@ fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a { const CHAN : u64 = 3; fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, col] { - let res : f32[CHAN, row, col]; + @const let res : f32[CHAN, row, col]; for chan = 0 to CHAN { for r = 0 to row { for c = 0 to col { - res[chan, r, c] = input[chan, r, c] as f32 * 1.0 / 255; + res[chan, r, c] = input[chan, r, c] as f32 / 255; } } } diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch new file mode 100644 index 00000000..0913d4b3 --- /dev/null +++ b/juno_samples/cava/src/cpu.sch @@ -0,0 +1,44 @@ +macro simpl!(X) { + ccp(X); + gvn(X); + phi-elim(X); + dce(X); + infer-schedules(X); +} + +simpl!(*); + +inline(denoise); +cpu(scale, demosaic, denoise, transform, gamut, tone_map, descale); + +ip-sroa(*); +sroa(*); +simpl!(*); + +no-memset(scale@const); +fixpoint { + forkify(scale); + fork-guard-elim(scale); + fork-coalesce(scale); +} +simpl!(*); +fork-dim-merge(scale); +simpl!(*); +fork-tile[2048, 0, false](scale); +simpl!(*); +let out = fork-split(scale); +simpl!(*); +let out = outline(out._0_scale.fj1); +ip-sroa(*); +sroa(*); +simpl!(*); +host(scale); +unforkify(out); +xdot[true](scale, out); + +gcm(*); +fixpoint { + float-collections(*); + dce(*); + gcm(*); +} diff --git a/juno_samples/cava/src/main.rs b/juno_samples/cava/src/main.rs index b4a0f6fd..a940d6eb 100644 --- a/juno_samples/cava/src/main.rs +++ b/juno_samples/cava/src/main.rs @@ -159,6 +159,7 @@ fn cava_harness(args: CavaInputs) { tonemap, } = load_cam_model(cam_model, CHAN).expect("Error loading camera model"); + println!("Running cava with {} rows, {} columns, and {} control points.", rows, cols, num_ctrl_pts); let result = run_cava( rows, cols, @@ -227,10 +228,8 @@ fn cava_test_small() { }); } -// Disabling the larger test because of how long it takes -/* #[test] -fn cava_test() { +fn cava_test_full() { cava_harness(CavaInputs { input: "examples/raw_tulips.bin".to_string(), output: None, @@ -239,4 +238,3 @@ fn cava_test() { cam_model: "cam_models/NikonD7000".to_string(), }); } -*/ diff --git a/juno_scheduler/src/compile.rs b/juno_scheduler/src/compile.rs index 7887b9b3..237ff3b9 100644 --- a/juno_scheduler/src/compile.rs +++ b/juno_scheduler/src/compile.rs @@ -130,6 +130,8 @@ impl FromStr for Appliable { "serialize" => Ok(Appliable::Pass(ir::Pass::Serialize)), "write-predication" => Ok(Appliable::Pass(ir::Pass::WritePredication)), + "print" => Ok(Appliable::Pass(ir::Pass::Print)), + "cpu" | "llvm" => Ok(Appliable::Device(Device::LLVM)), "gpu" | "cuda" | "nvidia" => Ok(Appliable::Device(Device::CUDA)), "host" | "rust" | "rust-async" => Ok(Appliable::Device(Device::AsyncRust)), diff --git a/juno_scheduler/src/ir.rs b/juno_scheduler/src/ir.rs index a9ee7956..4b88d6a2 100644 --- a/juno_scheduler/src/ir.rs +++ b/juno_scheduler/src/ir.rs @@ -27,6 +27,7 @@ pub enum Pass { Outline, PhiElim, Predication, + Print, ReduceSLF, ReuseProducts, SLF, @@ -42,10 +43,9 @@ pub enum Pass { impl Pass { pub fn num_args(&self) -> usize { match self { - Pass::Xdot => 1, + Pass::Xdot | Pass::Print => 1, + Pass::ForkFissionBufferize | Pass::ForkInterchange => 2, Pass::ForkChunk => 4, - Pass::ForkFissionBufferize => 2, - Pass::ForkInterchange => 2, _ => 0, } } diff --git a/juno_scheduler/src/lang.l b/juno_scheduler/src/lang.l index 9d4c34bf..2f34f01f 100644 --- a/juno_scheduler/src/lang.l +++ b/juno_scheduler/src/lang.l @@ -43,8 +43,8 @@ panic[\t \n\r]+after "panic_after" print[\t \n\r]+iter "print_iter" stop[\t \n\r]+after "stop_after" -[a-zA-Z][a-zA-Z0-9_\-]*! "MACRO" -[a-zA-Z][a-zA-Z0-9_\-]* "ID" +[a-zA-Z_][a-zA-Z0-9_\-]*! "MACRO" +[a-zA-Z_][a-zA-Z0-9_\-]* "ID" [0-9]+ "INT" . "UNMATCHED" diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index de725608..45b36fcc 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -2423,6 +2423,9 @@ fn run_pass( // Put BasicBlocks back, since it's needed for Codegen. pm.bbs = bbs; } + Pass::Print => { + println!("{:?}", args.get(0)); + } } println!("Ran Pass: {:?}", pass); -- GitLab