From f1f319bf28aed5a1b988813aa166fb43b267227f Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 13 Feb 2025 14:26:08 -0600
Subject: [PATCH] multi-core scale

---
 juno_samples/cava/build.rs    |  2 ++
 juno_samples/cava/src/cava.jn |  4 ++--
 juno_samples/cava/src/cpu.sch | 44 +++++++++++++++++++++++++++++++++++
 juno_samples/cava/src/main.rs |  6 ++---
 juno_scheduler/src/compile.rs |  2 ++
 juno_scheduler/src/ir.rs      |  6 ++---
 juno_scheduler/src/lang.l     |  4 ++--
 juno_scheduler/src/pm.rs      |  3 +++
 8 files changed, 60 insertions(+), 11 deletions(-)
 create mode 100644 juno_samples/cava/src/cpu.sch

diff --git a/juno_samples/cava/build.rs b/juno_samples/cava/build.rs
index 1b6dddf4..ff7e2b6b 100644
--- a/juno_samples/cava/build.rs
+++ b/juno_samples/cava/build.rs
@@ -13,6 +13,8 @@ fn main() {
     JunoCompiler::new()
         .file_in_src("cava.jn")
         .unwrap()
+        .schedule_in_src("cpu.sch")
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn
index 95a73f5b..bb8afded 100644
--- a/juno_samples/cava/src/cava.jn
+++ b/juno_samples/cava/src/cava.jn
@@ -22,12 +22,12 @@ fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a {
 const CHAN : u64 = 3;
 
 fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, col] {
-  let res : f32[CHAN, row, col];
+  @const let res : f32[CHAN, row, col];
 
   for chan = 0 to CHAN {
     for r = 0 to row {
       for c = 0 to col {
-        res[chan, r, c] = input[chan, r, c] as f32 * 1.0 / 255;
+        res[chan, r, c] = input[chan, r, c] as f32 / 255;
       }
     }
   }
diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
new file mode 100644
index 00000000..0913d4b3
--- /dev/null
+++ b/juno_samples/cava/src/cpu.sch
@@ -0,0 +1,44 @@
+macro simpl!(X) {
+  ccp(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
+
+simpl!(*);
+
+inline(denoise);
+cpu(scale, demosaic, denoise, transform, gamut, tone_map, descale);
+
+ip-sroa(*);
+sroa(*);
+simpl!(*);
+
+no-memset(scale@const);
+fixpoint {
+  forkify(scale);
+  fork-guard-elim(scale);
+  fork-coalesce(scale);
+}
+simpl!(*);
+fork-dim-merge(scale);
+simpl!(*);
+fork-tile[2048, 0, false](scale);
+simpl!(*);
+let out = fork-split(scale);
+simpl!(*);
+let out = outline(out._0_scale.fj1);
+ip-sroa(*);
+sroa(*);
+simpl!(*);
+host(scale);
+unforkify(out);
+xdot[true](scale, out);
+
+gcm(*);
+fixpoint {
+  float-collections(*);
+  dce(*);
+  gcm(*);
+}
diff --git a/juno_samples/cava/src/main.rs b/juno_samples/cava/src/main.rs
index b4a0f6fd..a940d6eb 100644
--- a/juno_samples/cava/src/main.rs
+++ b/juno_samples/cava/src/main.rs
@@ -159,6 +159,7 @@ fn cava_harness(args: CavaInputs) {
         tonemap,
     } = load_cam_model(cam_model, CHAN).expect("Error loading camera model");
 
+    println!("Running cava with {} rows, {} columns, and {} control points.", rows, cols, num_ctrl_pts);
     let result = run_cava(
         rows,
         cols,
@@ -227,10 +228,8 @@ fn cava_test_small() {
     });
 }
 
-// Disabling the larger test because of how long it takes
-/*
 #[test]
-fn cava_test() {
+fn cava_test_full() {
     cava_harness(CavaInputs {
         input: "examples/raw_tulips.bin".to_string(),
         output: None,
@@ -239,4 +238,3 @@ fn cava_test() {
         cam_model: "cam_models/NikonD7000".to_string(),
     });
 }
-*/
diff --git a/juno_scheduler/src/compile.rs b/juno_scheduler/src/compile.rs
index 7887b9b3..237ff3b9 100644
--- a/juno_scheduler/src/compile.rs
+++ b/juno_scheduler/src/compile.rs
@@ -130,6 +130,8 @@ impl FromStr for Appliable {
             "serialize" => Ok(Appliable::Pass(ir::Pass::Serialize)),
             "write-predication" => Ok(Appliable::Pass(ir::Pass::WritePredication)),
 
+            "print" => Ok(Appliable::Pass(ir::Pass::Print)),
+
             "cpu" | "llvm" => Ok(Appliable::Device(Device::LLVM)),
             "gpu" | "cuda" | "nvidia" => Ok(Appliable::Device(Device::CUDA)),
             "host" | "rust" | "rust-async" => Ok(Appliable::Device(Device::AsyncRust)),
diff --git a/juno_scheduler/src/ir.rs b/juno_scheduler/src/ir.rs
index a9ee7956..4b88d6a2 100644
--- a/juno_scheduler/src/ir.rs
+++ b/juno_scheduler/src/ir.rs
@@ -27,6 +27,7 @@ pub enum Pass {
     Outline,
     PhiElim,
     Predication,
+    Print,
     ReduceSLF,
     ReuseProducts,
     SLF,
@@ -42,10 +43,9 @@ pub enum Pass {
 impl Pass {
     pub fn num_args(&self) -> usize {
         match self {
-            Pass::Xdot => 1,
+            Pass::Xdot | Pass::Print => 1,
+            Pass::ForkFissionBufferize | Pass::ForkInterchange => 2,
             Pass::ForkChunk => 4,
-            Pass::ForkFissionBufferize => 2,
-            Pass::ForkInterchange => 2,
             _ => 0,
         }
     }
diff --git a/juno_scheduler/src/lang.l b/juno_scheduler/src/lang.l
index 9d4c34bf..2f34f01f 100644
--- a/juno_scheduler/src/lang.l
+++ b/juno_scheduler/src/lang.l
@@ -43,8 +43,8 @@ panic[\t \n\r]+after "panic_after"
 print[\t \n\r]+iter  "print_iter"
 stop[\t \n\r]+after  "stop_after"
 
-[a-zA-Z][a-zA-Z0-9_\-]*! "MACRO"
-[a-zA-Z][a-zA-Z0-9_\-]*  "ID"
+[a-zA-Z_][a-zA-Z0-9_\-]*! "MACRO"
+[a-zA-Z_][a-zA-Z0-9_\-]*  "ID"
 [0-9]+                   "INT"
 
 .                     "UNMATCHED"
diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs
index de725608..45b36fcc 100644
--- a/juno_scheduler/src/pm.rs
+++ b/juno_scheduler/src/pm.rs
@@ -2423,6 +2423,9 @@ fn run_pass(
             // Put BasicBlocks back, since it's needed for Codegen.
             pm.bbs = bbs;
         }
+        Pass::Print => {
+            println!("{:?}", args.get(0));
+        }
     }
     println!("Ran Pass: {:?}", pass);
 
-- 
GitLab