Skip to content
Snippets Groups Projects

Set up cava benchmark

Merged rarbore2 requested to merge cava_opt_3 into main
3 files
+ 11
13
Compare changes
  • Side-by-side
  • Inline
Files
3
#![feature(concat_idents)]
use std::iter::zip;
use rand::random;
#[cfg(feature = "cuda")]
use hercules_rt::CUDABox;
use hercules_rt::{runner, HerculesCPURef};
use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
juno_build::juno!("matmul");
fn main() {
async_std::task::block_on(async {
const I: usize = 256;
const J: usize = 8; // hardcoded constant in matmul.hir
const J: usize = 64;
const K: usize = 128;
let mut a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect();
let mut b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect();
let a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect();
let b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect();
let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect();
for i in 0..I {
for k in 0..K {
@@ -23,26 +22,11 @@ fn main() {
}
}
}
#[cfg(not(feature = "cuda"))]
{
let a = HerculesCPURef::from_slice(&mut a);
let b = HerculesCPURef::from_slice(&mut b);
let mut r = runner!(matmul);
let c = r.run(I as u64, J as u64, K as u64, a, b).await;
assert_eq!(c.as_slice::<i32>(), &*correct_c);
}
#[cfg(feature = "cuda")]
{
let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a));
let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b));
let mut r = runner!(matmul);
let c = r
.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref())
.await;
let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice();
c.to_cpu_ref(&mut c_cpu);
assert_eq!(&*c_cpu, &*correct_c);
}
let a = HerculesImmBox::from(a.as_ref());
let b = HerculesImmBox::from(b.as_ref());
let mut r = runner!(matmul);
let mut c: HerculesMutBox<i32> = HerculesMutBox::from(r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await);
assert_eq!(c.as_slice(), correct_c.as_ref());
});
}
Loading