From e73e2a976eccadc249d7e9560dec63c41e123426 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 16 Feb 2025 18:16:37 -0600 Subject: [PATCH 1/4] emit aligns --- hercules_cg/src/cpu.rs | 13 ++++++++----- juno_scheduler/src/pm.rs | 3 +++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs index 7d87170b..a19218c7 100644 --- a/hercules_cg/src/cpu.rs +++ b/hercules_cg/src/cpu.rs @@ -103,8 +103,9 @@ impl<'a> CPUContext<'a> { } else { write!( w, - "{} noalias nofree nonnull noundef %p{}", + "{} noalias nofree nonnull noundef align({}) %p{}", self.get_type(*ty), + get_type_alignment(&self.types, *ty), idx )?; } @@ -506,10 +507,11 @@ impl<'a> CPUContext<'a> { // load. write!( body, - " {} = load {}, ptr {}\n", + " {} = load {}, ptr {}, align {}\n", self.get_value(id, false), self.get_type(self_ty), - index_ptr_name + index_ptr_name, + get_type_alignment(self.types, collect_ty), )?; } else { // If this read doesn't reach a primitive type, just return @@ -538,9 +540,10 @@ impl<'a> CPUContext<'a> { // perform a single store of the data value. write!( body, - " store {}, ptr {}\n", + " store {}, ptr {}, align {}\n", self.get_value(data, true), - index_ptr_name + index_ptr_name, + get_type_alignment(self.types, collect_ty), )?; } else { // If the data item being written is not a primitive type, diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 410e614c..1c00e3d0 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -795,6 +795,7 @@ impl PassManager { .arg(&llvm_path) .arg("-c") .arg("-O3") + .arg("-ffast-math") .arg("-march=native") .arg("-o") .arg(&llvm_object) @@ -830,6 +831,8 @@ impl PassManager { let mut nvcc_process = Command::new("nvcc") .arg("-c") .arg("-O3") + .arg("-ffast-math") + .arg("-march=native") .arg("-diag-suppress") .arg("177") .arg("-o") -- GitLab From 7764ca63040186f02411516448f8e7f81236fc0c Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 16 Feb 2025 18:19:50 -0600 Subject: [PATCH 2/4] Use larger alignment for arrays --- hercules_cg/src/cpu.rs | 6 ++---- hercules_cg/src/lib.rs | 5 +++-- hercules_rt/src/lib.rs | 6 ++++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs index a19218c7..f90657a4 100644 --- a/hercules_cg/src/cpu.rs +++ b/hercules_cg/src/cpu.rs @@ -507,11 +507,10 @@ impl<'a> CPUContext<'a> { // load. write!( body, - " {} = load {}, ptr {}, align {}\n", + " {} = load {}, ptr {}\n", self.get_value(id, false), self.get_type(self_ty), index_ptr_name, - get_type_alignment(self.types, collect_ty), )?; } else { // If this read doesn't reach a primitive type, just return @@ -540,10 +539,9 @@ impl<'a> CPUContext<'a> { // perform a single store of the data value. write!( body, - " store {}, ptr {}, align {}\n", + " store {}, ptr {}\n", self.get_value(data, true), index_ptr_name, - get_type_alignment(self.types, collect_ty), )?; } else { // If the data item being written is not a primitive type, diff --git a/hercules_cg/src/lib.rs b/hercules_cg/src/lib.rs index 15946f72..af2420d8 100644 --- a/hercules_cg/src/lib.rs +++ b/hercules_cg/src/lib.rs @@ -16,7 +16,7 @@ use std::collections::BTreeMap; use hercules_ir::*; -pub const LARGEST_ALIGNMENT: usize = 8; +pub const LARGEST_ALIGNMENT: usize = 32; /* * The alignment of a type does not depend on dynamic constants. @@ -33,7 +33,8 @@ pub fn get_type_alignment(types: &Vec<Type>, ty: TypeID) -> usize { .map(|id| get_type_alignment(types, *id)) .max() .unwrap_or(1), - Type::Array(elem, _) => get_type_alignment(types, elem), + // Use a large alignment for arrays to generate better vector code. + Type::Array(_, _) => LARGEST_ALIGNMENT, } } diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs index f8fdf2ef..0c3ffd80 100644 --- a/hercules_rt/src/lib.rs +++ b/hercules_rt/src/lib.rs @@ -13,8 +13,10 @@ use std::sync::OnceLock; * src/rt.rs (the RT backend). */ +pub const LARGEST_ALIGNMENT: usize = 32; + pub unsafe fn __cpu_alloc(size: usize) -> *mut u8 { - let ptr = alloc(Layout::from_size_align(size, 16).unwrap()); + let ptr = alloc(Layout::from_size_align(size, LARGEST_ALIGNMENT).unwrap()); if cfg!(feature = "debug") { eprintln!("__cpu_alloc: {:?}, {}", ptr, size); assert!(!ptr.is_null() || size == 0); @@ -27,7 +29,7 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) { eprintln!("__cpu_dealloc: {:?}, {}", ptr, size); assert!(!ptr.is_null() || size == 0); } - dealloc(ptr, Layout::from_size_align(size, 16).unwrap()) + dealloc(ptr, Layout::from_size_align(size, LARGEST_ALIGNMENT).unwrap()) } pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) { -- GitLab From 6ba77d30bb843202e19bdc746d80356fc8a652c2 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 16 Feb 2025 18:23:53 -0600 Subject: [PATCH 3/4] . --- juno_scheduler/src/pm.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 1c00e3d0..e1ff113e 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -831,7 +831,6 @@ impl PassManager { let mut nvcc_process = Command::new("nvcc") .arg("-c") .arg("-O3") - .arg("-ffast-math") .arg("-march=native") .arg("-diag-suppress") .arg("177") -- GitLab From c4f5220dca24514978ab8adfa41f096b12bce54e Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 16 Feb 2025 18:27:54 -0600 Subject: [PATCH 4/4] . --- juno_scheduler/src/pm.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index e1ff113e..4555e024 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -831,7 +831,6 @@ impl PassManager { let mut nvcc_process = Command::new("nvcc") .arg("-c") .arg("-O3") - .arg("-march=native") .arg("-diag-suppress") .arg("177") .arg("-o") -- GitLab