From 429d2aaf9a45a1804359d12ce979d152a55e5c4b Mon Sep 17 00:00:00 2001
From: rarbore2 <rarbore2@illinois.edu>
Date: Sun, 16 Feb 2025 18:36:11 -0600
Subject: [PATCH] Emit align in LLVM

---
 hercules_cg/src/cpu.rs   | 7 ++++---
 hercules_cg/src/lib.rs   | 5 +++--
 hercules_rt/src/lib.rs   | 6 ++++--
 juno_scheduler/src/pm.rs | 1 +
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
index 7d87170b..f90657a4 100644
--- a/hercules_cg/src/cpu.rs
+++ b/hercules_cg/src/cpu.rs
@@ -103,8 +103,9 @@ impl<'a> CPUContext<'a> {
             } else {
                 write!(
                     w,
-                    "{} noalias nofree nonnull noundef %p{}",
+                    "{} noalias nofree nonnull noundef align({}) %p{}",
                     self.get_type(*ty),
+                    get_type_alignment(&self.types, *ty),
                     idx
                 )?;
             }
@@ -509,7 +510,7 @@ impl<'a> CPUContext<'a> {
                         "  {} = load {}, ptr {}\n",
                         self.get_value(id, false),
                         self.get_type(self_ty),
-                        index_ptr_name
+                        index_ptr_name,
                     )?;
                 } else {
                     // If this read doesn't reach a primitive type, just return
@@ -540,7 +541,7 @@ impl<'a> CPUContext<'a> {
                         body,
                         "  store {}, ptr {}\n",
                         self.get_value(data, true),
-                        index_ptr_name
+                        index_ptr_name,
                     )?;
                 } else {
                     // If the data item being written is not a primitive type,
diff --git a/hercules_cg/src/lib.rs b/hercules_cg/src/lib.rs
index 15946f72..af2420d8 100644
--- a/hercules_cg/src/lib.rs
+++ b/hercules_cg/src/lib.rs
@@ -16,7 +16,7 @@ use std::collections::BTreeMap;
 
 use hercules_ir::*;
 
-pub const LARGEST_ALIGNMENT: usize = 8;
+pub const LARGEST_ALIGNMENT: usize = 32;
 
 /*
  * The alignment of a type does not depend on dynamic constants.
@@ -33,7 +33,8 @@ pub fn get_type_alignment(types: &Vec<Type>, ty: TypeID) -> usize {
             .map(|id| get_type_alignment(types, *id))
             .max()
             .unwrap_or(1),
-        Type::Array(elem, _) => get_type_alignment(types, elem),
+        // Use a large alignment for arrays to generate better vector code.
+        Type::Array(_, _) => LARGEST_ALIGNMENT,
     }
 }
 
diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs
index e9b8f11f..4cf9b51a 100644
--- a/hercules_rt/src/lib.rs
+++ b/hercules_rt/src/lib.rs
@@ -13,8 +13,10 @@ use std::sync::OnceLock;
  * src/rt.rs (the RT backend).
  */
 
+pub const LARGEST_ALIGNMENT: usize = 32;
+
 pub unsafe fn __cpu_alloc(size: usize) -> *mut u8 {
-    let ptr = alloc(Layout::from_size_align(size, 16).unwrap());
+    let ptr = alloc(Layout::from_size_align(size, LARGEST_ALIGNMENT).unwrap());
     if cfg!(feature = "debug") {
         eprintln!("__cpu_alloc: {:?}, {}", ptr, size);
         assert!(!ptr.is_null() || size == 0);
@@ -27,7 +29,7 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) {
         eprintln!("__cpu_dealloc: {:?}, {}", ptr, size);
         assert!(!ptr.is_null() || size == 0);
     }
-    dealloc(ptr, Layout::from_size_align(size, 16).unwrap())
+    dealloc(ptr, Layout::from_size_align(size, LARGEST_ALIGNMENT).unwrap())
 }
 
 pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) {
diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs
index 9b77e51c..d83ff0bb 100644
--- a/juno_scheduler/src/pm.rs
+++ b/juno_scheduler/src/pm.rs
@@ -795,6 +795,7 @@ impl PassManager {
             .arg(&llvm_path)
             .arg("-c")
             .arg("-O3")
+            .arg("-ffast-math")
             .arg("-march=native")
             .arg("-o")
             .arg(&llvm_object)
-- 
GitLab