From 9a58de9e055004d05d0df66d062987c94819b790 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 25 Feb 2025 14:46:40 -0600
Subject: [PATCH] whoops

---
 hercules_cg/src/gpu.rs | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 25bbf1be..d857fdda 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -1879,8 +1879,10 @@ namespace cg = cooperative_groups;
                     let element_size = self.get_size(*element_type, None);
                     let element_align = self.get_alignment(*element_type);
                     index_ptr.push_str(&format!(
-                        " * (({} + {} - 1 / {} * {}))",
-                        element_size, element_align, element_align, element_align
+                        " * (({} + {}) & ~{})",
+                        element_size,
+                        element_align - 1,
+                        element_align - 1
                     ));
                     type_id = *element_type;
                 }
@@ -1932,8 +1934,10 @@ namespace cg = cooperative_groups;
                     let alignment = self.get_alignment(*type_id);
                     let size = self.get_size(*type_id, None);
                     *dynamic_shared_offset = format!(
-                        "(({} + {} - 1) / {}) * {}",
-                        dynamic_shared_offset, alignment, alignment, alignment
+                        "(({} + {}) & ~{})",
+                        dynamic_shared_offset,
+                        alignment - 1,
+                        alignment - 1,
                     );
                     write!(
                         w,
@@ -1981,8 +1985,10 @@ namespace cg = cooperative_groups;
                     let alignment = self.get_alignment(*type_id);
                     let size = self.get_size(*type_id, None);
                     *dynamic_shared_offset = format!(
-                        "(({} + {} - 1) / {}) * {}",
-                        dynamic_shared_offset, alignment, alignment, alignment
+                        "(({} + {}) & ~{})",
+                        dynamic_shared_offset,
+                        alignment - 1,
+                        alignment - 1
                     );
                     write!(
                         w,
@@ -2023,8 +2029,10 @@ namespace cg = cooperative_groups;
                 let alignment = self.get_alignment(*type_id);
                 let size = self.get_size(*type_id, None);
                 *dynamic_shared_offset = format!(
-                    "(({} + {} - 1) / {}) * {}",
-                    dynamic_shared_offset, alignment, alignment, alignment
+                    "(({} + {}) & ~{})",
+                    dynamic_shared_offset,
+                    alignment - 1,
+                    alignment - 1,
                 );
                 write!(
                     w,
@@ -2055,11 +2063,10 @@ namespace cg = cooperative_groups;
                 let array_size = multiply_dcs(extents);
                 let elem_align = self.get_alignment(type_id);
                 format!(
-                    "(({} + {} - 1) / {} * {}) * {}",
+                    "(({} + {}) & ~{}) * {}",
                     self.get_size(*element_type, None),
-                    elem_align,
-                    elem_align,
-                    elem_align,
+                    elem_align - 1,
+                    elem_align - 1,
                     array_size
                 )
             }
@@ -2073,10 +2080,7 @@ namespace cg = cooperative_groups;
                         if acc == "0" {
                             size
                         } else {
-                            format!(
-                                "({} + {} - 1) / {} * {} + {}",
-                                acc, align, align, align, size
-                            )
+                            format!("(({} + {}) & ~{}) + {}", acc, align - 1, align - 1, size)
                         }
                     })
             }
@@ -2101,8 +2105,10 @@ namespace cg = cooperative_groups;
                     .max()
                     .unwrap_or(0);
                 format!(
-                    "({} + {} - 1) / {} * {}",
-                    max_size, max_alignment, max_alignment, max_alignment
+                    "(({} + {}) & ~{})",
+                    max_size,
+                    max_alignment - 1,
+                    max_alignment - 1,
                 )
             }
             _ => {
-- 
GitLab