From 9a58de9e055004d05d0df66d062987c94819b790 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 14:46:40 -0600 Subject: [PATCH] whoops --- hercules_cg/src/gpu.rs | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index 25bbf1be..d857fdda 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -1879,8 +1879,10 @@ namespace cg = cooperative_groups; let element_size = self.get_size(*element_type, None); let element_align = self.get_alignment(*element_type); index_ptr.push_str(&format!( - " * (({} + {} - 1 / {} * {}))", - element_size, element_align, element_align, element_align + " * (({} + {}) & ~{})", + element_size, + element_align - 1, + element_align - 1 )); type_id = *element_type; } @@ -1932,8 +1934,10 @@ namespace cg = cooperative_groups; let alignment = self.get_alignment(*type_id); let size = self.get_size(*type_id, None); *dynamic_shared_offset = format!( - "(({} + {} - 1) / {}) * {}", - dynamic_shared_offset, alignment, alignment, alignment + "(({} + {}) & ~{})", + dynamic_shared_offset, + alignment - 1, + alignment - 1, ); write!( w, @@ -1981,8 +1985,10 @@ namespace cg = cooperative_groups; let alignment = self.get_alignment(*type_id); let size = self.get_size(*type_id, None); *dynamic_shared_offset = format!( - "(({} + {} - 1) / {}) * {}", - dynamic_shared_offset, alignment, alignment, alignment + "(({} + {}) & ~{})", + dynamic_shared_offset, + alignment - 1, + alignment - 1 ); write!( w, @@ -2023,8 +2029,10 @@ namespace cg = cooperative_groups; let alignment = self.get_alignment(*type_id); let size = self.get_size(*type_id, None); *dynamic_shared_offset = format!( - "(({} + {} - 1) / {}) * {}", - dynamic_shared_offset, alignment, alignment, alignment + "(({} + {}) & ~{})", + dynamic_shared_offset, + alignment - 1, + alignment - 1, ); write!( w, @@ -2055,11 +2063,10 @@ namespace cg = cooperative_groups; let array_size = multiply_dcs(extents); let elem_align = self.get_alignment(type_id); format!( - "(({} + {} - 1) / {} * {}) * {}", + "(({} + {}) & ~{}) * {}", self.get_size(*element_type, None), - elem_align, - elem_align, - elem_align, + elem_align - 1, + elem_align - 1, array_size ) } @@ -2073,10 +2080,7 @@ namespace cg = cooperative_groups; if acc == "0" { size } else { - format!( - "({} + {} - 1) / {} * {} + {}", - acc, align, align, align, size - ) + format!("(({} + {}) & ~{}) + {}", acc, align - 1, align - 1, size) } }) } @@ -2101,8 +2105,10 @@ namespace cg = cooperative_groups; .max() .unwrap_or(0); format!( - "({} + {} - 1) / {} * {}", - max_size, max_alignment, max_alignment, max_alignment + "(({} + {}) & ~{})", + max_size, + max_alignment - 1, + max_alignment - 1, ) } _ => { -- GitLab