Skip to content
Snippets Groups Projects

More optimizations

Merged rarbore2 requested to merge more_opt4 into main
3 files
+ 14
6
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 10
3
@@ -562,9 +562,15 @@ namespace cg = cooperative_groups;
* and writes.
*/
fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> {
write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?;
write!(
w,
"\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n"
)?;
write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?;
write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?;
write!(
w,
"\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n"
)?;
Ok(())
}
@@ -1726,7 +1732,8 @@ namespace cg = cooperative_groups;
available_thread_quota, cg_tile_available, available_thread_quota
)?;
if parallel_factor.is_none() {
write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, true))?;
write!(thread_block_tiles, "\t{};\n", self.get_fork_iter(id, true))?;
write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, false))?;
write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?;
}
}
Loading