Skip to content
Snippets Groups Projects
Commit 54acf3e2 authored by Russel Arbore's avatar Russel Arbore
Browse files

fix gpu backend to emit namespaces properly across cuda versions

parent 94950efe
No related branches found
No related tags found
1 merge request!216More optimizations
Pipeline #202054 passed
...@@ -354,7 +354,6 @@ impl GPUContext<'_> { ...@@ -354,7 +354,6 @@ impl GPUContext<'_> {
write!( write!(
w, w,
" "
#define _CG_ABI_EXPERIMENTAL
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stddef.h> #include <stddef.h>
...@@ -362,8 +361,23 @@ impl GPUContext<'_> { ...@@ -362,8 +361,23 @@ impl GPUContext<'_> {
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <math_constants.h> #include <math_constants.h>
#include <mma.h> #include <mma.h>
#if (CUDA_VERSION >= 12000)
#else
#define _CG_ABI_EXPERIMENTAL
#endif
#include <cooperative_groups.h> #include <cooperative_groups.h>
#include <cooperative_groups/reduce.h> #include <cooperative_groups/reduce.h>
#if (CUDA_VERSION >= 12000)
namespace cg = cooperative_groups;
namespace cge = cooperative_groups;
#else
namespace cg = cooperative_groups;
namespace cge = cooperative_groups::experimental;
#endif
#include <cuda_bf16.h> #include <cuda_bf16.h>
namespace cg = cooperative_groups; namespace cg = cooperative_groups;
...@@ -564,12 +578,12 @@ namespace cg = cooperative_groups; ...@@ -564,12 +578,12 @@ namespace cg = cooperative_groups;
fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> { fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> {
write!( write!(
w, w,
"\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n" "\t__shared__ cge::block_tile_memory<1024> block_sync_shared;\n"
)?; )?;
write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?; write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?;
write!( write!(
w, w,
"\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n" "\tcg::thread_block block = cge::this_thread_block(block_sync_shared);\n"
)?; )?;
Ok(()) Ok(())
} }
...@@ -1715,20 +1729,20 @@ namespace cg = cooperative_groups; ...@@ -1715,20 +1729,20 @@ namespace cg = cooperative_groups;
}; };
write!( write!(
thread_block_tiles, thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
use_thread_per_id, cg_tile, use_thread_per_id use_thread_per_id, cg_tile, use_thread_per_id
)?; )?;
let cg_tile_use = self.get_cg_tile(id, CGType::Use); let cg_tile_use = self.get_cg_tile(id, CGType::Use);
write!( write!(
thread_block_tiles, thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
use_thread_quota, cg_tile_use, use_thread_quota use_thread_quota, cg_tile_use, use_thread_quota
)?; )?;
let available_thread_quota = available_thread_quota.unwrap(); let available_thread_quota = available_thread_quota.unwrap();
let cg_tile_available = self.get_cg_tile(id, CGType::Available); let cg_tile_available = self.get_cg_tile(id, CGType::Available);
write!( write!(
thread_block_tiles, thread_block_tiles,
"\tcg::thread_block_tile<{}> {} = cg::experimental::tiled_partition<{}>(block);\n", "\tcg::thread_block_tile<{}> {} = cge::tiled_partition<{}>(block);\n",
available_thread_quota, cg_tile_available, available_thread_quota available_thread_quota, cg_tile_available, available_thread_quota
)?; )?;
if parallel_factor.is_none() { if parallel_factor.is_none() {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment