Skip to content
Snippets Groups Projects

Round 1 of rodinia schedule optimization

Merged rarbore2 requested to merge rodinia_opt1 into main
1 file
+ 7
2
Compare changes
  • Side-by-side
  • Inline
+ 13
5
@@ -14,6 +14,7 @@ use crate::*;
* of similarities with the CPU LLVM generation plus custom GPU parallelization.
*/
pub fn gpu_codegen<W: Write>(
module_name: &str,
function: &Function,
types: &Vec<Type>,
constants: &Vec<Constant>,
@@ -170,6 +171,7 @@ pub fn gpu_codegen<W: Write>(
};
let ctx = GPUContext {
module_name,
function,
types,
constants,
@@ -199,6 +201,7 @@ struct GPUKernelParams {
}
struct GPUContext<'a> {
module_name: &'a str,
function: &'a Function,
types: &'a Vec<Type>,
constants: &'a Vec<Constant>,
@@ -395,8 +398,8 @@ namespace cg = cooperative_groups;
fn codegen_kernel_begin<W: Write>(&self, w: &mut W) -> Result<(), Error> {
write!(
w,
"__global__ void __launch_bounds__({}) {}_gpu(",
self.kernel_params.max_num_threads, self.function.name
"__global__ void __launch_bounds__({}) {}_{}_gpu(",
self.kernel_params.max_num_threads, self.module_name, self.function.name
)?;
let mut first_param = true;
// The first parameter is a pointer to GPU backing memory, if it's
@@ -645,7 +648,7 @@ namespace cg = cooperative_groups;
} else {
write!(w, "{}", self.get_type(self.function.return_types[0], false))?;
}
write!(w, " {}(", self.function.name)?;
write!(w, " {}_{}(", self.module_name, self.function.name)?;
let mut first_param = true;
// The first parameter is a pointer to GPU backing memory, if it's
@@ -721,8 +724,13 @@ namespace cg = cooperative_groups;
write!(w, "\tcudaError_t err;\n")?;
write!(
w,
"\t{}_gpu<<<{}, {}, {}>>>({});\n",
self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args
"\t{}_{}_gpu<<<{}, {}, {}>>>({});\n",
self.module_name,
self.function.name,
num_blocks,
num_threads,
dynamic_shared_offset,
pass_args
)?;
write!(w, "\terr = cudaGetLastError();\n")?;
write!(
Loading