Skip to content
Snippets Groups Projects
Commit 5ab36921 authored by Russel Arbore's avatar Russel Arbore
Browse files

Speed up max gradient with tiling + cooperative groups

parent 91f3b55c
No related branches found
No related tags found
1 merge request!195Misc. GPU improvements
Pipeline #201795 failed
......@@ -1383,15 +1383,15 @@ extern \"C\" {} {}(",
let cg_tile = self.get_cg_tile(nesting_fork.unwrap(), CGType::Use);
#[allow(unreachable_patterns)]
let cg_op = match intrinsic {
Intrinsic::Max => "max",
Intrinsic::Min => "min",
Intrinsic::Max => "greater",
Intrinsic::Min => "less",
_ => unreachable!(),
};
let id_type_name = self.get_type(id_type, false);
write!(
w,
"{}{} = cg::reduce({}, {}, cg::{}<{}>());\n",
tabs, define_variable, non_reduce_arg, cg_tile, cg_op, id_type_name
tabs, define_variable, cg_tile, non_reduce_arg, cg_op, id_type_name
)?;
} else {
let ty = &self.types[id_type.idx()];
......
......@@ -8,6 +8,8 @@ macro simpl!(X) {
infer-schedules(X);
}
gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
simpl!(*);
ip-sroa(*);
......@@ -58,6 +60,12 @@ fixpoint {
fork-coalesce(max_gradient);
}
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](max_gradient);
simpl!(max_gradient);
fork-split(max_gradient);
simpl!(max_gradient);
no-memset(reject_zero_crossings@res);
fixpoint {
......@@ -70,8 +78,6 @@ simpl!(reject_zero_crossings);
async-call(edge_detection@le, edge_detection@zc);
gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
simpl!(*);
delete-uncalled(*);
......
......@@ -143,6 +143,11 @@ pub fn edge_detection_harness(args: EdgeDetectionInputs) {
num_frames
};
println!(
"Running edge with {} rows, {} columns, {} gs, {} sz, and {} sb.",
height, width, gs, sz, sb,
);
let mut r = runner!(edge_detection);
let mut output = output.map(|filename| {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment