Skip to content
Snippets Groups Projects
Commit 9fd852f1 authored by Yasmin Sarita's avatar Yasmin Sarita
Browse files

changed depthwise convolution

parent f36bfdf8
No related branches found
No related tags found
No related merge requests found
......@@ -114,6 +114,7 @@ __global__ void depthwise_conv(float* const __restrict__ y,
#pragma unroll
for (int i = 0; i < num; i++) {
if(b + i < B)
y4d(b + i, m, 0, tx) = C[i];
}
......@@ -176,7 +177,7 @@ void* tensorConvCutlass(void* input_ptr, void* filter_ptr,
}*/
dim3 grid((n / 8), c);
dim3 grid((n / 8) + 1, c);
dim3 block(h * w);
depthwise_conv8 << <grid, block >> > ((float*)output->gpu_data,
(float*)input->gpu_data, (float*)filter->gpu_data,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment