diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_perf.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_perf.cc index 219ea430b53af9e34629f731f0a1d6bac20a061b..7c9583f291ea908c4c89a8b56045e06585a4f83a 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_perf.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/lenet_perf.cc @@ -97,7 +97,7 @@ void testLenetTanh(){ // conv_mode, conv_precision, 2, 2, 1); void* conv1out = tensorConvSampSim(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision, 2, 1); + conv_mode, conv_precision, 4, 0); // NOTE: For tensorAdd, the only dimension that MUST match is channels tensorAdd(conv1out, conv1_bias); // NOTE: In place operation @@ -107,8 +107,12 @@ void testLenetTanh(){ void* conv1_tanh = tensorTanh(pool1out); // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvPerfCuda(conv1_tanh, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision, 1, 2, 1); + //void* conv2out = tensorConvPerfCuda(conv1_tanh, conv2_filter, 2, 2, 1, 1, + // conv_mode, conv_precision, 1, 2, 1); + + void* conv2out = tensorConvSampSim(conv1_tanh, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision, 2, 0); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h index 6cc48b0a570e8b2995b75fc5213d7d79431ef6ee..d5a1e903f644c4d27477bac4d8587fb177b58021 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h @@ -238,7 +238,8 @@ void sampleFilterElems(int N, int ch = (i % (c * h * w)) / (h * w); int n = i / (c * h * w); - int local_index = row * w + col; + //int local_index = row * w + col; + int local_index = (ch * (h * w)) + (row * w) + col; //data[n * (c * h * w) + ch * (h * w) + row * (w) + col] = 1.0;