Skip to content
Snippets Groups Projects
Commit fd51ccbd authored by Hashim Sharif's avatar Hashim Sharif
Browse files

Porting Mini-era CNN to HPVM-9 -- compiles with ported NVDLA pass

parent e0cb645f
No related branches found
No related tags found
No related merge requests found
#include <stdio.h>
#include <stdlib.h>
#include <cstring>
#include <string.h>
#include <iostream>
#include <hpvm.h>
#include <tensorUtils.h>
//#include <tensorUtils.h>
void* readTrainedWeights(const char* file_name, int data_type,
int dim1_size, int dim2_size,
int dim3_size, int dim4_size);
void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
__hpvm__return(2, r, (size_t) 0);
}
void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_add(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_2_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_relu(t1);
__hpvm__return(2, r, (size_t) 0);
}
void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
__hpvm__return(2, r, (size_t) 0);
}
void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_add(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_5_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_relu(t1);
__hpvm__return(2, r, (size_t) 0);
}
void var_6_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
__hpvm__return(2, r, (size_t) 0);
}
void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
__hpvm__return(2, r, (size_t) 0);
}
void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_add(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_9_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_relu(t1);
__hpvm__return(2, r, (size_t) 0);
}
void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_convolution(t1, t2, 0, 0, 1, 1);
__hpvm__return(2, r, (size_t) 0);
}
void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_add(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_12_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_relu(t1);
__hpvm__return(2, r, (size_t) 0);
}
void var_13_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
__hpvm__return(2, r, (size_t) 0);
}
void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_mul(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_add(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_16_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_relu(t1);
__hpvm__return(2, r, (size_t) 0);
}
void var_17_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_mul(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(2, t1, t2, 0);
void *r = __hpvm__tensor_add(t1, t2);
__hpvm__return(2, r, (size_t) 0);
}
void var_19_node(void* t1, size_t bytes_t1) {
__hpvm__hint(hpvm::CUDNN_TARGET);
__hpvm__attributes(1, t1, 0);
void* r = __hpvm__tensor_softmax(t1);
__hpvm__return(2, r, (size_t) 0);
}
void root(void* input, size_t input_bytes,
void* conv2d_1_w, size_t conv2d_1_w_bytes,
void* conv2d_1_b, size_t conv2d_1_b_bytes,
void* conv2d_2_w, size_t conv2d_2_w_bytes,
void* conv2d_2_b, size_t conv2d_2_b_bytes,
void* conv2d_3_w, size_t conv2d_3_w_bytes,
void* conv2d_3_b, size_t conv2d_3_b_bytes,
void* conv2d_4_w, size_t conv2d_4_w_bytes,
void* conv2d_4_b, size_t conv2d_4_b_bytes,
void* dense_1_w, size_t dense_1_w_bytes,
void* dense_1_b, size_t dense_1_b_bytes,
void* dense_2_w, size_t dense_2_w_bytes,
void* dense_2_b, size_t dense_2_b_bytes){
__hpvm__hint(hpvm::CPU_TARGET);
__hpvm__attributes(13, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0);
void* var_0 = __hpvm__createNodeND(0, var_0_node);
__hpvm__bindIn(var_0, 0, 0, 0);
__hpvm__bindIn(var_0, 1, 1, 0);
__hpvm__bindIn(var_0, 2, 2, 0);
__hpvm__bindIn(var_0, 3, 3, 0);
void* var_1 = __hpvm__createNodeND(0, var_1_node);
__hpvm__edge(var_0, var_1, 1, 0, 0, 0);
__hpvm__edge(var_0, var_1, 1, 1, 1, 0);
__hpvm__bindIn(var_1, 4, 2, 0);
__hpvm__bindIn(var_1, 5, 3, 0);
void* var_2 = __hpvm__createNodeND(0, var_2_node);
__hpvm__edge(var_1, var_2, 1, 0, 0, 0);
__hpvm__edge(var_1, var_2, 1, 1, 1, 0);
void* var_3 = __hpvm__createNodeND(0, var_3_node);
__hpvm__edge(var_2, var_3, 1, 0, 0, 0);
__hpvm__edge(var_2, var_3, 1, 1, 1, 0);
__hpvm__bindIn(var_3, 6, 2, 0);
__hpvm__bindIn(var_3, 7, 3, 0);
void* var_4 = __hpvm__createNodeND(0, var_4_node);
__hpvm__edge(var_3, var_4, 1, 0, 0, 0);
__hpvm__edge(var_3, var_4, 1, 1, 1, 0);
__hpvm__bindIn(var_4, 8, 2, 0);
__hpvm__bindIn(var_4, 9, 3, 0);
void* var_5 = __hpvm__createNodeND(0, var_5_node);
__hpvm__edge(var_4, var_5, 1, 0, 0, 0);
__hpvm__edge(var_4, var_5, 1, 1, 1, 0);
void* var_6 = __hpvm__createNodeND(0, var_6_node);
__hpvm__edge(var_5, var_6, 1, 0, 0, 0);
__hpvm__edge(var_5, var_6, 1, 1, 1, 0);
void* var_7 = __hpvm__createNodeND(0, var_7_node);
__hpvm__edge(var_6, var_7, 1, 0, 0, 0);
__hpvm__edge(var_6, var_7, 1, 1, 1, 0);
__hpvm__bindIn(var_7, 10, 2, 0);
__hpvm__bindIn(var_7, 11, 3, 0);
void* var_8 = __hpvm__createNodeND(0, var_8_node);
__hpvm__edge(var_7, var_8, 1, 0, 0, 0);
__hpvm__edge(var_7, var_8, 1, 1, 1, 0);
__hpvm__bindIn(var_8, 12, 2, 0);
__hpvm__bindIn(var_8, 13, 3, 0);
void* var_9 = __hpvm__createNodeND(0, var_9_node);
__hpvm__edge(var_8, var_9, 1, 0, 0, 0);
__hpvm__edge(var_8, var_9, 1, 1, 1, 0);
void* var_10 = __hpvm__createNodeND(0, var_10_node);
__hpvm__edge(var_9, var_10, 1, 0, 0, 0);
__hpvm__edge(var_9, var_10, 1, 1, 1, 0);
__hpvm__bindIn(var_10, 14, 2, 0);
__hpvm__bindIn(var_10, 15, 3, 0);
void* var_11 = __hpvm__createNodeND(0, var_11_node);
__hpvm__edge(var_10, var_11, 1, 0, 0, 0);
__hpvm__edge(var_10, var_11, 1, 1, 1, 0);
__hpvm__bindIn(var_11, 16, 2, 0);
__hpvm__bindIn(var_11, 17, 3, 0);
void* var_12 = __hpvm__createNodeND(0, var_12_node);
__hpvm__edge(var_11, var_12, 1, 0, 0, 0);
__hpvm__edge(var_11, var_12, 1, 1, 1, 0);
void* var_13 = __hpvm__createNodeND(0, var_13_node);
__hpvm__edge(var_12, var_13, 1, 0, 0, 0);
__hpvm__edge(var_12, var_13, 1, 1, 1, 0);
void* var_14 = __hpvm__createNodeND(0, var_14_node);
__hpvm__edge(var_13, var_14, 1, 0, 0, 0);
__hpvm__edge(var_13, var_14, 1, 1, 1, 0);
__hpvm__bindIn(var_14, 18, 2, 0);
__hpvm__bindIn(var_14, 19, 3, 0);
void* var_15 = __hpvm__createNodeND(0, var_15_node);
__hpvm__edge(var_14, var_15, 1, 0, 0, 0);
__hpvm__edge(var_14, var_15, 1, 1, 1, 0);
__hpvm__bindIn(var_15, 20, 2, 0);
__hpvm__bindIn(var_15, 21, 3, 0);
void* var_16 = __hpvm__createNodeND(0, var_16_node);
__hpvm__edge(var_15, var_16, 1, 0, 0, 0);
__hpvm__edge(var_15, var_16, 1, 1, 1, 0);
void* var_17 = __hpvm__createNodeND(0, var_17_node);
__hpvm__edge(var_16, var_17, 1, 0, 0, 0);
__hpvm__edge(var_16, var_17, 1, 1, 1, 0);
__hpvm__bindIn(var_17, 22, 2, 0);
__hpvm__bindIn(var_17, 23, 3, 0);
void* var_18 = __hpvm__createNodeND(0, var_18_node);
__hpvm__edge(var_17, var_18, 1, 0, 0, 0);
__hpvm__edge(var_17, var_18, 1, 1, 1, 0);
__hpvm__bindIn(var_18, 24, 2, 0);
__hpvm__bindIn(var_18, 25, 3, 0);
void* var_19 = __hpvm__createNodeND(0, var_19_node);
__hpvm__edge(var_18, var_19, 1, 0, 0, 0);
__hpvm__edge(var_18, var_19, 1, 1, 1, 0);
__hpvm__bindOut(var_19, 0, 0, 0);
__hpvm__bindOut(var_19, 1, 1, 0);
}
struct ret_t {
void* tensor;
size_t bytes;
};
typedef struct __attribute__((__packed__)) {
void* input;
size_t input_bytes;
void* conv2d_1_w;
size_t conv2d_1_w_bytes;
void* conv2d_1_b;
size_t conv2d_1_b_bytes;
void* conv2d_2_w;
size_t conv2d_2_w_bytes;
void* conv2d_2_b;
size_t conv2d_2_b_bytes;
void* conv2d_3_w;
size_t conv2d_3_w_bytes;
void* conv2d_3_b;
size_t conv2d_3_b_bytes;
void* conv2d_4_w;
size_t conv2d_4_w_bytes;
void* conv2d_4_b;
size_t conv2d_4_b_bytes;
void* dense_1_w;
size_t dense_1_w_bytes;
void* dense_1_b;
size_t dense_1_b_bytes;
void* dense_2_w;
size_t dense_2_w_bytes;
void* dense_2_b;
size_t dense_2_b_bytes;
struct ret_t r;
}
RootIn;
const int batch_size = 500, input_size = 5000,
batch_count = input_size / batch_size;
int main(){
//std::string input_path = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/input_fp16.bin";
std::string labels_path = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/labels_fp16.bin";
//char conv2d_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w_fp16.bin";
void* conv2d_1_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w.bin", 0,32,3,3,3);
//char conv2d_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16.bin";
void* conv2d_1_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b.bin", 0,1,32, 1, 1);//30,30);
//char conv2d_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16.bin";
void* conv2d_2_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w.bin", 0,32,32,3,3);
//char conv2d_2_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16.bin";
void* conv2d_2_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b.bin", 0,1,32, 1, 1);//28,28);
//char conv2d_3_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16.bin";
void* conv2d_3_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w.bin", 0,64,32,3,3);
//char conv2d_3_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16.bin";
void* conv2d_3_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b.bin", 0,1,64, 1, 1);//12,12);
//char conv2d_4_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16.bin";
void* conv2d_4_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w.bin", 0,64,64,3,3);
//char conv2d_4_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16.bin";
void* conv2d_4_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b.bin", 0,1,64, 1, 1);//10,10);
//char dense_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16.bin";
void* dense_1_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w.bin", 0,1,1,1600,256);
//char dense_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16.bin";
void* dense_1_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b.bin", 0,1,256,1,1);
//char dense_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16.bin";
void* dense_2_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w.bin", 0,1,1,256,5);
//char dense_2_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16.bin";
void* dense_2_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b.bin", 0,1,5,1,1);
//void* input = readTrainedWeights(input_path, 0,1,3,32,32);
//uint32_t* labels = readLabels3(labels_path, 500);
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
//args->input = input;
//args->input_bytes = 0;
args->conv2d_1_w = conv2d_1_w;
args->conv2d_1_w_bytes = 0;
args->conv2d_1_b = conv2d_1_b;
args->conv2d_1_b_bytes = 0;
args->conv2d_2_w = conv2d_2_w;
args->conv2d_2_w_bytes = 0;
args->conv2d_2_b = conv2d_2_b;
args->conv2d_2_b_bytes = 0;
args->conv2d_3_w = conv2d_3_w;
args->conv2d_3_w_bytes = 0;
args->conv2d_3_b = conv2d_3_b;
args->conv2d_3_b_bytes = 0;
args->conv2d_4_w = conv2d_4_w;
args->conv2d_4_w_bytes = 0;
args->conv2d_4_b = conv2d_4_b;
args->conv2d_4_b_bytes = 0;
args->dense_1_w = dense_1_w;
args->dense_1_w_bytes = 0;
args->dense_1_b = dense_1_b;
args->dense_1_b_bytes = 0;
args->dense_2_w = dense_2_w;
args->dense_2_w_bytes = 0;
args->dense_2_b = dense_2_b;
args->dense_2_b_bytes = 0;
__hpvm__init();
startMemTracking();
#pragma clang loop unroll(disable)
for (int i = 0; i < batch_count; i++) {
int start = i * batch_size, end = start + batch_size;
void* input = readInputBatch("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/input.bin", nchw, start, end, 3, 32, 32);
args->input = input;
args->input_bytes = 0;
void *dfg = __hpvm__launch(0, root, (void *)args);
__hpvm__wait(dfg);
void *result = static_cast<RootIn *>(args)->r.tensor;
hpvm_request_tensor(result, 0);
llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end);
freeBatchMemory();
}
__hpvm__cleanup();
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment