-
Hashim Sharif authoredHashim Sharif authored
approxhpvm_translator.py 33.62 KiB
import sys
import numpy as np
from frontend.promise_translator import PromiseRtTranslator
from frontend.hpvm_dfg_translator import HPVMTranslator
from frontend.weight_utils import dumpLabels, dumpData, dumpConvWeights, dumpFcWeights, dumpFcBias
from frontend.utils import *
from frontend.knobs import *
import keras
import os
class DFG:
root_set = False;
def __init__(self):
self.node_map = {}
self.root_node = None
self.last_node = None
def hasSingleInput(self, layer):
layer_name = layer.__class__.__name__
singleInLayers = {}
singleInLayers["DepthwiseConv2D"] = True
singleInLayers["Conv2D"] = True
singleInLayers["Dense"] = True
singleInLayers["MaxPooling2D"] = True
singleInLayers["Activation"] = True
singleInLayers["BatchNormalization"] = True
singleInLayers["Flatten"] = True
if layer_name in singleInLayers:
return True
return False
def hasMultipleInputs(self, layer):
layer_name = layer.__class__.__name__
multipleInLayers = {}
multipleInLayers["Add"] = True
if layer_name in multipleInLayers:
return True
return False
def add_dfg_edge(self, inbound_node_name, dfg_node):
inbound_node_name = inbound_node_name.split(":")[0]
inbound_node_name = inbound_node_name.split("/")[0]
if inbound_node_name in self.node_map:
inbound_node = self.node_map[inbound_node_name]
DEBUG (inbound_node_name, " found!")
inbound_node.add_output(dfg_node)
dfg_node.add_input(inbound_node)
else:
DEBUG ("--inbound node NOT FOUND!")
def add_to_graph(self, layer):
dfg_node = DFGNode(layer)
if not self.root_set:
self.root_node = dfg_node
self.root_set = True # DFG root node is now set
if self.hasMultipleInputs(layer):
for j in range(len(layer.input)):
DEBUG (type(layer.input[j]))
DEBUG (layer.input[j].op.name)
self.add_dfg_edge(layer.input[j].op.name, dfg_node)
else:
DEBUG (layer.input.name)
self.add_dfg_edge(layer.input.name, dfg_node)
# Adding DFG node to name mapping
self.node_map[layer.name] = dfg_node
# Check if all predecessor nodes have been visited thus far - reverse postorder traversal
def predVisited(self, cur_node, visited_nodes):
for input_node in cur_node.inputs:
if input_node.layer_name not in visited_nodes:
return False;
# All predecessors are visited
return True
def traverseNode(self, cur_node, visited_nodes):
# Skip visited nodes
if cur_node.layer_name in visited_nodes:
return
if self.predVisited(cur_node, visited_nodes):
DEBUG (cur_node.layer_type)
DEBUG (cur_node.layer_name)
visited_nodes[cur_node.layer_name] = True
# Invoking traversal on outbound nodes
for output_node in cur_node.outputs:
self.traverseNode(output_node, visited_nodes)
# NOTE: Assuming that no outbound edges implies the last node in the graph
if len(cur_node.outputs) == 0:
self.last_node = cur_node
#Build and Print the DFG in reverse postorder
def buildDFG(self):
DEBUG ("\n\n ****** Traversing and Printing DFG ******* \n\n")
visited_nodes = {}
# Starting traversal at the DFG root node
self.traverseNode(self.root_node, visited_nodes)
class DFGNode:
def add_output(self, output_node):
self.outputs.append(output_node)
def add_input(self, input_node):
self.inputs.append(input_node)
def __init__(self, layer):
self.inputs = []
self.outputs = []
layer_type = layer.__class__.__name__
self.layer_type = layer_type # layer type e.g., conv2d, add, dense
self.layer_name = layer.name # unique layer identifier
DEBUG (self.layer_name)
if layer_type == "Conv2D" or layer_type == "DepthwiseConv2D" or layer_type == "Dense":
self.weights = layer.get_weights()[0]
DEBUG ("\t", self.weights.shape)
self.use_bias = layer.use_bias
if layer.use_bias:
self.use_bias = layer.use_bias
self.bias_weights = layer.get_weights()[1]
DEBUG ("\t", self.bias_weights.shape)
if layer_type == "Conv2D" or layer_type == "DepthwiseConv2D":
self.padding = layer.padding
self.strides = layer.strides
DEBUG ("\t", self.strides)
DEBUG ("\tPadding = ", self.padding)
if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D":
self.pool_size = layer.pool_size
self.strides = layer.strides
DEBUG ("\t pool_size = ", self.pool_size)
DEBUG ("\t strides = ", self.strides)
if layerHasActivationAttr(self):
self.activation_type = layer.activation.__name__
DEBUG ("\t Activation = ", self.activation_type)
if layer_type == "ZeroPadding2D":
DEBUG ("***ZeroPaddding \n");
self.padding = layer.padding
DEBUG ("padding = ", self.padding);
if layer_type == "BatchNormalization":
self.epsilon = layer.epsilon
self.beta = layer.beta
self.gamma = layer.gamma
self.moving_mean = layer.moving_mean
self.moving_variance = layer.moving_variance
class TensorRtTranslator:
def __init__(self, dfg):
self.dfg = dfg
self.output_map = {}
self.counter = 0
self.weight_str = ""
self.program_str = ""
self.input_str = ""
self.filter_names = {}
# Used for Json gen
self.json_str = ""
self.knobs_str = ""
self.cur_height = 32
self.cur_width = 32
self.op_count = 1
def setInputHeightWidth(self, data):
self.cur_height = data.shape[2]
self.cur_width = data.shape[3]
DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, ", \n")
def addConvOverheads(self, weights, padding, strides):
K_d = weights.shape[0] * weights.shape[1] * weights.shape[2] * weights.shape[3]
H_d = self.cur_height / strides[0]
W_d = self.cur_width / strides[1]
flops = H_d * W_d * K_d
DEBUG ("conv_flops = ", flops)
self.json_str += "\"convolution_" + str(self.op_count) + "\" : " + str(flops) + ", \n"
self.knobs_str += "\"convolution_" + str(self.op_count) + "\" : [" + conv_knobs + "], \n"
self.op_count += 1
self.cur_height = self.cur_height / strides[0]
self.cur_width = self.cur_width / strides[1]
DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, "\n")
def addDenseOverheads(self, weights):
flops = weights.shape[0] * weights.shape[1]
DEBUG ("dense_flops = ", flops)
self.json_str += "\"linear_" + str(self.op_count) + "\" : " + str(flops) + "\n"
self.knobs_str += "\"linear_" + str(self.op_count) + "\" : [" + baseline_knobs + "], \n"
self.op_count += 1
self.cur_height = 1
self.cur_width = weights.shape[1]
DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, "\n")
def adjustPoolDims(self, strides):
self.cur_height = self.cur_height / strides[0]
self.cur_width = self.cur_width / strides[1]
DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, "\n")
def addBaselineKnob(self, op_name):
self.json_str += "\"" + op_name + "_" + str(self.op_count) + "\" : 0, \n"
self.knobs_str += "\"" + op_name + "_" + str(self.op_count) + "\" : [" + baseline_knobs + "], \n"
self.op_count += 1
def getWeightStr(self):
return self.weight_str
def getInputStr(self):
return self.input_str
def getFilterNames(self):
return self.filter_names
def getWeightVarName(self, weights):
output_var_name = "weights_" + str(self.w_counter)
self.w_counter += 1
self.filter_names[weights] = output_var_name
return output_var_name
def getVariableName(self, cur_node):
output_var_name = "var_" + str(self.counter)
self.counter += 1
self.output_map[cur_node.layer_name] = output_var_name
return output_var_name
def isSkipLayer(self, layer_type):
skip_layers = {}
skip_layers["Flatten"] = 0
skip_layers["Dropout"] = 0
skip_layers["ZeroPadding2D"] = 0
if layer_type in skip_layers:
return True
else:
return False
# NOTE: returns the previous DFG node ignoring "Flatten", "Dropout" Layers
def getPrevActiveLayer(self, cur_node):
pred_layer_type = cur_node.inputs[0].layer_type
# FIXME: Assuming the 'inference' phase - hence skipping Dropout
#if pred_layer_type == "Flatten" or pred_layer_type == "Dropout":
if self.isSkipLayer(pred_layer_type):
cur_node = self.getPrevActiveLayer(cur_node.inputs[0])
return cur_node
else:
return cur_node
def getSingleInputName(self, cur_node):
DEBUG (cur_node.layer_name)
# Assumption: If no inputs, the previous layer must be input layer
if len(cur_node.inputs) == 0:
return "input"
DEBUG ("Input_type = ", cur_node.inputs[0].layer_type)
# NOTE: Assuming the 'inference' phase - hence skipping Dropout
pred_layer_type = cur_node.inputs[0].layer_type
if self.isSkipLayer(pred_layer_type):
cur_node = self.getPrevActiveLayer(cur_node)
if cur_node.inputs[0].layer_type == "InputLayer":
return "input"
# get input to the layer
input_node_name = cur_node.inputs[0].layer_name # get the input layer ID
input_var_name = ""
if input_node_name in self.output_map:
input_var_name = self.output_map[input_node_name]
else:
print ("Input Var not found - Aborting....")
sys.exit(0)
return input_var_name
def getPrevLayerPadding(self, cur_node):
DEBUG (cur_node.layer_name)
# Assumption: If no inputs, the previous layer must be input layer
if len(cur_node.inputs) == 0:
return None
DEBUG ("Input_type = ", cur_node.inputs[0].layer_type)
if cur_node.inputs[0].layer_type == "ZeroPadding2D":
pred_padding = cur_node.inputs[0].padding
return pred_padding
return None
def getMultipleInputNames(self, cur_node):
var_names = []
for i in range(len(cur_node.inputs)):
# get input to the layer
input_node_name = cur_node.inputs[i].layer_name # get the input layer ID
input_var_name = ""
if input_node_name in self.output_map:
input_var_name = self.output_map[input_node_name]
var_names.append(input_var_name)
else:
print ("Input Var not found - Aborting....")
sys.exit(0)
return var_names
def hasBiasAdd(self, cur_node):
if nodeHasBias(cur_node):
return cur_node.use_bias
return False
def hasActivation(self, cur_node):
if nodeHasActivation(cur_node):
return cur_node.activation_type != "linear"
return False
def genNodeCalls(self, cur_node):
out_var_name1 = self.getVariableName(cur_node)
layer_type = cur_node.layer_type
if layer_type == "Conv2D" or layer_type == "DepthwiseConv2D":
input_var_name = self.getSingleInputName(cur_node)
weights = cur_node.weights
strides = cur_node.strides
padding = 0
if cur_node.padding.strip() == "valid":
padding = 0
else:
padding = cur_node.padding
padding = int((weights.shape[0] - 1) / 2)
prev_padding = self.getPrevLayerPadding(cur_node)
if prev_padding != None:
# FIXME: currently only supporting symmetric padding
padding = prev_padding[0][0]
inst_str = "void* " + out_var_name1 + " = "
inst_str += "tensorConvolution(" + input_var_name + ", "
inst_str += cur_node.layer_name + "_w, "
inst_str += str(padding) + ", "
inst_str += str(padding) + ", "
inst_str += str(strides[0]) + ", "
inst_str += str(strides[1]) + ", "
inst_str += "1, "
if layer_type == "DepthwiseConv2D":
C = weights.shape[2]
inst_str += str(C) + "); \n"
else:
inst_str += "1); \n"
self.program_str += inst_str
if strides[0] > 1 and cur_node.padding.strip() == "same":
print ("!ERROR: Same Padding not supported for Conv with Stride > 1")
print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
sys.exit(0)
# NOTE: For Json (tuning config) file generation
if layer_type == "Conv2D":
self.addConvOverheads(weights, padding, strides)
elif layer_type == "DepthwiseConv2D":
#self.json_str += "depthwise_convolution_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob("depthwise_convolution")
if layer_type == "Dense":
input_var_name = self.getSingleInputName(cur_node)
weights = cur_node.weights
inst_str = "void* " + out_var_name1 + " = "
inst_str += "tensorGemmGPU(" + input_var_name + ", "
inst_str += cur_node.layer_name + "_w"
inst_str += "); \n"
self.program_str += inst_str
# Add Cost for Dense Layer (Json file)
self.addDenseOverheads(weights)
if self.hasBiasAdd(cur_node):
out_var_name2 = self.getVariableName(cur_node)
inst_str = "void* " + out_var_name2 + " = "
inst_str += "tensorAdd(" + out_var_name1 + ", "
inst_str += cur_node.layer_name + "_b"
inst_str += "); \n"
self.program_str += inst_str
# NOTE: Changing output variable
out_var_name1 = out_var_name2
#self.json_str += "add_" + str(self.op_count) + " : 0, \n"
# self.op_count += 1
self.addBaselineKnob("add")
if layer_type == "Activation":
input_var_name = self.getSingleInputName(cur_node)
inst_str = genActivationCallStr(input_var_name, out_var_name1, cur_node.activation_type)
self.program_str += inst_str
#self.json_str += cur_node.activation_type + "_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob(cur_node.activation_type)
if self.hasActivation(cur_node) and layer_type != "Activation":
activation_type = cur_node.activation_type
out_var_name3 = self.getVariableName(cur_node)
inst_str = genActivationCallStr(out_var_name1, out_var_name3, activation_type)
self.program_str += inst_str
if activation_type == "softmax":
print ("Softmax canNOT be part of Dense/Conv Op. Insert: Activation('softmax');")
sys.exit(0)
#self.json_str += activation_type + "_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob(activation_type)
if layer_type == "BatchNormalization":
input_var_name = self.getSingleInputName(cur_node)
inst_str = "void* " + out_var_name1 + " = "
inst_str += "tensorBatchNorm(" + input_var_name + ", "
inst_str += cur_node.layer_name + "_gamma, "
inst_str += cur_node.layer_name + "_beta, "
inst_str += cur_node.layer_name + "_mean, "
inst_str += cur_node.layer_name + "_variance, "
inst_str += str(cur_node.epsilon)
inst_str += "); \n"
self.program_str += inst_str
#self.json_str += "batchnorm_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob("batchnorm")
if layer_type == "Add":
input_vars = self.getMultipleInputNames(cur_node)
inst_str = "void* " + out_var_name1 + " = "
inst_str += "tensorAdd(" + input_vars[0] + ", " + input_vars[1] + "); \n"
self.program_str += inst_str
#self.json_str += "add_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob("add")
if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D":
input_var_name = self.getSingleInputName(cur_node)
pool_size = cur_node.pool_size
strides = cur_node.strides
# FIXME: Non-same padding is *NOT* currently supported
padding = 0
pool_type = 0
if layer_type == "MaxPooling2D":
pool_type = "0"
#self.json_str += "maxpool_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob("maxpool")
if layer_type == "AveragePooling2D":
pool_type = "1"
#self.json_str += "avgpool_" + str(self.op_count) + " : 0, \n"
#self.op_count += 1
self.addBaselineKnob("avgpool")
# tensorPooling(input, pool_type, pool_h, pool_w, v_pad, h_pad, v_stride, h_stride)
inst_str = "void* " + out_var_name1 + " = "
inst_str += "tensorPooling(" + input_var_name + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1])
inst_str += "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1])
inst_str += "); \n"
self.program_str += inst_str
self.adjustPoolDims(strides)
def codegenNode(self, dfg, cur_node, visited_nodes):
# Skip visited nodes
if cur_node.layer_name in visited_nodes:
return
DEBUG ("-visiting = ", cur_node.layer_name, "\n")
if dfg.predVisited(cur_node, visited_nodes):
visited_nodes[cur_node.layer_name] = True
self.genNodeCalls(cur_node)
# Invoking traversal on outbound nodes
for output_node in cur_node.outputs:
self.codegenNode(dfg, output_node, visited_nodes)
# Print the DFG in reverse postorder
def codegen(self, dfg):
print ("\n *** Starting Codegen for HPVM Tensor Rt *** \n")
visited_nodes = {}
# Starting traversal at the DFG root node
self.codegenNode(dfg, dfg.root_node, visited_nodes)
print ("\n\n --- Codegen Completed --- \n\n")
def dump_weights(self, model, prefix, reload_weights):
layer_count = 0
for i in range(len(model.layers)):
layer = model.layers[i]
layer_type = layer.__class__.__name__
layer_name = layer.name
if layer_type == "Conv2D" or layer_type == "DepthwiseConv2D":
weights = layer.get_weights()[0]
w_name = layer_name + "_w"
self.filter_names[w_name] = 1
DEBUG (weights.shape, w_name)
N = weights.shape[3]
C = weights.shape[2]
H = weights.shape[1]
W = weights.shape[0]
unique_file_name = w_name + ".bin"
dumpConvWeights(prefix + unique_file_name, weights, N, C, H, W, reload_weights)
file_path = w_name + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += unique_file_name + "\"); \n"
self.weight_str += file_path_str
# NOTE: Special handling for DepthwiseConv2D
if layer_type == "DepthwiseConv2D":
N = C
C = 1
# FIXME: Be flexible for datatypes (currently only FP32 weights)
# NOTE: '0' specified for floating point type
self.weight_str += "void* " + w_name + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," + str(H) + "," + str(W)
self.weight_str += "); \n"
if layer.use_bias:
bias_weights = layer.get_weights()[1]
b_name = layer_name + "_b"
self.filter_names[b_name] = 1
DEBUG (bias_weights.shape, b_name)
unique_file_name = b_name + ".bin"
dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0], reload_weights)
file_path = b_name + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += unique_file_name + "\"); \n"
self.weight_str += file_path_str
C = bias_weights.shape[0]
self.weight_str += "void* " + b_name + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n"
if layer_type == "Dense":
weights = layer.get_weights()[0]
w_name = layer_name + "_w"
self.filter_names[w_name] = 1
DEBUG (weights.shape, w_name)
H = weights.shape[0]
W = weights.shape[1]
unique_file_name = w_name + ".bin"
dumpFcWeights(prefix + unique_file_name, weights, H, W, reload_weights)
file_path = w_name + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += unique_file_name + "\"); \n"
self.weight_str += file_path_str
self.weight_str += "void* " + w_name + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1,1," + str(H) + "," + str(W) + "); \n"
if layer.use_bias:
bias_weights = layer.get_weights()[1]
b_name = layer_name + "_b"
self.filter_names[b_name] = 1
DEBUG (bias_weights.shape, b_name)
unique_file_name = b_name + ".bin"
dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0], reload_weights)
file_path = b_name + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += unique_file_name + "\"); \n"
self.weight_str += file_path_str
C = bias_weights.shape[0]
self.weight_str += "void* " + b_name + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n"
if layer_type == "BatchNormalization":
weights = layer.get_weights()
gamma_w = weights[0]
gamma_id = layer_name + "_gamma"
gamma_file_name = gamma_id + ".bin"
self.filter_names[gamma_id] = 1
dumpFcBias(prefix + gamma_file_name, gamma_w, gamma_w.shape[0], reload_weights)
file_path = gamma_id + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += gamma_file_name + "\"); \n"
self.weight_str += file_path_str
C = gamma_w.shape[0]
self.weight_str += "void* " + gamma_id + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n"
# End of Gamma handling
beta_w = weights[1]
beta_id = layer_name + "_beta"
beta_file_name = beta_id + ".bin"
self.filter_names[beta_id] = 1
dumpFcBias(prefix + beta_file_name, beta_w, beta_w.shape[0], reload_weights)
file_path = beta_id + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += beta_file_name + "\"); \n"
self.weight_str += file_path_str
self.weight_str += "void* " + beta_id + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n"
# End of Beta Handling
mean_w = weights[2]
mean_id = layer_name + "_mean"
mean_file_name = mean_id + ".bin"
self.filter_names[mean_id] = 1
dumpFcBias(prefix + mean_file_name, mean_w, mean_w.shape[0], reload_weights)
file_path = mean_id + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += mean_file_name + "\"); \n"
self.weight_str += file_path_str
self.weight_str += "void* " + mean_id + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n"
# End of Mean Handling
variance_w = weights[3]
variance_id = layer_name + "_variance"
variance_file_name = variance_id + ".bin"
self.filter_names[variance_id] = 1
dumpFcBias(prefix + variance_file_name, variance_w, variance_w.shape[0], reload_weights)
file_path = variance_id + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += variance_file_name + "\"); \n"
self.weight_str += file_path_str
self.weight_str += "void* " + variance_id + " = " + " readTrainedWeights("
self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n"
# End of Variance Handling
layer_count += 1
def add_header(self):
headers = "\n#include <stdio.h> \n"
headers += "#include <stdlib.h> \n"
headers += "#include <unistd.h> \n"
headers += "#include <fcntl.h> \n"
headers += "#include <sys/types.h> \n"
headers += "#include <sys/stat.h> \n"
headers += "#include <string.h> \n"
headers += "#include \"tensor_runtime.h\" \n"
headers += "#include \"utils.h\" \n\n"
main_func = "int main(){ \n\n"
initialization = "llvm_hpvm_initTensorRt(0); \n\n"
self.program_str += headers
self.program_str += main_func
self.program_str += initialization
def add_footer(self, test_data):
if test_data is not None and self.dfg.last_node is not None:
last_node = self.dfg.last_node
output_var = self.output_map[last_node.layer_name]
destructors = "\nllvm_hpvm_cleanupTensorRt(); \n"
self.program_str += destructors
end_main = "\nreturn 0; \n\n}\n"
self.program_str += end_main
return 0
def genInputReadCall(self, input_data, input_name):
file_path = input_name + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += input_name + ".bin\"); \n"
self.weight_str += file_path_str
N = input_data.shape[0]
C = input_data.shape[1]
H = input_data.shape[2]
W = input_data.shape[3]
self.input_str += "void* " + input_name + " = readTrainedWeights("
self.input_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + ","
self.input_str += str(H) + "," + str(W) + "); \n"
def genLabelReadCall(self, labels, labels_name):
file_path = labels_name + "_path"
file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\""
file_path_str += labels_name + ".bin\"); \n"
self.weight_str += file_path_str
self.input_str += "uint32_t* " + labels_name + " = readLabels3("
self.input_str += file_path + ".c_str()," + str(labels.shape[0]) + "); \n"
def genInputCalls(self, test_data, test_labels, tuner_data, tuner_labels, weights_dir, reload_weights):
dumpData(weights_dir + "test_input.bin", test_data, reload_weights)
self.genInputReadCall(test_data, "test_input")
# Adding input to the filter map
self.filter_names["input"] = 1
dumpLabels(weights_dir + "test_labels.bin", test_labels, reload_weights)
self.genLabelReadCall(test_labels, "test_labels")
dumpData(weights_dir + "tune_input.bin", tuner_data, reload_weights)
self.genInputReadCall(test_data, "tune_input")
dumpLabels(weights_dir + "tune_labels.bin", tuner_labels, reload_weights)
self.genLabelReadCall(test_labels, "tune_labels")
def genBatchLoop(self, x_test):
N = x_test.shape[0]
C = x_test.shape[1]
H = x_test.shape[2]
W = x_test.shape[3]
loop_str = ""
loop_str += "\nstartMemTracking(); \n\n"
loop_str += "int test_input_size = " + str(N) + "; \n"
loop_str += "int batch_size = " + str(N) + "; \n"
loop_str += "int batch_count = test_input_size / batch_size; \n"
loop_str += "float final_accuracy = 0.0; \n\n"
loop_str += "for(int i = 0; i < batch_count; i++){ \n\n"
loop_str += "int start = i * batch_size; \n"
loop_str += "int end = (i + 1) * batch_size; \n"
loop_str += "\nvoid* input = readInputBatch(input_path.c_str(),0,start,end,"
loop_str += str(C) + "," + str(H) + "," + str(W) + "); \n\n"
self.program_str += loop_str
def endBatchLoop(self):
end_loop_str = ""
end_loop_str += "\nuint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); \n"
last_node = self.dfg.last_node
output_var = self.output_map[last_node.layer_name]
accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, " + output_var + "); \n"
end_loop_str += accuracy_call
end_loop_str += "final_accuracy += accuracy; \n"
end_loop_str += "freeBatchMemory(); \n "
end_loop_str += "\n}\n\n"
end_loop_str += "final_accuracy = final_accuracy / batch_count; \n"
end_loop_str += "dumpFinalAccuracy(final_accuracy); \n\n"
self.program_str += end_loop_str
def generateSourceProgram(self, dir_prefix):
f = open(dir_prefix + "/src.cc", "w+")
f.write(self.program_str)
f.close()
def dumpJsonFile(self, dir_prefix):
f = open(dir_prefix + "/tuner.json", "w+")
f.write("{ \n\n")
op_cost_str = " \"op_cost\" = { \n"
op_cost_str += self.json_str[:-3]
#f.write(self.json_str)
op_cost_str += "\n } \n\n"
f.write(op_cost_str)
knobs_speedup_str = "\n \"knob_speedup\" : { \n"
for key in knobs_speedups:
knobs_speedup_str += "\"" + str(key) + "\" : " + str(knobs_speedups[key]) + ", \n"
f.write(knobs_speedup_str[:-3] + "\n} \n\n")
layer_knobs_str = " \"op_knobs\" = { \n"
layer_knobs_str += self.knobs_str[:-3]
layer_knobs_str += " \n\n } \n\n"
f.write(layer_knobs_str)
f.write("\n\n}")
f.close()
def translate(self, model, weights_dir, src_dir, test_data, test_labels, tuner_data, tuner_labels, weights_reload):
self.add_header()
dir_path = "std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + std::string(\"" + weights_dir + "\"); \n"
self.weight_str += dir_path
if test_data is not None:
self.genInputCalls(test_data, test_labels, tuner_data, tuner_labels, weights_dir, weights_reload)
self.dump_weights(model, weights_dir, weights_reload)
self.program_str += "\n" + self.weight_str + "\n\n"
self.genBatchLoop(test_data)
self.codegen(self.dfg)
self.endBatchLoop()
self.add_footer(test_data);
self.generateSourceProgram(src_dir)
self.dumpJsonFile(src_dir)
def reloadModelParams(model, reload_dir, x_test, y_test):
print ("\n\n*****NOTE: Reloading pre-trained weights \n")
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss2:', score[0])
print('Test accuracy2:', score[1])
for i in range(len(model.layers)):
layer = model.layers[i]
layer_name = layer.name
DEBUG ("*layer_name = ", layer_name)
if "conv" not in layer_name and "dense" not in layer_name:
continue
w_path = reload_dir + layer_name + "_w.bin"
b_path = reload_dir + layer_name + "_b.bin"
w_arr = np.fromfile(w_path, dtype='float32')
b_arr = np.fromfile(b_path, dtype='float32')
w_shape = layer.get_weights()[0].shape
b_shape = layer.get_weights()[1].shape
if "conv" in layer_name:
w_nchw_shape = (w_shape[3], w_shape[2], w_shape[0], w_shape[1])
w_arr = np.reshape(w_arr, w_nchw_shape)
b_arr = np.reshape(b_arr, b_shape)
w_arr = np.transpose(w_arr, (2,3,1,0))
DEBUG ("old_shape = ", w_shape, " new_shape = ", w_arr.shape)
if "dense" in layer_name:
w_arr = np.reshape(w_arr, w_shape)
b_arr = np.reshape(b_arr, b_shape)
weights = []
weights.append(w_arr)
weights.append(b_arr)
# NOTE: overriding weights
layer.set_weights(weights)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss2:', score[0])
print('Test accuracy2:', score[1])
def getUniquePath(weights_dir):
# Do not overwrite existing directories - create new with unique ID
if os.path.exists(weights_dir):
char_count = len(weights_dir)
if weights_dir[char_count - 1] == "/":
weights_dir = weights_dir[:char_count-1]
tokens = weights_dir.split("_")
last_tok = tokens[len(tokens) - 1]
if last_tok.isdigit():
id = int(last_tok)
id += 1
weights_dir = "_".join(tokens[:-1]) + "_" + str(id) + "/"
else:
weights_dir = "_".join(tokens) + "_1/"
weights_dir = getUniquePath(weights_dir)
return weights_dir
#***** Top level External Function *******
def translate_to_approxhpvm(model,
weights_dir, src_dir,
test_data, test_labels,
tuner_data, tuner_labels,
batch_size, num_classes=10,
enable_weights_reload = False):
reload_weights = enable_weights_reload # If set to True, does not dump any weight/input/label files
if not reload_weights:
weights_dir = getUniquePath(weights_dir)
os.mkdir(weights_dir)
src_dir = getUniquePath(src_dir)
os.mkdir(src_dir)
dfg = DFG()
for i in range(len(model.layers)):
layer = model.layers[i]
# NOTE: Add DNN layer to graph
dfg.add_to_graph(layer)
# Build and Print DFG in reverse postorder
dfg.buildDFG()
DEBUG ("test_data.shape = ", test_data.shape, "\n")
DEBUG ("test_labels.shape = ", test_labels.shape, "\n")
tensorRtTranslator = TensorRtTranslator(dfg)
tensorRtTranslator.setInputHeightWidth(test_data)
tensorRtTranslator.translate(model, weights_dir, src_dir, test_data, test_labels, tuner_data, tuner_labels, reload_weights)
weight_str = tensorRtTranslator.getWeightStr()
input_str = tensorRtTranslator.getInputStr()
filter_names = tensorRtTranslator.getFilterNames()
hpvmTranslator = HPVMTranslator(dfg, weight_str, input_str, filter_names)
hpvmTranslator.translate(model, src_dir, test_data, tuner_data, batch_size)
if reload_weights:
print ("NOTE: Using existing pretrained weights \n")
else:
print ("NOTE: dumping new set of weights \n")
print ("-- Weight Files Under : ", weights_dir)
print ("-- TensorRT src : ", src_dir + "/src.cc")
print ("-- ApproxHPVM src : ", src_dir + "approxhpvm_src.cc")
return weights_dir