merging

9b51ef5c · Ubuntu · bb302f7c · de4a1143 · 9b51ef5c · 9b51ef5c
Commit 9b51ef5c authored 5 years ago by Ubuntu
--- a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
+++ b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
@@ -2,9 +2,21 @@ import glob
 import os 
 import subprocess
 import shutil 
+import sys

 from collections import defaultdict

+'''
+FORMAT
+
+** LayerName NumOpsInLayer <cols>
+OpName Col1Val Col2Val ...
+
+** Conv1 1 h2f_time h2f_energy fp32_time fp32_energy f2h_time f2h_energy fp16_perf_time fp16_perf_energy fp16_time fp16_energy
+Conv1 51.8808 97.2844 319.582 601.966 12.81 18.758 388.092 650.649 340.037 590.664
+
+'''
+
 class TableGenerator: 

    __ops_header_delimiter = "#"
@@ -53,7 +65,7 @@ class TableGenerator:
        3. Writes the internal table to <network_name>_tensors.txt file and uses the 
        <network_name>_ops.txt file as a guideline in terms of row order 
        '''
-        self.__run_inputted_binaries()
+        #self.__run_inputted_binaries()
        self.__build_internal_table()
        self.__output_table_to_file()

@@ -282,8 +294,11 @@ class TableGenerator:


 if __name__ == "__main__":
-    binary_dir_path = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet"
-    num_iters = 1 
-    profiler_binary_name = "/home/nvidia/awesome_profiler/pp"
+    if len(sys.argv) != 4:
+        print("python table_generator.py <binary dir path> <num itrs> <profiler bin path>")
+        exit(1)
+    binary_dir_path = sys.argv[1]
+    num_iters = int(sys.argv[2]) 
+    profiler_binary_name = sys.argv[3]
    table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name)
    table_gen.generate_table()
--- a/llvm/projects/soc_simulator/src/driver.py
+++ b/llvm/projects/soc_simulator/src/driver.py
-# Python driver -- ported from Perl driver (driver.pl)
-
 from collections import defaultdict
 import os
+import subprocess
 import sys

-def build_nested_default_dict():
-	return defaultdict(build_nested_default_dict)
+class Driver:
+    fp16_swing = 8

-tensor_layers = defaultdict(build_nested_default_dict)
+    class ApproxTypes:
+        FP16 = 0
+        FP32 = 1
+        PROMISE = 2

-def is_conv(operation_name):
-    return operation_name.startswith("Conv")
+    results_time_key = "Time"
+    results_energy_key = "Energy"

-def is_nml(operation_name):
-    return operation_name.startswith("NML")

-def is_fc(operation_name):
-    return operation_name.startswith("FC")
+    def driver(self):
+        self.__parse_tensor_layer_file()
+        self.__parse_tensor_table()
+        self.__run_simulations()
+        self.__display_results()

-def parse_tensor_layer_file(layer_filename): 
-    '''
-    Convs: Layer name, N, Cin, H, W, Cout, Kh, Kw, Sh, Sw
-    FCs: Layer name, Rows_A, Cols_A, Rows_B, Cols_B
-    NMLs (No Man Lands):  NML<number> (edited) 
-    '''
-    if not os.path.isfile(layer_filename):
-        print("ERROR: %s was not found." % layer_filename)
-        exit(1)

-    layer_file = open(layer_filename, "r")
-    for line in layer_file:
-        layer_data = line.strip().split(',')
-        layer_name = layer_data[0]
-        
-        if is_conv(layer_name):
-            tensor_layers[layer_name]["N"] = layer_data[1]
-            tensor_layers[layer_name]["Cin"] = layer_data[2]
-            tensor_layers[layer_name]["H"] = layer_data[3]
-            tensor_layers[layer_name]["W"] = layer_data[4]
-            tensor_layers[layer_name]["Cout"] = layer_data[5]
-            tensor_layers[layer_name]["Kh"] = layer_data[6] 
-            tensor_layers[layer_name]["Kw"] = layer_data[7]
-            tensor_layers[layer_name]["Sh"] = layer_data[8]
-            tensor_layers[layer_name]["Sw"] = layer_data[9]
-
-        elif is_fc(layer_name):
-            tensor_layers[layer_name]["RA"] = layer_data[1]
-            tensor_layers[layer_name]["CA"] = layer_data[2]
-            tensor_layers[layer_name]["RB"] = layer_data[3]
-            tensor_layers[layer_name]["CB"] = layer_data[4]
-
-		elif not is_nml(layer_name): # TODO should we store data for NMLs?
-			print("ERROR: Invalid layer name %s" % layer_name)
-			exit(1)
-
-    layer_file.close()
-
-# should this be a nested dict of dicts?
-# [layer_name][operation_name][cols]
-tensor_table = defaultdict(build_nested_default_dict)
-
-def parse_tensor_table(table_filename): 
-    if not os.path.isfile(table_filename):
-        print("ERROR: %s was not found." % table_filename)
-        exit(1)
+    def __init__(self, layer_filename, table_filename, config_filename, results_filename):
+        self.__layer_filename = layer_filename
+        self.__table_filename = table_filename
+        self.__config_filename = config_filename
+        self.__results_filename = results_filename
+
+        # NOTE: Use an OrderedDict if we want to search by operation name 
+        # Using a list bc we care about the order the data is read in
+        # since it corresponds to the data in the configuration file
+        self.__tensor_layers = []
+
+        # [layer_name][operation_name][cols] 
+        # Operation names need to be stored in order of insertion 
+        self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
+
+        # [Time/Energy][number corresponding to order the layer config was read in] = time/energy
+        self.__aggregate_results = defaultdict(lambda: defaultdict(float))
+        self.__config_count = 0
+
+
+    @staticmethod
+    def is_conv(operation_name):
+        return operation_name.startswith("Conv")
+
+
+    @staticmethod
+    def is_nml(operation_name):
+        return operation_name.startswith("NML")
+
+
+    @staticmethod
+    def is_fc(operation_name):
+        return operation_name.startswith("FC")
+
+
+    def __parse_tensor_layer_file(self): 
+        if not os.path.isfile(self.__layer_filename):
+            print("ERROR: %s was not found." % self.__layer_filename)
+            exit(1)
+
+        layer_file = open(self.__layer_filename, "r")
+        for line in layer_file:
+            layer_data = line.strip().split(',')
+            layer_name = layer_data[0]
+
+            tensor_layer = defaultdict(str)
+            tensor_layer["Name"] = layer_name
+
+            if Driver.is_conv(layer_name):
+                tensor_layer["N"] = float(layer_data[1])
+                tensor_layer["Cin"] = float(layer_data[2])
+                tensor_layer["H"] = float(layer_data[3])
+                tensor_layer["W"] = float(layer_data[4])
+                tensor_layer["Cout"] = float(layer_data[5])
+                tensor_layer["Kh"] = float(layer_data[7])
+                tensor_layer["Kw"] = float(layer_data[8])
+                tensor_layer["Sh"] = float(layer_data[9])
+                tensor_layer["Sw"] = float(layer_data[10])
+
+            elif Driver.is_fc(layer_name):
+                tensor_layer["RA"] = float(layer_data[1])
+                tensor_layer["CA"] = float(layer_data[2])
+                tensor_layer["RB"] = float(layer_data[3])
+                tensor_layer["CB"] = float(layer_data[4])
+
+            elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
+                print("ERROR: Invalid layer name %s" % layer_name)
+                exit(1)
+
+            self.__tensor_layers.append(tensor_layer)
+        layer_file.close()
+
+
+    def __parse_tensor_table(self): 
+        if not os.path.isfile(self.__table_filename):
+            print("ERROR: %s was not found." % self.__table_filename)
+            exit(1)
+        table_file = open(self.__table_filename, "r")
+        line = table_file.readline().strip()
+
+        while line:
+            # Line here MUST be a header or there's a bug 
+            # Get the description of the layer 
+            assert(line.startswith("**"))
+
+            header_contents = line.split(' ')[1:] 
+            layer_name = header_contents[0]
+            num_ops = int(header_contents[1])
+            col_names = header_contents[2:]
+
+            layer_operations = []
+
+            # Go through all operations in the layer
+            for op_count in range(num_ops):
+                operation_data = defaultdict(str)
+
+                line = table_file.readline().strip()
+                op_data = line.split(' ')
+                op_name = op_data[0]
+                operation_data["Name"] = op_name
+
+                # Number of data items (#s) needs to match up with the # of cols 
+                assert(len(op_data) - 1 == len(col_names)) 
+
+                # Go through all data items (each col element) per operation 
+                for i in range(len(col_names)):
+                    operation_data[col_names[i]] = float(op_data[i + 1])
+
+                layer_operations.append(operation_data)
+
+            self.__tensor_table[layer_name] = layer_operations
+            line = table_file.readline().strip()
+        table_file.close()
+
+
+    @staticmethod
+    def is_promise(config_layer):
+        return float(config_layer.split(' ')[0]) < Driver.fp16_swing
+
+
+    def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
+        if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \
+                    or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed
+            return 0.0, 0.0
+       
+        layer_name = layer_data["Name"]
+
+        # NOTE: Ignoring logic where curr == promise or prev == promise bc 
+        # smartDMA is always true so we'd return near the beginning of the method
+
+        # Get h2f/f2h data using the first tensor operation in the layer
+        # (which is why order matters in the tensor table)
+        tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]  
+        if curr_layer == Driver.ApproxTypes.FP32:
+            time = tensor_op_row["h2f_time"]
+            energy = tensor_op_row["h2f_energy"]
+        elif curr_layer == Driver.ApproxTypes.FP16:
+            time = tensor_op_row["f2h_time"]
+            energy = tensor_op_row["f2h_energy"]
+
+        print("Quantization: (%f, %f)" % (time, energy))
+        return (time, energy)
+
+
+    def __run_promise_simulation(self, swing, layer_data):
+        layer_name = layer_data["Name"] 
+        patch_factor = 1 
+
+        if Driver.is_conv(layer_name): 
+            rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
+                    / (layer_data["Sh"] * layer_data["Sw"])
+            cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
+            rows_b = cols_a
+            cols_b = layer_data["Cout"]
+            patch_factor = layer_data["Kh"] * layer_data["Kw"]
+        elif Driver.is_fc(layer_name):
+            rows_a = layer_data["RA"] 
+            cols_a = layer_data["CA"]
+            rows_b = cols_a
+            cols_b = layer_data["CB"]
+        else:
+            print("PROMISE can't run whatever this layer is.")
+            exit(1)
+        # Run promise simulator
+        # TODO need to print time and energy in the ptm runner so we can pipe it
+        output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \
+                    str(cols_b), str(patch_factor), str(swing)], \
+                    stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
+        total_time_energy = output.strip().split(',')
+
+        assert(len(total_time_energy) == 2)
+        print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
+        return float(total_time_energy[0]), float(total_time_energy[1])
+
+
+    def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind):
+        tensor_info = self.__tensor_table[layer_name][tensor_ind]
+        if curr_layer == Driver.ApproxTypes.FP32:
+            conversion_time = tensor_info["fp32_time"]
+            conversion_energy = tensor_info["fp32_energy"]
+        else:
+            conversion_time = tensor_info["fp16_time"]
+            conversion_energy = tensor_info["fp16_energy"]
+        print("GPU: (%f, %f)" % (conversion_time, conversion_energy))
+        return (conversion_time, conversion_energy)
+
+
+    def __run_simulations(self):
+        print("run sim")
+        if not os.path.isfile(self.__config_filename):
+            print("ERROR: %s was not found" % self.__config_filename)
+            exit(1)
+
+        config_file = open(self.__config_filename, "r")
+      
+        # each line = indepedent configuration
+        # layers are separated by commas
+        # tensor ops are separated by spaces
+        for config in config_file:
+            config_layers = config.strip().split(',')
+            prev_layer = Driver.ApproxTypes.FP32
+            curr_layer = None
+
+            for layer_ind, config_layer in enumerate(config_layers): # level
+                layer_data = self.__tensor_layers[layer_ind]  # layer
+                layer_name = layer_data["Name"]
+
+                if Driver.is_promise(config_layer):
+                    print("Running layer %s on PROMISE" % layer_name)
+                    curr_layer = Driver.ApproxTypes.PROMISE
+                    quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data)
+                    # Compute 
+                    time, energy = self.__run_promise_simulation(config_layer, layer_data)
+                    print(time, energy)
+                    self.__aggregate_results[Driver.results_time_key][self.__config_count] += time
+                    self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy 
+                else:
+                    print("Running layer %s on the GPU" % layer_name)
+                    tensor_ops = config_layer.split(' ')
+
+                    total_time = 0
+                    total_energy = 0
+                    for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle
+                        tensor_op = int(tensor_op)
+                        if tensor_op == Driver.fp16_swing:
+                            curr_layer = Driver.ApproxTypes.FP16
+                        else:
+                            curr_layer = Driver.ApproxTypes.FP32
+                        quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data)
+                        conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind)
+                        total_time += quant_time + conv_time 
+                        total_energy += quant_energy + conv_energy
+
+                    self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time
+                    self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy 
+
+                prev_layer = curr_layer
+            self.__config_count += 1
+            print("\n")
+        config_file.close()
+
+
+    def __display_results(self):
+        results_file = open(self.__results_filename, "w")
+        attributes_to_print = [Driver.results_time_key, Driver.results_energy_key]
+
+        for attribute in attributes_to_print:
+            results_file.write("%s\n" % attribute)
+            results_file.write("Configuration,Total,Improvement\n") 
+
+            baseline_val = self.__aggregate_results[attribute][0]
+            print(baseline_val)
+            best_config = None
+            best_result = None
+
+            for config_ind in range(self.__config_count):
+                results_file.write("c%d" % config_ind)
+                time_or_energy_val = self.__aggregate_results[attribute][config_ind]
+
+                # Using repr to keep all decimal digits when writing to file
+                results_file.write(",%s" % repr(time_or_energy_val))
+                results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001)))
+
+                if not best_result or time_or_energy_val < best_result:
+                    best_result = time_or_energy_val
+                    best_config = config_ind
+            results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config])))
+        results_file.close()

-    table_file = open(table_filename, "r")
-
-	line = table_file.readline().strip()
-
-	while line:
-		# Line here MUST be a header or there's a bug 
-		# Get the description of the layer 
-		assert(line.startswith("**"))
-		header_contents = line.split(' ')[1:] 
-		layer_name = header_contents[0]
-		num_ops = int(header_contents[1])
-		col_names = header_contents[2:]
-
-		# Go through all operations in the layer
-		for op_count in range(num_ops):
-			line = table_file.readline().strip()
-			op_data = line.split(' ')
-			op_name = op_data[0]
-
-			# Number of data items (#s) needs to match up with the # of cols 
-			assert(len(op_data) - 1 == len(col_names)) 
-
-			# Go through all data items (each col element) per operation 
-			for i in range(len(col_names)):
-				tensor_table[layer_name][op_name][col_names[i]] = op_data[i + 1]
-
-    	line = table_file.readline().strip()
-
-	table_file.close()
-
-
-def run_simulations():
-    # open configuration file
-    # open results file
-    # read through each line in the configuration file
-        # for each config file line --> parse the comma separated voltage swing levels
-        # recall: each line = a configuration that works
-        # for each level
-            # if promise --> promise runs an entire layer
-                # quantize, no patching and unpatching
-                # run on promise 
-                # output the total time and energy
-            # else
-                # for each sublevel (separated by spaces)
-                # quantize
-                # run
-                # keep track of total time and energy --> update as needed
-                # output the total time and energy
-
-# quantization: we always have smart dma
-# need to search stuff up
-# $layer = a map of elements
-# stores the layer name, then

 if __name__ == "__main__":
-    if len(sys.argv) != 4):
+    if len(sys.argv) != 5:
        print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")
        exit(1)
-
+    Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()