Removed build_pldi/ directory from repo

dc823cd1 · Elizabeth · c9ea00da · c9ea00da
Commit dc823cd1 authored 5 years ago by Elizabeth
--- a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
+++ b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
-import glob
-import os 
-import subprocess
-import shutil 
-import sys
-from collections import defaultdict
-'''
-FORMAT
-** LayerName NumOpsInLayer <cols>
-OpName Col1Val Col2Val ...
-** Conv1 1 h2f_time h2f_energy fp32_time fp32_energy f2h_time f2h_energy fp16_perf_time fp16_perf_energy fp16_time fp16_energy
-Conv1 51.8808 97.2844 319.582 601.966 12.81 18.758 388.092 650.649 340.037 590.664
-'''
-class TableGenerator: 
-    __ops_header_delimiter = "#"
-    __table_header_delimter = "**" 
-    __time_col_name = "time" 
-    __energy_col_name = "energy"
-    '''
-    Stores all precision conversions used. 
-    '''
-    precision_conversions = frozenset(["h2f", "f2h"]) 
-    def __init__(self, dir_path, iters, profiler_binary_name):
-        '''
-        Args:
-            dir_path:               Path of directory containing network binaries
-            iters:                  Number of iterations to run each binary for
-            profiler_binary_name:   Name of offline profiler binary to run 
-        '''
-        self.__dir_path = dir_path
-        # Name of the actual directory 
-        self.__network_name = os.path.split(dir_path)[-1]
-        self.__iters = iters 
-        self.__profiler_binary_name = profiler_binary_name
-        # Path to results directory 
-        self.__results_dir_path = "%s_results" % self.__dir_path
-        # Outputted table file
-        self.__table_filename = "%s_tensors.txt" % self.__network_name
-		# Nested default dictionary of default dicts
-        self.__table = self.__build_nested_default_dict()
-    def generate_table(self):
-        '''
-        Generates a table file called <network_name>_tensors.txt in the following 
-        steps:
-        1. Runs the offline profiler against the inputted binaries to generate
-        results files
-        2. Builds an internal table storing all data from the parsed results files
-        the offline profiler generated
-        3. Writes the internal table to <network_name>_tensors.txt file and uses the 
-        <network_name>_ops.txt file as a guideline in terms of row order 
-        '''
-        self.__run_inputted_binaries()
-        self.__build_internal_table()
-        self.__output_table_to_file()
-    def __run_inputted_binaries(self):
-        '''
-        Invokes the profiler to run all appropriate binaries (must start with the network 
-        name) in the inputted directory. Result files generated by the profiler are 
-        stored in the results file directory and are named <binary_name>.txt. These results
-        files are then parsed in a later step to generate the table
-        '''
-        if not os.path.isdir(self.__dir_path):
-            print("ERROR: Directory %s not found" % self.__dir_path)
-            exit(1)
-        try:
-            os.mkdir(self.__results_dir_path)
-        except OSError:
-            if os.path.isdir(self.__results_dir_path):
-                print("Directory already exists. Clearing directory.")
-                for old_file in glob.glob(os.path.join(self.__results_dir_path, "*")):
-                    os.remove(old_file)
-            else:
-                print("ERROR: Directory doesn't exist but failed to create dir")
-        for binary_name in os.listdir(self.__dir_path):
-            binary_path = os.path.join(self.__dir_path, binary_name)
-            if not self.__should_execute_file(binary_path):
-                continue
-            output_file = os.path.join(self.__results_dir_path, binary_name + ".txt")
-            # No stdout/stderr piping needed for now
-            subprocess.Popen([profiler_binary_name, binary_path, str(self.__iters), \
-                        output_file]).communicate()
-    def __build_internal_table(self):
-        '''
-        Iterates through each results file generated by the runs of the offline
-        profiler and stores the data in a dictionary in the following format:
-            [operation name][approximation type OR conversion type][time/energy]
-        '''
-        for results_file_name in os.listdir(self.__results_dir_path):
-            # Ignore if it's not a results file
-            if results_file_name == self.__table_filename or \
-                        not results_file_name.startswith(self.__network_name):
-                continue
-            approx_type = self.__get_approximation_type(results_file_name)
-            results_file = open(os.path.join(self.__results_dir_path, results_file_name), "r")
-            for line in results_file:
-                line = line.strip()
-                op_name, total_time, total_energy = self.__parse_tensor_operation_line(line)
-                # If the current operation is f2h or h2f  
-                if any(op_name.endswith(prec_conv) for prec_conv in TableGenerator.precision_conversions):
-                    # Get the original operation name (without the f2h/h2f) and the conversion type 
-                    orig_op_name, conversion_type = self.__get_original_operation_name(op_name)
-                    if orig_op_name not in self.__table:
-                        print("ERROR: Conversion found but original %s is not in the table" % orig_op_name)
-                        exit(1)
-                    # Store f2h and h2f as columns in the row belonging to the original operation
-                    self.__table[orig_op_name][conversion_type][TableGenerator.__time_col_name] = total_time
-                    self.__table[orig_op_name][conversion_type][TableGenerator.__energy_col_name] = total_energy
-                # Create a new row in the dictionary
-                else:
-                    self.__table[op_name][approx_type][TableGenerator.__time_col_name] = total_time
-                    self.__table[op_name][approx_type][TableGenerator.__energy_col_name] = total_energy
-            results_file.close()
-    def __output_table_to_file(self):
-        '''
-        Outputs the internally stored table to a file using the <network_name>_ops.txt file as
-        a guideline in the following steps:
-        1. Opens the ops file and the file to output the table to
-        2. Reads a line from the ops file (guaranteed to be the layers/NML header)
-        3. For each operation in the layer (or 1 operation if the "layer" is a NML), we store the
-        time and the energy
-        '''
-        table_file_path = os.path.join(self.__results_dir_path, self.__table_filename)
-        soc_operations_file_name = os.path.join("/", "home", "nvidia", "soc_simulator", \
-                        "%s_cifar10" % self.__network_name, "%s_ops.txt" % self.__network_name)
-        soc_operations_file = open(soc_operations_file_name, "r")
-        table_file = open(table_file_path, "w")
-        curr_line = soc_operations_file.readline().strip()
-        while curr_line:
-            # First line is always the layers line (#layer_name,num_ops)
-            layer_name, num_ops = self.__parse_layer_info_line(curr_line)
-            # List of strings, where each string is a row corresponding to an operation
-            # in the layer
-            ops_in_layer = []
-            # Stores a list of elements in the header, which will be joined into a string
-            # The header is only generated for the first operation in the layer
-            # CRITICAL ASSUMPTION: All operations within a layer have the same # columns
-            # or everything breaks bc the header is per layer, not per operation
-            header = [TableGenerator.__table_header_delimter, layer_name, str(num_ops)]
-            # Iterate through all operations within the layer 
-            for op_in_layer_count in range(num_ops):
-                # Contains the operation name 
-                curr_line = soc_operations_file.readline().strip()
-                # Stores a list of elements that will be joined to make up a row 
-                curr_op = [curr_line]
-                operation_data = self.__table[curr_line]
-                # Iterate through time/energy data for each approximation type corresponding
-                # to the current operation
-                for approx_type in operation_data:
-                    op_time = operation_data[approx_type][TableGenerator.__time_col_name]
-                    op_energy = operation_data[approx_type][TableGenerator.__energy_col_name]
-                    curr_op.append(op_time)
-                    curr_op.append(op_energy)
-                    if op_in_layer_count == 0:
-                        header.append("%s_time" % approx_type)
-                        header.append("%s_energy" % approx_type)
-                ops_in_layer.append(' '.join(curr_op))
-            # Getting all operation rows and then writing everything because
-            # calls to write() are slow (memory vs time tradeoff)
-            table_file.write("%s\n%s\n" % (' '.join(header), '\n'.join(ops_in_layer)))
-            curr_line = soc_operations_file.readline().strip()
-    def __should_execute_file(self, file_path):
-        '''
-        Checks if the file at the given file path is a binary that should be run
-        by the profiler. Must exist, be a binary, and must start with the network
-        name as per our naming standards.
-        Args:
-            file_path:          Path of the file to check 
-        '''
-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK) and \
-                file_path.find(self.__network_name) != -1
-    def __get_approximation_type(self, results_filename):
-        '''
-        Parses a given results filename for the approximation type. 
-        Format assumption: <network_name>_<approx_type>.txt
-        Args:
-            results_filename:      Name of results file
-        Returns:
-            the approximation technique (ex: fp16) 
-        '''
-        approx_type_start_ind = results_filename.find(self.__network_name) \
-                + len(self.__network_name) + 1 # + 1 to account for _ delimiter
-        approx_type_end_ind = results_filename.find(".txt")
-        return results_filename[approx_type_start_ind : approx_type_end_ind] 
-    def __parse_tensor_operation_line(self, tensor_op_line):
-        '''
-        Parses a tensor operation line (within a output file from the offline
-        profiler for the operation name, the total time used, and the total
-        energy used
-        Args:
-            tensor_op_line:        Tensor operation line from output file
-        Returns:
-            operation name
-            total time used
-            total energy used
-        '''
-        line_as_list = tensor_op_line.split(",")
-        return line_as_list[0], line_as_list[1], line_as_list[2] 
-    def __build_nested_default_dict(self):
-        '''
-        Builds a nested default dictionary with an arbitrary number of levels
-        '''
-        return defaultdict(self.__build_nested_default_dict)
-    def __get_original_operation_name(self, op_name):
-        '''
-        Parses an operation name containing _<conversion type> for the original
-        operation name.
-        Format assumption: <original_op_name>_<conversion type>
-        Args:
-            op_name:        Name of the operation
-        Returns:
-            the original operation name 
-        '''
-        underscore_ind = op_name.find("_")
-        return op_name[ : underscore_ind], op_name[underscore_ind + 1 : ]
-    def __parse_layer_info_line(self, layer_info_line): #layer_name,num_ops
-        '''
-        Parses a layer header (from the original ops.txt file) into the layer name
-        and the number of operations
-        Assumed format: #layer_name,num_ops
-        Args:
-            layer_info_line:    Line at the beginning of each layer in the ops file
-        Returns:
-            layer name
-            number of ops in the layer
-        '''
-        comma_ind = layer_info_line.find(",")
-        return layer_info_line[layer_info_line.find(TableGenerator.__ops_header_delimiter) + 1 : comma_ind], \
-                    int(layer_info_line[comma_ind + 1 : ])
-if __name__ == "__main__":
-    if len(sys.argv) != 4:
-        print("python table_generator.py <binary dir path> <num itrs> <profiler bin path>")
-        exit(1)
-    binary_dir_path = sys.argv[1]
-    num_iters = int(sys.argv[2]) 
-    profiler_binary_name = sys.argv[3]
-    table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name)
-    table_gen.generate_table()