From dbbe65ef7df183ae5f8ea9a4e534b84d37dfc161 Mon Sep 17 00:00:00 2001
From: Elizabeth <hashim.sharif91@gmail.com>
Date: Tue, 8 Oct 2019 18:57:47 -0500
Subject: [PATCH] Added more pydoc

---
 .../build_pldi/table_generator.py             | 112 +++++++++++-------
 1 file changed, 71 insertions(+), 41 deletions(-)

diff --git a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
index c610aa658c..85895d7978 100644
--- a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
+++ b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
@@ -39,10 +39,18 @@ class TableGenerator:
 
 	def generate_table(self):
 		'''
-        Generates a table file called <network_name>_tensors.txt in the 
+        Generates a table file called <network_name>_tensors.txt in the following 
+        steps:
+        1. Runs the offline profiler against the inputted binaries to generate
+        results files
+        2. Builds an internal table storing all data from the parsed results files
+        the offline profiler generated
+        3. Writes the internal table to <network_name>_tensors.txt file and uses the 
+        <network_name>_ops.txt file as a guideline in terms of row order 
 		'''
+        self.__run_inputted_binaries()
         self.__build_internal_table()
-        self.__output_table()
+        self.__output_table_to_file()
 
 
     def __should_execute_file(self, file_path):
@@ -58,7 +66,7 @@ class TableGenerator:
                 file_path.find(self.__network_name) != -1
 
 
-    def run_inputted_binaries(self):
+    def __run_inputted_binaries(self):
         '''
         Invokes the profiler to run all appropriate binaries (must start with the network 
         name) in the inputted directory. Result files generated by the profiler are 
@@ -149,6 +157,11 @@ class TableGenerator:
 
 
     def __build_internal_table(self):
+        '''
+        Iterates through each results file generated by the runs of the offline
+        profiler and stores the data in a dictionary in the following format:
+            [operation name][approximation type OR conversion type][time/energy]
+        '''
         for results_file_name in os.listdir(self.__results_dir_path):
             # Ignore if it's not a results file
             if results_file_name == self.__table_filename or \
@@ -162,20 +175,18 @@ class TableGenerator:
                 line = line.strip()
                 op_name, total_time, total_energy = self.__parse_tensor_operation_line(line)
 
-                # Conv1 --> all approximations --> store f2h and h2f as independent approximations 
-                # Need to store their full names
-                # Handle _f2h and _h2f output for tensor operation
-                # Store as columns of original operation rather than independent rows 
+                # If the current operation is f2h or h2f  
                 if any(op_name.endswith(prec_conv) for prec_conv in TableGenerator.precision_conversions):
+                    # Get the original operation name (without the f2h/h2f) and the conversion type 
                     orig_op_name, conversion_type = self.__get_original_operation_name(op_name)
 
                     if orig_op_name not in self.__table: 
                         print("ERROR: Conversion found but original %s is not in the table" % orig_op_name)
                         exit(1)
-                    # Need to store f2h and h2f for each type of approximation 
-                    conversion_key = "%s_%s" % (approx_type, conversion_type)
-                    self.__table[orig_op_name][conversion_key]["time"] = total_time
-                    self.__table[orig_op_name][conversion_key]["energy"] = total_energy 
+
+                    # Store f2h and h2f as columns in the row belonging to the original operation
+                    self.__table[orig_op_name][approx_type]["time"] = total_time
+                    self.__table[orig_op_name][approx_type]["energy"] = total_energy 
 
                 # Create a new row in the dictionary
                 else:
@@ -184,72 +195,91 @@ class TableGenerator:
 
             results_file.close()
 
-    def __output_table(self):
+
+    def __output_table_to_file(self):
+        '''
+        Outputs the internally stored table to a file using the <network_name>_ops.txt file as
+        a guideline in the following steps:
+        1. Opens the ops file and the file to output the table to
+        2. Reads a line from the ops file (guaranteed to be the layers/NML header)
+        3. For each operation in the layer (or 1 operation if the "layer" is a NML), we store the
+        time and the energy
+        '''
         table_file_path = os.path.join(self.__results_dir_path, self.__table_filename)
-        # TODO un hard code this 
-        soc_operations_file_name = os.path.join("/home/nvidia/soc_simulator", "%s_cifar10" % self.__network_name, "%s_ops.txt" % self.__network_name)
+        soc_operations_file_name = os.path.join("home", "nvidia", "soc_simulator", \
+                        "%s_cifar10" % self.__network_name, "%s_ops.txt" % self.__network_name)
 
-		# Don't need to copy the file over --> can use the original file as a reference
         soc_operations_file = open(soc_operations_file_name, "r")
         table_file = open(table_file_path, "w")
 
-        # TODO possible for operations in the same layer to not have the same # of cols? 
-
         curr_line = soc_operations_file.readline().strip()
 
         while curr_line:
             # First line is always the layers line (#layer_name,num_ops)
             layer_name, num_ops = self.__parse_layer_info_line(curr_line)
-            print("FIRST LINE", layer_name, num_ops)
-            
-            # Get each operation in the layer
+           
+            # List of strings, where each string is a row corresponding to an operation
+            # in the layer
             ops_in_layer = []
+
+            # Stores a list of elements in the header, which will be joined into a string
+            # The header is only generated for the first operation in the layer
+            # CRITICAL ASSUMPTION: All operations within a layer have the same # columns
+            # or everything breaks bc the header is per layer, not per operation
             header = ["**", layer_name, str(num_ops)]
-            
+           
+            # Iterate through all operations within the layer 
             for op_in_layer_count in range(num_ops): 
-                # Each line consists of operation name  
+                # Contains the operation name 
                 curr_line = soc_operations_file.readline().strip()
-                curr_op = [curr_line] # Join into a string later
+
+                # Stores a list of elements that will be joined to make up a row 
+                curr_op = [curr_line] 
                 operation_data = self.__table[curr_line]
 
-                # Iterate through time/energy data for each approx type
+                # Iterate through time/energy data for each approximation type corresponding
+                # to the current operation
                 for approx_type in operation_data:
                     op_time = operation_data[approx_type]["time"]
                     op_energy = operation_data[approx_type]["energy"]
 
-                    # can either just be the approx time or can be the approx type_h2f/f2h field
                     curr_op.append(op_time)
                     curr_op.append(op_energy)
 
-                    # CRITICAL ASSUMPTION: All ops within a layer have the same # cols
-                    # Only fill out the header once for the layer
                     if op_in_layer_count == 0:
                         header.append("%s_time" % approx_type)    
                         header.append("%s_energy" % approx_type)
+
                 ops_in_layer.append(' '.join(curr_op))
 
             # Getting all operation rows and then writing everything because
             # calls to write() are slow (memory vs time tradeoff)
-            print("%s" % ' '.join(header))
-            print("%s" % '\n'.join(ops_in_layer))
             table_file.write("%s\n%s\n" % (' '.join(header), '\n'.join(ops_in_layer)))
 
             curr_line = soc_operations_file.readline().strip()
 
     def __parse_layer_info_line(self, layer_info_line): #layer_name,num_ops
+        '''
+        Parses a layer header (from the original ops.txt file) into the layer name
+        and the number of operations
+        Assumed format: #layer_name,num_ops
+
+        Args:
+            layer_info_line:    Line at the beginning of each layer in the ops file
+
+        Returns:
+            layer name
+            number of ops in the layer
+        '''
         comma_ind = layer_info_line.find(",")
         return layer_info_line[layer_info_line.find("#") + 1 : comma_ind], \
                     int(layer_info_line[comma_ind + 1 : ])
 
-    def __generate_header(self, table):
-        # <approx type time/energy> <conversion type at very end> 
-        # should the header be per tensor op or per layer?
-        # Try doing this per layer first
-        pass            
-
-binary_dir_path = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet"
-num_iters = 1 
-profiler_binary_name = "/home/nvidia/awesome_profiler/pp"
-table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name)
-#table_gen.run_inputted_binaries()
-#table_gen.generate_table()
+
+if __name__ == "__main__":
+    binary_dir_path = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet"
+    num_iters = 1 
+    profiler_binary_name = "/home/nvidia/awesome_profiler/pp"
+    table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name)
+    #table_gen.run_inputted_binaries()
+    table_gen.generate_table()
-- 
GitLab