Skip to content
Snippets Groups Projects
Commit dc823cd1 authored by Elizabeth's avatar Elizabeth
Browse files

Removed build_pldi/ directory from repo

parent c9ea00da
No related branches found
No related tags found
No related merge requests found
import glob
import os
import subprocess
import shutil
import sys
from collections import defaultdict
'''
FORMAT
** LayerName NumOpsInLayer <cols>
OpName Col1Val Col2Val ...
** Conv1 1 h2f_time h2f_energy fp32_time fp32_energy f2h_time f2h_energy fp16_perf_time fp16_perf_energy fp16_time fp16_energy
Conv1 51.8808 97.2844 319.582 601.966 12.81 18.758 388.092 650.649 340.037 590.664
'''
class TableGenerator:
__ops_header_delimiter = "#"
__table_header_delimter = "**"
__time_col_name = "time"
__energy_col_name = "energy"
'''
Stores all precision conversions used.
'''
precision_conversions = frozenset(["h2f", "f2h"])
def __init__(self, dir_path, iters, profiler_binary_name):
'''
Args:
dir_path: Path of directory containing network binaries
iters: Number of iterations to run each binary for
profiler_binary_name: Name of offline profiler binary to run
'''
self.__dir_path = dir_path
# Name of the actual directory
self.__network_name = os.path.split(dir_path)[-1]
self.__iters = iters
self.__profiler_binary_name = profiler_binary_name
# Path to results directory
self.__results_dir_path = "%s_results" % self.__dir_path
# Outputted table file
self.__table_filename = "%s_tensors.txt" % self.__network_name
# Nested default dictionary of default dicts
self.__table = self.__build_nested_default_dict()
def generate_table(self):
'''
Generates a table file called <network_name>_tensors.txt in the following
steps:
1. Runs the offline profiler against the inputted binaries to generate
results files
2. Builds an internal table storing all data from the parsed results files
the offline profiler generated
3. Writes the internal table to <network_name>_tensors.txt file and uses the
<network_name>_ops.txt file as a guideline in terms of row order
'''
self.__run_inputted_binaries()
self.__build_internal_table()
self.__output_table_to_file()
def __run_inputted_binaries(self):
'''
Invokes the profiler to run all appropriate binaries (must start with the network
name) in the inputted directory. Result files generated by the profiler are
stored in the results file directory and are named <binary_name>.txt. These results
files are then parsed in a later step to generate the table
'''
if not os.path.isdir(self.__dir_path):
print("ERROR: Directory %s not found" % self.__dir_path)
exit(1)
try:
os.mkdir(self.__results_dir_path)
except OSError:
if os.path.isdir(self.__results_dir_path):
print("Directory already exists. Clearing directory.")
for old_file in glob.glob(os.path.join(self.__results_dir_path, "*")):
os.remove(old_file)
else:
print("ERROR: Directory doesn't exist but failed to create dir")
for binary_name in os.listdir(self.__dir_path):
binary_path = os.path.join(self.__dir_path, binary_name)
if not self.__should_execute_file(binary_path):
continue
output_file = os.path.join(self.__results_dir_path, binary_name + ".txt")
# No stdout/stderr piping needed for now
subprocess.Popen([profiler_binary_name, binary_path, str(self.__iters), \
output_file]).communicate()
def __build_internal_table(self):
'''
Iterates through each results file generated by the runs of the offline
profiler and stores the data in a dictionary in the following format:
[operation name][approximation type OR conversion type][time/energy]
'''
for results_file_name in os.listdir(self.__results_dir_path):
# Ignore if it's not a results file
if results_file_name == self.__table_filename or \
not results_file_name.startswith(self.__network_name):
continue
approx_type = self.__get_approximation_type(results_file_name)
results_file = open(os.path.join(self.__results_dir_path, results_file_name), "r")
for line in results_file:
line = line.strip()
op_name, total_time, total_energy = self.__parse_tensor_operation_line(line)
# If the current operation is f2h or h2f
if any(op_name.endswith(prec_conv) for prec_conv in TableGenerator.precision_conversions):
# Get the original operation name (without the f2h/h2f) and the conversion type
orig_op_name, conversion_type = self.__get_original_operation_name(op_name)
if orig_op_name not in self.__table:
print("ERROR: Conversion found but original %s is not in the table" % orig_op_name)
exit(1)
# Store f2h and h2f as columns in the row belonging to the original operation
self.__table[orig_op_name][conversion_type][TableGenerator.__time_col_name] = total_time
self.__table[orig_op_name][conversion_type][TableGenerator.__energy_col_name] = total_energy
# Create a new row in the dictionary
else:
self.__table[op_name][approx_type][TableGenerator.__time_col_name] = total_time
self.__table[op_name][approx_type][TableGenerator.__energy_col_name] = total_energy
results_file.close()
def __output_table_to_file(self):
'''
Outputs the internally stored table to a file using the <network_name>_ops.txt file as
a guideline in the following steps:
1. Opens the ops file and the file to output the table to
2. Reads a line from the ops file (guaranteed to be the layers/NML header)
3. For each operation in the layer (or 1 operation if the "layer" is a NML), we store the
time and the energy
'''
table_file_path = os.path.join(self.__results_dir_path, self.__table_filename)
soc_operations_file_name = os.path.join("/", "home", "nvidia", "soc_simulator", \
"%s_cifar10" % self.__network_name, "%s_ops.txt" % self.__network_name)
soc_operations_file = open(soc_operations_file_name, "r")
table_file = open(table_file_path, "w")
curr_line = soc_operations_file.readline().strip()
while curr_line:
# First line is always the layers line (#layer_name,num_ops)
layer_name, num_ops = self.__parse_layer_info_line(curr_line)
# List of strings, where each string is a row corresponding to an operation
# in the layer
ops_in_layer = []
# Stores a list of elements in the header, which will be joined into a string
# The header is only generated for the first operation in the layer
# CRITICAL ASSUMPTION: All operations within a layer have the same # columns
# or everything breaks bc the header is per layer, not per operation
header = [TableGenerator.__table_header_delimter, layer_name, str(num_ops)]
# Iterate through all operations within the layer
for op_in_layer_count in range(num_ops):
# Contains the operation name
curr_line = soc_operations_file.readline().strip()
# Stores a list of elements that will be joined to make up a row
curr_op = [curr_line]
operation_data = self.__table[curr_line]
# Iterate through time/energy data for each approximation type corresponding
# to the current operation
for approx_type in operation_data:
op_time = operation_data[approx_type][TableGenerator.__time_col_name]
op_energy = operation_data[approx_type][TableGenerator.__energy_col_name]
curr_op.append(op_time)
curr_op.append(op_energy)
if op_in_layer_count == 0:
header.append("%s_time" % approx_type)
header.append("%s_energy" % approx_type)
ops_in_layer.append(' '.join(curr_op))
# Getting all operation rows and then writing everything because
# calls to write() are slow (memory vs time tradeoff)
table_file.write("%s\n%s\n" % (' '.join(header), '\n'.join(ops_in_layer)))
curr_line = soc_operations_file.readline().strip()
def __should_execute_file(self, file_path):
'''
Checks if the file at the given file path is a binary that should be run
by the profiler. Must exist, be a binary, and must start with the network
name as per our naming standards.
Args:
file_path: Path of the file to check
'''
return os.path.isfile(file_path) and os.access(file_path, os.X_OK) and \
file_path.find(self.__network_name) != -1
def __get_approximation_type(self, results_filename):
'''
Parses a given results filename for the approximation type.
Format assumption: <network_name>_<approx_type>.txt
Args:
results_filename: Name of results file
Returns:
the approximation technique (ex: fp16)
'''
approx_type_start_ind = results_filename.find(self.__network_name) \
+ len(self.__network_name) + 1 # + 1 to account for _ delimiter
approx_type_end_ind = results_filename.find(".txt")
return results_filename[approx_type_start_ind : approx_type_end_ind]
def __parse_tensor_operation_line(self, tensor_op_line):
'''
Parses a tensor operation line (within a output file from the offline
profiler for the operation name, the total time used, and the total
energy used
Args:
tensor_op_line: Tensor operation line from output file
Returns:
operation name
total time used
total energy used
'''
line_as_list = tensor_op_line.split(",")
return line_as_list[0], line_as_list[1], line_as_list[2]
def __build_nested_default_dict(self):
'''
Builds a nested default dictionary with an arbitrary number of levels
'''
return defaultdict(self.__build_nested_default_dict)
def __get_original_operation_name(self, op_name):
'''
Parses an operation name containing _<conversion type> for the original
operation name.
Format assumption: <original_op_name>_<conversion type>
Args:
op_name: Name of the operation
Returns:
the original operation name
'''
underscore_ind = op_name.find("_")
return op_name[ : underscore_ind], op_name[underscore_ind + 1 : ]
def __parse_layer_info_line(self, layer_info_line): #layer_name,num_ops
'''
Parses a layer header (from the original ops.txt file) into the layer name
and the number of operations
Assumed format: #layer_name,num_ops
Args:
layer_info_line: Line at the beginning of each layer in the ops file
Returns:
layer name
number of ops in the layer
'''
comma_ind = layer_info_line.find(",")
return layer_info_line[layer_info_line.find(TableGenerator.__ops_header_delimiter) + 1 : comma_ind], \
int(layer_info_line[comma_ind + 1 : ])
if __name__ == "__main__":
if len(sys.argv) != 4:
print("python table_generator.py <binary dir path> <num itrs> <profiler bin path>")
exit(1)
binary_dir_path = sys.argv[1]
num_iters = int(sys.argv[2])
profiler_binary_name = sys.argv[3]
table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name)
table_gen.generate_table()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment