diff --git a/llvm/projects/gpu_profiler/include/profiler.h b/llvm/projects/gpu_profiler/include/profiler.h index b776ed2b6642ee773783e48c9ba408d33d211f43..78c0f18071e916edfff435844fd990936855c410 100644 --- a/llvm/projects/gpu_profiler/include/profiler.h +++ b/llvm/projects/gpu_profiler/include/profiler.h @@ -65,6 +65,11 @@ private: const std::string ddr_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power2_input"; const std::string soc_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0040/iio_device/in_power1_input"; const std::string sys_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power0_input"; + // Critical assumption: If this file doesn't exist, then the board isn't a Jetson + const std::string jetson_chip_id = "/sys/module/tegra_fuse/parameters/tegra_chip_id"; + + // True if running on Jetson, else false + bool on_jetson_; // An individual power reading struct PowerReading { diff --git a/llvm/projects/gpu_profiler/src/profiler.cpp b/llvm/projects/gpu_profiler/src/profiler.cpp index 188223a9059eede6d2a32e853dee22b95ecb719e..822d708d58f6c8468cb4b84ed23e988251b95159 100644 --- a/llvm/projects/gpu_profiler/src/profiler.cpp +++ b/llvm/projects/gpu_profiler/src/profiler.cpp @@ -1,4 +1,4 @@ -#include "profiler.h" +#include "profiler.h" Profiler::Profiler() : should_run_profiler_(false), should_stop_profiler_(false) { // Open all streams. Not done in start_profiler() function bc the streams @@ -9,8 +9,14 @@ Profiler::Profiler() : should_run_profiler_(false), should_stop_profiler_(false) soc_stream_.open(soc_power_rail, std::ifstream::in); sys_stream_.open(sys_power_rail, std::ifstream::in); - if (!cpu_stream_.is_open() || !gpu_stream_.is_open() || !ddr_stream_.is_open() - || !soc_stream_.is_open() || !sys_stream_.is_open()) { + // Check if the jetson file id file exists to indirectly check architecture + std::ifstream jetson_file(jetson_chip_id); + on_jetson_ = jetson_file.good(); + + if (on_jetson_ && + (!cpu_stream_.is_open() || !gpu_stream_.is_open() + || !ddr_stream_.is_open() || !soc_stream_.is_open() + || !sys_stream_.is_open())) { std::cout << "Failed to open one of the power rails for reading\n"; exit(1); } @@ -68,16 +74,23 @@ void Profiler::pause_profiler() { // Returns this as a pair of <delta time in milliseconds, energy> std::pair<double, double> Profiler::get_time_energy() const { double total_energy = 0.0; - - std::chrono::time_point<std::chrono::high_resolution_clock> prev_time = start_time_; - for (auto reading : power_readings_) { - std::chrono::duration<double> duration = reading.time_ - prev_time; - total_energy += reading.gpu_ * duration.count(); - total_energy += reading.ddr_ * duration.count(); - prev_time = reading.time_; + double delta_time = 0.0; + + if (on_jetson_) { + std::chrono::time_point<std::chrono::high_resolution_clock> prev_time = start_time_; + for (auto reading : power_readings_) { + std::chrono::duration<double> duration = reading.time_ - prev_time; + total_energy += reading.gpu_ * duration.count(); + total_energy += reading.ddr_ * duration.count(); + prev_time = reading.time_; + } + delta_time = std::chrono::duration<double, std::milli>(prev_time + - start_time_).count(); + } else { + auto last_reading_time = power_readings_[power_readings_.size() - 1].time_; + delta_time = std::chrono::duration<double, std::milli>(last_reading_time + - start_time_).count(); } - double delta_time = std::chrono::duration<double, std::milli>(prev_time - - start_time_).count(); return std::make_pair(delta_time, total_energy); } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc index e7784cb18e7e7c766f0ae27e6588d4851a2f2812..e8947881765637d68ca9d95d716c97d486e8380a 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc @@ -53,5 +53,6 @@ void dummyFunction(){ void* TanhWrapper = (void*) &wrapper_tensorTanh; void* BatchNormWrapper = (void*) &wrapper_tensorBatchNorm; void* PoolingWrapper = (void*) &wrapper_tensorPooling; + void* softmaxWrapper = (void*) &wrapper_tensorSoftmax; } diff --git a/llvm/projects/soc_simulator/src/driver.py b/llvm/projects/soc_simulator/src/driver.py index 1df46eec8fc34cee7c6a7683d1faaae4a94639ca..dbf2651bd3a9512c46d9e0a549c61290ad913ab0 100644 --- a/llvm/projects/soc_simulator/src/driver.py +++ b/llvm/projects/soc_simulator/src/driver.py @@ -14,6 +14,13 @@ class Driver: results_time_key = "Time" results_energy_key = "Energy" + def __get_str(self, appr): + if appr == Driver.ApproxTypes.FP16: + return "FP16" + elif appr == Driver.ApproxTypes.FP32: + return "FP32" + elif appr == Driver.ApproxTypes.PROMISE: + return "PROMISE" def driver(self): self.__parse_tensor_layer_file() @@ -143,6 +150,7 @@ class Driver: def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + print(self.__get_str(curr_layer), self.__get_str(prev_layer), h2f_f2h_operation_ind) if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \ or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed return 0.0, 0.0 @@ -187,7 +195,7 @@ class Driver: exit(1) # Run promise simulator # TODO need to print time and energy in the ptm runner so we can pipe it - output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ + output = subprocess.Popen(["./ptm_new", str(rows_a), str(cols_a), str(rows_b), \ str(cols_b), str(patch_factor), str(swing)], \ stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] total_time_energy = output.strip().split(',') @@ -210,7 +218,6 @@ class Driver: def __run_simulations(self): - print("run sim") if not os.path.isfile(self.__config_filename): print("ERROR: %s was not found" % self.__config_filename) exit(1) @@ -228,14 +235,12 @@ class Driver: for layer_ind, config_layer in enumerate(config_layers): # level layer_data = self.__tensor_layers[layer_ind] # layer layer_name = layer_data["Name"] - if Driver.is_promise(config_layer): print("Running layer %s on PROMISE" % layer_name) curr_layer = Driver.ApproxTypes.PROMISE quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data) # Compute time, energy = self.__run_promise_simulation(config_layer, layer_data) - print(time, energy) self.__aggregate_results[Driver.results_time_key][self.__config_count] += time self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy else: @@ -250,15 +255,17 @@ class Driver: curr_layer = Driver.ApproxTypes.FP16 else: curr_layer = Driver.ApproxTypes.FP32 + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data) conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind) total_time += quant_time + conv_time total_energy += quant_energy + conv_energy + prev_layer = curr_layer self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy - prev_layer = curr_layer + self.__config_count += 1 print("\n") config_file.close() diff --git a/llvm/projects/soc_simulator/src/driver_new_config.py b/llvm/projects/soc_simulator/src/driver_new_config.py index 115237dac51c96b47d02c84a603d98bdcf0b84a4..46af48c737b149f0665683bae9ffa023cf8d339e 100644 --- a/llvm/projects/soc_simulator/src/driver_new_config.py +++ b/llvm/projects/soc_simulator/src/driver_new_config.py @@ -15,13 +15,97 @@ class Driver: results_time_key = "Time" results_energy_key = "Energy" + def __get_str(self, appr): + if appr == Driver.ApproxTypes.FP16: + return "FP16" + elif appr == Driver.ApproxTypes.FP32: + return "FP32" + elif appr == Driver.ApproxTypes.PROMISE: + return "PROMISE" def driver(self): self.__parse_tensor_layer_file() self.__parse_tensor_table() - self.__run_simulations() - self.__display_results() + #self.__run_simulations() + #self.__display_results() + config_file = open(self.__config_filename, "r") + line = config_file.readline().strip() + config_count = 0 + + prev_layer = Driver.ApproxTypes.FP32 + curr_layer = None + + while line: + assert(line == "+++++") + print("CONFIGURATION") + + line = config_file.readline().strip() + # configuration data + conf_name = line.split(' ')[0] + assert(conf_name.startswith("conf")) + print(conf_name) + line = config_file.readline().strip() + + # layers + layer_count = 0 + while line != "-----": + print("LAYER", line == "-----") + print(layer_count, len(self.__tensor_layers)) + if line.find("softmax") != -1: + line = config_file.readline().strip() + continue + layer_table_data = self.__tensor_layers[layer_count] + layer_name = layer_table_data["name"] + + layer_line = line.split(' ') + + if layer_line[1] == "promise": + print("PROMISE") + curr_layer = Driver.ApproxTypes.PROMISE + time, energy = self.__run_promise_simulation(layer_line[3], layer_table_data) + print(time, energy) + self.__aggregate_results[results_time_key][config_count] += time + self.__aggregate_results[results_energy_key][config_count] += energy + + elif layer_line[1] == "gpu": + print("GPU") + + total_time = 0 + total_energy = 0 + tensor_count = 0 + + for i in range(2, len(layer_line), 3): + op_type = layer_line[i] + approx_type = layer_line[i + 1] + op_number = layer_line[i + 2] + #print(op_type, approx_type, op_number) + + if approx_type == "fp16": + curr_layer = Driver.ApproxTypes.FP16 + elif approx_type == "fp32": + curr_layer = Driver.ApproxTypes.FP32 + elif approx_type == "perf": + curr_layer = Driver.ApproxTypes.PERF + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_count, layer_table_data) + if curr_layer == Driver.ApproxTypes.PERF: + conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_count, op_number) # Only need op number for perf + else: + conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_count) + total_time += quant_time + conv_time + total_energy += quant_energy + conv_energy + prev_layer = curr_layer + tensor_count += 1 + + self.__aggregate_results[results_time_key][config_count] += total_time + self.__aggregate_results[results_energy_key][config_count] += total_energy + + layer_count += 1 + line = config_file.readline().strip() + prev_layer = curr_layer + config_count += 1 + line = config_file.readline().strip() + config_file.close() def __init__(self, layer_filename, table_filename, config_filename, results_filename): self.__layer_filename = layer_filename @@ -144,8 +228,11 @@ class Driver: def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + print(self.__get_str(curr_layer), self.__get_str(prev_layer), h2f_f2h_operation_ind) if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \ - or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed + or prev_layer == Driver.ApproxTypes.PROMISE \ + or curr_layer == Driver.ApproxTypes.PERF \ + or prev_layer == Driver.ApproxTypes.PERF: # No quantization needed return 0.0, 0.0 layer_name = layer_data["Name"] @@ -155,7 +242,6 @@ class Driver: # Get h2f/f2h data using the first tensor operation in the layer # (which is why order matters in the tensor table) - print(layer_name, self.__tensor_table[layer_name]) tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] if curr_layer == Driver.ApproxTypes.FP32: time = tensor_op_row["h2f_time"] @@ -189,7 +275,7 @@ class Driver: exit(1) # Run promise simulator # TODO need to print time and energy in the ptm runner so we can pipe it - output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ + output = subprocess.Popen(["./ptm_new", str(rows_a), str(cols_a), str(rows_b), \ str(cols_b), str(patch_factor), str(swing)], \ stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] total_time_energy = output.strip().split(',') @@ -199,98 +285,23 @@ class Driver: return float(total_time_energy[0]), float(total_time_energy[1]) - def __run_simulations(self): - if not os.path.isfile(self.__config_filename): - print("ERROR: %s was not found" % self.__config_filename) - exit(1) - - config_file = open(self.__config_filename, "r") - - line = config_file.readline().strip() - - while line: - assert(line.startswith("+++++")) - config_name = config_file.readline().strip().split(' ')[0] # Next line = configuration name - print("CONFIGURATION") - - line = config_file.readline().strip() - layer_ind = 0 # NOTE can also use the leftmost number in the currl ine - - prev_layer = Driver.ApproxTypes.FP32 - curr_layer = None - - while not line.startswith("-----"): - layer_info = line.split(' ') - layer_data = self.__tensor_layers[layer_ind] - layer_name = layer_data["Name"] - - if layer_info[1] == "promise": - print("Running layer %s on PROMISE" % layer_name) - curr_layer = Driver.ApproxTypes.PROMISE - - swing = int(layer_info[3]) - time, energy = self.__run_promise_simulation(swing, layer_data) - print(time, energy) - self.__aggregate_results[Driver.results_time_key][self.__config_count] += time - self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy - - elif layer_info[1] == "gpu": - # Parse each individual tensor operation - # TODO not portable bc there can be multiple numbers after each approx later on - total_time = 0 - total_energy = 0 - - tensor_ind = 0 - for i in range(2, len(layer_info), 3): - tensor_op = layer_info[i] - approx_type = layer_info[i + 1] - approx_num = layer_info[i + 2] # only matters if perf - - if approx_type == "fp16": - curr_layer = Driver.ApproxTypes.FP16 - elif approx_type == "fp32": - curr_layer = Driver.ApproxTypes.FP32 - elif approx_type == "perf": - curr_layer = DriverApproxTypes.PERF - else: - assert(False) - - quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data) - time, energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind, approx_num) - total_time += time - total_energy += energy - - tensor_ind += 1 - - self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time - self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy - - layer_ind += 1 - line = config_file.readline().strip() - - self.__config_count += 1 - line = config_file.readline().strip() - - config_file.close() - - - def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, approx_num): + def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, perf_number = None): tensor_info = self.__tensor_table[layer_name][tensor_ind] - if curr_layer == Driver.ApproxTypes.FP32: - time = tensor_info["fp32_time"] - energy = tensor_info["fp32_energy"] + conversion_time = tensor_info["fp32_time"] + conversion_energy = tensor_info["fp32_energy"] elif curr_layer == Driver.ApproxTypes.FP16: - time = tensor_info["fp16_time"] - energy = tensor_info["fp16_energy"] + conversion_time = tensor_info["fp16_time"] + conversion_energy = tensor_info["fp16_energy"] elif curr_layer == Driver.ApproxTypes.PERF: - time = tensor_info["perf%s_energy" % approx_num] - energy = tensor_info["perf%s_energy" % approx_num] - - print("GPU: (%f, %f)" % (time, energy)) - return time, energy + # Then we care abut the following number + conversion_time = tensor_info["perf%d_time" % perf_number] + conversion_energy = tensor_info["perf%d_energy" % perf_number] + + print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) + return (conversion_time, conversion_energy) def __display_results(self):