Skip to content
Snippets Groups Projects
Commit 730b473a authored by kotsifa2's avatar kotsifa2
Browse files

Merge branch 'approx_hpvm' of gitlab.engr.illinois.edu:llvm/hpvm into approx_hpvm

parents 8ab6c870 5bb2d3f3
No related branches found
No related tags found
No related merge requests found
#include <cmath> #include <atomic>
#include <chrono> #include <chrono>
#include <cmath>
#include <iostream> #include <condition_variable>
#include <fstream> #include <fstream>
#include <iostream>
#include <string> #include <string>
#include <boost/algorithm/string.hpp> #include <thread>
#include <vector> #include <vector>
#include <map>
#include <thread> // Reads power rails at runtime and computes the GPU and DDR energy within a window
#include <atomic> // of time, which is delimitered by the calls to resume_profiler() and stop_profiler()
#include <sched.h> //
// IMPORTANT: Must call exit_profiler() to kill the profiler thread
//
// Public interface methods:
// void initialize();
// void run_profiler();
// void resume_profiler();
// void stop_profiler();
// std::pair<double, double> get_time_energy() const;
// void reset()
// void exit_profiler();
class Profiler {
public:
Profiler() : should_run_profiler_(false), should_exit_profiler_(false) {
// Open all streams. Not done in initialize() function bc the streams
// should be strictly opened once
cpu_stream_.open(cpu_power_rail, std::ifstream::in);
gpu_stream_.open(gpu_power_rail, std::ifstream::in);
ddr_stream_.open(ddr_power_rail, std::ifstream::in);
soc_stream_.open(soc_power_rail, std::ifstream::in);
sys_stream_.open(sys_power_rail, std::ifstream::in);
#define NUM_ARGS 4 if (!cpu_stream_.is_open() or !gpu_stream_.is_open() or !ddr_stream_.is_open()
or !soc_stream_.is_open() or !sys_stream_.is_open()) {
std::cout << "Failed to open one of the power rails for reading\n";
exit(1);
}
}
// This is a simple power profiler that can sample the power of the various ~Profiler() {
// components in a Jetson TX2. The usage is simple: profile() measures power cpu_stream_.close();
// for the specified program, and then dumpOutput() prints the readings to a gpu_stream_.close();
// file. profile() can be called as many times as desired - the internal state ddr_stream_.close();
// is reset each time and thus the measurements are not cumulative. soc_stream_.close();
class Profiler { sys_stream_.close();
private: }
// Jetson's ARM cores' physical IDs. The two Denver cores are 1 and 2, and
// we can't use them. // Reinitializes boolean vars used for control flow and launches the profiler
const unsigned core0 = 0; // thread. DOES NOT reset other internal data structures.
const unsigned core1 = 3; void initialize(){
const unsigned core2 = 4; // Reinitialize in case the profiler object has been used before
const unsigned core3 = 5; should_run_profiler_ = false;
should_exit_profiler_ = false;
// Launch profiler thread
profiler_thread_ = std::thread(&Profiler::run_profiler, this);
}
// Runs the profiler thread, keeping it alive by wrapping the functionality
// in an infinite loop
void run_profiler(){
while (true){
if (should_exit_profiler_) {
break;
}
// TODO overhead between calls to obtain_power_reading
// Need to lock the mutex and check the condition var
{
std::unique_lock<std::mutex> mutex_lock(mutex_);
if (should_exit_profiler_) {
break;
}
// Wake the thread up when it's time to run the profiler or exit
// the profiler
cond_var_.wait(mutex_lock, [this]{return should_run_profiler_
|| should_exit_profiler_; });
}
if (should_exit_profiler_) {
break;
}
obtain_power_reading();
}
}
// Resumes the profiling of whatever executable's currently running
// DOES NOT reset any data
void resume_profiler() {
{
std::unique_lock<std::mutex> mutex_lock(mutex_);
if (should_run_profiler_){
std::cout << "WARNING: resume_profiler was already called\n";
}
should_run_profiler_ = true;
start_time_ = std::chrono::high_resolution_clock::now();
}
cond_var_.notify_one();
}
// sysfs paths for i2c buses of various components // Stops profiler by putting profiler thread to sleep
const char * const cpu_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power1_input"; void stop_profiler() {
const char * const gpu_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0040/iio_device/in_power0_input"; {
const char * const ddr_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power2_input"; std::unique_lock<std::mutex> mutex_lock(mutex_);
const char * const soc_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0040/iio_device/in_power1_input"; if (!should_run_profiler_){
const char * const sys_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power0_input"; std::cout << "WARNING: stop_profiler was already called\n";
}
should_run_profiler_ = false;
}
cond_var_.notify_one();
}
// It takes some time for the GPU's power to return to idle (ms) // Gets the delta time and total GPU and DDR energy between the last two
const unsigned gpu_idle_time = 0; // calls to resume_profiler and stop_profiler
//
// Returns this as a pair of <delta time in milliseconds, energy>
std::pair<double, double> get_time_energy() const {
double total_energy = 0.0;
std::chrono::time_point<std::chrono::high_resolution_clock> prev_time = start_time_;
for (auto reading : power_readings_) {
std::chrono::duration<double> duration = reading.time_ - prev_time;
total_energy += reading.gpu_ * duration.count();
total_energy += reading.ddr_ * duration.count();
prev_time = reading.time_;
}
double delta_time = std::chrono::duration<double, std::milli>(prev_time
- start_time_).count();
return std::make_pair(delta_time, total_energy);
}
// Resets all internal data structures, including the vector storing all power_readings.
void reset() {
should_exit_profiler_ = false; // Can call reset after calling exit_profiler()
should_run_profiler_ = false; // Can call reset after calling resume
power_readings_.clear();
}
// Exit the profiler and kill the thread
// Must call initialize() to reuse this object after calling exit_profiler()
void exit_profiler() {
std::cout << "Exiting profiler\n";
should_exit_profiler_ = true;
cond_var_.notify_one();
profiler_thread_.join();
}
private:
// Power rails are mounted as files. Keeping the old power rail file names for possible future
// integrations
const std::string cpu_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power1_input";
const std::string gpu_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0040/iio_device/in_power0_input";
const std::string ddr_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power2_input";
const std::string soc_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0040/iio_device/in_power1_input";
const std::string sys_power_rail = "/sys/devices/3160000.i2c/i2c-0/0-0041/iio_device/in_power0_input";
// An individual power reading // An individual power reading
struct PowerReading { struct PowerReading {
...@@ -49,81 +164,10 @@ private: ...@@ -49,81 +164,10 @@ private:
double sys_; double sys_;
}; };
// Individual tensor op // Stores all power readings and is cleared only when reset() is called
struct TensorOp { std::vector<PowerReading> power_readings_;
std::string name_;
double start_;
double finish_;
double time_;
double energy_;
double gpu_energy_;
double ddr_energy_;
double power_;
double gpu_power_;
double ddr_power_;
TensorOp(std::string name, double start, double finish)
: name_(name), start_(start), finish_(finish), time_(finish - start),
energy_(0.0), gpu_energy_(0.0), ddr_energy_(0.0),
power_(0.0), gpu_power_(0.0), ddr_power_(0.0) {
}
};
// Aggregate tensor info
struct AggTensorInfo {
// Op name
std::string name_;
// Averages
double average_time_;
double average_energy_;
double average_gpu_energy_;
double average_ddr_energy_;
double average_power_;
double average_gpu_power_;
double average_ddr_power_;
// Standard deviations
double time_std_;
double energy_std_;
double gpu_energy_std_;
double ddr_energy_std_;
double power_std_;
double gpu_power_std_;
double ddr_power_std_;
};
// Total time, energy, and power
struct TotalInfo {
double time_;
double energy_;
double gpu_energy_;
double ddr_energy_;
double power_;
double gpu_power_;
double ddr_power_;
void clear() {
time_ = 0.0;
energy_ = 0.0;
gpu_energy_ = 0.0;
ddr_energy_ = 0.0;
power_ = 0.0; std::chrono::time_point<std::chrono::high_resolution_clock> start_time_;
gpu_power_ = 0.0;
ddr_power_ = 0.0;
}
};
// For reading the i2c buses via sysfs // For reading the i2c buses via sysfs
std::ifstream cpu_stream_; std::ifstream cpu_stream_;
...@@ -132,58 +176,18 @@ private: ...@@ -132,58 +176,18 @@ private:
std::ifstream soc_stream_; std::ifstream soc_stream_;
std::ifstream sys_stream_; std::ifstream sys_stream_;
// Start time (so graph begins from t=0) std::mutex mutex_;
std::chrono::time_point<std::chrono::high_resolution_clock> start_time_;
std::condition_variable cond_var_;
// Per-run info
std::vector<PowerReading> power_readings_;
// Aggregate (across all runs) info bool should_run_profiler_; // True if we want to resume the profiling thread
std::map<std::string, std::vector<TensorOp>> tensor_info_;
std::vector<AggTensorInfo> agg_tensor_info_;
TotalInfo total_info_;
unsigned iterations_;
// Start and stop flags to synchronize the program and profiling threads std::atomic_bool should_exit_profiler_; // Quit profiling
std::atomic_bool start_;
std::atomic_bool stop_;
private: std::thread profiler_thread_;
// Resets tensor info and total time and energy
void resetGlobal() {
tensor_info_.clear();
agg_tensor_info_.clear();
total_info_.clear();
}
// Resets power readings and flags
void resetLocal() {
power_readings_.clear();
start_ = false;
stop_ = false;
}
// Pins the given thread to the specified core
void pinThread(std::thread &t, const unsigned core) const {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(core, &cpuset);
if (pthread_setaffinity_np(t.native_handle(), sizeof(cpu_set_t), &cpuset) != 0)
std::cout << "Couldn't set thread affinity\n";
}
// Adds a tensor op to the map
void addTensorOp(std::string &op_name, TensorOp &top) {
// Create a vector if this is the first entry
auto it = tensor_info_.find(op_name);
if (it == tensor_info_.end()) {
tensor_info_.insert(std::pair<std::string, std::vector<TensorOp>>(op_name, std::vector<TensorOp>()));
}
tensor_info_[op_name].push_back(top);
}
// Obtain's a single power reading from the GPU and DDR rails // Obtain's a single power reading from the GPU and DDR rails
void getPowerReading() { void obtain_power_reading() {
PowerReading reading; PowerReading reading;
// The order matters here. All the reads have to happen together first // The order matters here. All the reads have to happen together first
...@@ -199,385 +203,34 @@ private: ...@@ -199,385 +203,34 @@ private:
gpu_stream_.seekg(0); gpu_stream_.seekg(0);
ddr_stream_.seekg(0); ddr_stream_.seekg(0);
} }
};
// Executes the program to be profiled /*
void runProgram(const char * const program) { // TESTS
// Tell the profiling thread to start, execute the program that needs void resume_pause_profiler(Profiler& profile_wrapper, unsigned long sleep_millis){
// to be profiled, and then tell the profiling thread to stop. profile_wrapper.resume_profiler();
start_ = true; std::this_thread::sleep_for(std::chrono::milliseconds(sleep_millis));
const auto result = std::system(program); profile_wrapper.stop_profiler();
stop_ = true;
}
// Records power while the program is running
void recordPower() {
// Obtain the new start time, wait for the start signal, and keep
// profiling until the stop flag is set.
start_time_ = std::chrono::high_resolution_clock::now();
while (!start_);
while (!stop_)
getPowerReading();
}
// Calculates stats for the entire execution (CPU+GPU phase)
void updateTotalStats() {
double energy = 0.0;
double gpu_energy = 0.0;
double ddr_energy = 0.0;
std::chrono::time_point<std::chrono::high_resolution_clock> prev_time = start_time_;
for (auto reading : power_readings_) {
std::chrono::duration<double> duration = reading.time_ - prev_time;
gpu_energy += reading.gpu_ * duration.count();
ddr_energy += reading.ddr_ * duration.count();
prev_time = reading.time_;
}
energy = gpu_energy + ddr_energy;
auto time = std::chrono::duration<double>(prev_time - start_time_).count();
total_info_.time_ += time;
total_info_.energy_ += (gpu_energy + ddr_energy);
total_info_.gpu_energy_ += gpu_energy;
total_info_.ddr_energy_ += ddr_energy;
total_info_.power_ += (energy / time);
total_info_.gpu_power_ += (gpu_energy / time);
total_info_.ddr_power_ += (ddr_energy / time);
}
// Calculates energy and power usage of the given tensor operation
void calculateTensorEP(TensorOp &top) const {
auto prev_time = top.start_;
unsigned i = 0;
// Skip until we hit the start time of the operation
for (; std::chrono::duration<double>(power_readings_[i].time_.time_since_epoch()).count() < top.start_; i++);
// Keep going until we hit the finish time of the operation or we run out of readings
for (double curr_time; ((curr_time = std::chrono::duration<double>(power_readings_[i].time_.time_since_epoch()).count()) <= top.finish_)
&& (i < power_readings_.size()); i++) {
auto duration = curr_time - prev_time;
prev_time = curr_time;
top.gpu_energy_ += power_readings_[i].gpu_ * duration;
top.ddr_energy_ += power_readings_[i].ddr_ * duration;
}
top.energy_ = top.gpu_energy_ + top.ddr_energy_;
top.power_ = top.energy_ / top.time_;
top.gpu_power_ = top.gpu_energy_ / top.time_;
top.ddr_power_ = top.ddr_energy_ / top.time_;
}
// Calculates stats for all the tensors in the timestamp file
void updatePerOpStats() {
const char * const op_file = "profile_data.txt";
std::string line;
std::ifstream ifs(op_file, std::ios::in);
// Calculate time and energy for each tensor operation. There are two
// possibilities for the file format:
// If the line doesn't begin with #, we are looking at FP32 code
// without any conversions to/from FP16, and each operation occupies
// two consecutive lines in the timestamp file.
// If the line does begin with #, we are looking at FP16 code with
// conversion routines in the middle. In this case, *after* the current
// line, there will be two lines for F2H, two lines for H2F, and then
// one line for the end of the operation.
while (std::getline(ifs, line)) {
std::vector<std::string> tokens;
boost::split(tokens, line, boost::is_any_of("\t"));
std::string op_name = tokens[0];
// FP32
if (tokens[0][0] != '#') {
// First line with tensor op name and start time
std::string op_name = tokens[0];
const auto start = std::stod(tokens[1]);
// Second line with tensor op end time
std::getline(ifs, line);
tokens.clear();
boost::split(tokens, line, boost::is_any_of("\t"));
const auto finish = std::stod(tokens[1]);
TensorOp top(op_name, start, finish);
calculateTensorEP(top);
addTensorOp(op_name, top);
} else {
// First line with tensor op name and start time
std::string op_name = tokens[0].substr(1);
const auto start = std::stod(tokens[1]);
// Second line with f2h
std::getline(ifs, line);
tokens.clear();
boost::split(tokens, line, boost::is_any_of("\t"));
std::string f2h_name = op_name + "_f2h";
const auto f2h_start = std::stod(tokens[1]);
// Third line with f2h
std::getline(ifs, line);
tokens.clear();
boost::split(tokens, line, boost::is_any_of("\t"));
const auto f2h_finish = std::stod(tokens[1]);
// Add f2h
TensorOp f2h(f2h_name, f2h_start, f2h_finish);
calculateTensorEP(f2h);
addTensorOp(f2h_name, f2h);
// Fourth line with h2f
std::getline(ifs, line);
tokens.clear();
boost::split(tokens, line, boost::is_any_of("\t"));
std::string h2f_name = op_name + "_h2f";
const auto h2f_start = std::stod(tokens[1]);
// Fifth line with h2f
std::getline(ifs, line);
tokens.clear();
boost::split(tokens, line, boost::is_any_of("\t"));
const auto h2f_finish = std::stod(tokens[1]);
// Add h2f
TensorOp h2f(h2f_name, h2f_start, h2f_finish);
calculateTensorEP(h2f);
addTensorOp(h2f_name, h2f);
// Sixth and final line with tensor op end time
std::getline(ifs, line);
tokens.clear();
boost::split(tokens, line, boost::is_any_of("\t"));
const auto finish = std::stod(tokens[1]);
// Subtract f2h's and h2f's time and energy to get just the computation's info
TensorOp top(op_name, start, finish);
calculateTensorEP(top);
top.time_ -= (f2h.time_ + h2f.time_);
top.energy_ -= (f2h.energy_ + h2f.energy_);
top.gpu_energy_ -= (f2h.gpu_energy_ + h2f.gpu_energy_);
top.ddr_energy_ -= (f2h.ddr_energy_ + h2f.ddr_energy_);
top.power_ = top.energy_ / top.time_;
top.gpu_power_ = top.gpu_energy_ / top.time_;
top.ddr_power_ = top.ddr_energy_ / top.time_;
addTensorOp(op_name, top);
}
}
ifs.close();
}
void updateStats() {
updatePerOpStats();
updateTotalStats();
}
// Calculates the average and standard deviation of each metric of each tensor op
void calculateAggregateStats() {
for (auto it = tensor_info_.begin(); it != tensor_info_.end(); it++) {
AggTensorInfo ati;
ati.name_ = it->first;
auto topv = it->second;
double total_time = 0.0;
double total_energy = 0.0;
double total_gpu_energy = 0.0;
double total_ddr_energy = 0.0;
double total_power = 0.0;
double total_gpu_power = 0.0;
double total_ddr_power = 0.0;
double time_sum = 0.0;
double energy_sum = 0.0;
double gpu_energy_sum = 0.0;
double ddr_energy_sum = 0.0;
double power_sum = 0.0;
double gpu_power_sum = 0.0;
double ddr_power_sum = 0.0;
// Calculate average
for (const auto &top : topv) {
total_time += top.time_;
total_energy += top.energy_;
total_gpu_energy += top.gpu_energy_;
total_ddr_energy += top.ddr_energy_;
total_power += top.power_;
total_gpu_power += top.gpu_power_;
total_ddr_power += top.ddr_power_;
}
ati.average_time_ = total_time / iterations_;
ati.average_energy_ = total_energy / iterations_;
ati.average_gpu_energy_ = total_gpu_energy / iterations_;
ati.average_ddr_energy_ = total_ddr_energy / iterations_;
ati.average_power_ = total_power / iterations_;
ati.average_gpu_power_ = total_gpu_power / iterations_;
ati.average_ddr_power_ = total_ddr_power / iterations_;
// Calculate standard deviation
for (const auto &top : topv) {
auto time_diff = top.time_ - ati.average_time_;
time_sum += time_diff * time_diff;
auto energy_diff = top.energy_ - ati.average_energy_;
energy_sum += energy_diff * energy_diff;
auto gpu_energy_diff = top.gpu_energy_ - ati.average_gpu_energy_;
gpu_energy_sum += gpu_energy_diff * gpu_energy_diff;
auto ddr_energy_diff = top.ddr_energy_ - ati.average_ddr_energy_;
ddr_energy_sum += ddr_energy_diff * ddr_energy_diff;
auto power_diff = top.power_ - ati.average_power_;
power_sum += power_diff * power_diff;
auto gpu_power_diff = top.gpu_power_ - ati.average_gpu_power_;
gpu_power_sum += gpu_power_diff * gpu_power_diff;
auto ddr_power_diff = top.ddr_power_ - ati.average_ddr_power_;
ddr_power_sum += ddr_power_diff * ddr_power_diff;
}
ati.time_std_ = std::sqrt(time_sum / iterations_);
ati.energy_std_ = std::sqrt(energy_sum / iterations_);
ati.gpu_energy_std_ = std::sqrt(gpu_energy_sum / iterations_);
ati.ddr_energy_std_ = std::sqrt(ddr_energy_sum / iterations_);
ati.power_std_ = std::sqrt(power_sum / iterations_);
ati.gpu_power_std_ = std::sqrt(gpu_power_sum / iterations_);
ati.ddr_power_std_ = std::sqrt(ddr_power_sum / iterations_);
agg_tensor_info_.push_back(ati);
}
}
public:
Profiler() {
cpu_stream_.open(cpu_power_rail, std::ifstream::in);
gpu_stream_.open(gpu_power_rail, std::ifstream::in);
ddr_stream_.open(ddr_power_rail, std::ifstream::in);
soc_stream_.open(soc_power_rail, std::ifstream::in);
sys_stream_.open(sys_power_rail, std::ifstream::in);
if (!cpu_stream_.is_open() or !gpu_stream_.is_open() or !ddr_stream_.is_open()
or !soc_stream_.is_open() or !sys_stream_.is_open()) {
std::cout << "Failed to open one of the power rails for reading\n";
exit(1);
}
}
~Profiler() {
cpu_stream_.close();
gpu_stream_.close();
ddr_stream_.close();
soc_stream_.close();
sys_stream_.close();
}
void profile(const char * const program, const int iterations) {
iterations_ = iterations;
resetGlobal();
for (unsigned i = 0; i < iterations_; i++) {
resetLocal();
// Launch two threads: one for running the program and one for
// profiling it. Pin the threads to specific cores to remove migration
// overhead. Profiling showed that the sampling rate increases slightly
// with pinning.
std::thread prog(&Profiler::runProgram, this, program);
std::thread power(&Profiler::recordPower, this);
pinThread(prog, core1);
pinThread(power, core2);
prog.join();
power.join();
updateStats();
// Sleep for some time to bring the GPU back to idle
std::this_thread::sleep_for(std::chrono::milliseconds(gpu_idle_time));
}
calculateAggregateStats();
}
void dumpTensorInfo(const char * const filename) const {
const std::string header = "Op,Time (ms),Energy (mJ),GPU Energy (mJ),DDR Energy (mJ),Power (mW),GPU Power (mW),DDR Power (mW),Time std,Energy std,GPU Energy std,DDR Energy std,Power std,GPU Power std,DDR Power std\n";
std::ofstream ofs;
ofs.open(filename);
//ofs << header;
for (const auto &ati : agg_tensor_info_) {
ofs << ati.name_
<< "," << ati.average_time_ * 1e3
<< "," << ati.average_energy_
<< "," << ati.average_gpu_energy_
<< "," << ati.average_ddr_energy_
<< "," << ati.average_power_
<< "," << ati.average_gpu_power_
<< "," << ati.average_ddr_power_
<< "," << ati.time_std_ * 1e3
<< "," << ati.energy_std_
<< "," << ati.gpu_energy_std_
<< "," << ati.ddr_energy_std_
<< "," << ati.power_std_
<< "," << ati.gpu_power_std_
<< "," << ati.ddr_power_std_
<< "\n";
std::cout << ati.average_time_ * 1e3 << "," << ati.average_energy_ << "\n";
}
ofs.close();
}
void dumpPowerReadings(const char * const filename) const {
std::ofstream ofs;
ofs.open(filename);
for (const auto &reading : power_readings_) {
std::chrono::duration<double> duration = reading.time_ - start_time_;
//std::chrono::duration<double> duration = reading.time_.time_since_epoch();
ofs << std::to_string(duration.count())
<< " " << reading.gpu_
<< " " << reading.ddr_
<< "\n";
}
ofs.close();
}
void dumpTotalInfo() const {
auto total_time = total_info_.time_ / iterations_;
auto total_energy = total_info_.energy_ / iterations_;
auto gpu_energy = total_info_.gpu_energy_ / iterations_;
auto ddr_energy = total_info_.ddr_energy_ / iterations_;
auto power = total_info_.power_ / iterations_; auto time_energy_pair = profile_wrapper.get_time_energy();
auto gpu_power = total_info_.gpu_power_ / iterations_; profile_wrapper.reset();
auto ddr_power = total_info_.ddr_power_ / iterations_;
std::cout << "-----------------------------------------------------\n"; printf("time: %f, energy: %f\n", time_energy_pair.first, time_energy_pair.second);
std::cout << "Program info (average)\n"; std::this_thread::sleep_for(std::chrono::milliseconds(sleep_millis));
std::cout << "-----------------------------------------------------\n"; }
std::cout << "\tExecution time: " << total_time << " seconds\n";
std::cout << "\tTotal energy: " << total_energy << " mJ\n";
std::cout << "\t GPU: " << gpu_energy << " mJ\n";
std::cout << "\t DDR: " << ddr_energy << " mJ\n";
std::cout << "\tPower: " << power << " mW\n";
std::cout << "\t GPU: " << gpu_power << " mW\n";
std::cout << "\t DDR: " << ddr_power << " mW\n";
std::cout << "-----------------------------------------------------\n";
}
};
int main(int argc, char *argv[]) {
if (argc < NUM_ARGS) {
std::cout << "Usage: " << argv[0] << " <program> <iterations> <tensor output file> [power output file]\n";
exit(1);
}
Profiler pp; int main(){
pp.profile(argv[1], std::stoi(argv[2])); Profiler profile_wrapper;
pp.dumpTensorInfo(argv[3]); profile_wrapper.initialize();
if (argc > NUM_ARGS) unsigned long sleep_millis = 5000;
pp.dumpPowerReadings(argv[4]); resume_pause_profiler(profile_wrapper, sleep_millis);
resume_pause_profiler(profile_wrapper, sleep_millis);
resume_pause_profiler(profile_wrapper, sleep_millis);
resume_pause_profiler(profile_wrapper, sleep_millis);
// IMPORTANT
profile_wrapper.exit_profiler();
return 0; return 0;
} }
*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment