Skip to content
Snippets Groups Projects
Commit 7e5f21fe authored by hashimsharif's avatar hashimsharif
Browse files

Adding CPU support in profiler and making adding devices more easy

parent 8d8857d7
No related branches found
No related tags found
No related merge requests found
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <thread> #include <thread>
#include <vector> #include <vector>
// Reads power rails at runtime and computes the GPU and DDR energy within a window // Reads power rails at runtime and computes the specified device and DDR energy within a window
// of time, which is delimitered by the calls to resume_profiler() and pause_profiler() // of time, which is delimitered by the calls to resume_profiler() and pause_profiler()
// //
// IMPORTANT: Must call pause_profiler() to kill the profiler thread // IMPORTANT: Must call pause_profiler() to kill the profiler thread
...@@ -28,6 +28,16 @@ public: ...@@ -28,6 +28,16 @@ public:
~Profiler(); ~Profiler();
// FIXME: Probably change the enum name to something else.
// Expose the devices available
enum Device {
CPU = 0,
GPU = 1 << 0,
SOC = 1 << 1,
SYS = 1 << 2,
NONE = 1 << 3,
};
// Reinitializes boolean vars used for control flow and launches the profiler // Reinitializes boolean vars used for control flow and launches the profiler
// thread. DOES NOT reset other internal data structures. // thread. DOES NOT reset other internal data structures.
void start_profiler(); void start_profiler();
...@@ -39,11 +49,11 @@ public: ...@@ -39,11 +49,11 @@ public:
// Stops profiler by putting profiler thread to sleep // Stops profiler by putting profiler thread to sleep
void pause_profiler(); void pause_profiler();
// Gets the delta time and total GPU and DDR energy between the last two // Gets the delta time and total CPU/GPU and DDR energy between the last two
// calls to resume_profiler and pause_profiler // calls to resume_profiler and pause_profiler
// //
// Returns this as a pair of <delta time in milliseconds, energy> // Returns this as a pair of <delta time in milliseconds, energy>
std::pair<double, double> get_time_energy() const; std::pair<double, double> get_time_energy(Device dev = GPU) const;
// Resets all internal data structures, including the vector storing all power_readings. // Resets all internal data structures, including the vector storing all power_readings.
void reset(); void reset();
...@@ -76,11 +86,12 @@ private: ...@@ -76,11 +86,12 @@ private:
// An individual power reading // An individual power reading
struct PowerReading { struct PowerReading {
std::chrono::time_point<clock_type> time_; std::chrono::time_point<clock_type> time_;
double cpu_; double dev_;
double gpu_;
double ddr_; double ddr_;
double soc_; Device dev_type_;
double sys_;
// Initialize to avoid any undefined behavior
//PowerReading() : dev_type_(NONE), dev_(0), ddr_(0) {}
}; };
// Stores all power readings and is cleared only when reset() is called // Stores all power readings and is cleared only when reset() is called
...@@ -111,13 +122,14 @@ private: ...@@ -111,13 +122,14 @@ private:
std::thread profiler_thread_; std::thread profiler_thread_;
// Obtain's a single power reading from the GPU and DDR rails // Obtain's a single power reading from the device and DDR rails
void obtain_power_reading(); void obtain_power_reading(Device dev);
// Pins the given thread to the specified core // Pins the given thread to the specified core
void pin_thread(std::thread &t, const unsigned core) const; void pin_thread(std::thread &t, const unsigned core) const;
// Runs the profiler thread, keeping it alive by wrapping the functionality // Runs the profiler thread, keeping it alive by wrapping the functionality
// in an infinite loop // in an infinite loop
void run_profiler(); void run_profiler();
}; };
...@@ -43,39 +43,38 @@ void Profiler::start_profiler(){ ...@@ -43,39 +43,38 @@ void Profiler::start_profiler(){
// Resumes the profiling of whatever executable's currently running // Resumes the profiling of whatever executable's currently running
// DOES NOT reset any data // DOES NOT reset any data
void Profiler::resume_profiler() { void Profiler::resume_profiler() {
{ std::unique_lock<std::mutex> mutex_lock(mutex_);
std::unique_lock<std::mutex> mutex_lock(mutex_); if (should_run_profiler_){
if (should_run_profiler_){ std::cout << "WARNING: resume_profiler was already called\n";
std::cout << "WARNING: resume_profiler was already called\n"; }
} //std::cout<<"RESUME RESUME RESUME RESUME\n";
//std::cout<<"RESUME RESUME RESUME RESUME\n"; should_run_profiler_ = true;
should_run_profiler_ = true; start_time_ = clock_type::now();
start_time_ = clock_type::now();
}
cond_var_.notify_one(); cond_var_.notify_one();
} }
// Stops profiler by putting profiler thread to sleep // Stops profiler by putting profiler thread to sleep
void Profiler::pause_profiler() { void Profiler::pause_profiler() {
{ std::unique_lock<std::mutex> mutex_lock(mutex_);
std::unique_lock<std::mutex> mutex_lock(mutex_); if (!should_run_profiler_){
if (!should_run_profiler_){ std::cout << "WARNING: pause_profiler was already called\n";
std::cout << "WARNING: pause_profiler was already called\n";
}
//std::cout<<"PAUSE PAUSE PAUSE PAUSE\n";
should_run_profiler_ = false;
stop_time_ = clock_type::now();
} }
//std::cout<<"PAUSE PAUSE PAUSE PAUSE\n";
should_run_profiler_ = false;
stop_time_ = clock_type::now();
cond_var_.notify_one(); cond_var_.notify_one();
} }
// Gets the delta time and total GPU and DDR energy between the last two // Gets the delta time and total CPU/GPU and DDR energy between the last two
// calls to resume_profiler and pause_profiler // calls to resume_profiler and pause_profiler
// //
// Returns this as a pair of <delta time in milliseconds, energy> // Returns this as a pair of <delta time in milliseconds, energy>
std::pair<double, double> Profiler::get_time_energy() const { std::pair<double, double> Profiler::get_time_energy(Device dev) const {
std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex // We support taking CPU/GPU readings only
if(dev != CPU && dev != GPU)
return std::make_pair(-1, -1);
std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex
double total_energy = 0.0; double total_energy = 0.0;
if (on_jetson_) { if (on_jetson_) {
//std::cout<<"power readings size"<<power_readings_.size()<<'\n'; //std::cout<<"power readings size"<<power_readings_.size()<<'\n';
...@@ -83,7 +82,8 @@ std::pair<double, double> Profiler::get_time_energy() const { ...@@ -83,7 +82,8 @@ std::pair<double, double> Profiler::get_time_energy() const {
for (size_t i = 0; i < power_readings_.size(); i++){ for (size_t i = 0; i < power_readings_.size(); i++){
const auto& reading = power_readings_[i]; const auto& reading = power_readings_[i];
std::chrono::duration<double> duration_secs = reading.time_ - prev_time; std::chrono::duration<double> duration_secs = reading.time_ - prev_time;
total_energy += (reading.gpu_ + reading.ddr_);// * duration_secs.count(); if(reading.dev_type_ == dev)
total_energy += (reading.dev_ + reading.ddr_);// * duration_secs.count();
prev_time = reading.time_; prev_time = reading.time_;
} }
} }
...@@ -110,10 +110,13 @@ void Profiler::stop_profiler() { ...@@ -110,10 +110,13 @@ void Profiler::stop_profiler() {
profiler_thread_.join(); profiler_thread_.join();
} }
// Obtain's a single power reading from the GPU and DDR rails // Obtain's a single power reading from the CPU/GPU and DDR rails
void Profiler::obtain_power_reading() { void Profiler::obtain_power_reading(Device dev) {
std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex // We support taking CPU/GPU readings only
if(dev != CPU && dev != GPU)
return;
std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex
PowerReading reading; PowerReading reading;
// The order matters here. All the reads have to happen together first // The order matters here. All the reads have to happen together first
...@@ -122,16 +125,22 @@ void Profiler::obtain_power_reading() { ...@@ -122,16 +125,22 @@ void Profiler::obtain_power_reading() {
// the different rails. // the different rails.
reading.time_ = clock_type::now(); reading.time_ = clock_type::now();
if (on_jetson_){ if (on_jetson_){
gpu_stream_ >> reading.gpu_; // FIXME: Use switch-case in the future.
ddr_stream_ >> reading.ddr_; if(dev == CPU) {
cpu_stream_ >> reading.dev_;
gpu_stream_.seekg(0); ddr_stream_ >> reading.ddr_;
ddr_stream_.seekg(0); cpu_stream_.seekg(0);
} else {
gpu_stream_ >> reading.dev_;
ddr_stream_ >> reading.ddr_;
gpu_stream_.seekg(0);
}
ddr_stream_.seekg(0);
} else { } else {
reading.gpu_ = 0.0; reading.dev_ = 0.0;
reading.ddr_ = 0.0; reading.ddr_ = 0.0;
} }
reading.dev_type_ = dev;
power_readings_.push_back(reading); power_readings_.push_back(reading);
} }
...@@ -165,7 +174,8 @@ void Profiler::run_profiler(){ ...@@ -165,7 +174,8 @@ void Profiler::run_profiler(){
if (should_stop_profiler_) { if (should_stop_profiler_) {
break; break;
} }
obtain_power_reading(); obtain_power_reading(GPU);
obtain_power_reading(CPU);
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment