diff --git a/llvm/projects/gpu_profiler/include/profiler.h b/llvm/projects/gpu_profiler/include/profiler.h index 9468554833fa82fd3b3c8c2ab85a393e21e92c80..33f36dba7d7281de84dd4a155762559f24e5c52b 100644 --- a/llvm/projects/gpu_profiler/include/profiler.h +++ b/llvm/projects/gpu_profiler/include/profiler.h @@ -8,7 +8,7 @@ #include <thread> #include <vector> -// Reads power rails at runtime and computes the GPU and DDR energy within a window +// Reads power rails at runtime and computes the specified device and DDR energy within a window // of time, which is delimitered by the calls to resume_profiler() and pause_profiler() // // IMPORTANT: Must call pause_profiler() to kill the profiler thread @@ -28,6 +28,16 @@ public: ~Profiler(); + // FIXME: Probably change the enum name to something else. + // Expose the devices available + enum Device { + CPU = 0, + GPU = 1 << 0, + SOC = 1 << 1, + SYS = 1 << 2, + NONE = 1 << 3, + }; + // Reinitializes boolean vars used for control flow and launches the profiler // thread. DOES NOT reset other internal data structures. void start_profiler(); @@ -39,11 +49,11 @@ public: // Stops profiler by putting profiler thread to sleep void pause_profiler(); - // Gets the delta time and total GPU and DDR energy between the last two + // Gets the delta time and total CPU/GPU and DDR energy between the last two // calls to resume_profiler and pause_profiler // // Returns this as a pair of <delta time in milliseconds, energy> - std::pair<double, double> get_time_energy() const; + std::pair<double, double> get_time_energy(Device dev = GPU) const; // Resets all internal data structures, including the vector storing all power_readings. void reset(); @@ -76,11 +86,12 @@ private: // An individual power reading struct PowerReading { std::chrono::time_point<clock_type> time_; - double cpu_; - double gpu_; + double dev_; double ddr_; - double soc_; - double sys_; + Device dev_type_; + + // Initialize to avoid any undefined behavior + //PowerReading() : dev_type_(NONE), dev_(0), ddr_(0) {} }; // Stores all power readings and is cleared only when reset() is called @@ -111,13 +122,14 @@ private: std::thread profiler_thread_; - // Obtain's a single power reading from the GPU and DDR rails - void obtain_power_reading(); - + // Obtain's a single power reading from the device and DDR rails + void obtain_power_reading(Device dev); + // Pins the given thread to the specified core void pin_thread(std::thread &t, const unsigned core) const; // Runs the profiler thread, keeping it alive by wrapping the functionality // in an infinite loop void run_profiler(); + }; diff --git a/llvm/projects/gpu_profiler/src/profiler.cpp b/llvm/projects/gpu_profiler/src/profiler.cpp index cae8823fa23056d44973f3b23d16c4c2f28e89f7..de84067d946451acce032ae3cdaaef667c588a4a 100644 --- a/llvm/projects/gpu_profiler/src/profiler.cpp +++ b/llvm/projects/gpu_profiler/src/profiler.cpp @@ -43,39 +43,38 @@ void Profiler::start_profiler(){ // Resumes the profiling of whatever executable's currently running // DOES NOT reset any data void Profiler::resume_profiler() { - { - std::unique_lock<std::mutex> mutex_lock(mutex_); - if (should_run_profiler_){ - std::cout << "WARNING: resume_profiler was already called\n"; - } - //std::cout<<"RESUME RESUME RESUME RESUME\n"; - should_run_profiler_ = true; - start_time_ = clock_type::now(); - } + std::unique_lock<std::mutex> mutex_lock(mutex_); + if (should_run_profiler_){ + std::cout << "WARNING: resume_profiler was already called\n"; + } + //std::cout<<"RESUME RESUME RESUME RESUME\n"; + should_run_profiler_ = true; + start_time_ = clock_type::now(); cond_var_.notify_one(); } // Stops profiler by putting profiler thread to sleep void Profiler::pause_profiler() { - { - std::unique_lock<std::mutex> mutex_lock(mutex_); - if (!should_run_profiler_){ - std::cout << "WARNING: pause_profiler was already called\n"; - } - //std::cout<<"PAUSE PAUSE PAUSE PAUSE\n"; - should_run_profiler_ = false; - stop_time_ = clock_type::now(); + std::unique_lock<std::mutex> mutex_lock(mutex_); + if (!should_run_profiler_){ + std::cout << "WARNING: pause_profiler was already called\n"; } + //std::cout<<"PAUSE PAUSE PAUSE PAUSE\n"; + should_run_profiler_ = false; + stop_time_ = clock_type::now(); cond_var_.notify_one(); } -// Gets the delta time and total GPU and DDR energy between the last two +// Gets the delta time and total CPU/GPU and DDR energy between the last two // calls to resume_profiler and pause_profiler // // Returns this as a pair of <delta time in milliseconds, energy> -std::pair<double, double> Profiler::get_time_energy() const { - std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex +std::pair<double, double> Profiler::get_time_energy(Device dev) const { + // We support taking CPU/GPU readings only + if(dev != CPU && dev != GPU) + return std::make_pair(-1, -1); + std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex double total_energy = 0.0; if (on_jetson_) { //std::cout<<"power readings size"<<power_readings_.size()<<'\n'; @@ -83,7 +82,8 @@ std::pair<double, double> Profiler::get_time_energy() const { for (size_t i = 0; i < power_readings_.size(); i++){ const auto& reading = power_readings_[i]; std::chrono::duration<double> duration_secs = reading.time_ - prev_time; - total_energy += (reading.gpu_ + reading.ddr_);// * duration_secs.count(); + if(reading.dev_type_ == dev) + total_energy += (reading.dev_ + reading.ddr_);// * duration_secs.count(); prev_time = reading.time_; } } @@ -110,10 +110,13 @@ void Profiler::stop_profiler() { profiler_thread_.join(); } -// Obtain's a single power reading from the GPU and DDR rails -void Profiler::obtain_power_reading() { - std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex +// Obtain's a single power reading from the CPU/GPU and DDR rails +void Profiler::obtain_power_reading(Device dev) { + // We support taking CPU/GPU readings only + if(dev != CPU && dev != GPU) + return; + std::unique_lock<std::mutex> mutex_lock(vector_mutex_); // MUST use a mutex PowerReading reading; // The order matters here. All the reads have to happen together first @@ -122,16 +125,22 @@ void Profiler::obtain_power_reading() { // the different rails. reading.time_ = clock_type::now(); if (on_jetson_){ - gpu_stream_ >> reading.gpu_; - ddr_stream_ >> reading.ddr_; - - gpu_stream_.seekg(0); - ddr_stream_.seekg(0); - + // FIXME: Use switch-case in the future. + if(dev == CPU) { + cpu_stream_ >> reading.dev_; + ddr_stream_ >> reading.ddr_; + cpu_stream_.seekg(0); + } else { + gpu_stream_ >> reading.dev_; + ddr_stream_ >> reading.ddr_; + gpu_stream_.seekg(0); + } + ddr_stream_.seekg(0); } else { - reading.gpu_ = 0.0; + reading.dev_ = 0.0; reading.ddr_ = 0.0; } + reading.dev_type_ = dev; power_readings_.push_back(reading); } @@ -165,7 +174,8 @@ void Profiler::run_profiler(){ if (should_stop_profiler_) { break; } - obtain_power_reading(); + obtain_power_reading(GPU); + obtain_power_reading(CPU); } }