//===--------------------------- hpvm-rt-controller.cpp ---------------------===//
//
//===----------------------------------------------------------------------===//
//   
//  This file contains code for that allows the tensor runtime to adapt 
// in response to external changes in conditions (such as frequency changes)
// by helping to choose correct approximation configurations. It also provides
// routines for the rest of the runtime to get performance and energy profiling.
//
//===----------------------------------------------------------------------===//


#include "hpvm-rt-controller.h"
#include "img_tensor_utils.h"
#include "global_data.h"
#include <fstream>

//-------- Functionality to read and update frequency on Jetson board -------//
/*const char* available_freqs[] = {"140250000", "229500000", "318750000",
                                 "408000000", "497250000", "586500000", 
                                 "675750000", "765000000", "854250000",
                                 "943500000", "1032750000", "1122000000",
                                 "1211250000", "1300500000"};

*/


const int available_freqs[] = {
140250000, // 0
229500000, // 1
318750000, // 2
408000000, // 3
497250000, // 4
586500000, // 5
675750000, // 6
765000000, // 7
854250000, // 8
943500000, // 9
1032750000,// 10
1122000000,// 11
1211250000,// 12
1300500000 // 13
};


/*void updateJetsonGPUFreq(int freq_level) {

  if (freq_level < 0 || freq_level > 13) {
    printf("ERROR: Provide freq level between {0, 13}  \n\n\n");
    abort();
  }

  const char* freq_val = available_freqs[freq_level]; 
  printf("freq-val[0] = %s \n", freq_val);

  FILE* max_file =
    fopen("/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/max_freq", "w+");
  if (max_file == NULL) {
    printf("Could not min_freq file \n");
  }
  fwrite(freq_val, strlen(freq_val), 1, max_file);
  fclose(max_file);
  
  FILE* min_file =
    fopen("/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/min_freq", "w+");
  if (min_file == NULL){
    printf("Could not min_freq file \n");
    abort();
  }
  fwrite(freq_val, strlen(freq_val), 1, min_file);
  fclose(min_file);
}

unsigned long int readJetsonGPUFreq() {
  FILE* cur_freq_file =
    fopen("/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/cur_freq", "r");
//    fopen("/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/min_freq", "r");
  if (cur_freq_file == NULL) {
    printf("Could not open cur_freq file \n");
  }

  char buf[50];
  char* ptr;
  
  fread(buf, 50, 1, cur_freq_file);
  unsigned long cur_freq = strtoul(buf, &ptr, 10);
  fclose(cur_freq_file);
  return cur_freq;
}

*/


// Sets frequency
void setFreq(unsigned freq_index) {

  unsigned target_freq = available_freqs[freq_index];
  
  const char * const min_freq_file = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/min_freq";
  const char * const max_freq_file = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/max_freq";

  std::ofstream min_stream;
  std::ofstream max_stream;

  min_stream.open(min_freq_file, std::ofstream::out);
  max_stream.open(max_freq_file, std::ofstream::out);

  min_stream << target_freq << std::flush;
  max_stream << target_freq << std::flush;

  min_stream.close();
  max_stream.close();
}

// Records frequency
unsigned recordFreq() {

  // Current frequency file
  const char * const cur_freq_file = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/cur_freq";
  std::ifstream cur_stream;
  cur_stream.open(cur_freq_file, std::ifstream::in);

  // Get starting frequency
  unsigned cur_freq;
  cur_stream >> cur_freq;
  std::cout << "Starting frequency = " << cur_freq << "\n";
  cur_stream.close();

  return cur_freq;
}





//---------------------------------------------------------------------------//

/*
 * Check if a file exists
 * Return true if the file exists, false else
 */
bool fileExists(const std::string &file) {
  struct stat buf;
  return (stat(file.c_str(), &buf) == 0);
}

// There will be no frequency request for the first batch
// Therefore, we skip the first element by initializing to 1, not 0.
FrequencyIndexList::FrequencyIndexList(std::vector<int> il, unsigned rf) :
  idx_list(il), rep_factor(rf), count(1), idx(0) {}

unsigned FrequencyIndexList::getNextIndex() {
  if (count == rep_factor) {
    count = 0;
    idx = (idx+1) % idx_list.size();
  }
  count++;
  return idx_list[idx];
}

// Functions
void ProfileInfo::resetCurrentIterationTime() {
  time_compute_current_iteration = 0.0;
  time_control_current_iteration = 0.0;
  time_config_current_iteration = 0.0;
}

void ProfileInfo::resetCurrentIterationEnergy() {
  energy_compute_current_iteration = 0.0;
  energy_control_current_iteration = 0.0;
  energy_config_current_iteration = 0.0;
}

void ProfileInfo::start_iteration() {
  if (!in_iteration) {
    resetCurrentIterationTime();
    resetCurrentIterationEnergy();
    tensor_time_info.push_back(std::vector<std::pair<std::string, double>>());
    tensor_energy_info.push_back(std::vector<std::pair<std::string, double>>());
    in_iteration = true;
  }
}
void ProfileInfo::end_iteration() {
  // Update time counters
  time_compute += time_compute_current_iteration;
  time_control += time_control_current_iteration;
  time_config += time_config_current_iteration;

  time_total +=
      (time_compute_current_iteration + time_control_current_iteration +
       time_config_current_iteration);

  // Update energy counters
  energy_compute += energy_compute_current_iteration;
  energy_control += energy_control_current_iteration;
  energy_config += energy_config_current_iteration;

  energy_total +=
      (energy_compute_current_iteration + energy_control_current_iteration +
       energy_config_current_iteration);

  // Save current iteration counters
  compute_time_info.push_back(time_compute_current_iteration);
  compute_energy_info.push_back(energy_compute_current_iteration);
  control_time_info.push_back(time_control_current_iteration);
  control_energy_info.push_back(energy_control_current_iteration);
  config_time_info.push_back(time_config_current_iteration);
  config_energy_info.push_back(energy_config_current_iteration);

  frequency_info.push_back(frequency_current_iteration);

  // Note end of iteration
  in_iteration = false;
}

void ProfileInfo::readIterationFrequency() {
#ifdef JETSON_EXECUTION
  //----- frequency_current_iteration = readJetsonGPUFreq();
  frequency_current_iteration = recordFreq();
#else
  frequency_current_iteration = 0;
#endif //JETSON_EXECUTION
}

unsigned long ProfileInfo::getIterationFrequency() {
  return frequency_current_iteration;
}

void ProfileInfo::addToCurrentIterationComputeTime(const char *s, double t) {
  start_iteration();
  time_compute_current_iteration += t;
  tensor_time_info.back().push_back(std::make_pair(std::string(s), t));
}

void ProfileInfo::addToCurrentIterationControlTime(double t) {
  start_iteration();
  time_control_current_iteration += t;
}

void ProfileInfo::addToCurrentIterationConfigTime(double t) {
  start_iteration();
  time_config_current_iteration += t;
}

void ProfileInfo::addToCurrentIterationComputeEnergy(const char *s, double e) {
  start_iteration();
  energy_compute_current_iteration += e;
  tensor_energy_info.back().push_back(std::make_pair(std::string(s), e));
}

void ProfileInfo::addToCurrentIterationControlEnergy(double e) {
  start_iteration();
  energy_control_current_iteration += e;
}

void ProfileInfo::addToCurrentIterationConfigEnergy(double e) {
  start_iteration();
  energy_config_current_iteration += e;
}

double ProfileInfo::getTotalTime() { return time_total; }

double ProfileInfo::getTotalEnergy() { return energy_total; }

double ProfileInfo::getCurrentIterationComputeTime() {
  return time_compute_current_iteration;
}

double ProfileInfo::getCurrentIterationComputeEnergy() {
  return energy_compute_current_iteration;
}

void ProfileInfo::set_out_file_name(std::string &str) { out_file_name = str; }

void ProfileInfo::printToFile() {
  INFO("Writing Runtime Profile Info File...\n");

  if (control_time_info.size() == 0)
    return;

  std::ofstream s_out(out_file_name.c_str());
  if (!s_out) {
    ERROR("Failed to open output file.");
    abort();
  }

  // By construction, tensor_time_info and tensor_energy_info are expected
  // to have equal sizes, in outer and inner vectors both,
  // and all time_info and energy_info vectors must have the same size.
  unsigned iterations = tensor_time_info.size();
  CUSTOM_ASSERT(
      (tensor_time_info.size() == iterations) &&
      (tensor_energy_info.size() == iterations) &&
      (control_time_info.size() == iterations) &&
      (control_energy_info.size() == iterations) &&
      (config_time_info.size() == iterations) &&
      (config_energy_info.size() == iterations) &&
      (frequency_info.size() == iterations) &&
      "time_info, energy_info, frequency_info size: \
                   iteration number does not match.");

  for (unsigned i = 0; i < tensor_time_info.size(); i++) {
    // time_info.size() == energy_info.size(), since we passed the assertion
    s_out << "Iteration " << i << "\n";

    CUSTOM_ASSERT(
        (tensor_time_info[i].size() == tensor_energy_info[i].size()) &&
        "time_info and energy_info size: operation number does not match.");
    for (unsigned j = 0; j < tensor_time_info[i].size(); j++) {
      // time_info[i].size() == energy_info[i].size(), we passed the assertion
      CUSTOM_ASSERT(
          (tensor_time_info[i][j].first == tensor_energy_info[i][j].first) &&
          "time_info and energy_info: operation does not match.");
      s_out << tensor_time_info[i][j].first << " "
            << tensor_time_info[i][j].second << " "
            << tensor_energy_info[i][j].second << "\n";
    }

    s_out << "\nIteration Compute Time   : " << compute_time_info[i] << "\n";
    s_out << "Iteration Compute Energy : " << compute_energy_info[i] << "\n";
    s_out << "Iteration Control Time   : " << control_time_info[i] << "\n";
    s_out << "Iteration Control Energy : " << control_energy_info[i] << "\n";
    s_out << "Iteration Config Time   : " << config_time_info[i] << "\n";
    s_out << "Iteration Config Energy : " << config_energy_info[i] << "\n";
    s_out << "Iteration End Frequency : " << frequency_info[i] << "\n\n\n";
  }
  s_out << "\n\nTotal Compute Time  : " << time_compute << "\n";
  s_out << "Total Compute Energy: " << energy_compute << "\n";

  s_out << "\nTotal Control Time  : " << time_control << "\n";
  s_out << "Total Control Energy: " << energy_control << "\n";

  s_out << "\nTotal Config Time  : " << time_config << "\n";
  s_out << "Total Config Energy: " << energy_config << "\n";

  s_out << "\nTotal Time  : " << time_total << "\n";
  s_out << "Total Energy: " << energy_total << "\n";

  s_out.close();

  INFO("Done writing profile.\n");
}

ProfileInfo::ProfileInfo()
    : time_total(0.0), energy_total(0.0), time_compute_current_iteration(0.0),
      time_control_current_iteration(0.0), time_config_current_iteration(0.0),
      energy_compute_current_iteration(0.0),
      energy_control_current_iteration(0.0),
      energy_config_current_iteration(0.0),
      frequency_current_iteration(0), in_iteration(false) {}

Slowdowns::Slowdowns() {
  idx = 0;

  std::ifstream s_in("slowdowns.txt");
  if (!s_in) {
    DEBUG("slowdowns file not found. Initializing slowdowns randomly.\n");
    for (unsigned i = 0; i < 10; i++) {
      slowdowns.push_back(1.0 + (rand() / (RAND_MAX / (5.0 - 1.0))));
    }
  } else {
    DEBUG("Found slowdowns file.\n");
    for (std::string line; std::getline(s_in, line);) {
      float s = std::stof(line);
      slowdowns.push_back(s);
    }
  }
}

unsigned Slowdowns::getSlowdownsNumber() { return slowdowns.size(); }

float Slowdowns::getNextSlowdown() {
  float tmp = slowdowns[idx];
  idx = (idx + 1) % slowdowns.size();
  return tmp;
}

RuntimeController *RC;

// Functions

// Private functions of profiler
void RuntimeController::start_profiler() {
  if (profiler)
    profiler->start_profiler();
}
void RuntimeController::stop_profiler() {
  if (profiler)
    profiler->stop_profiler();
}
// For testing purposes only - do not use widely
std::vector<struct Configuration *> &RuntimeController::
getSpeedupConfigurations() {
  return SpeedupConfigurations;
}
// For testing purposes only - do not use widely
std::vector<struct Configuration *> &RuntimeController::
getEnergyConfigurations() {
  return EnergyConfigurations;
}
// For testing purposes only - do not use widely
std::vector<struct Configuration *> &RuntimeController::
getThreeDCurveConfigurations() {
  return ThreeDCurveConfigurations;
}
// For testing purposes only - do not use widely
unsigned RuntimeController::getConfigurationIdx() { return configurationIdx; }

double RuntimeController::getCurrentConfigurationSpeedup() {
  return (double) (*Configurations)[configurationIdx]->speedup;
}

double RuntimeController::getCurrentConfigurationEnergy() {
  return (double) (*Configurations)[configurationIdx]->energy;
}

double RuntimeController::getCurrentConfigurationAccuracy() {
  return (double) (*Configurations)[configurationIdx]->accuracy;
}

double RuntimeController::getCurrentConfigurationAccuracyLoss() {
  return (double) (*Configurations)[configurationIdx]->accuracyLoss;
}

NodeConfiguration *RuntimeController::getNodeConfiguration(const char *data) {

  // if visc.node.id Not specified for this HPVM Node
  if (currentTensorID == -1){
    std::string s(data);
    // All nodes are expected to have a configuration
    return (*Configurations)[configurationIdx]->setup.at(s);
  }
  else{
    DEBUG("-- currentTensorID = \%u \n", currentTensorID); 
    return (*Configurations)[configurationIdx]->idConfigMap.at(currentTensorID);
  }
  
}

void RuntimeController::init(const char *Cstr) {
  // We initialize the path to the profile info output file,
  // based on the path given for the configuration file
  setProfileInfoFilename(Cstr);
  readConfigurationFile(Cstr);

  // NOTE: Configurations is pareto-configs. InitialConfigurations is the full list (config file)
  Configurations = NULL;
  computeParetoConfigurationPoints();
  //    compute3DParetoConfigurationPoints(); Not using 3D curve
  INFO("Speedup Configurations\n");
  printConfigurations(SpeedupConfigurations);
  //    INFO("Energy Configurations\n");
  //    printConfigurations(EnergyConfigurations);
  //    INFO("3D Configurations\n");
  //    printConfigurations(ThreeDCurveConfigurations);
  configurationIdx =
      0; // TODO: initialize using pareto curve - findTargetConfiguration ?
  Configurations = &SpeedupConfigurations;

  // Initializations for different runtime control strategies
  srand(static_cast<unsigned>(time(0)));
  slowdowns = new Slowdowns();

  // Pseudo random variable (when we did few experiments)
  // or true random numbers for probabilistic control
  pseudo_rd = 0.0;
  std::random_device rd;  //Will be used to obtain a seed for the random number engine
  generator = std::mt19937 (rd()); //Standard mersenne_twister_engine seeded with rd()
  distr = std::uniform_real_distribution<>(0.0, 1.0);

  g_freq = available_freqs[13];
  g_speedup = 1.0;

  // Initialize utility objects for knob reading
  perfParamSet = new PerfParamSet();
  sampParamSet = new SampParamSet();

  // Start profiling thread in the background, ready to time
  start_profiler();
  pause_profiler();
  reset_profiler();
}

// Exposing functionality of ProfileInfo
void RuntimeController::end_iteration() {
  if (PI)
    PI->end_iteration();
}

void RuntimeController::addToCurrentIterationComputeTime(
    const char *s, double t) {
  if (PI)
    PI->addToCurrentIterationComputeTime(s, t);
}

void RuntimeController::addToCurrentIterationControlTime(double t) {
  if (PI)
    PI->addToCurrentIterationControlTime(t);
}

void RuntimeController::addToCurrentIterationConfigTime(double t) {
  if (PI)
    PI->addToCurrentIterationConfigTime(t);
}

void RuntimeController::addToCurrentIterationComputeEnergy(
    const char *s, double e) {
  if (PI)
    PI->addToCurrentIterationComputeEnergy(s, e);
}

void RuntimeController::addToCurrentIterationControlEnergy(double e) {
  if (PI)
    PI->addToCurrentIterationControlEnergy(e);
}

void RuntimeController::addToCurrentIterationConfigEnergy(double e) {
  if (PI)
    PI->addToCurrentIterationConfigEnergy(e);
}

double RuntimeController::getCurrentIterationComputeTime() {
  return (PI ? PI->getCurrentIterationComputeTime() : 0.0);
}

double RuntimeController::getCurrentIterationComputeEnergy() {
  return (PI ? PI->getCurrentIterationComputeEnergy() : 0.0);
}

void RuntimeController::readIterationFrequency() {
  if (PI)
    PI->readIterationFrequency();
}

unsigned long RuntimeController::getIterationFrequency() {
  return (PI ? PI->getIterationFrequency() : 0);
}

void RuntimeController::updateFrequency() {
#ifdef JETSON_EXECUTION
  unsigned freq_idx = FIL->getNextIndex();
  //--- updateJetsonGPUFreq(freq_idx);

  setFreq(freq_idx);
  
#endif //JETSON_EXECUTION
}

void RuntimeController::writeProfileInfo() {
  if (PI)
    PI->printToFile();
}

// Exposing functionality of (gpu) profiler
void RuntimeController::resume_profiler() {
  if (profiler)
    profiler->resume_profiler();
}

void RuntimeController::pause_profiler() {
  if (profiler)
    profiler->pause_profiler();
}

void RuntimeController::reset_profiler() {
  if (profiler)
    profiler->reset();
}

std::pair<double, double> RuntimeController::get_time_energy() const {
  return (profiler ? profiler->get_time_energy() : std::make_pair(0.0, 0.0));
}

// Exposing functionality of promise simulator
std::pair<double, double> RuntimeController::fc_profile(
    const unsigned num_rows_a, const unsigned num_cols_a,
    const unsigned num_rows_b, const unsigned num_cols_b,
    const unsigned voltage_swing, const unsigned patch_factor) {
  return (
      promise ? promise->fc_profile(
                    num_rows_a, num_cols_a, num_rows_b, num_cols_b,
                    voltage_swing, patch_factor)
              : std::make_pair(0.0, 0.0));
}

std::pair<double, double> RuntimeController::conv_profile(
    const unsigned n, const unsigned c, const unsigned h, const unsigned w,
    const unsigned c_out, const unsigned c_in, const unsigned k_h,
    const unsigned k_w, const unsigned s_h, const unsigned s_w,
    const unsigned voltage_swing, const unsigned patch_factor) {
  return (
      promise ? promise->conv_profile(
                    n, c, h, w, c_out, c_in, k_h, k_w, s_h, s_w, voltage_swing,
                    patch_factor)
              : std::make_pair(0.0, 0.0));
}

// Constructor and descructor
RuntimeController::RuntimeController() {
  configurationIdx = 0;
  FIL = new FrequencyIndexList({13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, 10);
#ifdef ACTIVE_PROFILING
  PI = new ProfileInfo();
  profiler = new Profiler();
  promise = new Promise();
#else
  PI = NULL;
  profiler = NULL;
  promise = NULL;
#endif
}

RuntimeController::~RuntimeController() {

  stop_profiler();
  writeProfileInfo();

  if (PI) {
    delete PI;
  }
  if (profiler) {
    delete profiler;
  }
  if (promise) {
    delete promise;
  }

  for (std::vector<struct Configuration>::iterator
           it = InitialConfigurations.begin(),
           ie = InitialConfigurations.end();
       it != ie; ++it) {
    std::map<std::string, NodeConfiguration *> ConfSetup = it->setup;
    for (std::map<std::string, NodeConfiguration *>::const_iterator it =
             ConfSetup.begin();
         it != ConfSetup.end(); ++it) {
      delete it->second;
    }
  }
  // Handle freeing memory, for all configurations
  // A way to do that is to not free the initial configurations in the pareto
  // curve, and free all at once in the end This is done because configurations
  // are stored in different containers, but share the node setup
}

void RuntimeController::setProfileInfoFilename(const char *str) {

  if (PI) {
    std::string file_path = std::string(str);
    size_t idx = file_path.find_last_of("/");
    file_path.erase(idx + 1);
    file_path.append("profile_info_");

    bool found = false;
    std::string profile_filename;
    for (unsigned i = 0; !found; i++) {
      profile_filename = file_path;
      profile_filename.append(std::to_string(i));
      profile_filename.append(".txt");
      found = !fileExists(profile_filename);
    }

    PI->set_out_file_name(profile_filename);
  }
}

void RuntimeController::readConfigurationFile(const char *str) {

  INFO("Reading Configuration File...\n");

  std::ifstream qin(str);

  if (!qin) {
    ERROR("Failed to open configuration file.");
    abort();
  }

  bool readingConfiguration = false;
  bool readingFirstLine = false;

  // Read baseline_time from first line of configuration file
  std::string first_line;
  std::getline(qin, first_line);
  DEBUG("first_line: %s\n", first_line.c_str());

  try{
    baseline_time = std::stod(first_line);
    DEBUG("Baseline time: %lf\n\n", baseline_time);
  }
  catch(...){
    ERROR("Please Add/Fix Baseline Time at Top of Config File.. ");
  }

  
  unsigned int firstTensorID = 1;  
  for (std::string line; std::getline(qin, line);) {
    DEBUG("line: %s\n", line.c_str());

    // Tokenize using ' ' as delimiter
    // Vector to store tokens
    std::vector<std::string> tokens;

    for (auto i = strtok(&line[0], " "); i != NULL; i = strtok(NULL, " "))
      tokens.push_back(i);

    for (unsigned i = 0; i < tokens.size(); i++)
      DEBUG("t: %s\n", tokens[i].c_str());

    DEBUG("\n");

    if (tokens[0] == "+++++") { // Found new configuration start token
      // Mark the start of a new configuration
      readingConfiguration = true;
      readingFirstLine = true;
      continue;
    }

    if (tokens[0] == "-----") { // Found configuration end token
      readingConfiguration = false;
      // Mark the end of current configuration
      continue;
    }

    if (readingFirstLine) {
      // Read first line, to create the new configuration struct
      readingFirstLine = false;
      firstTensorID = 1; // reset first tensor ID for new config
      
      InitialConfigurations.push_back(Configuration(
          tokens[0], std::stof(tokens[1]), std::stof(tokens[2]),
          std::stof(tokens[3]), std::stof(tokens[4])));
      continue;
    }

    if (tokens[1] == "gpu") {
      DEBUG("Found gpu configuration\n");

      // There must be at least one operation, with an approximation option
      CUSTOM_ASSERT(
          (tokens.size() >= 5) &&
          "Not enough operations - approximation options.");

      GPUNodeConfiguration *NodeConf = new GPUNodeConfiguration();
      InitialConfigurations.back().setup.insert(
          std::make_pair(tokens[0], NodeConf));

      // Updating map of visc.node.id ID values to NodeConfigurations
      // FIXME: Do same for CPU and PROMISE configs
      InitialConfigurations.back().idConfigMap.insert(
          std::make_pair(firstTensorID, NodeConf));
      DEBUG("*** firstTensorID = %d \n\n", firstTensorID);
      
      unsigned idx = 2;
      while (idx < tokens.size()) {
        if (tokens[idx] == "add") {
          DEBUG("Found add operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::ADD);
          idx++;
        } else if (tokens[idx] == "batchnorm") {
          DEBUG("Found batchnorm operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::BATCHNORM);
          idx++;
        } else if (tokens[idx] == "conv") {
          DEBUG("Found conv operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::CONV);
          idx++;
        } else if (tokens[idx] == "group_conv") {
          DEBUG("Found group_conv operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::GROUP_CONV);
          idx++;
        } else if (tokens[idx] == "mul") {
          DEBUG("Found mul operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::MUL);
          idx++;
        } else if (tokens[idx] == "relu") {
          DEBUG("Found relu operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::RELU);
          idx++;
        } else if (tokens[idx] == "clipped_relu") {
          DEBUG("Found clipped_relu operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU);
          idx++;
        } else if (tokens[idx] == "tanh") {
          DEBUG("Found tanh operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::TANH);
          idx++;
        } else if (tokens[idx] == "pool_max") {
          DEBUG("Found pool_max operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::POOL_MAX);
          idx++;
        } else if (tokens[idx] == "pool_mean") {
          DEBUG("Found pool_mean operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::POOL_MEAN);
          idx++;
        } else if (tokens[idx] == "pool_min") {
          DEBUG("Found pool_min operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::POOL_MIN);
          idx++;
        } else if (tokens[idx] == "softmax") {
          DEBUG("Found softmax operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::SOFTMAX);
          idx++;
        } else if (tokens[idx] == "fft") {
          DEBUG("Found fft operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::FFT);
          idx++;
        } else if (tokens[idx] == "reduce") {
          DEBUG("Found reduce operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::REDUCE);
          idx++;
        } else if (tokens[idx] == "projectiveT") {
          DEBUG("Found projectiveT operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::PROJECTIVE_T);
          idx++;
        } else if (tokens[idx] == "map1") {
          DEBUG("Found map1 operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::MAP1);
          idx++;
        } else if (tokens[idx] == "map2") {
          DEBUG("Found map2 operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::MAP2);
          idx++;
        } else if (tokens[idx] == "map3") {
          DEBUG("Found map3 operation\n");
          NodeConf->pushNewTensorOperation(
              GPUNodeConfiguration::TENSOR_OP::MAP3);
          idx++;
        } else /*Not a new operation. This means an approximation option*/
            if (tokens[idx] == "fp32") {
          DEBUG("Found fp32 option\n");
          int fp32 = std::stoi(tokens[idx + 1]);
          DEBUG("fp32 parameter: %d, ignoring\n", fp32);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::FP32, fp32);
          idx += 2;
        } else if (tokens[idx] == "fp16") {
          DEBUG("Found fp16 option\n");
          int fp16 = std::stoi(tokens[idx + 1]);
          DEBUG("fp16 parameter: %d, ignoring\n", fp16);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::FP16, fp16);
          idx += 2;
        } else if (tokens[idx] == "perf") {
          DEBUG("Found perf option\n");
          int perf = std::stoi(tokens[idx + 1]);
          DEBUG("perf parameter: %d\n", perf);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::PERFORATION, perf);
          idx += 2;
        } else if (tokens[idx] == "perf_fp16") {
          DEBUG("Found perf_fp16 option\n");
          int perf_fp16 = std::stoi(tokens[idx + 1]);
          DEBUG("perf_fp16 parameter: %d\n", perf_fp16);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::PERFORATION_HP, perf_fp16);
          idx += 2;
        } else if (tokens[idx] == "samp") {
          DEBUG("Found samp option\n");
          int samp = std::stoi(tokens[idx + 1]);
          DEBUG("samp parameter: %d\n", samp);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::INPUT_SAMPLING, samp);
          idx += 2;
        } else if (tokens[idx] == "samp_fp16") {
          DEBUG("Found samp_fp16 option\n");
          int samp_fp16 = std::stoi(tokens[idx + 1]);
          DEBUG("samp_fp16 parameter: %d\n", samp_fp16);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::INPUT_SAMPLING_HP, samp_fp16);
          idx += 2;
        } else if (tokens[idx] == "red_samp") {
          DEBUG("Found red_samp option\n");
          int red_samp = std::stoi(tokens[idx + 1]);
          DEBUG("red_samp parameter: %d\n", red_samp);
          NodeConf->pushNewApproximationChoiceForOperation(
              GPUNodeConfiguration::APPROX::REDUCTION_SAMPLING, red_samp);
          idx += 2;
        }
        // TODO: other approximation options handled here
      }

      // Update first TensorID using number of tensor ops in current node
      firstTensorID += NodeConf->getApproxChoices().size();
      
    } else if (tokens[1] == "cpu") {
      DEBUG("Found gpu configuration\n");

      // There must be at least one operation, with an approximation option
      CUSTOM_ASSERT(
          (tokens.size() >= 5) &&
          "Not enough operations - approximation options.");

      CPUNodeConfiguration *NodeConf = new CPUNodeConfiguration();
      InitialConfigurations.back().setup.insert(
          std::make_pair(tokens[0], NodeConf));

      unsigned idx = 2;
      while (idx < tokens.size()) {
        if (tokens[idx] == "add") {
          DEBUG("Found add operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::ADD);
          idx++;
        } else if (tokens[idx] == "batchnorm") {
          DEBUG("Found batchnorm operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::BATCHNORM);
          idx++;
        } else if (tokens[idx] == "conv") {
          DEBUG("Found conv operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::CONV);
          idx++;
        } else if (tokens[idx] == "group_conv") {
          DEBUG("Found group_conv operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::GROUP_CONV);
          idx++;
        } else if (tokens[idx] == "mul") {
          DEBUG("Found mul operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::MUL);
          idx++;
        } else if (tokens[idx] == "relu") {
          DEBUG("Found relu operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::RELU);
          idx++;
        } else if (tokens[idx] == "clipped_relu") {
          DEBUG("Found clipped_relu operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU);
          idx++;
        } else if (tokens[idx] == "tanh") {
          DEBUG("Found tanh operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::TANH);
          idx++;
        } else if (tokens[idx] == "pool_max") {
          DEBUG("Found pool_max operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::POOL_MAX);
          idx++;
        } else if (tokens[idx] == "pool_mean") {
          DEBUG("Found pool_mean operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::POOL_MEAN);
          idx++;
        } else if (tokens[idx] == "pool_min") {
          DEBUG("Found pool_min operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::POOL_MIN);
          idx++;
        } else if (tokens[idx] == "softmax") {
          DEBUG("Found softmax operation\n");
          NodeConf->pushNewTensorOperation(
              CPUNodeConfiguration::TENSOR_OP::SOFTMAX);
          idx++;
        } else /*Not a new operation. This means an approximation option*/
            if (tokens[idx] == "fp32") {
          DEBUG("Found fp32 option\n");
          int fp32 = std::stoi(tokens[idx + 1]);
          DEBUG("fp32 parameter: %d, ignoring\n", fp32);
          NodeConf->pushNewApproximationChoiceForOperation(
              CPUNodeConfiguration::APPROX::FP32, fp32);
          idx += 2;
        } else if (tokens[idx] == "perf") {
          DEBUG("Found perf option\n");
          int perf = std::stoi(tokens[idx + 1]);
          DEBUG("perf parameter: %d\n", perf);
          NodeConf->pushNewApproximationChoiceForOperation(
              CPUNodeConfiguration::APPROX::PERFORATION, perf);
          idx += 2;
        } else if (tokens[idx] == "samp") {
          DEBUG("Found samp option\n");
          int samp = std::stoi(tokens[idx + 1]);
          DEBUG("samp parameter: %d\n", samp);
          NodeConf->pushNewApproximationChoiceForOperation(
              CPUNodeConfiguration::APPROX::INPUT_SAMPLING, samp);
          idx += 2;
        }
        // TODO: other approximation options handled here
      }

    } else {
      DEBUG("Invalid Configuration File\n");
      exit(1);
    }
  }

  qin.close();
  DEBUG("DONE.\n");
}
void RuntimeController::computeParetoConfigurationPoints() {

  // Keep indices of pareto optimal points (configurations from
  // InitialConfigurations vector that were copied to Configurations vector.)
  // The others' setup pointer needs to be deleted
  std::vector<unsigned> Indices;

  // Baseline configuration (first one we read) always belongs to the curve
  SpeedupConfigurations.push_back(&InitialConfigurations[0]);
  EnergyConfigurations.push_back(&InitialConfigurations[0]);

  // Sort the configurations according to accuracy loss
  INFO("Sorting autotuner configurations...\n");
  std::sort(
      InitialConfigurations.begin() + 1, InitialConfigurations.end(),
      ConfigurationLessThan());
  INFO("Done sorting.\n");

  for (unsigned start_idx = 1; start_idx < InitialConfigurations.size();) {
    // Points to first Configuration with different (higher) accuracy loss
    // compared to the one pointed by start_idx
    unsigned end_idx = start_idx + 1;
    while ((end_idx < InitialConfigurations.size()) &&
           (InitialConfigurations[end_idx].accuracyLoss -
                InitialConfigurations[start_idx].accuracyLoss <
            AL_THRESHOLD)) {
      end_idx++;
    }
    DEBUG("start_idx = %d, end_idx = %d\n", start_idx, end_idx);
    // Now, all elements in [start_idx, end_idx) have equal accuracy loss,
    // that is lower from later ones.

    // Find the best speedup and energy between them as well
    float sp = -1.0; // FLT_MIN
    unsigned sp_idx = 0;

    float en = -1.0; // FLT_MIN
    unsigned en_idx = 0;

    for (unsigned i = start_idx; i < end_idx; i++) {
      if (InitialConfigurations[i].speedup > sp) {
        sp = InitialConfigurations[i].speedup;
        sp_idx = i;
      }
      if (InitialConfigurations[i].energy > en) {
        en = InitialConfigurations[i].energy;
        en_idx = i;
      }
    }
    DEBUG(
        "accuracy loss = %f, speedup = %f, at sp_idx = %d\n",
        InitialConfigurations[sp_idx].accuracyLoss, sp, sp_idx);
    // Found best speedup for this accuracy point (not dominated by any of
    // these).
    DEBUG(
        "accuracy loss = %f, energy = %f, at en_idx = %d\n",
        InitialConfigurations[en_idx].accuracyLoss, en, en_idx);
    // Found best energy for this accuracy point (not dominated by any of
    // these).

    // Now, we need to check that it is not dominated.
    // - better accuracy loss of all in initial configurations out of
    // start_idx, end_idx range
    // - better or equal speedup to the ones within this range
    // We only need to check the points already in Configurations, that have
    // already been inserted in pareto frontier. These have better accuracy
    // loss, so this one will only be added if it shows better speedup
    // The one in curve with best speedup so far is the last one (with worst
    // = highest accuracy loss), so compare only with that one.

    // Similar handling of energy vector

    bool sp_notDominated = true;
    if (!SpeedupConfigurations.empty()) {
      if (SpeedupConfigurations.back()->speedup >= sp)
        sp_notDominated = false;
    }

    bool en_notDominated = true;
    if (!EnergyConfigurations.empty()) {
      if (EnergyConfigurations.back()->energy >= en)
        en_notDominated = false;
    }

    DEBUG("sp_notDominated = %d\n", sp_notDominated);
    DEBUG("en_notDominated = %d\n", en_notDominated);

    // If not dominated, insert in pareto frontier set
    if (sp_notDominated) {
      SpeedupConfigurations.push_back(&InitialConfigurations[sp_idx]);
    }
    if (en_notDominated) {
      EnergyConfigurations.push_back(&InitialConfigurations[en_idx]);
    }

    // Keep track of unnecessary configurations
    for (unsigned i = start_idx; i < end_idx; i++) {
      if (((i != sp_idx) || (!sp_notDominated)) &&
          ((i != en_idx) || (!en_notDominated)))
        Indices.push_back(i);
    }

    // Continue from next accuracy loss level
    start_idx = end_idx;
  }

  // All elements in InitialConfigurations whose index is in Indices are no
  // longer needed.
  //  for (std::vector<unsigned>::iterator idx_it = Indices.begin(), idx_e =
  //  Indices.end();
  //       idx_it != idx_e; ++idx_it) {
  //    std::map<std::string, NodeConfiguration * > ConfSetup =
  //      InitialConfigurations[*idx_it].setup;
  //    for (std::map<std::string, NodeConfiguration* >::const_iterator it =
  //    ConfSetup.begin();
  //     it != ConfSetup.end(); ++it) {
  //      delete it->second;
  //    }
  //  }
  //  InitialConfigurations.clear();
}

void RuntimeController::compute3DParetoConfigurationPoints() {

  // Sort the configurations according to accuracy loss
  INFO("Sorting autotuner configurations...\n");
  std::sort(
      InitialConfigurations.begin(), InitialConfigurations.end(),
      ConfigurationLessThan());
  INFO("Done sorting.\n");

  for (unsigned start_idx = 0; start_idx < InitialConfigurations.size();) {
    // Points to first Configuration with different (higher) accuracy loss
    // compared to the one pointed by start_idx
    unsigned end_idx = start_idx + 1;
    while ((end_idx < InitialConfigurations.size()) &&
           (InitialConfigurations[end_idx].accuracyLoss -
                InitialConfigurations[start_idx].accuracyLoss <
            AL_THRESHOLD)) {
      end_idx++;
    }
    DEBUG("start_idx = %d, end_idx = %d\n", start_idx, end_idx);
    // Now, all elements in [start_idx, end_idx) have equal accuracy loss,
    // that is lower from later ones and worse than those already in curve
    // (so they cannot displace them).

    // Find candidates from [start_idx, end_idx) to be inserted
    // Keep their indices. If a point is dominated (strictly worse),
    // its index will not be inserted
    std::vector<unsigned> Indices;

    for (unsigned i = start_idx; i < end_idx; i++) {
      bool dominated = false;
      for (unsigned j = i + 1; (j < end_idx) && !dominated; j++) {
        if ((InitialConfigurations[i].speedup <
             InitialConfigurations[j].speedup) &&
            (InitialConfigurations[i].energy <
             InitialConfigurations[j].energy)) {
          dominated = true;
        }
      }
      if (!dominated) {
        DEBUG(
            "accuracy loss = %f, speedup = %f, energy = %f, at idx = %d\n",
            InitialConfigurations[i].accuracyLoss,
            InitialConfigurations[i].speedup, InitialConfigurations[i].energy,
            i);
        Indices.push_back(i);
      }
    }

    for (std::vector<unsigned>::iterator idx_it = Indices.begin(),
                                         idx_e = Indices.end();
         idx_it != idx_e; ++idx_it) {
      Configuration &CandidateConfiguration = InitialConfigurations[*idx_it];

      if (!ThreeDCurveConfigurations.empty()) {
        bool notDominated = true;
        for (unsigned i = 0;
             (i < ThreeDCurveConfigurations.size()) && notDominated; i++) {
          if ((CandidateConfiguration.speedup <=
               ThreeDCurveConfigurations[i]->speedup) &&
              (CandidateConfiguration.energy <=
               ThreeDCurveConfigurations[i]->energy)) {
            // This configuration is not better, in at least one characteristic,
            // compared to the existing ones in the curve.
            notDominated = false;
          }
        }
        if (notDominated) {
          ThreeDCurveConfigurations.push_back(&CandidateConfiguration);
        }
      } else {
        // If the curve is empty, we know that this is a point that must be
        // inserted. It has the best accuracy loss, and belongs here because
        // it is not dominated by any point in this accuracy range.
        ThreeDCurveConfigurations.push_back(&CandidateConfiguration);
      }
    }

    // Continue from next accuracy loss level
    start_idx = end_idx;
  }
}

void RuntimeController::printConfigurations(
    std::vector<struct Configuration> &Confs) {

  for (std::vector<struct Configuration>::iterator it = Confs.begin(),
                                                   ie = Confs.end();
       it != ie; ++it) {
    it->print();
  }
}

void RuntimeController::printConfigurations(
    std::vector<struct Configuration *> &Confs) {

  for (std::vector<struct Configuration *>::iterator it = Confs.begin(),
                                                     ie = Confs.end();
       it != ie; ++it) {
    (*it)->print();
  }
}

unsigned long RuntimeController::getLastFrequency() {
  return g_freq;
}

void RuntimeController::setLastFrequency(unsigned long f) {
  g_freq = f;
}

double RuntimeController::getLastSpeedup() {
  return g_speedup;
}

void RuntimeController::setLastSpeedup(double s) {
  g_speedup = s;
}

void RuntimeController::findNextConfiguration() {
  configurationIdx = (configurationIdx + 1) % Configurations->size();
  DEBUG(
      "findNextConfiguration: Updated configurationIdx to %u.\n",
      configurationIdx);
}

void RuntimeController::findTargetConfiguration(
    float goal, enum SEARCH_KIND sk) {
  // We search in range begin(), end()-1 . It is OK to decrement end(), because
  // the configurations vector always points to one of the pareto curves, and
  // they are never empty - we have always pushed at least one configuration.

  DEBUG("findTargetConfiguration: goalVal: %f, search kind: %d.\n", goal, sk);
  std::vector<struct Configuration *>::iterator low_it;
  switch (sk) {
  case SPEEDUP: {
    // Assigning one of Pareto configs to 'Configurations' class attribute
    Configurations = &SpeedupConfigurations;
    low_it = std::lower_bound(
        Configurations->begin(), Configurations->end() - 1, goal,
        ConfigurationLessThan_SP());
    configurationIdx = low_it - Configurations->begin();
    break;
  }
  case ENERGY: {
    Configurations = &EnergyConfigurations;
    low_it = std::lower_bound(
        Configurations->begin(), Configurations->end() - 1, goal,
        ConfigurationLessThan_E());
    configurationIdx = low_it - Configurations->begin();
    break;
  }
  case ACCURACY_LOSS: {
    Configurations = &SpeedupConfigurations;
    low_it = std::lower_bound(
        Configurations->begin(), Configurations->end() - 1, goal,
        ConfigurationLessThan_AL());
    if ((*low_it)->accuracyLoss > goal)
      --low_it;
    configurationIdx = low_it - Configurations->begin();
    break;
  }
  default: {
    CUSTOM_ASSERT(false && "Unknown search option for optimization target");
    ERROR("Unknown search option for optimization target.");
    abort();
  }
  }
  // After search, low_it points to the Configuration to the element with the
  // goal value or the immediately lower value if it does not exist

  DEBUG(
      "findTargetConfiguration: Updated configurationIdx to %u.\n",
      configurationIdx);
}

void RuntimeController::adjustTargetConfiguration(float goal) {

  DEBUG("adjustTargetConfiguration: goalVal: %f.\n\n", goal);

  pseudo_rd += 0.1f;
  // Find configuration before the selected one.
  // There is always one, unless goal is 1. Then, we would pick baseline, and
  //  both upper and lower should be the same configuration, at index 0.
  unsigned prev_conf_idx = configurationIdx > 0 ? configurationIdx - 1
                                                : configurationIdx;
  // Get the two configurations' speedup, and compute the appropriate ranges
  float curr_conf_speedup = (*Configurations)[configurationIdx]->speedup;
  float prev_conf_speedup = (*Configurations)[prev_conf_idx]->speedup;
  float sp_diff = curr_conf_speedup - prev_conf_speedup;

  float high_range = curr_conf_speedup - goal;
  float low_range = goal - prev_conf_speedup;

  // These represent how likely we are to pick the upper or lower configuration
  float high_pb = 0.0, low_pb = 0.0;
  if (configurationIdx == prev_conf_idx) {
    high_pb = low_pb = 1.0;
  } else {
    high_pb = low_range / sp_diff;
    low_pb = high_range / sp_diff;

    //***--- Probability adjustment strategy 1 ---***//
    // No big adjustments at edges of probability range
//    float adjust_val = 0.0;
//    if (low_pb < high_pb) {
//      adjust_val = low_pb * 0.2;
//    } else {
//      adjust_val = high_pb * 0.2;
//    }
//    low_pb -= adjust_val;
//    high_pb += adjust_val;
    //***---                                   ---***//

    //***--- Probability adjustment strategy 2 ---***//
    // No big adjustment at high edge of probability range
//    float adjust_val = high_pb * 0.2 > 0.1 ? 0.1 : high_pb * 0.2;
//    low_pb -= adjust_val;
//    high_pb += adjust_val;
    //***---                                   ---***//

    //***--- Probability adjustment strategy 3 ---***//
    //Similar to 2, but higher always increases, more significantly
//    float adjust_val = low_pb * 0.5 > 0.1 ? 0.1 : low_pb * 0.5;
//    low_pb -= adjust_val;
//    high_pb += adjust_val;
    //***---                                   ---***//

    //***--- Probability adjustment strategy 4 ---***//
    //Similar to 2, but higher always increases, more significantly
    // Low end, high end a bit less aggressive than total range
    float adjust_val = low_pb * 0.6 > 0.2 ? 0.2 : low_pb * 0.6;
    adjust_val = adjust_val > high_pb ? high_pb : adjust_val;
    low_pb -= adjust_val;
    high_pb += adjust_val;
    //***---                                   ---***//
  }

  DEBUG(
      "**---- adjustTargetConfiguration: upper conf = %s with probability: "
      "%f.\n",
      ((*Configurations)[configurationIdx]->name).c_str(), high_pb);
  DEBUG(
      "**---- adjustTargetConfiguration: lower conf = %s with probability: "
      "%f.\n\n",
      ((*Configurations)[prev_conf_idx]->name).c_str(), low_pb);

  // Select a random number from 0 to 1
  // We assign the (0..low_pb) to the lower configuration, and the (low_pb..1)
  // to the upper
  // float rd = static_cast <float> (rand()) / static_cast <float> (RAND_MAX) ;
  //float rd = pseudo_rd;
  float rd = distr(generator);
  if (rd < low_pb) {
    // If the probability is in the low range
    configurationIdx = prev_conf_idx;
  }

  DEBUG(
      "adjustTargetConfiguration: rand: %f : Updated configurationIdx to %u.\n",
      rd, configurationIdx);
}
float RuntimeController::getGoalSpeedup() {
  return 1.0 + (rand() / (RAND_MAX / (MAX_GOAL_SPEEDUP - 1.0)));
}

double RuntimeController::getBaselineTime() { return baseline_time; }

Slowdowns *RuntimeController::getSlowdowns() { return slowdowns; }

// Functions to be inserted with initializeTensorRT and clearTensorRT
extern "C" void llvm_hpvm_initializeRuntimeController(const char *ConfigFile) {
  RC = new RuntimeController();
  RC->init(ConfigFile);
  return;
}

extern "C" void llvm_hpvm_clearRuntimeController() {
  delete RC;
  return;
}

//*** Methods to compute accuracy of a tensor by the runtime controller   ***//
uint32_t *labels_from_file = NULL;

uint32_t *
hpvm_rt_readLabelsBatch_cached(const char *labels_file, int start, int end) {

  // Initialize buffer
  if (!labels_from_file) {
    FILE *file = fopen(labels_file, "rb");
    if (file == NULL) {
      ERROR("Data file %s is not found. Aborting...\n", labels_file);
      abort();
    }
    
    // Get number of labels
    fseek(file, 0, SEEK_END);
    long size = ftell(file);
    fseek(file, 0, SEEK_SET); // return file pointer to beginning

    // Allocate memory for labels
    labels_from_file = (uint32_t *) malloc(size);
    if (labels_from_file == NULL) {
      ERROR("Memory allocation for labels unsucessfull. Aborting...\n");
      abort();
    }

    // Copy the labels file into the allocated buffer
    size_t result = fread(labels_from_file, 1, size, file);
    if (result != size) {
      // We did not read as many elemets as there are in the file
      ERROR("Reading labels file unsucessfull. Aborting...\n");
      abort();
    }

    fclose(file);
  }

  //  int num_labels = end - start;
  //  uint32_t* labels = (uint32_t*) malloc(sizeof(uint32_t) * num_labels);
  //  for (unsigned i = start; i < end; i++) {
  //    labels[i-start] = labels_from_file[i];
  //  }
  //  return labels;

  // Return pointer to labels
  return &labels_from_file[start];
}

static float average_accuracy = 0.0;
static int num_executations = 0;

//*** Copied from dnn_sources/include/utils.h                             ***//
float hpvm_rt_computeAccuracy3(uint32_t *labels, void *result_ptr) {

  struct Tensor *result = (struct Tensor *)result_ptr;

  size_t batch_dim = result->dims.dim_sizes[0];
  size_t num_classes = result->dims.dim_sizes[1];
  float *data = (float *)result->host_data;
  int num_errors = 0;

  printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes);

  for (int i = 0; i < batch_dim; i++) {

    int chosen = 0;
    for (int id = 1; id < num_classes; ++id) {
      if (data[i * num_classes + chosen] < data[i * num_classes + id])
        chosen = id;
    }

    if (chosen != labels[i])
      num_errors++;
  }

  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
  printf("****** Accuracy = %f \n\n", accuracy);
  
  average_accuracy = accuracy + (average_accuracy * num_executations); 
  num_executations++;
  average_accuracy = average_accuracy/num_executations;

  FILE *fp = fopen("final_accuracy", "w+");
  if (fp != NULL) {

    std::ostringstream ss;
    ss << std::fixed << average_accuracy;
    std::string print_str = ss.str();

    fwrite(print_str.c_str(), 1, print_str.length(), fp);
  }

  fclose(fp);

  return accuracy;
}


#define llvm_hpvm_invokeRtControl_BASE llvm_hpvm_invokeRtControl
//#define llvm_hpvm_invokeRtControl_ADJUST_PR llvm_hpvm_invokeRtControl
//#define llvm_hpvm_invokeRtControl_ITERATE llvm_hpvm_invokeRtControl

extern "C" void llvm_hpvm_invokeRtControl_BASE(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();

  RC->resume_profiler();
  RC->pause_profiler();

  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  INFO(
      "current iteration time = %f, current iteration energy = %f\n\n",
      current_iteration_time, current_iteration_energy);

  // Note the end of iteration
  RC->end_iteration();
}

extern "C" void llvm_hpvm_invokeRtControl_ITERATE(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();

  RC->resume_profiler();
  RC->findNextConfiguration();
  // Still use findNext configuration, to update the configurationIdx,
  // to point to next location
  enum SEARCH_KIND k = ACCURACY_LOSS;
  float goalVal =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->accuracyLoss;
  RC->findTargetConfiguration(goalVal, k);

  RC->pause_profiler();
  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  INFO(
      "current iteration time = %f, current iteration energy = %f\n\n",
      current_iteration_time, current_iteration_energy);

  // Note the end of iteration
  RC->end_iteration();
}

extern "C" void llvm_hpvm_invokeRtControl_ADJUST(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();
  RC->readIterationFrequency();

  RC->resume_profiler();
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double target_speedup;
  if (RC->getLastFrequency() == RC->getIterationFrequency()) {
    target_speedup = RC->getLastSpeedup();
  } else {
    double baseline_time = RC->getBaselineTime();
    // Relative to current configuration
    target_speedup = current_iteration_time / baseline_time;
    // Adjust to baseline
    target_speedup *= RC->getCurrentConfigurationSpeedup();
    RC->setLastFrequency(RC->getIterationFrequency());
    RC->setLastSpeedup(target_speedup);
  }
  RC->findTargetConfiguration(target_speedup, SPEEDUP);
  RC->pause_profiler();

  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  //*                                                                        *
  //*Needed for the frequency variation experiment - not part of the control *
  RC->resume_profiler();
  RC->updateFrequency();
  RC->pause_profiler();

  std::pair<double, double> pinfo2 = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationConfigTime(pinfo2.first);
  RC->addToCurrentIterationConfigEnergy(pinfo2.second);
  //*                                                                        */

  INFO(
      "current iteration time = %f, current iteration energy = %f\n",
      current_iteration_time, current_iteration_energy);
  INFO("target speedup = %lf\n\n", target_speedup);

  // Note the end of iteration
  RC->end_iteration();
}

extern "C" void llvm_hpvm_invokeRtControl_ADJUST_PR(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();
  RC->readIterationFrequency();

  RC->resume_profiler();
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double target_speedup;
  if (RC->getLastFrequency() == RC->getIterationFrequency()) {
    target_speedup = RC->getLastSpeedup();
  } else {
    double baseline_time = RC->getBaselineTime();
    // Relative to current configuration
    target_speedup = current_iteration_time / baseline_time;
    // Adjust to baseline
    target_speedup *= RC->getCurrentConfigurationSpeedup();
    RC->setLastFrequency(RC->getIterationFrequency());
    RC->setLastSpeedup(target_speedup);
  }
  RC->findTargetConfiguration(target_speedup, SPEEDUP);
  RC->adjustTargetConfiguration(target_speedup);
  RC->pause_profiler();

  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  //*                                                                        *
  //*Needed for the frequency variation experiment - not part of the control *
  RC->resume_profiler();
  RC->updateFrequency();
  RC->pause_profiler();

  std::pair<double, double> pinfo2 = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationConfigTime(pinfo2.first);
  RC->addToCurrentIterationConfigEnergy(pinfo2.second);
  //*                                                                        */

  INFO(
      "current iteration time = %f, current iteration energy = %f\n",
      current_iteration_time, current_iteration_energy);
  INFO("target speedup = %lf\n\n", target_speedup);

  // Note the end of iteration
  RC->end_iteration();
}

extern "C" void llvm_hpvm_invokeRtControl_SLOWDOWN(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();

  std::string prev_conf_name =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->name;

  RC->resume_profiler();
  float slowdown = RC->getSlowdowns()->getNextSlowdown();
  RC->findTargetConfiguration(slowdown, SPEEDUP);
  RC->pause_profiler();

  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  std::string next_conf_name =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->name;
  float next_conf_speedup =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->speedup;

  INFO(
      "current iteration time = %f, current iteration energy = %f\n",
      current_iteration_time, current_iteration_energy);
  INFO("slowdown (target speedup) = %f\n", slowdown);
  INFO("Previous configuration: %s\n", prev_conf_name.c_str());
  INFO(
      "Swapping to next configuration: %s with speedup %f\n\n",
      next_conf_name.c_str(), next_conf_speedup);

  // Note the end of iteration
  RC->end_iteration();
}

extern "C" void llvm_hpvm_invokeRtControl_SLOWDOWN_PR(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();

  std::string prev_conf_name =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->name;

  RC->resume_profiler();
  float slowdown = RC->getSlowdowns()->getNextSlowdown();
  RC->findTargetConfiguration(slowdown, SPEEDUP);
  RC->adjustTargetConfiguration(slowdown);
  RC->pause_profiler();

  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  std::string next_conf_name =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->name;
  float next_conf_speedup =
      RC->getSpeedupConfigurations()[RC->getConfigurationIdx()]->speedup;

  INFO(
      "current iteration time = %f, current iteration energy = %f\n",
      current_iteration_time, current_iteration_energy);
  INFO("slowdown (target speedup) = %f\n", slowdown);
  INFO("Previous configuration: %s\n", prev_conf_name.c_str());
  INFO(
      "Swapping to next configuration: %s with speedup %f\n\n",
      next_conf_name.c_str(), next_conf_speedup);

  // Note the end of iteration
  RC->end_iteration();
}

extern "C" void llvm_hpvm_invokeRtControl_RAND(
    void *result, const char *str, int start, int end) {

  uint32_t *labels_cached = hpvm_rt_readLabelsBatch_cached(str, start, end);
  hpvm_rt_computeAccuracy3(labels_cached, result);

  // Read stats for iteration that was just completed
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();

  RC->resume_profiler();
  RC->findTargetConfiguration(RC->getGoalSpeedup(), SPEEDUP);
  RC->pause_profiler();

  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  INFO(
      "current iteration time = %f, current iteration energy = %f\n\n",
      current_iteration_time, current_iteration_energy);

  // Note the end of iteration
  RC->end_iteration();
}

template <typename T>
static void writeVectorToFile(const char *path, const std::vector<T> &vec) {
  std::ofstream of(path, std::ofstream::out | std::ofstream::app);
  if (!of.good())
    ERROR("Cannot write to %s file", path);
  for (float f: vec)
    of << f << ' ';
  of << '\n';
}

extern "C" void llvm_hpvm_imgInvokeRtControl(void* result, void *gold, int start, int end) {
  RC->resume_profiler();

  if (gold != nullptr) {
    writeVectorToFile("psnr.txt", PSNR(gold, result));
    writeVectorToFile("ssim.txt", SSIM(gold, result));
  }

  // Read stats for iteration that was just completed
  double current_iteration_time = RC->getCurrentIterationComputeTime();
  double current_iteration_energy = RC->getCurrentIterationComputeEnergy();

  RC->pause_profiler();
  std::pair<double, double> pinfo = RC->get_time_energy();
  RC->reset_profiler();
  RC->addToCurrentIterationControlTime(pinfo.first);
  RC->addToCurrentIterationControlEnergy(pinfo.second);

  INFO("current iteration time = %f, current iteration energy = %f\n\n",
       current_iteration_time, current_iteration_energy);

  // Note the end of iteration
  RC->end_iteration();
}