DEPEND
power-aware-model-serving

Repository

conda create -n atc24-env python=3.11.4 -y
conda activate atc24-env
pip install -r requirements.txt
cd output-token-len-predictions
python preprocess_dataset.py [--FLAGS]
python latency_prediction.py [--FLAGS]
cd model-serving/prediction/final/
python eval_prediction.py
cd model-serving/prediction/
python predictor_overhead_vs_model_serving_latency.py
cd characterization/
python characterization.py
cd model-serving/
python auto_eval.py
python auto_eval_lineplot.py
cd power/
./eval.sh
@inproceedings{qiu2024atc,
  title={Power-aware Deep Learning Model Serving with $\mu$-Serve},
  author={Qiu, Haoran and Mao, Weichao and Patke, Archit and Cui, Shengkun and Jha, Saurabh and Wang, Chen and Franke, Hubertus and Kalbarczyk, Zbigniew T and Ba{\c{s}}ar, Tamer and Iyer, Ravishankar K},
  booktitle={Proceedings of the 2024 USENIX Annual Technical Conference (USENIX ATC 2024)},
  year={2024}
}