diff --git a/predtuner/pipedbin.py b/predtuner/pipedbin.py index 7fbe4af1513d6bf160571e06b4af1764ce9ac839..a45f981dcb1e1685fd8943aa6659f5963af5860c 100644 --- a/predtuner/pipedbin.py +++ b/predtuner/pipedbin.py @@ -1,20 +1,14 @@ import json import os from pathlib import Path -from typing import List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union -from .approxapp import ApproxKnob, KnobsT +from .approxapp import ApproxKnob, KnobsT, BaselineKnob from .modeledapp import IPerfModel, IQoSModel, LinearPerfModel, ModeledApp, QoSModelP2 PathLike = Union[str, Path] -class IntApproxKnob(ApproxKnob): - def __init__(self, name: str, speedup: float): - super().__init__(name, speedup=speedup) - self.speedup = speedup - - class PipedBinaryApp(ModeledApp): qos_relpath = "final_accuracy" @@ -35,13 +29,12 @@ class PipedBinaryApp(ModeledApp): self.conf_file = None # The binary will tell us through fifo file metadata_file = self.base_dir / metadata_relpath with metadata_file.open() as f: - self._metadata = json.load(f) - ( - self.op_costs, - op_knobs, - self.knob_speedups, - self.baseline_knob, - ) = self._check_metadata(self._metadata) + ( + self.op_costs, + op_knobs, + self.knob_speedups, + self.baseline_knob, + ) = self._parse_metadata(json.load(f)) self._op_order = {v: i for i, v in enumerate(op_knobs.keys())} self.model_storage = ( Path(model_storage_folder) if model_storage_folder else None @@ -50,6 +43,7 @@ class PipedBinaryApp(ModeledApp): raise RuntimeError(f"Binary file {self.binary_path} not found") super().__init__(op_knobs) # Init here + self.knob_exporter = HPVMConfigBuilder(list(op_knobs.keys())) self.process = None self._invoke_binary() @@ -59,13 +53,16 @@ class PipedBinaryApp(ModeledApp): the user should try to make it unique.""" return self.app_name - def measure_qos_perf( - self, with_approxes: KnobsT, is_test: bool + def empirical_measure_qos_perf( + self, + with_approxes: KnobsT, + is_test: bool ) -> Tuple[float, float]: from time import time_ns conf = self.add_baseline_to_knobs(with_approxes) - self._write_conf(conf) + with self.conf_file.open("w") as f: + f.write(self.knob_exporter.to_str(conf)) time_begin = time_ns() / (10 ** 9) self._signal_and_wait("test" if is_test else "tune") time_end = time_ns() / (10 ** 9) @@ -91,7 +88,7 @@ class PipedBinaryApp(ModeledApp): self.fifo_file.unlink() null_file = open(os.devnull, "wb") self.process = subprocess.Popen( - [self.binary_path], cwd=self.base_dir + [self.binary_path], stdout=null_file, cwd=self.base_dir ) atexit.register(self._stop_binary) while self.conf_file is None: @@ -119,11 +116,12 @@ class PipedBinaryApp(ModeledApp): f.read() # will block until something is written @staticmethod - def _check_metadata(metadata: dict): + def _parse_metadata(metadata: dict): op_costs = metadata["op_cost"] op_knobs = metadata["op_knobs"] knob_speedup = metadata["knob_speedup"] baseline_knob = metadata["baseline_knob"] + # Check sanity if set(op_costs.keys()) != set(op_knobs.keys()): raise ValueError( "Operators listed in layer_cost and knobs_of_layer don't agree" @@ -137,8 +135,90 @@ class PipedBinaryApp(ModeledApp): ) if baseline_knob not in knobs_defined: raise ValueError(f"baseline_knob {baseline_knob} is undefined") + # Create actual knob object from knob names + name2knob = { + s: BaselineKnob(s) if s == baseline_knob else ApproxKnob(s) + for s in knobs_used + } + op_knobs = {op: [name2knob[k] for k in knobs] for op, knobs in op_knobs.items()} return op_costs, op_knobs, knob_speedup, baseline_knob - def _write_conf(self, conf: KnobsT): - with self.conf_file.open("w") as f: - f.write("") + +def invert_knob_name_to_range(knob_name_to_range: Dict[str, range]): + ret = {} + for k, range_ in knob_name_to_range.items(): + for idx in range_: + ret[str(idx)] = k + return ret + + +class HPVMConfigBuilder: + max_merge_chain = [ + ["convolution", "linear"], + ["add"], + ["tanh", "relu"], + ["maxpool"], + ] + + op_to_op = {"convolution": "conv", "maxpool": "pool_max", "linear": "mul"} + + knob_name_to_range = { + "fp32": range(11, 12), + "fp16": range(12, 13), + "perf_fp16": range(151, 168 + 1), + "samp_fp16": range(261, 269 + 1), + } + + knob_to_knob = invert_knob_name_to_range(knob_name_to_range) + + def __init__(self, ops: List[str]) -> None: + self.ops = ops + self.types = self._parse_ops(ops) + self.merged_to_original = self._find_merge_chains(self.types) + + def to_str(self, config: KnobsT) -> str: + def print_op(op_index: int): + ty = self.types[op_index] + knob_value = config[self.ops[op_index]] + out_knob_ty = self.knob_to_knob[knob_value] + out_op_ty = self.op_to_op.get(ty, ty) + return f"{out_op_ty} {out_knob_ty} {knob_value}" + + def print_line(line_index: int, op_indices): + return f"{line_index} gpu " + " ".join(print_op(idx) for idx in op_indices) + + if len(config) != len(self.ops): + raise ValueError(f"Incorrect config length, expected {len(self.ops)}") + prefix = ["0.0", "+++++", "conf1 0.0 0.0 0.0 0.0"] + suffix = ["-----"] + body_lines = [ + print_line(line_idx, orig_indices) + for line_idx, orig_indices in enumerate(self.merged_to_original, start=1) + ] + return "\n".join(prefix + body_lines + suffix) + + @staticmethod + def _parse_ops(ops: List[str]) -> List[str]: + types: List[str] = [None for _ in range(len(ops))] + for k in ops: + ty, idx = k.split("_") + types[int(idx)] = ty + if any(x is None for x in types): + raise ValueError("Operator indice not consecutive") + return types + + @classmethod + def _find_merge_chains(cls, types: List[str]): + mm = cls.max_merge_chain + lhs, rhs = 0, 0 # rhs >= lhs + merged_to_original = [] + while lhs < len(types): + widx = 0 + while widx < len(mm) and types[rhs] in mm[widx]: + rhs += 1 + widx = rhs - lhs + if rhs == lhs: + rhs = lhs + 1 # At least take 1 operator + merged_to_original.append(range(lhs, rhs)) + lhs = rhs + return merged_to_original