diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs index 16e5c3264d33a7c9bef85fc0fa3cec02963dbf48..b33dc956ecc65d5bef67b804a4ca5a543793b242 100644 --- a/hercules_opt/src/editor.rs +++ b/hercules_opt/src/editor.rs @@ -795,6 +795,26 @@ impl<'a, 'b> FunctionEdit<'a, 'b> { self.add_constant(constant_to_construct) } + pub fn add_pos_inf_constant(&mut self, id: TypeID) -> ConstantID { + let ty = self.get_type(id).clone(); + let constant_to_construct = match ty { + Type::Float32 => Constant::Float32(ordered_float::OrderedFloat(f32::INFINITY)), + Type::Float64 => Constant::Float64(ordered_float::OrderedFloat(f64::INFINITY)), + _ => panic!(), + }; + self.add_constant(constant_to_construct) + } + + pub fn add_neg_inf_constant(&mut self, id: TypeID) -> ConstantID { + let ty = self.get_type(id).clone(); + let constant_to_construct = match ty { + Type::Float32 => Constant::Float32(ordered_float::OrderedFloat(f32::NEG_INFINITY)), + Type::Float64 => Constant::Float64(ordered_float::OrderedFloat(f64::NEG_INFINITY)), + _ => panic!(), + }; + self.add_constant(constant_to_construct) + } + pub fn get_constant(&self, id: ConstantID) -> impl Deref<Target = Constant> + '_ { if id.idx() < self.editor.constants.borrow().len() { Either::Left(Ref::map(self.editor.constants.borrow(), |constants| { diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch index ad3ec65c902aefd90384f1e40098bc43ad5edeb7..2a8960eec731fec4a3187c6df2e47eb72e3a2411 100644 --- a/juno_samples/edge_detection/src/gpu.sch +++ b/juno_samples/edge_detection/src/gpu.sch @@ -62,10 +62,17 @@ fixpoint { simpl!(max_gradient); fork-dim-merge(max_gradient); simpl!(max_gradient); -fork-tile[32, 0, false, true](max_gradient); +fork-tile[1024, 0, false, true](max_gradient); +let out = fork-split(max_gradient); +fork-tile[32, 0, false, true](out._4_max_gradient.fj1); +let out = fork-split(max_gradient); simpl!(max_gradient); -fork-split(max_gradient); +xdot[true](max_gradient); +clean-monoid-reduces(max_gradient); +xdot[true](max_gradient); +fork-fission-bufferize[out._4_max_gradient.fj0, out._4_max_gradient.fj1](max_gradient); simpl!(max_gradient); +xdot[true](max_gradient); no-memset(reject_zero_crossings@res); fixpoint {