diff --git a/hercules_ir/src/schedule.rs b/hercules_ir/src/schedule.rs index 8cc91f922b58d6db4bd0b189e8b3d506bfdb6e01..2151b1320a8cf85a934f8edec1bd5315962e5999 100644 --- a/hercules_ir/src/schedule.rs +++ b/hercules_ir/src/schedule.rs @@ -76,11 +76,10 @@ pub fn default_plan( // Infer schedules. infer_parallel_reduce(function, fork_join_map, &mut plan); + infer_vectorize(function, fork_join_map, &mut plan); // Infer a partitioning. partition_out_forks(function, reverse_postorder, fork_join_map, bbs, &mut plan); - - // Place fork partitions on the GPU. place_fork_partitions_on_gpu(function, &mut plan); plan @@ -166,6 +165,28 @@ pub fn infer_parallel_reduce( } } +/* + * Infer vectorizable fork-joins. Just check that there are no control nodes + * between a fork and its join. + */ +pub fn infer_vectorize( + function: &Function, + fork_join_map: &HashMap<NodeID, NodeID>, + plan: &mut Plan, +) { + for id in (0..function.nodes.len()) + .map(NodeID::new) + .filter(|id| function.nodes[id.idx()].is_join()) + { + let u = get_uses(&function.nodes[id.idx()]).as_ref()[0]; + if let Some(join) = fork_join_map.get(&u) + && *join == id + { + plan.schedules[u.idx()].push(Schedule::Vectorize); + } + } +} + /* * Create partitions corresponding to fork-join nests. Also, split the "top- * level" partition into sub-partitions that are connected graphs. Place data