diff --git a/Cargo.lock b/Cargo.lock
index 4a9b889119f59ef0097453a9d22a3caef39ac7bc..ffb61f4d8beb387ba3c756617f5bb3c8ce18ef3b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1209,6 +1209,16 @@ dependencies = [
  "with_builtin_macros",
 ]
 
+[[package]]
+name = "juno_product_read"
+version = "0.1.0"
+dependencies = [
+ "async-std",
+ "hercules_rt",
+ "juno_build",
+ "with_builtin_macros",
+]
+
 [[package]]
 name = "juno_schedule_test"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index eeb5e69d92410b2174c1db19886b0ff76f773257..3e86bad053f8847c9f10d18117635de1cfa76250 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -33,4 +33,5 @@ members = [
 	"juno_samples/edge_detection",
 	"juno_samples/fork_join_tests",
 	"juno_samples/multi_device",
+	"juno_samples/product_read",
 ]
diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index e6b540aed6218f56e0a1fa33b0e4b1e1db09e9b0..d6461a1ee2d1208071090358bf05d7d34c2638d0 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -284,11 +284,7 @@ impl GPUContext<'_> {
 
         // If there are no forks, fast forward to single-block, single-thread codegen
         let (num_blocks, num_threads) = if self.fork_join_map.is_empty() {
-            self.codegen_data_control_no_forks(
-                &HashSet::new(),
-                &mut dynamic_shared_offset,
-                &mut gotos,
-            )?;
+            self.codegen_data_control_no_forks(&mut dynamic_shared_offset, &mut gotos)?;
             ("1".to_string(), "1".to_string())
         } else {
             // Create structures and determine block and thread parallelization strategy
@@ -298,10 +294,6 @@ impl GPUContext<'_> {
                 self.get_thread_root_forks(&root_forks, self.fork_tree, is_block_parallel);
             let (fork_thread_quota_map, num_threads) =
                 self.get_thread_quotas(self.fork_tree, thread_root_root_fork);
-            // TODO: Uncomment and adjust once we know logic of extra dim. This will affect constant
-            // collections, reads, and writes.
-            // let extra_dim_collects = self.get_extra_dim_collects(&fork_control_map, &fork_thread_quota_map);
-            let extra_dim_collects = HashSet::new();
 
             // Core function for the CUDA code of all data and control nodes.
             self.codegen_data_control(
@@ -312,7 +304,6 @@ impl GPUContext<'_> {
                 },
                 &thread_root_forks,
                 &fork_thread_quota_map,
-                &extra_dim_collects,
                 &mut dynamic_shared_offset,
                 is_block_parallel,
                 num_threads,
@@ -859,25 +850,8 @@ extern \"C\" {} {}(",
         }
     }
 
-    /*
-     * All non reduced-over collections used in fork joins have an extra dimension.
-     * However, this is only useful if ThreadIDs run in parallel not serially,
-     * otherwise it's unnecessarily consuming shared memory. This function returns
-     * the set of collections that have an unnecessary extra dimension.
-     */
-    fn get_extra_dim_collects(
-        &self,
-        fork_control_map: &HashMap<NodeID, HashSet<NodeID>>,
-        fork_thread_quota_map: &HashMap<NodeID, (usize, usize, usize)>,
-    ) -> HashSet<TypeID> {
-        // Determine which fork each collection is used in, and check if it's
-        // parallelized via the fork_thread_quota_map.
-        todo!()
-    }
-
     fn codegen_data_control_no_forks(
         &self,
-        extra_dim_collects: &HashSet<TypeID>,
         dynamic_shared_offset: &mut String,
         gotos: &mut BTreeMap<NodeID, CudaGoto>,
     ) -> Result<(), Error> {
@@ -901,7 +875,6 @@ extern \"C\" {} {}(",
                         None,
                         None,
                         false,
-                        extra_dim_collects,
                         dynamic_shared_offset,
                         body,
                         &mut tabs,
@@ -919,7 +892,6 @@ extern \"C\" {} {}(",
         block_fork: Option<NodeID>,
         thread_root_forks: &HashSet<NodeID>,
         fork_thread_quota_map: &HashMap<NodeID, (usize, usize, usize)>,
-        extra_dim_collects: &HashSet<TypeID>,
         dynamic_shared_offset: &mut String,
         is_block_parallel: bool,
         num_threads: usize,
@@ -945,7 +917,6 @@ extern \"C\" {} {}(",
                     None,
                     None,
                     false,
-                    extra_dim_collects,
                     dynamic_shared_offset,
                     body,
                     &mut tabs,
@@ -979,7 +950,6 @@ extern \"C\" {} {}(",
                         None,
                         Some(block_fork.unwrap()),
                         false,
-                        extra_dim_collects,
                         dynamic_shared_offset,
                         body,
                         &mut tabs,
@@ -996,7 +966,6 @@ extern \"C\" {} {}(",
                 fork_thread_quota_map,
                 1,
                 num_threads,
-                extra_dim_collects,
                 dynamic_shared_offset,
                 gotos,
             )?;
@@ -1017,7 +986,6 @@ extern \"C\" {} {}(",
         fork_thread_quota_map: &HashMap<NodeID, (usize, usize, usize)>,
         parent_quota: usize,
         num_threads: usize,
-        extra_dim_collections: &HashSet<TypeID>,
         dynamic_shared_offset: &mut String,
         gotos: &mut BTreeMap<NodeID, CudaGoto>,
     ) -> Result<(), Error> {
@@ -1068,7 +1036,6 @@ extern \"C\" {} {}(",
                     parallel_factor,
                     Some(curr_fork),
                     reducts.contains(data),
-                    extra_dim_collections,
                     dynamic_shared_offset,
                     body,
                     &mut tabs,
@@ -1082,7 +1049,6 @@ extern \"C\" {} {}(",
                 fork_thread_quota_map,
                 use_thread_quota,
                 num_threads,
-                extra_dim_collections,
                 dynamic_shared_offset,
                 gotos,
             )?;
@@ -1099,7 +1065,6 @@ extern \"C\" {} {}(",
         parallel_factor: Option<usize>,
         nesting_fork: Option<NodeID>,
         is_special_reduct: bool,
-        extra_dim_collects: &HashSet<TypeID>,
         dynamic_shared_offset: &mut String,
         w: &mut String,
         num_tabs: &mut usize,
@@ -1206,7 +1171,6 @@ extern \"C\" {} {}(",
                         define_variable.clone(),
                         *cons_id,
                         true,
-                        Some(extra_dim_collects),
                         dynamic_shared_offset,
                         w,
                         *num_tabs,
@@ -1232,8 +1196,7 @@ extern \"C\" {} {}(",
                 if !is_primitive
                     && (state != KernelState::OutBlock || !is_block_parallel.unwrap_or(false))
                 {
-                    let data_size =
-                        self.get_size(self.typing[id.idx()], None, Some(extra_dim_collects));
+                    let data_size = self.get_size(self.typing[id.idx()], None);
                     write!(
                         w,
                         "{}for (int i = {}.thread_rank(); i < {}; i += {}.size()) {{\n",
@@ -1453,8 +1416,7 @@ extern \"C\" {} {}(",
             }
             // Read of primitive requires load after pointer math.
             Node::Read { collect, indices } => {
-                let collect_with_indices =
-                    self.codegen_collect(*collect, indices, extra_dim_collects);
+                let collect_with_indices = self.codegen_collect(*collect, indices);
                 let data_type_id = self.typing[id.idx()];
                 if self.types[data_type_id.idx()].is_primitive() {
                     let type_name = self.get_type(data_type_id, true);
@@ -1478,8 +1440,7 @@ extern \"C\" {} {}(",
                 data,
                 indices,
             } => {
-                let collect_with_indices =
-                    self.codegen_collect(*collect, indices, extra_dim_collects);
+                let collect_with_indices = self.codegen_collect(*collect, indices);
                 let data_variable = self.get_value(*data, false, false);
                 let data_type_id = self.typing[data.idx()];
                 let cg_tile = match state {
@@ -1498,7 +1459,7 @@ extern \"C\" {} {}(",
                     )?;
                     write!(w, "{}}}\n", tabs)?;
                 } else {
-                    let data_size = self.get_size(data_type_id, None, Some(extra_dim_collects));
+                    let data_size = self.get_size(data_type_id, None);
                     write!(
                         w,
                         "{}for (int i = {}.thread_rank(); i < {}; i += {}.size()) {{\n",
@@ -1754,31 +1715,31 @@ extern \"C\" {} {}(",
      * This function emits collection name + pointer math for the provided indices.
      * All collection types use char pointers.
      */
-    fn codegen_collect(
-        &self,
-        collect: NodeID,
-        indices: &[Index],
-        extra_dim_collects: &HashSet<TypeID>,
-    ) -> String {
+    fn codegen_collect(&self, collect: NodeID, indices: &[Index]) -> String {
         let mut index_ptr = "0".to_string();
-        let type_id = self.typing[collect.idx()];
+        let mut type_id = self.typing[collect.idx()];
         for index in indices {
             match index {
                 Index::Field(field) => {
-                    self.get_size(type_id, Some(*field), Some(extra_dim_collects));
+                    index_ptr.push_str(&format!(" + ({})", self.get_size(type_id, Some(*field))));
+                    type_id = if let Type::Product(fields) = &self.types[type_id.idx()] {
+                        fields[*field]
+                    } else {
+                        panic!("Expected product type")
+                    };
                 }
                 // Variants of summations have zero offset
-                Index::Variant(_) => {}
+                Index::Variant(index) => {
+                    type_id = if let Type::Summation(variants) = &self.types[type_id.idx()] {
+                        variants[*index]
+                    } else {
+                        panic!("Expected summation type")
+                    };
+                }
                 // Convert multi-d array index to 1-d index, and optionally
                 // convert to single-byte index by multiplying by element size
                 Index::Position(array_indices) => {
-                    let has_extra_dim = extra_dim_collects.contains(&self.typing[collect.idx()]);
-                    if has_extra_dim {
-                        continue;
-                    }
-                    let Type::Array(element_type, extents) =
-                        &self.types[self.typing[collect.idx()].idx()]
-                    else {
+                    let Type::Array(element_type, extents) = &self.types[type_id.idx()] else {
                         panic!("Expected array type")
                     };
                     let mut cumulative_offset = multiply_dcs(&extents[array_indices.len()..]);
@@ -1800,8 +1761,9 @@ extern \"C\" {} {}(",
                         cumulative_offset,
                         ")".repeat(array_indices.len())
                     ));
-                    let element_size = self.get_size(*element_type, None, Some(extra_dim_collects));
-                    index_ptr.push_str(&format!(" * {}", element_size));
+                    let element_size = self.get_size(*element_type, None);
+                    index_ptr.push_str(&format!(" * ({})", element_size));
+                    type_id = *element_type;
                 }
             }
         }
@@ -1825,7 +1787,6 @@ extern \"C\" {} {}(",
         name: String,
         cons_id: ConstantID,
         allow_allocate: bool,
-        extra_dim_collects: Option<&HashSet<TypeID>>,
         dynamic_shared_offset: &mut String,
         w: &mut String,
         num_tabs: usize,
@@ -1850,7 +1811,7 @@ extern \"C\" {} {}(",
             Constant::Product(type_id, constant_fields) => {
                 if allow_allocate {
                     let alignment = self.get_alignment(*type_id);
-                    let size = self.get_size(*type_id, None, extra_dim_collects);
+                    let size = self.get_size(*type_id, None);
                     *dynamic_shared_offset = format!(
                         "(({} + {} - 1) / {}) * {}",
                         dynamic_shared_offset, alignment, alignment, alignment
@@ -1872,7 +1833,7 @@ extern \"C\" {} {}(",
                 };
                 for i in 0..constant_fields.len() {
                     // For each field update offset and issue recursive call
-                    let offset = self.get_size(type_fields[i], Some(i), extra_dim_collects);
+                    let offset = self.get_size(type_fields[i], Some(i));
                     let field_constant = &self.constants[constant_fields[i].idx()];
                     if field_constant.is_scalar() {
                         let field_type = self.get_type(type_fields[i], true);
@@ -1880,7 +1841,6 @@ extern \"C\" {} {}(",
                             format!("*reinterpret_cast<{}>({}+{})", field_type, name, offset),
                             constant_fields[i],
                             false,
-                            None,
                             dynamic_shared_offset,
                             w,
                             num_tabs,
@@ -1890,7 +1850,6 @@ extern \"C\" {} {}(",
                             format!("{}+{}", name, offset),
                             constant_fields[i],
                             false,
-                            extra_dim_collects,
                             dynamic_shared_offset,
                             w,
                             num_tabs,
@@ -1901,7 +1860,7 @@ extern \"C\" {} {}(",
             Constant::Summation(type_id, variant, field) => {
                 if allow_allocate {
                     let alignment = self.get_alignment(*type_id);
-                    let size = self.get_size(*type_id, None, extra_dim_collects);
+                    let size = self.get_size(*type_id, None);
                     *dynamic_shared_offset = format!(
                         "(({} + {} - 1) / {}) * {}",
                         dynamic_shared_offset, alignment, alignment, alignment
@@ -1930,21 +1889,12 @@ extern \"C\" {} {}(",
                         format!("*reinterpret_cast<{}>({})", variant_type, name),
                         *field,
                         false,
-                        extra_dim_collects,
                         dynamic_shared_offset,
                         w,
                         num_tabs,
                     )?;
                 } else if !variant_constant.is_array() {
-                    self.codegen_constant(
-                        name,
-                        *field,
-                        false,
-                        extra_dim_collects,
-                        dynamic_shared_offset,
-                        w,
-                        num_tabs,
-                    )?;
+                    self.codegen_constant(name, *field, false, dynamic_shared_offset, w, num_tabs)?;
                 };
             }
             Constant::Array(type_id) => {
@@ -1952,7 +1902,7 @@ extern \"C\" {} {}(",
                     panic!("Nested array constant should not be re-allocated");
                 }
                 let alignment = self.get_alignment(*type_id);
-                let size = self.get_size(*type_id, None, extra_dim_collects);
+                let size = self.get_size(*type_id, None);
                 *dynamic_shared_offset = format!(
                     "(({} + {} - 1) / {}) * {}",
                     dynamic_shared_offset, alignment, alignment, alignment
@@ -1979,35 +1929,19 @@ extern \"C\" {} {}(",
      * and offset to 2nd field. This is useful for constant initialization and read/write
      * index math.
      */
-    fn get_size(
-        &self,
-        type_id: TypeID,
-        num_fields: Option<usize>,
-        extra_dim_collects: Option<&HashSet<TypeID>>,
-    ) -> String {
+    fn get_size(&self, type_id: TypeID, num_fields: Option<usize>) -> String {
         match &self.types[type_id.idx()] {
             Type::Array(element_type, extents) => {
-                let array_size = if extra_dim_collects.is_some()
-                    && extra_dim_collects.unwrap().contains(&type_id)
-                {
-                    "1".to_string()
-                } else {
-                    multiply_dcs(extents)
-                };
-                format!("{} * {}", self.get_alignment(*element_type), array_size)
+                assert!(num_fields.is_none());
+                let array_size = multiply_dcs(extents);
+                format!("{} * {}", self.get_size(*element_type, None), array_size)
             }
             Type::Product(fields) => {
-                let num_fields = &num_fields.unwrap_or(fields.len());
-                let with_field = fields
+                let num_fields = num_fields.unwrap_or(fields.len());
+                fields
                     .iter()
-                    .enumerate()
-                    .filter(|(i, _)| i < num_fields)
-                    .map(|(_, id)| {
-                        (
-                            self.get_size(*id, None, extra_dim_collects),
-                            self.get_alignment(*id),
-                        )
-                    })
+                    .take(num_fields)
+                    .map(|id| (self.get_size(*id, None), self.get_alignment(*id)))
                     .fold(String::from("0"), |acc, (size, align)| {
                         if acc == "0" {
                             size
@@ -2017,31 +1951,23 @@ extern \"C\" {} {}(",
                                 acc, align, align, align, size
                             )
                         }
-                    });
-                if num_fields < &fields.len() {
-                    format!(
-                        "{} - {}",
-                        with_field,
-                        self.get_size(fields[*num_fields], None, extra_dim_collects)
-                    )
-                } else {
-                    with_field
-                }
+                    })
             }
             Type::Summation(variants) => {
+                assert!(num_fields.is_none());
                 // The argmax variant by size is not guaranteed to be same as
                 // argmax variant by alignment, eg product of 3 4-byte primitives
                 // vs 1 8-byte primitive, so we need to calculate both.
-                let max_size = variants
-                    .iter()
-                    .map(|id| self.get_size(*id, None, extra_dim_collects))
-                    .fold(String::from("0"), |acc, x| {
+                let max_size = variants.iter().map(|id| self.get_size(*id, None)).fold(
+                    String::from("0"),
+                    |acc, x| {
                         if acc == "0" {
                             x
                         } else {
                             format!("umax({}, {})", acc, x)
                         }
-                    });
+                    },
+                );
                 let max_alignment = variants
                     .iter()
                     .map(|id| self.get_alignment(*id))
@@ -2052,7 +1978,10 @@ extern \"C\" {} {}(",
                     max_size, max_alignment, max_alignment, max_alignment
                 )
             }
-            _ => format!("{}", self.get_alignment(type_id)),
+            _ => {
+                assert!(num_fields.is_none());
+                format!("{}", self.get_alignment(type_id))
+            }
         }
     }
 
diff --git a/juno_samples/product_read/Cargo.toml b/juno_samples/product_read/Cargo.toml
new file mode 100644
index 0000000000000000000000000000000000000000..d466f5550b77e426040842339684a1e8906b22fa
--- /dev/null
+++ b/juno_samples/product_read/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "juno_product_read"
+version = "0.1.0"
+authors = ["Aaron Councilman <aaronjc4@illinois.edu>"]
+edition = "2021"
+
+[[bin]]
+name = "juno_product_read"
+path = "src/main.rs"
+
+[features]
+cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+
+[build-dependencies]
+juno_build = { path = "../../juno_build" }
+
+[dependencies]
+juno_build = { path = "../../juno_build" }
+hercules_rt = { path = "../../hercules_rt" }
+with_builtin_macros = "0.1.0"
+async-std = "*"
diff --git a/juno_samples/product_read/build.rs b/juno_samples/product_read/build.rs
new file mode 100644
index 0000000000000000000000000000000000000000..2bd5172e661e65e2284a986e2d710cd890d71b90
--- /dev/null
+++ b/juno_samples/product_read/build.rs
@@ -0,0 +1,22 @@
+use juno_build::JunoCompiler;
+
+fn main() {
+    #[cfg(not(feature = "cuda"))]
+    {
+        JunoCompiler::new()
+            .file_in_src("product_read.jn")
+            .unwrap()
+            .build()
+            .unwrap();
+    }
+    #[cfg(feature = "cuda")]
+    {
+        JunoCompiler::new()
+            .file_in_src("product_read.jn")
+            .unwrap()
+            .schedule_in_src("gpu.sch")
+            .unwrap()
+            .build()
+            .unwrap();
+    }
+}
diff --git a/juno_samples/product_read/src/gpu.sch b/juno_samples/product_read/src/gpu.sch
new file mode 100644
index 0000000000000000000000000000000000000000..549b421561da50023719fd432fe8d943a4ab37f6
--- /dev/null
+++ b/juno_samples/product_read/src/gpu.sch
@@ -0,0 +1,19 @@
+gvn(*);
+phi-elim(*);
+dce(*);
+
+let out = auto-outline(*);
+gpu(out.product_read);
+
+ip-sroa(*);
+sroa(*);
+crc(*);
+dce(*);
+gvn(*);
+phi-elim(*);
+dce(*);
+
+infer-schedules(*);
+
+float-collections(*);
+gcm(*);
diff --git a/juno_samples/product_read/src/main.rs b/juno_samples/product_read/src/main.rs
new file mode 100644
index 0000000000000000000000000000000000000000..5211098ceebd6d7b15871ba0dd73cdbefb993313
--- /dev/null
+++ b/juno_samples/product_read/src/main.rs
@@ -0,0 +1,20 @@
+#![feature(concat_idents)]
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
+
+juno_build::juno!("product_read");
+
+fn main() {
+    async_std::task::block_on(async {
+        let input = vec![(0, 1), (2, 3)];
+        let input : HerculesImmBox<(i32, i32)> = HerculesImmBox::from(input.as_slice());
+        let mut r = runner!(product_read);
+        let res : Vec<i32> = HerculesMutBox::from(r.run(input.to()).await).as_slice().to_vec();
+        assert_eq!(res, vec![0, 1, 2, 3]);
+    });
+}
+
+#[test]
+fn products_test() {
+    main();
+}
diff --git a/juno_samples/product_read/src/product_read.jn b/juno_samples/product_read/src/product_read.jn
new file mode 100644
index 0000000000000000000000000000000000000000..7bf74a105b32099341f299c38898f6f6c08eb467
--- /dev/null
+++ b/juno_samples/product_read/src/product_read.jn
@@ -0,0 +1,9 @@
+#[entry]
+fn product_read(input: (i32, i32)[2]) -> i32[4] {
+  let result : i32[4];
+  result[0] = input[0].0;
+  result[1] = input[0].1;
+  result[2] = input[1].0;
+  result[3] = input[1].1;
+  return result;
+}