diff --git a/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb b/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb
index 7c1361fff89dc05923f847c060dc84c7fe513c80..0f66bcf8262fa92c2122929ccf96f30c5a92c406 100644
--- a/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb
+++ b/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb
@@ -14,7 +14,9 @@
     "from PIL import Image\n",
     "import torchvision.transforms as transforms\n",
     "import matplotlib.pyplot as plt\n",
-    "import pillow_heif"
+    "import pillow_heif\n",
+    "import os\n",
+    "import pickle"
    ]
   },
   {
@@ -253,7 +255,7 @@
     }
    ],
    "source": [
-    "labels_path = \"/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx\"\n",
+    "labels_path = \"../data/trap_labels.xlsx\"\n",
     "df = pd.read_excel(labels_path)\n",
     "df.head()"
    ]
@@ -374,35 +376,35 @@
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M24</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>604</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M26</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>605</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M28</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>606</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M30</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>607</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M32</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -417,11 +419,11 @@
        "3         Angled               2022-07-19               U8              0\n",
        "4         Angled               2022-07-19              U10              0\n",
        "..            ...                     ...              ...            ...\n",
-       "603       Angled                      NaT              M24            NaN\n",
-       "604       Angled                      NaT              M26            NaN\n",
-       "605       Angled                      NaT              M28            NaN\n",
-       "606       Angled                      NaT              M30            NaN\n",
-       "607       Angled                      NaT              M32            NaN\n",
+       "603       Angled                      NaT              M24            nan\n",
+       "604       Angled                      NaT              M26            nan\n",
+       "605       Angled                      NaT              M28            nan\n",
+       "606       Angled                      NaT              M30            nan\n",
+       "607       Angled                      NaT              M32            nan\n",
        "\n",
        "[320 rows x 4 columns]"
       ]
@@ -432,14 +434,13 @@
     }
    ],
    "source": [
-    "df_angled = df[df[\"trap orient'n\"] == \"Angled \"]\n",
-    "df_angled[\"2022 Data Collect. Date\"].astype(str)\n",
+    "df_angled = df[df[\"trap orient'n\"] == \"Angled \"].astype(str)\n",
     "df_angled[[\"trap orient'n\", \"2022 Data Collect. Date\", \"Sticky Trap Name\", \"T-top side WCR\"]]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 71,
    "id": "0ccf1183",
    "metadata": {},
    "outputs": [
@@ -471,8 +472,8 @@
       "B20_25August2022.jpg\n",
       "B8_25August2022.HEIC\n",
       "B18_25August2022.jpg\n",
+      "B18c_25August2022.jpg\n",
       "B32_25August2022.jpg\n",
-      "Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg\n",
       "B4_25August2022.HEIC\n",
       "B22_25August2022.jpg\n",
       "B6_25August2022.HEIC\n",
@@ -484,11 +485,11 @@
       "B14_20July2022.jpg\n",
       "B8_20July2022.jpg\n",
       "B32_20July2022.jpg\n",
+      "B28b_20July2022.jpg\n",
       "B26b_20July2022.jpg\n",
+      "B32b_20July2022.jpg\n",
       "B16_20JULY2022.jpg\n",
       "B26_20July2022.jpg\n",
-      "B28 angled sticky-20JUL2022.jpg\n",
-      "B32 angled sticky-20JUL2022.jpg\n",
       "B30_20July2022.jpg\n",
       "B2_20July2022.jpg\n",
       "B4_20July2022.jpg\n",
@@ -536,9 +537,9 @@
       "F24_3AUGUST2022.HEIC\n",
       "F28_25August2022.HEIC\n",
       "F2b_25August2022.heic\n",
+      "F16b_25August2022.jpg\n",
       "F26_25August2022.HEIC\n",
       "F32_25August2022.HEIC\n",
-      "F16 wide view_25August2022.jpg\n",
       "F16_25August2022.jpg\n",
       "F12_25August2022.jpg\n",
       "F30_25August2022.HEIC\n",
@@ -549,7 +550,6 @@
       "F22_25August2022.HEIC\n",
       "F18_25August2022.HEIC\n",
       "F14b_25August2022.jpg\n",
-      "Faivre field soybean phenoloyg_25August2022.jpg\n",
       "F14_20July2022.jpg\n",
       "F32_20July2022.jpg\n",
       "F8_20July2022.jpg\n",
@@ -600,7 +600,7 @@
       "M32_4AUGUST2022..jpg\n",
       "M24_4AUGUST2022..jpg\n",
       "M14_4AUGUST2022.HEIC\n",
-      "M14._17AUGUST2022.HEIC\n",
+      "M32_17AUGUST2022.jpg\n",
       "M24_17AUGUST2022.jpg\n",
       "M30_17AUGUST2022.jpg\n",
       "M10_17AUGUST2022.HEIC\n",
@@ -608,37 +608,127 @@
       "M2_17AUGUST2022.HEIC\n",
       "M16_17AUGUST2022.HEIC\n",
       "M26_17AUGUST2022.jpg\n",
+      "M14_17AUGUST2022.HEIC\n",
       "M12_17AUGUST2022.HEIC\n",
       "M18_17AUGUST2022.jpg\n",
       "M28_17AUGUST2022.jpg\n",
       "M6_17AUGUST2022.HEIC\n",
-      "M32-17AUGUST2022.jpg\n",
       "M4_17AUGUST2022.HEIC\n",
       "M8_17AUGUST2022.HEIC\n",
       "M20_17AUGUST2022.jpg\n",
       "M12_27JULY2022.jpg\n",
-      "Moore_M_Phenology of soybean_27JULY2022.jpg\n",
       "M8_27JULY2022.jpg\n",
       "M26_27JULY2022.jpeg\n",
       "M16_27JULY2022.jpg\n",
       "M32_27JULY2022.jpeg\n",
-      "M30_27JULY2022.jpeg\n"
+      "M30_27JULY2022.jpeg\n",
+      "M10_27JULY2022.jpg\n",
+      "M4_27JULY2022.jpg\n",
+      "M14_27JULY2022.jpg\n",
+      "M20_27JULY2022.jpeg\n",
+      "M28_27JULY2022.jpeg\n",
+      "M6_27JULY2022.jpg\n",
+      "M22_27JULY2022.jpeg\n",
+      "M24_27JULY2022.jpeg\n",
+      "M2_27JULY2022.jpg\n",
+      "U30_19JULY2022.jpg\n",
+      "U2_19JULY2022.HEIC\n",
+      "U10_19JULY2022.HEIC\n",
+      "U20_19JULY2022jpg.jpg\n",
+      "U22_19JULY2022.jpg\n",
+      "U16_19JULY2022.HEIC\n",
+      "U24_19JULY2022.jpg\n",
+      "U8_19JULY2022.HEIC\n",
+      "U14_19JULY2022.HEIC\n",
+      "U18_19JULY2022.jpg\n",
+      "U4_19JULY2022.HEIC\n",
+      "U12_19JULY2022.HEIC\n",
+      "U26_19JULY2022.jpg\n",
+      "U6_19JULY2022.HEIC\n",
+      "U32_19JULY2022.jpg\n",
+      "U28_19JULY2022.jpg\n",
+      "U26_5AUGUST2022.jpg\n",
+      "U32_5AUGUST2022jpg.jpg\n",
+      "U10_5August2022(4979).HEIC\n",
+      "U8_5August2022(4980).HEIC\n",
+      "U6_5August2022(4981).HEIC\n",
+      "U16_5August2022(4977).HEIC\n",
+      "U2_5August2022(4983).HEIC\n",
+      "U24_5AUGUST2022.jpg\n",
+      "U20_5AUGUST2022.jpg\n",
+      "U4_5August2022(4982).HEIC\n",
+      "U12_5August2022(4978).HEIC\n",
+      "U18_5AUGUST2022.jpg\n",
+      "U28_5AUGUST2022.jpg\n",
+      "U22_5AUGUST2022.jpg\n",
+      "U30_5AUGUST2022.jpg\n",
+      "U26_27JULY2022.jpg\n",
+      "U20_27JULY2022.jpg\n",
+      "U2_27JULY2022.HEIC\n",
+      "U16_27JULY2022.HEIC\n",
+      "U10_27JULY2022.HEIC\n",
+      "U24_27JULY2022.jpg\n",
+      "U6_27JULY2022.HEIC\n",
+      "U12_27JULY2022.HEIC\n",
+      "U8_27JULY2022.HEIC\n",
+      "U4_27JULY2022.HEIC\n",
+      "U28_27JULY2022.jpg\n",
+      "U32_27JULY2022.jpg\n",
+      "U30_27JULY2022.jpg\n",
+      "U22_27JULY2022jpg.jpg\n",
+      "U18_27JULY2022.jpg\n",
+      "U14_27JULY2022.HEIC\n",
+      "U4_10AUGUST2022.HEIC\n",
+      "U22_10AUGUST2022.jpg\n",
+      "U26_10AUGUST2022.jpg\n",
+      "U24_10AUGUST2022.jpg\n",
+      "U28_10AUGUST2022.jpg\n",
+      "U30_10AUGUST2022.jpg\n",
+      "U10_10AUGUST2022.HEIC\n",
+      "U32_10AUGUST2022.jpg\n",
+      "U18_10AUGUST2022.jpg\n",
+      "U20_10AUGUST2022.jpg\n",
+      "U16_10AUGUST2022.HEIC\n",
+      "U14_10AUGUST2022.HEIC\n",
+      "U2_10AUGUST2022.HEIC\n",
+      "U8_10AUGUST2022.HEIC\n",
+      "U12_10AUGUST2022.HEIC\n",
+      "U6_10AUGUST2022.HEIC\n",
+      "U16_23August2022.HEIC\n",
+      "U12_23August2022.HEIC\n",
+      "U26_23August2022.jpg\n",
+      "U20_23August2022.jpg\n",
+      "U10_23August2022.HEIC\n",
+      "U6_23August2022.HEIC\n",
+      "U4_23August2022.HEIC\n",
+      "U2_23August2022.HEIC\n",
+      "U28_23August2022.jpg\n",
+      "U22_23August2022.jpg\n",
+      "U14_23August2022.HEIC\n",
+      "U18_23August2022.HEIC\n",
+      "U18_23August2022.jpg\n",
+      "U24_23August2022.jpg\n",
+      "U28b_23August2022.jpg\n",
+      "U30_23August2022.jpg\n",
+      "U32_23August2022.jpg\n"
      ]
     }
    ],
    "source": [
     "folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n",
     "\n",
-    "transform = transforms.Compose([\n",
-    "    transforms.PILToTensor()\n",
-    "])\n",
     "\n",
+    "images = []\n",
+    "total_beetles = 0\n",
     "for folder_name in folders:\n",
     "    dates = []\n",
     "    for date in glob.glob(r\"/raid/projects/akhot2/group-01-phys371-sp2023/data/\" + folder_name + \"/*\"):\n",
     "        dates.append(date)\n",
     "    for date in dates:\n",
     "        for image_file in glob.glob(date +  \"/*\"):\n",
+    "            print(image_file.split(\"/\")[-1])\n",
+    "            f_name = image_file.split(\"/\")[-1]\n",
+    "            #continue\n",
     "            if image_file.split(\".\")[-1].lower() == \"heic\":\n",
     "                heif_file = pillow_heif.read_heif(image_file)\n",
     "                img = Image.frombytes(\n",
@@ -647,31 +737,148 @@
     "                    heif_file.data,\n",
     "                    \"raw\",\n",
     "                )\n",
+    "                img.save(r\"/raid/projects/akhot2/group-01-phys371-sp2023/beetles/\"+f_name[:-4]+\".jpg\")\n",
     "                #print(\"HEIC FILE:\")\n",
     "            else:\n",
     "                img = Image.open(image_file)\n",
+    "                img.save(r\"/raid/projects/akhot2/group-01-phys371-sp2023/beetles/\"+f_name)\n",
     "                #print(\"JPG FILE:\")\n",
-    "            img_tensor = transform(img)\n",
+    "            name = f_name.split(\"_\")[0]\n",
+    "            if name[-1].isalpha():\n",
+    "                name = name[0:-1]\n",
+    "            day = f_name.split(\"_\")[1][0:2]\n",
+    "            if day[1].isalpha():\n",
+    "                day = \"0\" + day[0]\n",
+    "            if \"august\" in f_name.lower():\n",
+    "                date = \"2022-08-\" + day \n",
+    "            else:\n",
+    "                date = \"2022-07-\" + day\n",
+    "            beetle_count = df_angled[(df_angled['2022 Data Collect. Date'] == date) & (df_angled['Sticky Trap Name'] == name)]['T-top side WCR']\n",
+    "            row_number = beetle_count.index[0]\n",
+    "            #total_beetles += int(beetle_count)\n",
+    "            #images.append((img_tensor, int(beetle_count), row_number))\n",
+    "            \n",
+    "            \n",
     "            #plt.imshow(img)\n",
     "            #plt.show()\n",
     "            #print(img_tensor.shape)\n",
-    "            print(image_file.split(\"/\")[-1])\n",
+    "            \n",
     "        \n",
     "    "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 26,
    "id": "eaa3da3e",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Grand Acc #</th>\n",
+       "      <th>Weekly Acc#</th>\n",
+       "      <th>Sample collection period</th>\n",
+       "      <th>Sticky trap pair (rep)</th>\n",
+       "      <th>2022 Julian Date</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>2022 Set up Date</th>\n",
+       "      <th>2022 Data Collect. Date</th>\n",
+       "      <th>Coll. intrvl (d)</th>\n",
+       "      <th>Coll. hour</th>\n",
+       "      <th>...</th>\n",
+       "      <th>NCR/trap /day</th>\n",
+       "      <th>WCR/trap /day</th>\n",
+       "      <th>Total CRW /trap /day</th>\n",
+       "      <th>NCR/trap top /day</th>\n",
+       "      <th>WCR/trap top/day</th>\n",
+       "      <th>Total CRW /trap top/day</th>\n",
+       "      <th>Proportion NCR on top</th>\n",
+       "      <th>Proportion WCR on top</th>\n",
+       "      <th>Proportion All CRW on top</th>\n",
+       "      <th>Notes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>0 rows × 43 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Grand Acc #, Weekly Acc#, Sample collection period, Sticky trap pair (rep), 2022 Julian Date, Month, 2022 Set up Date, 2022 Data Collect. Date, Coll. intrvl (d), Coll. hour, Trap visitor, Drone pilot, On farm location, Site Abrv., trap angle, trap orient'n, Trap #, Sticky Trap Name, T-top side NCR, bottom side NCR, T-top side WCR, bottom side WCR, WCR mal, WCR fem, T-top side \"other\", Bottom side \"other\", T-top side total CRW, bottom side total CRW, Trap Total NCR, Trap Total WCR, Trap Total CRW, Proportion NCR in total CRW, Proportion WCR in total CRW, NCR/trap /day, WCR/trap /day, Total CRW /trap /day, NCR/trap top /day, WCR/trap top/day, Total CRW /trap top/day, Proportion NCR on top, Proportion WCR on top, Proportion All CRW on top, Notes]\n",
+       "Index: []\n",
+       "\n",
+       "[0 rows x 43 columns]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_angled[df_angled['2022 Data Collect. Date'] == \"2022-08-01\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "80679db4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "41"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_beetles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "a397557a",
+   "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "folder = '../data'\n",
+    "if not os.path.exists(folder):\n",
+    "    os.mkdir(folder)\n",
+    "with open(folder+\"/training-data\"+\".pkl\", \"wb\") as f:\n",
+    "     pickle.dump(images, f)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "80679db4",
+   "id": "997ccb7f",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/training/PreProcessor.ipynb b/training/PreProcessor.ipynb
index a2863fb0c82b4e99c7576f68e1ebfb0d1c6170e0..bb4dbc980bbb0ed342fa7dc730c6915dfff3c2f9 100644
--- a/training/PreProcessor.ipynb
+++ b/training/PreProcessor.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
    "id": "ff555f6b",
    "metadata": {},
    "outputs": [],
@@ -14,12 +14,14 @@
     "from PIL import Image\n",
     "import torchvision.transforms as transforms\n",
     "import matplotlib.pyplot as plt\n",
-    "import pillow_heif"
+    "import pillow_heif\n",
+    "import os\n",
+    "import pickle"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "id": "fd6db132",
    "metadata": {},
    "outputs": [
@@ -247,20 +249,20 @@
        "[5 rows x 43 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "labels_path = \"/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx\"\n",
+    "labels_path = \"../data/trap_labels.xlsx\"\n",
     "df = pd.read_excel(labels_path)\n",
     "df.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "id": "7097a875",
    "metadata": {},
    "outputs": [
@@ -284,7 +286,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -295,7 +297,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "id": "24d3874e",
    "metadata": {},
    "outputs": [
@@ -374,35 +376,35 @@
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M24</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>604</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M26</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>605</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M28</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>606</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M30</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>607</th>\n",
        "      <td>Angled</td>\n",
        "      <td>NaT</td>\n",
        "      <td>M32</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>nan</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -417,318 +419,142 @@
        "3         Angled               2022-07-19               U8              0\n",
        "4         Angled               2022-07-19              U10              0\n",
        "..            ...                     ...              ...            ...\n",
-       "603       Angled                      NaT              M24            NaN\n",
-       "604       Angled                      NaT              M26            NaN\n",
-       "605       Angled                      NaT              M28            NaN\n",
-       "606       Angled                      NaT              M30            NaN\n",
-       "607       Angled                      NaT              M32            NaN\n",
+       "603       Angled                      NaT              M24            nan\n",
+       "604       Angled                      NaT              M26            nan\n",
+       "605       Angled                      NaT              M28            nan\n",
+       "606       Angled                      NaT              M30            nan\n",
+       "607       Angled                      NaT              M32            nan\n",
        "\n",
        "[320 rows x 4 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df_angled = df[df[\"trap orient'n\"] == \"Angled \"]\n",
-    "df_angled[\"2022 Data Collect. Date\"].astype(str)\n",
+    "df_angled = df[df[\"trap orient'n\"] == \"Angled \"].astype(str)\n",
     "df_angled[[\"trap orient'n\", \"2022 Data Collect. Date\", \"Sticky Trap Name\", \"T-top side WCR\"]]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "0ccf1183",
+   "execution_count": 17,
+   "id": "b605c71f",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "B8_15August2022.jpg\n",
-      "B12_15August2022.jpg\n",
-      "B26_15August2022.jpg\n",
-      "B22_15August2022.jpg\n",
-      "B4_15August2022.jpg\n",
-      "B20_15August2022.jpg\n",
-      "B32_15August2022.jpg\n",
-      "B16_15August2022.jpg\n",
-      "B18_15August2022.jpg\n",
-      "B28_15August2022.jpg\n",
-      "B14_15August2022.jpg\n",
-      "B24_15August2022.jpg\n",
-      "B10_15August2022.jpg\n",
-      "B2_15August2022.jpg\n",
-      "B6_15August2022.jpg\n",
-      "B28_25August2022.jpg\n",
-      "B26_25August2022.jpg\n",
-      "B24_25August2022.jpg\n",
-      "B16_25August2022.HEIC\n",
-      "B12_25August2022.HEIC\n",
-      "B2_25August2022.HEIC\n",
-      "B20_25August2022.jpg\n",
-      "B8_25August2022.HEIC\n",
-      "B18_25August2022.jpg\n",
-      "B32_25August2022.jpg\n",
-      "Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg\n",
-      "B4_25August2022.HEIC\n",
-      "B22_25August2022.jpg\n",
-      "B6_25August2022.HEIC\n",
-      "B10_25August2022.HEIC\n",
-      "B30_25August2022.jpg\n",
-      "B14_25August2022.HEIC\n",
-      "B18b_25August2022.jpg\n",
-      "B30b_25August2022.jpg\n",
-      "B14_20July2022.jpg\n",
-      "B8_20July2022.jpg\n",
-      "B32_20July2022.jpg\n",
-      "B26b_20July2022.jpg\n",
-      "B16_20JULY2022.jpg\n",
-      "B26_20July2022.jpg\n",
-      "B28 angled sticky-20JUL2022.jpg\n",
-      "B32 angled sticky-20JUL2022.jpg\n",
-      "B30_20July2022.jpg\n",
-      "B2_20July2022.jpg\n",
-      "B4_20July2022.jpg\n",
-      "B10_20July2022.jpg\n",
-      "B28_20July2022.jpg\n",
-      "B12_20July2022.jpg\n",
-      "B6_20July2022.jpg\n",
-      "B2_3AUGUST2022.HEIC\n",
-      "B18_3AUGUST2022.jpeg\n",
-      "B20_3AUGUST2022.jpeg\n",
-      "B4_3AUGUST2022.HEIC\n",
-      "B10_3AUGUST2022.HEIC\n",
-      "B26_3AUGUST2022.jpeg\n",
-      "B8_3AUGUST2022.HEIC\n",
-      "B28_3AUGUST2022.jpeg\n",
-      "B22_3AUGUST2022.jpeg\n",
-      "B14_3AUGUST2022.HEIC\n",
-      "B16_3AUGUST2022.HEIC\n",
-      "B24_3AUGUST2022.jpeg\n",
-      "B12_3AUGUST2022.HEIC\n",
-      "B6_3AUGUST2022.HEIC\n",
-      "B8_26JUL2022.jpg\n",
-      "B14_26JUL2022.jpg\n",
-      "B6_26JUL2022.jpg\n",
-      "B2_26JUL2022.jpg\n",
-      "B4_26JUL2022.jpg\n",
-      "B10_26JUL2022.jpg\n",
-      "B16_26JUL2022.jpg\n",
-      "B12_26JUL2022jpg.jpg\n",
-      "F2_3AUGUST2022.jpg\n",
-      "F28_3AUGUST2022.HEIC\n",
-      "F18_3AUGUST2022.HEIC\n",
-      "F10_3AUGUST2022.jpg\n",
-      "F16_3AUGUST2022.jpg\n",
-      "F6_3AUGUST2022.jpg\n",
-      "F12_3AUGUST2022jpg.jpg\n",
-      "F26_3AUGUST2022.HEIC\n",
-      "F4_3AUGUST2022.jpg\n",
-      "F8_3AUGUST2022.jpg\n",
-      "F20_3AUGUST2022.HEIC\n",
-      "F30_3AUGUST2022.HEIC\n",
-      "F22_3AUGUST2022.HEIC\n",
-      "F32_3AUGUST2022.HEIC\n",
-      "F14_3AUGUST2022.jpg\n",
-      "F24_3AUGUST2022.HEIC\n",
-      "F28_25August2022.HEIC\n",
-      "F2b_25August2022.heic\n",
-      "F26_25August2022.HEIC\n",
-      "F32_25August2022.HEIC\n",
-      "F16 wide view_25August2022.jpg\n",
-      "F16_25August2022.jpg\n",
-      "F12_25August2022.jpg\n",
-      "F30_25August2022.HEIC\n",
-      "F2_25August2022.jpg\n",
-      "F24_25August2022.HEIC\n",
-      "F20_25August2022.HEIC\n",
-      "F14_25August2022.jpg\n",
-      "F22_25August2022.HEIC\n",
-      "F18_25August2022.HEIC\n",
-      "F14b_25August2022.jpg\n",
-      "Faivre field soybean phenoloyg_25August2022.jpg\n",
-      "F14_20July2022.jpg\n",
-      "F32_20July2022.jpg\n",
-      "F8_20July2022.jpg\n",
-      "F12_20July2022.jpg\n",
-      "F28_20July2022.jpg\n",
-      "F30_20July2022.jpg\n",
-      "F18_20July2022.jpg\n",
-      "F20_20July2022.jpg\n",
-      "F2_20July2022.jpg\n",
-      "F26_20July2022.jpg\n",
-      "F22_20July2022.jpg\n",
-      "F6b_20July2022.jpg\n",
-      "F6_20July2022.jpg\n",
-      "F16_20July2022.jpg\n",
-      "F24_20July2022.jpg\n",
-      "F10_20July2022.jpg\n",
-      "F20_26JUL2022.HEIC\n",
-      "F26_26JUL2022.HEIC\n",
-      "F8_26JUL2022.jpg\n",
-      "F28_26JUL2022.HEIC\n",
-      "F30_26JUL2022.HEIC\n",
-      "F2_26JUL2022.jpg\n",
-      "F24_26JUL2022.HEIC\n",
-      "F18_26JUL2022.HEIC\n",
-      "F10_26JUL2022.jpg\n",
-      "F32_26JUL2022.HEIC\n",
-      "F22_26JUL2022.HEIC\n",
-      "M16_19July2022.jpg\n",
-      "M10_19July2022.jpg\n",
-      "M2_19July2022.jpg\n",
-      "M14_19July2022.jpg\n",
-      "M12_19July2022.jpg\n",
-      "M6_19July2022.jpg\n",
-      "M4_19July2022.jpg\n",
-      "M8_19July2022.jpg\n",
-      "M20_4AUGUST2022..jpg\n",
-      "M18_4AUGUST2022.jpg\n",
-      "M2_4AUGUST2022.HEIC\n",
-      "M22_4AUGUST2022.jpg\n",
-      "M30_4AUGUST2022..jpg\n",
-      "M8_4AUGUST2022.HEIC\n",
-      "M16_4AUGUST2022.HEIC\n",
-      "M10_4AUGUST2022.HEIC\n",
-      "M6_4AUGUST2022.HEIC\n",
-      "M12_4AUGUST2022.HEIC\n",
-      "M26_4AUGUST2022.jpg\n",
-      "M28_4AUGUST2022..jpg\n",
-      "M32_4AUGUST2022..jpg\n",
-      "M24_4AUGUST2022..jpg\n",
-      "M14_4AUGUST2022.HEIC\n",
-      "M14._17AUGUST2022.HEIC\n",
-      "M24_17AUGUST2022.jpg\n",
-      "M30_17AUGUST2022.jpg\n",
-      "M10_17AUGUST2022.HEIC\n",
-      "M22_17AUGUST2022.jpg\n",
-      "M2_17AUGUST2022.HEIC\n",
-      "M16_17AUGUST2022.HEIC\n",
-      "M26_17AUGUST2022.jpg\n",
-      "M12_17AUGUST2022.HEIC\n",
-      "M18_17AUGUST2022.jpg\n",
-      "M28_17AUGUST2022.jpg\n",
-      "M6_17AUGUST2022.HEIC\n",
-      "M32-17AUGUST2022.jpg\n",
-      "M4_17AUGUST2022.HEIC\n",
-      "M8_17AUGUST2022.HEIC\n",
-      "M20_17AUGUST2022.jpg\n",
-      "M12_27JULY2022.jpg\n",
-      "Moore_M_Phenology of soybean_27JULY2022.jpg\n",
-      "M8_27JULY2022.jpg\n",
-      "M26_27JULY2022.jpeg\n",
-      "M16_27JULY2022.jpg\n",
-      "M32_27JULY2022.jpeg\n",
-      "M30_27JULY2022.jpeg\n",
-      "M10_27JULY2022.jpg\n",
-      "M4_27JULY2022.jpg\n",
-      "M14_27JULY2022.jpg\n",
-      "M20_27JULY2022.jpeg\n",
-      "M28_27JULY2022.jpeg\n",
-      "M6_27JULY2022.jpg\n",
-      "M22_27JULY2022.jpeg\n",
-      "M24_27JULY2022.jpeg\n",
-      "M2_27JULY2022.jpg\n",
-      "U30_19JULY2022.jpg\n",
-      "U2_19JULY2022.HEIC\n",
-      "U10_19JULY2022.HEIC\n",
-      "U20_19JULY2022jpg.jpg\n",
-      "U22_19JULY2022.jpg\n",
-      "U16_19JULY2022.HEIC\n",
-      "U24_19JULY2022.jpg\n",
-      "U8_19JULY2022.HEIC\n",
-      "U14_19JULY2022.HEIC\n",
-      "U18_19JULY2022.jpg\n",
-      "U4_19JULY2022.HEIC\n",
-      "U12_19JULY2022.HEIC\n",
-      "U26_19JULY2022.jpg\n",
-      "U6_19JULY2022.HEIC\n",
-      "U32_19JULY2022.jpg\n",
-      "U28_19JULY2022.jpg\n",
-      "U26_5AUGUST2022.jpg\n",
-      "U32_5AUGUST2022jpg.jpg\n",
-      "IMG_4978.HEIC\n",
-      "IMG_4980.HEIC\n",
-      "IMG_4982.HEIC\n",
-      "IMG_4981.HEIC\n",
-      "U24_5AUGUST2022.jpg\n",
-      "U20_5AUGUST2022.jpg\n",
-      "IMG_4979.HEIC\n",
-      "IMG_4983.HEIC\n",
-      "IMG_4977.HEIC\n",
-      "U18_5AUGUST2022.jpg\n",
-      "U28_5AUGUST2022.jpg\n",
-      "U22_5AUGUST2022.jpg\n",
-      "U30_5AUGUST2022.jpg\n",
-      "U26_27JULY2022.jpg\n",
-      "U20_27JULY2022.jpg\n",
-      "U2_27JULY2022.HEIC\n",
-      "U16_27JULY2022.HEIC\n",
-      "U10_27JULY2022.HEIC\n",
-      "U24_27JULY2022.jpg\n",
-      "U6_27JULY2022.HEIC\n",
-      "U12_27JULY2022.HEIC\n",
-      "U8_27JULY2022.HEIC\n",
-      "U4_27JULY2022.HEIC\n",
-      "U28_27JULY2022.jpg\n",
-      "U32_27JULY2022.jpg\n",
-      "U30_27JULY2022.jpg\n",
-      "U22_27JULY2022jpg.jpg\n",
-      "U18_27JULY2022.jpg\n",
-      "U14_27JULY2022.HEIC\n",
-      "U4_10AUGUST2022.HEIC\n",
-      "U22_10AUGUST2022.jpg\n",
-      "U26_10AUGUST2022.jpg\n",
-      "U24_10AUGUST2022.jpg\n",
-      "U28_10AUGUST2022.jpg\n",
-      "U30_10AUGUST2022.jpg\n",
-      "U10_10AUGUST2022.HEIC\n",
-      "U32_10AUGUST2022.jpg\n",
-      "U18_10AUGUST2022.jpg\n",
-      "U20_10AUGUST2022.jpg\n",
-      "U16_10AUGUST2022.HEIC\n",
-      "U14_10AUGUST2022.HEIC\n",
-      "U2_10AUGUST2022.HEIC\n",
-      "U8_10AUGUST2022.HEIC\n",
-      "U12_10AUGUST2022.HEIC\n",
-      "U6_10AUGUST2022.HEIC\n",
-      "U16_23August2022.HEIC\n",
-      "U12_23August2022.HEIC\n",
-      "U26_23August2022.jpg\n",
-      "U20_23August2022.jpg\n",
-      "U10_23August2022.HEIC\n",
-      "U6_23August2022.HEIC\n",
-      "U4_23August2022.HEIC\n",
-      "U2_23August2022.HEIC\n",
-      "U28_23August2022.jpg\n",
-      "U22_23August2022.jpg\n",
-      "U14_23August2022.HEIC\n",
-      "U18_23August2022.HEIC\n",
-      "U18_23August2022.jpg\n",
-      "U24_23August2022.jpg\n",
-      "U28b_23August2022.jpg\n",
-      "U30_23August2022.jpg\n",
-      "U32_23August2022.jpg\n"
+      "B12_15August2022.jpg 421    2\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B4_15August2022.jpg 417    2\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B24_15August2022.jpg 427    2\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B10_15August2022.jpg 420    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B2_15August2022.jpg 416    2\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B6_15August2022.jpg 418    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B28_25August2022.jpg 525    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B26_25August2022.jpg 524    3\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "1\n",
+      "B24_25August2022.jpg 523    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B2_25August2022.HEIC 512    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "1\n",
+      "B18_25August2022.jpg 520    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "6\n",
+      "B18c_25August2022.jpg 520    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "6\n",
+      "B30_25August2022.jpg 526    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B18b_25August2022.jpg 520    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "6\n",
+      "B30b_25August2022.jpg 526    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B2_3AUGUST2022.HEIC 256    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "B12_3AUGUST2022.HEIC 261    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F2_3AUGUST2022.jpg 272    3\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F28_3AUGUST2022.HEIC 285    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F18_3AUGUST2022.HEIC 280    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F10_3AUGUST2022.jpg 276    2\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F16_3AUGUST2022.jpg 279    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "1\n",
+      "F6_3AUGUST2022.jpg 274    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F4_3AUGUST2022.jpg 273    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F20_3AUGUST2022.HEIC 281    3\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F32_3AUGUST2022.HEIC 287    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "1\n",
+      "F14_3AUGUST2022.jpg 278    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "1\n",
+      "F26_26JUL2022.HEIC 156    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n",
+      "F2_26JUL2022.jpg 144    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "1\n",
+      "M10_19July2022.jpg 20    1\n",
+      "Name: T-top side WCR, dtype: object\n",
+      "0\n"
      ]
     }
    ],
    "source": [
-    "folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n",
+    "## folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n",
     "\n",
-    "transform = transforms.Compose([\n",
-    "    transforms.PILToTensor()\n",
-    "])\n",
     "\n",
+    "images = []\n",
+    "total_beetles = 0\n",
     "for folder_name in folders:\n",
     "    dates = []\n",
     "    for date in glob.glob(r\"/raid/projects/akhot2/group-01-phys371-sp2023/data/\" + folder_name + \"/*\"):\n",
     "        dates.append(date)\n",
     "    for date in dates:\n",
     "        for image_file in glob.glob(date +  \"/*\"):\n",
-    "            print(image_file.split(\"/\")[-1])\n",
+    "            #print(image_file.split(\"/\")[-1])\n",
+    "            f_name = image_file.split(\"/\")[-1]\n",
     "            #continue\n",
     "            if image_file.split(\".\")[-1].lower() == \"heic\":\n",
     "                heif_file = pillow_heif.read_heif(image_file)\n",
@@ -738,30 +564,90 @@
     "                    heif_file.data,\n",
     "                    \"raw\",\n",
     "                )\n",
+    "                #print(r\"/raid/projects/akhot2/group-01-phys371-sp2023/beetles/\"+f_name[:-5]+\".jpg\")\n",
+    "                #img.save(r\"/raid/projects/akhot2/group-01-phys371-sp2023/beetles/\"+f_name[:-5]+\".jpg\")\n",
     "                #print(\"HEIC FILE:\")\n",
     "            else:\n",
     "                img = Image.open(image_file)\n",
+    "                #print(r\"/raid/projects/akhot2/group-01-phys371-sp2023/beetles/\"+f_name)\n",
+    "                #img.save(r\"/raid/projects/akhot2/group-01-phys371-sp2023/beetles/\"+f_name)\n",
     "                #print(\"JPG FILE:\")\n",
-    "            img_tensor = transform(img)\n",
+    "            name = f_name.split(\"_\")[0]\n",
+    "            if name[-1].isalpha():\n",
+    "                name = name[0:-1]\n",
+    "            day = f_name.split(\"_\")[1][0:2]\n",
+    "            if day[1].isalpha():\n",
+    "                day = \"0\" + day[0]\n",
+    "            if \"august\" in f_name.lower():\n",
+    "                date = \"2022-08-\" + day \n",
+    "            else:\n",
+    "                date = \"2022-07-\" + day\n",
+    "            beetle_count = df_angled[(df_angled['2022 Data Collect. Date'] == date) & (df_angled['Sticky Trap Name'] == name)]['T-top side WCR']\n",
+    "            row_number = beetle_count.index[0]\n",
+    "            if (int(beetle_count) != 0):\n",
+    "                print(f_name, beetle_count)\n",
+    "                print(int(df_angled[(df_angled['2022 Data Collect. Date'] == date) & (df_angled['Sticky Trap Name'] == name)]['T-top side NCR']))\n",
+    "            #total_beetles += int(beetle_count)\n",
+    "            #images.append((img_tensor, int(beetle_count), row_number))\n",
+    "            \n",
+    "            \n",
     "            #plt.imshow(img)\n",
     "            #plt.show()\n",
     "            #print(img_tensor.shape)\n",
+    "            \n",
     "        \n",
     "    "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
-   "id": "eaa3da3e",
+   "execution_count": 33,
+   "id": "80679db4",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "41"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_beetles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a397557a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['/raid/projects/akhot2/group-01-phys371-sp2023/data/moore/M_7.19.22',\n",
+       " '/raid/projects/akhot2/group-01-phys371-sp2023/data/moore/M_8.4.22',\n",
+       " '/raid/projects/akhot2/group-01-phys371-sp2023/data/moore/M_8.17.22',\n",
+       " '/raid/projects/akhot2/group-01-phys371-sp2023/data/moore/M_7.27.22']"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dates"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "80679db4",
+   "id": "997ccb7f",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/training/dataloader.py b/training/dataloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..b12d444139013915e4b1dfd7233176ce133dc875
--- /dev/null
+++ b/training/dataloader.py
@@ -0,0 +1,103 @@
+import os
+import json
+import torch
+from torchvision.io import read_image
+from torch.utils.data import Dataset
+from torchvision.transforms import Compose
+import torchvideo.transforms as tvt
+from tqdm import tqdm
+from torchvision.transforms import InterpolationMode
+import pickle
+import os
+
+
+class ASLDataset(Dataset):
+    def __init__(self, file_path, size=256, transforms=None):
+        with open(file_path, "rb") as f:
+            data = pickle.load(f)
+
+        
+        self.clips = []
+        
+        self.labels = {}
+        total_classes = len(data)
+        current_class = 0
+        
+        self.frames = frame
+        self.size = size
+        if transforms is None:
+            transforms =  Compose([
+                tvt.NormalizeVideo([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], channel_dim=1),
+                tvt.ResizeVideo((size,size), interpolation=InterpolationMode.BICUBIC)
+               
+            ])
+     
+        self.transforms = transforms
+
+        for key in data: # for each word
+            one_hot_version = torch.nn.functional.one_hot(torch.tensor([current_class]), num_classes=total_classes)
+            
+            self.labels[key] = current_class # one_hot_version # make it the next one_hot
+            # word -> OHE
+            current_class += 1
+            
+            for value in data[key]:
+                self.clips.append((key, value))
+        
+        
+    def __len__(self):
+        return len(self.clips)
+
+    def __getitem__(self, idx):
+        word, video_data = self.clips[idx]
+        # convert the word to a one hot encoding
+        label = self.labels[word]
+        
+        # convert the video data to be the same shape (and load the video)
+        frame_start, frame_end, video_url = video_data
+        # get the video path
+        video_path = "/raid/projects/weustis/data/asl/videos/"+ "_".join(video_url.split('/')[-2:]).split('.')[0] + "/"
+      #  video_path = r"/raid/projects/weustis/data/asl/videos/ASL_2008_01_11_scene26-camera1/"
+        # load the video
+        path, dirs, files = next(os.walk(video_path))
+        frame_count = len(files)
+        
+        # c, t, h, w
+        # specified frames and size beforehand
+
+        # shorten/length the video
+        frame_end = int(frame_end)
+        frame_start = int(frame_start)
+        t = frame_end - frame_start #time of video 
+        
+        
+        if (t < self.frames): #if shorter
+            offset = torch.randint(t-self.frames, 0, (1,)) + 1
+
+        if (t > self.frames): 
+            offset = torch.randint(t-self.frames, (1,))
+        else:
+            offset = 0
+        frames = ["%06d" % (x,)+".jpg" for x in range(frame_start+offset, frame_start+offset+self.frames)]
+        r = torch.empty(self.frames, 3, 480, 640)
+        for idx, p_end in enumerate(frames):
+            img_data =  read_image(video_path + p_end)
+            r[idx, :, :, :] = img_data
+         # resize video
+        r = r.permute([1,0,2,3]).unsqueeze(0)
+        r = next(self.transforms(r))
+       
+        return r, label
+    
+if __name__ == "__main__":
+
+    mydataset = ASLDataset("/raid/projects/weustis/data/asl/dataset.json")
+    
+    from torch.utils.data import DataLoader
+    
+    dataloader = DataLoader(mydataset, batch_size=8, shuffle=True, num_workers=40, pin_memory=True, prefetch_factor=1, persistent_workers=True)
+    
+    for x,y in tqdm(dataloader):
+        pass
+
+    # python dataloader.py
\ No newline at end of file
diff --git a/training/preprocess.py b/training/preprocess.py
index 7371f47a6f8bd23a8fa1a8b2a9479cdd76380e54..3bcb59843994110fd680d73a76c24f4b81a75cde 100644
--- a/training/preprocess.py
+++ b/training/preprocess.py
@@ -1 +1,75 @@
-B
\ No newline at end of file
+import glob
+from tqdm import tqdm
+import pandas as pd   
+import torch
+from PIL import Image
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+import pillow_heif
+import os
+import pickle
+
+labels_path = "../data/trap_labels.xlsx"
+df = pd.read_excel(labels_path)
+
+df_angled = df[df["trap orient'n"] == "Angled "].astype(str)
+
+folders = ["buchhloz", "faivre", "moore", "underwood"]
+
+transform = transforms.Compose([
+    transforms.PILToTensor()
+])
+
+print("Loading images...")
+
+images = []
+total_beetles = 0
+for folder_name in folders:
+    dates = []
+    for date in glob.glob(r"/raid/projects/akhot2/group-01-phys371-sp2023/data/" + folder_name + "/*"):
+        dates.append(date)
+    for date in dates:
+        for image_file in glob.glob(date +  "/*"):
+            #print(image_file.split("/")[-1])
+            f_name = image_file.split("/")[-1]
+            #continue
+            if image_file.split(".")[-1].lower() == "heic":
+                heif_file = pillow_heif.read_heif(image_file)
+                img = Image.frombytes(
+                    heif_file.mode,
+                    heif_file.size,
+                    heif_file.data,
+                    "raw",
+                )
+                #print("HEIC FILE:")
+            else:
+                img = Image.open(image_file)
+                #print("JPG FILE:")
+            img_tensor = transform(img)
+            name = f_name.split("_")[0]
+            if name[-1].isalpha():
+                name = name[0:-1]
+            day = f_name.split("_")[1][0:2]
+            if day[1].isalpha():
+                day = "0" + day[0]
+            if "august" in f_name.lower():
+                date = "2022-08-" + day 
+            else:
+                date = "2022-07-" + day
+            beetle_count = df_angled[(df_angled['2022 Data Collect. Date'] == date) & (df_angled['Sticky Trap Name'] == name)]['T-top side WCR']
+            row_number = beetle_count.index[0]
+            #total_beetles += int(beetle_count)
+            images.append((img_tensor, int(beetle_count), row_number))
+            #plt.imshow(img)
+            #plt.show()
+            #print(img_tensor.shape)
+            
+print("Loading images complete!")
+      
+folder = '../data'
+print("Uploading data to " + folder +"/training-data"+".pkl")
+if not os.path.exists(folder):
+    os.mkdir(folder)
+with open(folder+"/training-data"+".pkl", "wb") as f:
+     pickle.dump(images, f)
+    
\ No newline at end of file
diff --git a/yolov5 b/yolov5
new file mode 160000
index 0000000000000000000000000000000000000000..226a5e43cbceff5de43a71c4fb3f3f7478a9bb03
--- /dev/null
+++ b/yolov5
@@ -0,0 +1 @@
+Subproject commit 226a5e43cbceff5de43a71c4fb3f3f7478a9bb03