From 19606b81a708a6d701d96a189a4bfbef03064da7 Mon Sep 17 00:00:00 2001
From: fresleven <khotayush@gmail.com>
Date: Tue, 7 Feb 2023 20:07:04 -0600
Subject: [PATCH] Starting preprocessing

---
 .../PreProcessor-checkpoint.ipynb             | 701 ++++++++++++++++
 training/PreProcessor.ipynb                   | 791 ++++++++++++++++++
 training/preprocess.py                        |   1 +
 3 files changed, 1493 insertions(+)
 create mode 100644 training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb
 create mode 100644 training/PreProcessor.ipynb
 create mode 100644 training/preprocess.py

diff --git a/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb b/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb
new file mode 100644
index 0000000..7c1361f
--- /dev/null
+++ b/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb
@@ -0,0 +1,701 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ff555f6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glob\n",
+    "from tqdm import tqdm\n",
+    "import pandas as pd   \n",
+    "import torch\n",
+    "from PIL import Image\n",
+    "import torchvision.transforms as transforms\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pillow_heif"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "fd6db132",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/raid/projects/akhot2/conda/envs/akhot2/lib/python3.9/site-packages/openpyxl/worksheet/header_footer.py:48: UserWarning: Cannot parse header or footer so it will be ignored\n",
+      "  warn(\"\"\"Cannot parse header or footer so it will be ignored\"\"\")\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Grand Acc #</th>\n",
+       "      <th>Weekly Acc#</th>\n",
+       "      <th>Sample collection period</th>\n",
+       "      <th>Sticky trap pair (rep)</th>\n",
+       "      <th>2022 Julian Date</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>2022 Set up Date</th>\n",
+       "      <th>2022 Data Collect. Date</th>\n",
+       "      <th>Coll. intrvl (d)</th>\n",
+       "      <th>Coll. hour</th>\n",
+       "      <th>...</th>\n",
+       "      <th>NCR/trap /day</th>\n",
+       "      <th>WCR/trap /day</th>\n",
+       "      <th>Total CRW /trap /day</th>\n",
+       "      <th>NCR/trap top /day</th>\n",
+       "      <th>WCR/trap top/day</th>\n",
+       "      <th>Total CRW /trap top/day</th>\n",
+       "      <th>Proportion NCR on top</th>\n",
+       "      <th>Proportion WCR on top</th>\n",
+       "      <th>Proportion All CRW on top</th>\n",
+       "      <th>Notes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>10</td>\n",
+       "      <td>10</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 43 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Grand Acc #  Weekly Acc#  Sample collection period  Sticky trap pair (rep)  \\\n",
+       "0            2            2                         1                       1   \n",
+       "1            4            4                         1                       2   \n",
+       "2            6            6                         1                       3   \n",
+       "3            8            8                         1                       4   \n",
+       "4           10           10                         1                       5   \n",
+       "\n",
+       "   2022 Julian Date  Month 2022 Set up Date 2022 Data Collect. Date  \\\n",
+       "0               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "1               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "2               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "3               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "4               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "\n",
+       "   Coll. intrvl (d)  Coll. hour  ...  NCR/trap /day  WCR/trap /day  \\\n",
+       "0              11.0       11.35  ...            0.0            0.0   \n",
+       "1              11.0       11.35  ...            0.0            0.0   \n",
+       "2              11.0       11.35  ...            0.0            0.0   \n",
+       "3              11.0       11.35  ...            0.0            0.0   \n",
+       "4              11.0       11.35  ...            0.0            0.0   \n",
+       "\n",
+       "  Total CRW /trap /day NCR/trap top /day  WCR/trap top/day  \\\n",
+       "0                  0.0               0.0               0.0   \n",
+       "1                  0.0               0.0               0.0   \n",
+       "2                  0.0               0.0               0.0   \n",
+       "3                  0.0               0.0               0.0   \n",
+       "4                  0.0               0.0               0.0   \n",
+       "\n",
+       "  Total CRW /trap top/day  Proportion NCR on top Proportion WCR on top  \\\n",
+       "0                     0.0                    NaN                   NaN   \n",
+       "1                     0.0                    NaN                   NaN   \n",
+       "2                     0.0                    NaN                   NaN   \n",
+       "3                     0.0                    NaN                   NaN   \n",
+       "4                     0.0                    NaN                   NaN   \n",
+       "\n",
+       "  Proportion All CRW on top Notes  \n",
+       "0                       NaN   NaN  \n",
+       "1                       NaN   NaN  \n",
+       "2                       NaN   NaN  \n",
+       "3                       NaN   NaN  \n",
+       "4                       NaN   NaN  \n",
+       "\n",
+       "[5 rows x 43 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "labels_path = \"/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx\"\n",
+    "df = pd.read_excel(labels_path)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7097a875",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Grand Acc #', 'Weekly Acc#', 'Sample collection period',\n",
+       "       'Sticky trap pair (rep)', '2022 Julian Date', 'Month',\n",
+       "       '2022 Set up Date', '2022 Data Collect. Date', 'Coll. intrvl (d)',\n",
+       "       'Coll. hour', 'Trap visitor', 'Drone pilot', 'On farm location',\n",
+       "       'Site Abrv.', 'trap angle', 'trap orient'n', 'Trap #',\n",
+       "       'Sticky Trap Name', 'T-top side NCR', 'bottom side NCR',\n",
+       "       'T-top side WCR', 'bottom side WCR', 'WCR mal', 'WCR fem',\n",
+       "       'T-top side \"other\"', 'Bottom side \"other\"', 'T-top side total CRW',\n",
+       "       'bottom side total CRW', 'Trap Total NCR', 'Trap Total WCR',\n",
+       "       'Trap Total CRW', 'Proportion NCR in total CRW',\n",
+       "       'Proportion WCR in total CRW', 'NCR/trap /day', 'WCR/trap /day',\n",
+       "       'Total CRW /trap /day', 'NCR/trap top /day', 'WCR/trap top/day',\n",
+       "       'Total CRW /trap top/day', 'Proportion NCR on top',\n",
+       "       'Proportion WCR on top', 'Proportion All CRW on top', 'Notes'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "24d3874e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>trap orient'n</th>\n",
+       "      <th>2022 Data Collect. Date</th>\n",
+       "      <th>Sticky Trap Name</th>\n",
+       "      <th>T-top side WCR</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U4</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U6</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U8</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U10</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>603</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M24</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>604</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M26</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>605</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M28</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>606</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M30</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>607</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M32</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>320 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    trap orient'n 2022 Data Collect. Date Sticky Trap Name T-top side WCR\n",
+       "0         Angled               2022-07-19               U2              0\n",
+       "1         Angled               2022-07-19               U4              0\n",
+       "2         Angled               2022-07-19               U6              0\n",
+       "3         Angled               2022-07-19               U8              0\n",
+       "4         Angled               2022-07-19              U10              0\n",
+       "..            ...                     ...              ...            ...\n",
+       "603       Angled                      NaT              M24            NaN\n",
+       "604       Angled                      NaT              M26            NaN\n",
+       "605       Angled                      NaT              M28            NaN\n",
+       "606       Angled                      NaT              M30            NaN\n",
+       "607       Angled                      NaT              M32            NaN\n",
+       "\n",
+       "[320 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_angled = df[df[\"trap orient'n\"] == \"Angled \"]\n",
+    "df_angled[\"2022 Data Collect. Date\"].astype(str)\n",
+    "df_angled[[\"trap orient'n\", \"2022 Data Collect. Date\", \"Sticky Trap Name\", \"T-top side WCR\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ccf1183",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "B8_15August2022.jpg\n",
+      "B12_15August2022.jpg\n",
+      "B26_15August2022.jpg\n",
+      "B22_15August2022.jpg\n",
+      "B4_15August2022.jpg\n",
+      "B20_15August2022.jpg\n",
+      "B32_15August2022.jpg\n",
+      "B16_15August2022.jpg\n",
+      "B18_15August2022.jpg\n",
+      "B28_15August2022.jpg\n",
+      "B14_15August2022.jpg\n",
+      "B24_15August2022.jpg\n",
+      "B10_15August2022.jpg\n",
+      "B2_15August2022.jpg\n",
+      "B6_15August2022.jpg\n",
+      "B28_25August2022.jpg\n",
+      "B26_25August2022.jpg\n",
+      "B24_25August2022.jpg\n",
+      "B16_25August2022.HEIC\n",
+      "B12_25August2022.HEIC\n",
+      "B2_25August2022.HEIC\n",
+      "B20_25August2022.jpg\n",
+      "B8_25August2022.HEIC\n",
+      "B18_25August2022.jpg\n",
+      "B32_25August2022.jpg\n",
+      "Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg\n",
+      "B4_25August2022.HEIC\n",
+      "B22_25August2022.jpg\n",
+      "B6_25August2022.HEIC\n",
+      "B10_25August2022.HEIC\n",
+      "B30_25August2022.jpg\n",
+      "B14_25August2022.HEIC\n",
+      "B18b_25August2022.jpg\n",
+      "B30b_25August2022.jpg\n",
+      "B14_20July2022.jpg\n",
+      "B8_20July2022.jpg\n",
+      "B32_20July2022.jpg\n",
+      "B26b_20July2022.jpg\n",
+      "B16_20JULY2022.jpg\n",
+      "B26_20July2022.jpg\n",
+      "B28 angled sticky-20JUL2022.jpg\n",
+      "B32 angled sticky-20JUL2022.jpg\n",
+      "B30_20July2022.jpg\n",
+      "B2_20July2022.jpg\n",
+      "B4_20July2022.jpg\n",
+      "B10_20July2022.jpg\n",
+      "B28_20July2022.jpg\n",
+      "B12_20July2022.jpg\n",
+      "B6_20July2022.jpg\n",
+      "B2_3AUGUST2022.HEIC\n",
+      "B18_3AUGUST2022.jpeg\n",
+      "B20_3AUGUST2022.jpeg\n",
+      "B4_3AUGUST2022.HEIC\n",
+      "B10_3AUGUST2022.HEIC\n",
+      "B26_3AUGUST2022.jpeg\n",
+      "B8_3AUGUST2022.HEIC\n",
+      "B28_3AUGUST2022.jpeg\n",
+      "B22_3AUGUST2022.jpeg\n",
+      "B14_3AUGUST2022.HEIC\n",
+      "B16_3AUGUST2022.HEIC\n",
+      "B24_3AUGUST2022.jpeg\n",
+      "B12_3AUGUST2022.HEIC\n",
+      "B6_3AUGUST2022.HEIC\n",
+      "B8_26JUL2022.jpg\n",
+      "B14_26JUL2022.jpg\n",
+      "B6_26JUL2022.jpg\n",
+      "B2_26JUL2022.jpg\n",
+      "B4_26JUL2022.jpg\n",
+      "B10_26JUL2022.jpg\n",
+      "B16_26JUL2022.jpg\n",
+      "B12_26JUL2022jpg.jpg\n",
+      "F2_3AUGUST2022.jpg\n",
+      "F28_3AUGUST2022.HEIC\n",
+      "F18_3AUGUST2022.HEIC\n",
+      "F10_3AUGUST2022.jpg\n",
+      "F16_3AUGUST2022.jpg\n",
+      "F6_3AUGUST2022.jpg\n",
+      "F12_3AUGUST2022jpg.jpg\n",
+      "F26_3AUGUST2022.HEIC\n",
+      "F4_3AUGUST2022.jpg\n",
+      "F8_3AUGUST2022.jpg\n",
+      "F20_3AUGUST2022.HEIC\n",
+      "F30_3AUGUST2022.HEIC\n",
+      "F22_3AUGUST2022.HEIC\n",
+      "F32_3AUGUST2022.HEIC\n",
+      "F14_3AUGUST2022.jpg\n",
+      "F24_3AUGUST2022.HEIC\n",
+      "F28_25August2022.HEIC\n",
+      "F2b_25August2022.heic\n",
+      "F26_25August2022.HEIC\n",
+      "F32_25August2022.HEIC\n",
+      "F16 wide view_25August2022.jpg\n",
+      "F16_25August2022.jpg\n",
+      "F12_25August2022.jpg\n",
+      "F30_25August2022.HEIC\n",
+      "F2_25August2022.jpg\n",
+      "F24_25August2022.HEIC\n",
+      "F20_25August2022.HEIC\n",
+      "F14_25August2022.jpg\n",
+      "F22_25August2022.HEIC\n",
+      "F18_25August2022.HEIC\n",
+      "F14b_25August2022.jpg\n",
+      "Faivre field soybean phenoloyg_25August2022.jpg\n",
+      "F14_20July2022.jpg\n",
+      "F32_20July2022.jpg\n",
+      "F8_20July2022.jpg\n",
+      "F12_20July2022.jpg\n",
+      "F28_20July2022.jpg\n",
+      "F30_20July2022.jpg\n",
+      "F18_20July2022.jpg\n",
+      "F20_20July2022.jpg\n",
+      "F2_20July2022.jpg\n",
+      "F26_20July2022.jpg\n",
+      "F22_20July2022.jpg\n",
+      "F6b_20July2022.jpg\n",
+      "F6_20July2022.jpg\n",
+      "F16_20July2022.jpg\n",
+      "F24_20July2022.jpg\n",
+      "F10_20July2022.jpg\n",
+      "F20_26JUL2022.HEIC\n",
+      "F26_26JUL2022.HEIC\n",
+      "F8_26JUL2022.jpg\n",
+      "F28_26JUL2022.HEIC\n",
+      "F30_26JUL2022.HEIC\n",
+      "F2_26JUL2022.jpg\n",
+      "F24_26JUL2022.HEIC\n",
+      "F18_26JUL2022.HEIC\n",
+      "F10_26JUL2022.jpg\n",
+      "F32_26JUL2022.HEIC\n",
+      "F22_26JUL2022.HEIC\n",
+      "M16_19July2022.jpg\n",
+      "M10_19July2022.jpg\n",
+      "M2_19July2022.jpg\n",
+      "M14_19July2022.jpg\n",
+      "M12_19July2022.jpg\n",
+      "M6_19July2022.jpg\n",
+      "M4_19July2022.jpg\n",
+      "M8_19July2022.jpg\n",
+      "M20_4AUGUST2022..jpg\n",
+      "M18_4AUGUST2022.jpg\n",
+      "M2_4AUGUST2022.HEIC\n",
+      "M22_4AUGUST2022.jpg\n",
+      "M30_4AUGUST2022..jpg\n",
+      "M8_4AUGUST2022.HEIC\n",
+      "M16_4AUGUST2022.HEIC\n",
+      "M10_4AUGUST2022.HEIC\n",
+      "M6_4AUGUST2022.HEIC\n",
+      "M12_4AUGUST2022.HEIC\n",
+      "M26_4AUGUST2022.jpg\n",
+      "M28_4AUGUST2022..jpg\n",
+      "M32_4AUGUST2022..jpg\n",
+      "M24_4AUGUST2022..jpg\n",
+      "M14_4AUGUST2022.HEIC\n",
+      "M14._17AUGUST2022.HEIC\n",
+      "M24_17AUGUST2022.jpg\n",
+      "M30_17AUGUST2022.jpg\n",
+      "M10_17AUGUST2022.HEIC\n",
+      "M22_17AUGUST2022.jpg\n",
+      "M2_17AUGUST2022.HEIC\n",
+      "M16_17AUGUST2022.HEIC\n",
+      "M26_17AUGUST2022.jpg\n",
+      "M12_17AUGUST2022.HEIC\n",
+      "M18_17AUGUST2022.jpg\n",
+      "M28_17AUGUST2022.jpg\n",
+      "M6_17AUGUST2022.HEIC\n",
+      "M32-17AUGUST2022.jpg\n",
+      "M4_17AUGUST2022.HEIC\n",
+      "M8_17AUGUST2022.HEIC\n",
+      "M20_17AUGUST2022.jpg\n",
+      "M12_27JULY2022.jpg\n",
+      "Moore_M_Phenology of soybean_27JULY2022.jpg\n",
+      "M8_27JULY2022.jpg\n",
+      "M26_27JULY2022.jpeg\n",
+      "M16_27JULY2022.jpg\n",
+      "M32_27JULY2022.jpeg\n",
+      "M30_27JULY2022.jpeg\n"
+     ]
+    }
+   ],
+   "source": [
+    "folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n",
+    "\n",
+    "transform = transforms.Compose([\n",
+    "    transforms.PILToTensor()\n",
+    "])\n",
+    "\n",
+    "for folder_name in folders:\n",
+    "    dates = []\n",
+    "    for date in glob.glob(r\"/raid/projects/akhot2/group-01-phys371-sp2023/data/\" + folder_name + \"/*\"):\n",
+    "        dates.append(date)\n",
+    "    for date in dates:\n",
+    "        for image_file in glob.glob(date +  \"/*\"):\n",
+    "            if image_file.split(\".\")[-1].lower() == \"heic\":\n",
+    "                heif_file = pillow_heif.read_heif(image_file)\n",
+    "                img = Image.frombytes(\n",
+    "                    heif_file.mode,\n",
+    "                    heif_file.size,\n",
+    "                    heif_file.data,\n",
+    "                    \"raw\",\n",
+    "                )\n",
+    "                #print(\"HEIC FILE:\")\n",
+    "            else:\n",
+    "                img = Image.open(image_file)\n",
+    "                #print(\"JPG FILE:\")\n",
+    "            img_tensor = transform(img)\n",
+    "            #plt.imshow(img)\n",
+    "            #plt.show()\n",
+    "            #print(img_tensor.shape)\n",
+    "            print(image_file.split(\"/\")[-1])\n",
+    "        \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "eaa3da3e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80679db4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/training/PreProcessor.ipynb b/training/PreProcessor.ipynb
new file mode 100644
index 0000000..a2863fb
--- /dev/null
+++ b/training/PreProcessor.ipynb
@@ -0,0 +1,791 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ff555f6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glob\n",
+    "from tqdm import tqdm\n",
+    "import pandas as pd   \n",
+    "import torch\n",
+    "from PIL import Image\n",
+    "import torchvision.transforms as transforms\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pillow_heif"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "fd6db132",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/raid/projects/akhot2/conda/envs/akhot2/lib/python3.9/site-packages/openpyxl/worksheet/header_footer.py:48: UserWarning: Cannot parse header or footer so it will be ignored\n",
+      "  warn(\"\"\"Cannot parse header or footer so it will be ignored\"\"\")\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Grand Acc #</th>\n",
+       "      <th>Weekly Acc#</th>\n",
+       "      <th>Sample collection period</th>\n",
+       "      <th>Sticky trap pair (rep)</th>\n",
+       "      <th>2022 Julian Date</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>2022 Set up Date</th>\n",
+       "      <th>2022 Data Collect. Date</th>\n",
+       "      <th>Coll. intrvl (d)</th>\n",
+       "      <th>Coll. hour</th>\n",
+       "      <th>...</th>\n",
+       "      <th>NCR/trap /day</th>\n",
+       "      <th>WCR/trap /day</th>\n",
+       "      <th>Total CRW /trap /day</th>\n",
+       "      <th>NCR/trap top /day</th>\n",
+       "      <th>WCR/trap top/day</th>\n",
+       "      <th>Total CRW /trap top/day</th>\n",
+       "      <th>Proportion NCR on top</th>\n",
+       "      <th>Proportion WCR on top</th>\n",
+       "      <th>Proportion All CRW on top</th>\n",
+       "      <th>Notes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>10</td>\n",
+       "      <td>10</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2022-07-08</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>11.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 43 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Grand Acc #  Weekly Acc#  Sample collection period  Sticky trap pair (rep)  \\\n",
+       "0            2            2                         1                       1   \n",
+       "1            4            4                         1                       2   \n",
+       "2            6            6                         1                       3   \n",
+       "3            8            8                         1                       4   \n",
+       "4           10           10                         1                       5   \n",
+       "\n",
+       "   2022 Julian Date  Month 2022 Set up Date 2022 Data Collect. Date  \\\n",
+       "0               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "1               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "2               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "3               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "4               NaN    NaN       2022-07-08              2022-07-19   \n",
+       "\n",
+       "   Coll. intrvl (d)  Coll. hour  ...  NCR/trap /day  WCR/trap /day  \\\n",
+       "0              11.0       11.35  ...            0.0            0.0   \n",
+       "1              11.0       11.35  ...            0.0            0.0   \n",
+       "2              11.0       11.35  ...            0.0            0.0   \n",
+       "3              11.0       11.35  ...            0.0            0.0   \n",
+       "4              11.0       11.35  ...            0.0            0.0   \n",
+       "\n",
+       "  Total CRW /trap /day NCR/trap top /day  WCR/trap top/day  \\\n",
+       "0                  0.0               0.0               0.0   \n",
+       "1                  0.0               0.0               0.0   \n",
+       "2                  0.0               0.0               0.0   \n",
+       "3                  0.0               0.0               0.0   \n",
+       "4                  0.0               0.0               0.0   \n",
+       "\n",
+       "  Total CRW /trap top/day  Proportion NCR on top Proportion WCR on top  \\\n",
+       "0                     0.0                    NaN                   NaN   \n",
+       "1                     0.0                    NaN                   NaN   \n",
+       "2                     0.0                    NaN                   NaN   \n",
+       "3                     0.0                    NaN                   NaN   \n",
+       "4                     0.0                    NaN                   NaN   \n",
+       "\n",
+       "  Proportion All CRW on top Notes  \n",
+       "0                       NaN   NaN  \n",
+       "1                       NaN   NaN  \n",
+       "2                       NaN   NaN  \n",
+       "3                       NaN   NaN  \n",
+       "4                       NaN   NaN  \n",
+       "\n",
+       "[5 rows x 43 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "labels_path = \"/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx\"\n",
+    "df = pd.read_excel(labels_path)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "7097a875",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Grand Acc #', 'Weekly Acc#', 'Sample collection period',\n",
+       "       'Sticky trap pair (rep)', '2022 Julian Date', 'Month',\n",
+       "       '2022 Set up Date', '2022 Data Collect. Date', 'Coll. intrvl (d)',\n",
+       "       'Coll. hour', 'Trap visitor', 'Drone pilot', 'On farm location',\n",
+       "       'Site Abrv.', 'trap angle', 'trap orient'n', 'Trap #',\n",
+       "       'Sticky Trap Name', 'T-top side NCR', 'bottom side NCR',\n",
+       "       'T-top side WCR', 'bottom side WCR', 'WCR mal', 'WCR fem',\n",
+       "       'T-top side \"other\"', 'Bottom side \"other\"', 'T-top side total CRW',\n",
+       "       'bottom side total CRW', 'Trap Total NCR', 'Trap Total WCR',\n",
+       "       'Trap Total CRW', 'Proportion NCR in total CRW',\n",
+       "       'Proportion WCR in total CRW', 'NCR/trap /day', 'WCR/trap /day',\n",
+       "       'Total CRW /trap /day', 'NCR/trap top /day', 'WCR/trap top/day',\n",
+       "       'Total CRW /trap top/day', 'Proportion NCR on top',\n",
+       "       'Proportion WCR on top', 'Proportion All CRW on top', 'Notes'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "24d3874e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>trap orient'n</th>\n",
+       "      <th>2022 Data Collect. Date</th>\n",
+       "      <th>Sticky Trap Name</th>\n",
+       "      <th>T-top side WCR</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U4</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U6</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U8</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>2022-07-19</td>\n",
+       "      <td>U10</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>603</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M24</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>604</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M26</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>605</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M28</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>606</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M30</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>607</th>\n",
+       "      <td>Angled</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>M32</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>320 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    trap orient'n 2022 Data Collect. Date Sticky Trap Name T-top side WCR\n",
+       "0         Angled               2022-07-19               U2              0\n",
+       "1         Angled               2022-07-19               U4              0\n",
+       "2         Angled               2022-07-19               U6              0\n",
+       "3         Angled               2022-07-19               U8              0\n",
+       "4         Angled               2022-07-19              U10              0\n",
+       "..            ...                     ...              ...            ...\n",
+       "603       Angled                      NaT              M24            NaN\n",
+       "604       Angled                      NaT              M26            NaN\n",
+       "605       Angled                      NaT              M28            NaN\n",
+       "606       Angled                      NaT              M30            NaN\n",
+       "607       Angled                      NaT              M32            NaN\n",
+       "\n",
+       "[320 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_angled = df[df[\"trap orient'n\"] == \"Angled \"]\n",
+    "df_angled[\"2022 Data Collect. Date\"].astype(str)\n",
+    "df_angled[[\"trap orient'n\", \"2022 Data Collect. Date\", \"Sticky Trap Name\", \"T-top side WCR\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0ccf1183",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "B8_15August2022.jpg\n",
+      "B12_15August2022.jpg\n",
+      "B26_15August2022.jpg\n",
+      "B22_15August2022.jpg\n",
+      "B4_15August2022.jpg\n",
+      "B20_15August2022.jpg\n",
+      "B32_15August2022.jpg\n",
+      "B16_15August2022.jpg\n",
+      "B18_15August2022.jpg\n",
+      "B28_15August2022.jpg\n",
+      "B14_15August2022.jpg\n",
+      "B24_15August2022.jpg\n",
+      "B10_15August2022.jpg\n",
+      "B2_15August2022.jpg\n",
+      "B6_15August2022.jpg\n",
+      "B28_25August2022.jpg\n",
+      "B26_25August2022.jpg\n",
+      "B24_25August2022.jpg\n",
+      "B16_25August2022.HEIC\n",
+      "B12_25August2022.HEIC\n",
+      "B2_25August2022.HEIC\n",
+      "B20_25August2022.jpg\n",
+      "B8_25August2022.HEIC\n",
+      "B18_25August2022.jpg\n",
+      "B32_25August2022.jpg\n",
+      "Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg\n",
+      "B4_25August2022.HEIC\n",
+      "B22_25August2022.jpg\n",
+      "B6_25August2022.HEIC\n",
+      "B10_25August2022.HEIC\n",
+      "B30_25August2022.jpg\n",
+      "B14_25August2022.HEIC\n",
+      "B18b_25August2022.jpg\n",
+      "B30b_25August2022.jpg\n",
+      "B14_20July2022.jpg\n",
+      "B8_20July2022.jpg\n",
+      "B32_20July2022.jpg\n",
+      "B26b_20July2022.jpg\n",
+      "B16_20JULY2022.jpg\n",
+      "B26_20July2022.jpg\n",
+      "B28 angled sticky-20JUL2022.jpg\n",
+      "B32 angled sticky-20JUL2022.jpg\n",
+      "B30_20July2022.jpg\n",
+      "B2_20July2022.jpg\n",
+      "B4_20July2022.jpg\n",
+      "B10_20July2022.jpg\n",
+      "B28_20July2022.jpg\n",
+      "B12_20July2022.jpg\n",
+      "B6_20July2022.jpg\n",
+      "B2_3AUGUST2022.HEIC\n",
+      "B18_3AUGUST2022.jpeg\n",
+      "B20_3AUGUST2022.jpeg\n",
+      "B4_3AUGUST2022.HEIC\n",
+      "B10_3AUGUST2022.HEIC\n",
+      "B26_3AUGUST2022.jpeg\n",
+      "B8_3AUGUST2022.HEIC\n",
+      "B28_3AUGUST2022.jpeg\n",
+      "B22_3AUGUST2022.jpeg\n",
+      "B14_3AUGUST2022.HEIC\n",
+      "B16_3AUGUST2022.HEIC\n",
+      "B24_3AUGUST2022.jpeg\n",
+      "B12_3AUGUST2022.HEIC\n",
+      "B6_3AUGUST2022.HEIC\n",
+      "B8_26JUL2022.jpg\n",
+      "B14_26JUL2022.jpg\n",
+      "B6_26JUL2022.jpg\n",
+      "B2_26JUL2022.jpg\n",
+      "B4_26JUL2022.jpg\n",
+      "B10_26JUL2022.jpg\n",
+      "B16_26JUL2022.jpg\n",
+      "B12_26JUL2022jpg.jpg\n",
+      "F2_3AUGUST2022.jpg\n",
+      "F28_3AUGUST2022.HEIC\n",
+      "F18_3AUGUST2022.HEIC\n",
+      "F10_3AUGUST2022.jpg\n",
+      "F16_3AUGUST2022.jpg\n",
+      "F6_3AUGUST2022.jpg\n",
+      "F12_3AUGUST2022jpg.jpg\n",
+      "F26_3AUGUST2022.HEIC\n",
+      "F4_3AUGUST2022.jpg\n",
+      "F8_3AUGUST2022.jpg\n",
+      "F20_3AUGUST2022.HEIC\n",
+      "F30_3AUGUST2022.HEIC\n",
+      "F22_3AUGUST2022.HEIC\n",
+      "F32_3AUGUST2022.HEIC\n",
+      "F14_3AUGUST2022.jpg\n",
+      "F24_3AUGUST2022.HEIC\n",
+      "F28_25August2022.HEIC\n",
+      "F2b_25August2022.heic\n",
+      "F26_25August2022.HEIC\n",
+      "F32_25August2022.HEIC\n",
+      "F16 wide view_25August2022.jpg\n",
+      "F16_25August2022.jpg\n",
+      "F12_25August2022.jpg\n",
+      "F30_25August2022.HEIC\n",
+      "F2_25August2022.jpg\n",
+      "F24_25August2022.HEIC\n",
+      "F20_25August2022.HEIC\n",
+      "F14_25August2022.jpg\n",
+      "F22_25August2022.HEIC\n",
+      "F18_25August2022.HEIC\n",
+      "F14b_25August2022.jpg\n",
+      "Faivre field soybean phenoloyg_25August2022.jpg\n",
+      "F14_20July2022.jpg\n",
+      "F32_20July2022.jpg\n",
+      "F8_20July2022.jpg\n",
+      "F12_20July2022.jpg\n",
+      "F28_20July2022.jpg\n",
+      "F30_20July2022.jpg\n",
+      "F18_20July2022.jpg\n",
+      "F20_20July2022.jpg\n",
+      "F2_20July2022.jpg\n",
+      "F26_20July2022.jpg\n",
+      "F22_20July2022.jpg\n",
+      "F6b_20July2022.jpg\n",
+      "F6_20July2022.jpg\n",
+      "F16_20July2022.jpg\n",
+      "F24_20July2022.jpg\n",
+      "F10_20July2022.jpg\n",
+      "F20_26JUL2022.HEIC\n",
+      "F26_26JUL2022.HEIC\n",
+      "F8_26JUL2022.jpg\n",
+      "F28_26JUL2022.HEIC\n",
+      "F30_26JUL2022.HEIC\n",
+      "F2_26JUL2022.jpg\n",
+      "F24_26JUL2022.HEIC\n",
+      "F18_26JUL2022.HEIC\n",
+      "F10_26JUL2022.jpg\n",
+      "F32_26JUL2022.HEIC\n",
+      "F22_26JUL2022.HEIC\n",
+      "M16_19July2022.jpg\n",
+      "M10_19July2022.jpg\n",
+      "M2_19July2022.jpg\n",
+      "M14_19July2022.jpg\n",
+      "M12_19July2022.jpg\n",
+      "M6_19July2022.jpg\n",
+      "M4_19July2022.jpg\n",
+      "M8_19July2022.jpg\n",
+      "M20_4AUGUST2022..jpg\n",
+      "M18_4AUGUST2022.jpg\n",
+      "M2_4AUGUST2022.HEIC\n",
+      "M22_4AUGUST2022.jpg\n",
+      "M30_4AUGUST2022..jpg\n",
+      "M8_4AUGUST2022.HEIC\n",
+      "M16_4AUGUST2022.HEIC\n",
+      "M10_4AUGUST2022.HEIC\n",
+      "M6_4AUGUST2022.HEIC\n",
+      "M12_4AUGUST2022.HEIC\n",
+      "M26_4AUGUST2022.jpg\n",
+      "M28_4AUGUST2022..jpg\n",
+      "M32_4AUGUST2022..jpg\n",
+      "M24_4AUGUST2022..jpg\n",
+      "M14_4AUGUST2022.HEIC\n",
+      "M14._17AUGUST2022.HEIC\n",
+      "M24_17AUGUST2022.jpg\n",
+      "M30_17AUGUST2022.jpg\n",
+      "M10_17AUGUST2022.HEIC\n",
+      "M22_17AUGUST2022.jpg\n",
+      "M2_17AUGUST2022.HEIC\n",
+      "M16_17AUGUST2022.HEIC\n",
+      "M26_17AUGUST2022.jpg\n",
+      "M12_17AUGUST2022.HEIC\n",
+      "M18_17AUGUST2022.jpg\n",
+      "M28_17AUGUST2022.jpg\n",
+      "M6_17AUGUST2022.HEIC\n",
+      "M32-17AUGUST2022.jpg\n",
+      "M4_17AUGUST2022.HEIC\n",
+      "M8_17AUGUST2022.HEIC\n",
+      "M20_17AUGUST2022.jpg\n",
+      "M12_27JULY2022.jpg\n",
+      "Moore_M_Phenology of soybean_27JULY2022.jpg\n",
+      "M8_27JULY2022.jpg\n",
+      "M26_27JULY2022.jpeg\n",
+      "M16_27JULY2022.jpg\n",
+      "M32_27JULY2022.jpeg\n",
+      "M30_27JULY2022.jpeg\n",
+      "M10_27JULY2022.jpg\n",
+      "M4_27JULY2022.jpg\n",
+      "M14_27JULY2022.jpg\n",
+      "M20_27JULY2022.jpeg\n",
+      "M28_27JULY2022.jpeg\n",
+      "M6_27JULY2022.jpg\n",
+      "M22_27JULY2022.jpeg\n",
+      "M24_27JULY2022.jpeg\n",
+      "M2_27JULY2022.jpg\n",
+      "U30_19JULY2022.jpg\n",
+      "U2_19JULY2022.HEIC\n",
+      "U10_19JULY2022.HEIC\n",
+      "U20_19JULY2022jpg.jpg\n",
+      "U22_19JULY2022.jpg\n",
+      "U16_19JULY2022.HEIC\n",
+      "U24_19JULY2022.jpg\n",
+      "U8_19JULY2022.HEIC\n",
+      "U14_19JULY2022.HEIC\n",
+      "U18_19JULY2022.jpg\n",
+      "U4_19JULY2022.HEIC\n",
+      "U12_19JULY2022.HEIC\n",
+      "U26_19JULY2022.jpg\n",
+      "U6_19JULY2022.HEIC\n",
+      "U32_19JULY2022.jpg\n",
+      "U28_19JULY2022.jpg\n",
+      "U26_5AUGUST2022.jpg\n",
+      "U32_5AUGUST2022jpg.jpg\n",
+      "IMG_4978.HEIC\n",
+      "IMG_4980.HEIC\n",
+      "IMG_4982.HEIC\n",
+      "IMG_4981.HEIC\n",
+      "U24_5AUGUST2022.jpg\n",
+      "U20_5AUGUST2022.jpg\n",
+      "IMG_4979.HEIC\n",
+      "IMG_4983.HEIC\n",
+      "IMG_4977.HEIC\n",
+      "U18_5AUGUST2022.jpg\n",
+      "U28_5AUGUST2022.jpg\n",
+      "U22_5AUGUST2022.jpg\n",
+      "U30_5AUGUST2022.jpg\n",
+      "U26_27JULY2022.jpg\n",
+      "U20_27JULY2022.jpg\n",
+      "U2_27JULY2022.HEIC\n",
+      "U16_27JULY2022.HEIC\n",
+      "U10_27JULY2022.HEIC\n",
+      "U24_27JULY2022.jpg\n",
+      "U6_27JULY2022.HEIC\n",
+      "U12_27JULY2022.HEIC\n",
+      "U8_27JULY2022.HEIC\n",
+      "U4_27JULY2022.HEIC\n",
+      "U28_27JULY2022.jpg\n",
+      "U32_27JULY2022.jpg\n",
+      "U30_27JULY2022.jpg\n",
+      "U22_27JULY2022jpg.jpg\n",
+      "U18_27JULY2022.jpg\n",
+      "U14_27JULY2022.HEIC\n",
+      "U4_10AUGUST2022.HEIC\n",
+      "U22_10AUGUST2022.jpg\n",
+      "U26_10AUGUST2022.jpg\n",
+      "U24_10AUGUST2022.jpg\n",
+      "U28_10AUGUST2022.jpg\n",
+      "U30_10AUGUST2022.jpg\n",
+      "U10_10AUGUST2022.HEIC\n",
+      "U32_10AUGUST2022.jpg\n",
+      "U18_10AUGUST2022.jpg\n",
+      "U20_10AUGUST2022.jpg\n",
+      "U16_10AUGUST2022.HEIC\n",
+      "U14_10AUGUST2022.HEIC\n",
+      "U2_10AUGUST2022.HEIC\n",
+      "U8_10AUGUST2022.HEIC\n",
+      "U12_10AUGUST2022.HEIC\n",
+      "U6_10AUGUST2022.HEIC\n",
+      "U16_23August2022.HEIC\n",
+      "U12_23August2022.HEIC\n",
+      "U26_23August2022.jpg\n",
+      "U20_23August2022.jpg\n",
+      "U10_23August2022.HEIC\n",
+      "U6_23August2022.HEIC\n",
+      "U4_23August2022.HEIC\n",
+      "U2_23August2022.HEIC\n",
+      "U28_23August2022.jpg\n",
+      "U22_23August2022.jpg\n",
+      "U14_23August2022.HEIC\n",
+      "U18_23August2022.HEIC\n",
+      "U18_23August2022.jpg\n",
+      "U24_23August2022.jpg\n",
+      "U28b_23August2022.jpg\n",
+      "U30_23August2022.jpg\n",
+      "U32_23August2022.jpg\n"
+     ]
+    }
+   ],
+   "source": [
+    "folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n",
+    "\n",
+    "transform = transforms.Compose([\n",
+    "    transforms.PILToTensor()\n",
+    "])\n",
+    "\n",
+    "for folder_name in folders:\n",
+    "    dates = []\n",
+    "    for date in glob.glob(r\"/raid/projects/akhot2/group-01-phys371-sp2023/data/\" + folder_name + \"/*\"):\n",
+    "        dates.append(date)\n",
+    "    for date in dates:\n",
+    "        for image_file in glob.glob(date +  \"/*\"):\n",
+    "            print(image_file.split(\"/\")[-1])\n",
+    "            #continue\n",
+    "            if image_file.split(\".\")[-1].lower() == \"heic\":\n",
+    "                heif_file = pillow_heif.read_heif(image_file)\n",
+    "                img = Image.frombytes(\n",
+    "                    heif_file.mode,\n",
+    "                    heif_file.size,\n",
+    "                    heif_file.data,\n",
+    "                    \"raw\",\n",
+    "                )\n",
+    "                #print(\"HEIC FILE:\")\n",
+    "            else:\n",
+    "                img = Image.open(image_file)\n",
+    "                #print(\"JPG FILE:\")\n",
+    "            img_tensor = transform(img)\n",
+    "            #plt.imshow(img)\n",
+    "            #plt.show()\n",
+    "            #print(img_tensor.shape)\n",
+    "        \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "eaa3da3e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80679db4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/training/preprocess.py b/training/preprocess.py
new file mode 100644
index 0000000..7371f47
--- /dev/null
+++ b/training/preprocess.py
@@ -0,0 +1 @@
+B
\ No newline at end of file
-- 
GitLab