From 19606b81a708a6d701d96a189a4bfbef03064da7 Mon Sep 17 00:00:00 2001 From: fresleven <khotayush@gmail.com> Date: Tue, 7 Feb 2023 20:07:04 -0600 Subject: [PATCH] Starting preprocessing --- .../PreProcessor-checkpoint.ipynb | 701 ++++++++++++++++ training/PreProcessor.ipynb | 791 ++++++++++++++++++ training/preprocess.py | 1 + 3 files changed, 1493 insertions(+) create mode 100644 training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb create mode 100644 training/PreProcessor.ipynb create mode 100644 training/preprocess.py diff --git a/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb b/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb new file mode 100644 index 0000000..7c1361f --- /dev/null +++ b/training/.ipynb_checkpoints/PreProcessor-checkpoint.ipynb @@ -0,0 +1,701 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ff555f6b", + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "from tqdm import tqdm\n", + "import pandas as pd \n", + "import torch\n", + "from PIL import Image\n", + "import torchvision.transforms as transforms\n", + "import matplotlib.pyplot as plt\n", + "import pillow_heif" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fd6db132", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/raid/projects/akhot2/conda/envs/akhot2/lib/python3.9/site-packages/openpyxl/worksheet/header_footer.py:48: UserWarning: Cannot parse header or footer so it will be ignored\n", + " warn(\"\"\"Cannot parse header or footer so it will be ignored\"\"\")\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Grand Acc #</th>\n", + " <th>Weekly Acc#</th>\n", + " <th>Sample collection period</th>\n", + " <th>Sticky trap pair (rep)</th>\n", + " <th>2022 Julian Date</th>\n", + " <th>Month</th>\n", + " <th>2022 Set up Date</th>\n", + " <th>2022 Data Collect. Date</th>\n", + " <th>Coll. intrvl (d)</th>\n", + " <th>Coll. hour</th>\n", + " <th>...</th>\n", + " <th>NCR/trap /day</th>\n", + " <th>WCR/trap /day</th>\n", + " <th>Total CRW /trap /day</th>\n", + " <th>NCR/trap top /day</th>\n", + " <th>WCR/trap top/day</th>\n", + " <th>Total CRW /trap top/day</th>\n", + " <th>Proportion NCR on top</th>\n", + " <th>Proportion WCR on top</th>\n", + " <th>Proportion All CRW on top</th>\n", + " <th>Notes</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>8</td>\n", + " <td>8</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>10</td>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 43 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Grand Acc # Weekly Acc# Sample collection period Sticky trap pair (rep) \\\n", + "0 2 2 1 1 \n", + "1 4 4 1 2 \n", + "2 6 6 1 3 \n", + "3 8 8 1 4 \n", + "4 10 10 1 5 \n", + "\n", + " 2022 Julian Date Month 2022 Set up Date 2022 Data Collect. Date \\\n", + "0 NaN NaN 2022-07-08 2022-07-19 \n", + "1 NaN NaN 2022-07-08 2022-07-19 \n", + "2 NaN NaN 2022-07-08 2022-07-19 \n", + "3 NaN NaN 2022-07-08 2022-07-19 \n", + "4 NaN NaN 2022-07-08 2022-07-19 \n", + "\n", + " Coll. intrvl (d) Coll. hour ... NCR/trap /day WCR/trap /day \\\n", + "0 11.0 11.35 ... 0.0 0.0 \n", + "1 11.0 11.35 ... 0.0 0.0 \n", + "2 11.0 11.35 ... 0.0 0.0 \n", + "3 11.0 11.35 ... 0.0 0.0 \n", + "4 11.0 11.35 ... 0.0 0.0 \n", + "\n", + " Total CRW /trap /day NCR/trap top /day WCR/trap top/day \\\n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "\n", + " Total CRW /trap top/day Proportion NCR on top Proportion WCR on top \\\n", + "0 0.0 NaN NaN \n", + "1 0.0 NaN NaN \n", + "2 0.0 NaN NaN \n", + "3 0.0 NaN NaN \n", + "4 0.0 NaN NaN \n", + "\n", + " Proportion All CRW on top Notes \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + "[5 rows x 43 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels_path = \"/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx\"\n", + "df = pd.read_excel(labels_path)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7097a875", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Grand Acc #', 'Weekly Acc#', 'Sample collection period',\n", + " 'Sticky trap pair (rep)', '2022 Julian Date', 'Month',\n", + " '2022 Set up Date', '2022 Data Collect. Date', 'Coll. intrvl (d)',\n", + " 'Coll. hour', 'Trap visitor', 'Drone pilot', 'On farm location',\n", + " 'Site Abrv.', 'trap angle', 'trap orient'n', 'Trap #',\n", + " 'Sticky Trap Name', 'T-top side NCR', 'bottom side NCR',\n", + " 'T-top side WCR', 'bottom side WCR', 'WCR mal', 'WCR fem',\n", + " 'T-top side \"other\"', 'Bottom side \"other\"', 'T-top side total CRW',\n", + " 'bottom side total CRW', 'Trap Total NCR', 'Trap Total WCR',\n", + " 'Trap Total CRW', 'Proportion NCR in total CRW',\n", + " 'Proportion WCR in total CRW', 'NCR/trap /day', 'WCR/trap /day',\n", + " 'Total CRW /trap /day', 'NCR/trap top /day', 'WCR/trap top/day',\n", + " 'Total CRW /trap top/day', 'Proportion NCR on top',\n", + " 'Proportion WCR on top', 'Proportion All CRW on top', 'Notes'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "24d3874e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>trap orient'n</th>\n", + " <th>2022 Data Collect. Date</th>\n", + " <th>Sticky Trap Name</th>\n", + " <th>T-top side WCR</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U4</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U6</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U8</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U10</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>603</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M24</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>604</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M26</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>605</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M28</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>606</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M30</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>607</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M32</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>320 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " trap orient'n 2022 Data Collect. Date Sticky Trap Name T-top side WCR\n", + "0 Angled 2022-07-19 U2 0\n", + "1 Angled 2022-07-19 U4 0\n", + "2 Angled 2022-07-19 U6 0\n", + "3 Angled 2022-07-19 U8 0\n", + "4 Angled 2022-07-19 U10 0\n", + ".. ... ... ... ...\n", + "603 Angled NaT M24 NaN\n", + "604 Angled NaT M26 NaN\n", + "605 Angled NaT M28 NaN\n", + "606 Angled NaT M30 NaN\n", + "607 Angled NaT M32 NaN\n", + "\n", + "[320 rows x 4 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_angled = df[df[\"trap orient'n\"] == \"Angled \"]\n", + "df_angled[\"2022 Data Collect. Date\"].astype(str)\n", + "df_angled[[\"trap orient'n\", \"2022 Data Collect. Date\", \"Sticky Trap Name\", \"T-top side WCR\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ccf1183", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "B8_15August2022.jpg\n", + "B12_15August2022.jpg\n", + "B26_15August2022.jpg\n", + "B22_15August2022.jpg\n", + "B4_15August2022.jpg\n", + "B20_15August2022.jpg\n", + "B32_15August2022.jpg\n", + "B16_15August2022.jpg\n", + "B18_15August2022.jpg\n", + "B28_15August2022.jpg\n", + "B14_15August2022.jpg\n", + "B24_15August2022.jpg\n", + "B10_15August2022.jpg\n", + "B2_15August2022.jpg\n", + "B6_15August2022.jpg\n", + "B28_25August2022.jpg\n", + "B26_25August2022.jpg\n", + "B24_25August2022.jpg\n", + "B16_25August2022.HEIC\n", + "B12_25August2022.HEIC\n", + "B2_25August2022.HEIC\n", + "B20_25August2022.jpg\n", + "B8_25August2022.HEIC\n", + "B18_25August2022.jpg\n", + "B32_25August2022.jpg\n", + "Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg\n", + "B4_25August2022.HEIC\n", + "B22_25August2022.jpg\n", + "B6_25August2022.HEIC\n", + "B10_25August2022.HEIC\n", + "B30_25August2022.jpg\n", + "B14_25August2022.HEIC\n", + "B18b_25August2022.jpg\n", + "B30b_25August2022.jpg\n", + "B14_20July2022.jpg\n", + "B8_20July2022.jpg\n", + "B32_20July2022.jpg\n", + "B26b_20July2022.jpg\n", + "B16_20JULY2022.jpg\n", + "B26_20July2022.jpg\n", + "B28 angled sticky-20JUL2022.jpg\n", + "B32 angled sticky-20JUL2022.jpg\n", + "B30_20July2022.jpg\n", + "B2_20July2022.jpg\n", + "B4_20July2022.jpg\n", + "B10_20July2022.jpg\n", + "B28_20July2022.jpg\n", + "B12_20July2022.jpg\n", + "B6_20July2022.jpg\n", + "B2_3AUGUST2022.HEIC\n", + "B18_3AUGUST2022.jpeg\n", + "B20_3AUGUST2022.jpeg\n", + "B4_3AUGUST2022.HEIC\n", + "B10_3AUGUST2022.HEIC\n", + "B26_3AUGUST2022.jpeg\n", + "B8_3AUGUST2022.HEIC\n", + "B28_3AUGUST2022.jpeg\n", + "B22_3AUGUST2022.jpeg\n", + "B14_3AUGUST2022.HEIC\n", + "B16_3AUGUST2022.HEIC\n", + "B24_3AUGUST2022.jpeg\n", + "B12_3AUGUST2022.HEIC\n", + "B6_3AUGUST2022.HEIC\n", + "B8_26JUL2022.jpg\n", + "B14_26JUL2022.jpg\n", + "B6_26JUL2022.jpg\n", + "B2_26JUL2022.jpg\n", + "B4_26JUL2022.jpg\n", + "B10_26JUL2022.jpg\n", + "B16_26JUL2022.jpg\n", + "B12_26JUL2022jpg.jpg\n", + "F2_3AUGUST2022.jpg\n", + "F28_3AUGUST2022.HEIC\n", + "F18_3AUGUST2022.HEIC\n", + "F10_3AUGUST2022.jpg\n", + "F16_3AUGUST2022.jpg\n", + "F6_3AUGUST2022.jpg\n", + "F12_3AUGUST2022jpg.jpg\n", + "F26_3AUGUST2022.HEIC\n", + "F4_3AUGUST2022.jpg\n", + "F8_3AUGUST2022.jpg\n", + "F20_3AUGUST2022.HEIC\n", + "F30_3AUGUST2022.HEIC\n", + "F22_3AUGUST2022.HEIC\n", + "F32_3AUGUST2022.HEIC\n", + "F14_3AUGUST2022.jpg\n", + "F24_3AUGUST2022.HEIC\n", + "F28_25August2022.HEIC\n", + "F2b_25August2022.heic\n", + "F26_25August2022.HEIC\n", + "F32_25August2022.HEIC\n", + "F16 wide view_25August2022.jpg\n", + "F16_25August2022.jpg\n", + "F12_25August2022.jpg\n", + "F30_25August2022.HEIC\n", + "F2_25August2022.jpg\n", + "F24_25August2022.HEIC\n", + "F20_25August2022.HEIC\n", + "F14_25August2022.jpg\n", + "F22_25August2022.HEIC\n", + "F18_25August2022.HEIC\n", + "F14b_25August2022.jpg\n", + "Faivre field soybean phenoloyg_25August2022.jpg\n", + "F14_20July2022.jpg\n", + "F32_20July2022.jpg\n", + "F8_20July2022.jpg\n", + "F12_20July2022.jpg\n", + "F28_20July2022.jpg\n", + "F30_20July2022.jpg\n", + "F18_20July2022.jpg\n", + "F20_20July2022.jpg\n", + "F2_20July2022.jpg\n", + "F26_20July2022.jpg\n", + "F22_20July2022.jpg\n", + "F6b_20July2022.jpg\n", + "F6_20July2022.jpg\n", + "F16_20July2022.jpg\n", + "F24_20July2022.jpg\n", + "F10_20July2022.jpg\n", + "F20_26JUL2022.HEIC\n", + "F26_26JUL2022.HEIC\n", + "F8_26JUL2022.jpg\n", + "F28_26JUL2022.HEIC\n", + "F30_26JUL2022.HEIC\n", + "F2_26JUL2022.jpg\n", + "F24_26JUL2022.HEIC\n", + "F18_26JUL2022.HEIC\n", + "F10_26JUL2022.jpg\n", + "F32_26JUL2022.HEIC\n", + "F22_26JUL2022.HEIC\n", + "M16_19July2022.jpg\n", + "M10_19July2022.jpg\n", + "M2_19July2022.jpg\n", + "M14_19July2022.jpg\n", + "M12_19July2022.jpg\n", + "M6_19July2022.jpg\n", + "M4_19July2022.jpg\n", + "M8_19July2022.jpg\n", + "M20_4AUGUST2022..jpg\n", + "M18_4AUGUST2022.jpg\n", + "M2_4AUGUST2022.HEIC\n", + "M22_4AUGUST2022.jpg\n", + "M30_4AUGUST2022..jpg\n", + "M8_4AUGUST2022.HEIC\n", + "M16_4AUGUST2022.HEIC\n", + "M10_4AUGUST2022.HEIC\n", + "M6_4AUGUST2022.HEIC\n", + "M12_4AUGUST2022.HEIC\n", + "M26_4AUGUST2022.jpg\n", + "M28_4AUGUST2022..jpg\n", + "M32_4AUGUST2022..jpg\n", + "M24_4AUGUST2022..jpg\n", + "M14_4AUGUST2022.HEIC\n", + "M14._17AUGUST2022.HEIC\n", + "M24_17AUGUST2022.jpg\n", + "M30_17AUGUST2022.jpg\n", + "M10_17AUGUST2022.HEIC\n", + "M22_17AUGUST2022.jpg\n", + "M2_17AUGUST2022.HEIC\n", + "M16_17AUGUST2022.HEIC\n", + "M26_17AUGUST2022.jpg\n", + "M12_17AUGUST2022.HEIC\n", + "M18_17AUGUST2022.jpg\n", + "M28_17AUGUST2022.jpg\n", + "M6_17AUGUST2022.HEIC\n", + "M32-17AUGUST2022.jpg\n", + "M4_17AUGUST2022.HEIC\n", + "M8_17AUGUST2022.HEIC\n", + "M20_17AUGUST2022.jpg\n", + "M12_27JULY2022.jpg\n", + "Moore_M_Phenology of soybean_27JULY2022.jpg\n", + "M8_27JULY2022.jpg\n", + "M26_27JULY2022.jpeg\n", + "M16_27JULY2022.jpg\n", + "M32_27JULY2022.jpeg\n", + "M30_27JULY2022.jpeg\n" + ] + } + ], + "source": [ + "folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n", + "\n", + "transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + "])\n", + "\n", + "for folder_name in folders:\n", + " dates = []\n", + " for date in glob.glob(r\"/raid/projects/akhot2/group-01-phys371-sp2023/data/\" + folder_name + \"/*\"):\n", + " dates.append(date)\n", + " for date in dates:\n", + " for image_file in glob.glob(date + \"/*\"):\n", + " if image_file.split(\".\")[-1].lower() == \"heic\":\n", + " heif_file = pillow_heif.read_heif(image_file)\n", + " img = Image.frombytes(\n", + " heif_file.mode,\n", + " heif_file.size,\n", + " heif_file.data,\n", + " \"raw\",\n", + " )\n", + " #print(\"HEIC FILE:\")\n", + " else:\n", + " img = Image.open(image_file)\n", + " #print(\"JPG FILE:\")\n", + " img_tensor = transform(img)\n", + " #plt.imshow(img)\n", + " #plt.show()\n", + " #print(img_tensor.shape)\n", + " print(image_file.split(\"/\")[-1])\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "eaa3da3e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80679db4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/training/PreProcessor.ipynb b/training/PreProcessor.ipynb new file mode 100644 index 0000000..a2863fb --- /dev/null +++ b/training/PreProcessor.ipynb @@ -0,0 +1,791 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "ff555f6b", + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "from tqdm import tqdm\n", + "import pandas as pd \n", + "import torch\n", + "from PIL import Image\n", + "import torchvision.transforms as transforms\n", + "import matplotlib.pyplot as plt\n", + "import pillow_heif" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fd6db132", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/raid/projects/akhot2/conda/envs/akhot2/lib/python3.9/site-packages/openpyxl/worksheet/header_footer.py:48: UserWarning: Cannot parse header or footer so it will be ignored\n", + " warn(\"\"\"Cannot parse header or footer so it will be ignored\"\"\")\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Grand Acc #</th>\n", + " <th>Weekly Acc#</th>\n", + " <th>Sample collection period</th>\n", + " <th>Sticky trap pair (rep)</th>\n", + " <th>2022 Julian Date</th>\n", + " <th>Month</th>\n", + " <th>2022 Set up Date</th>\n", + " <th>2022 Data Collect. Date</th>\n", + " <th>Coll. intrvl (d)</th>\n", + " <th>Coll. hour</th>\n", + " <th>...</th>\n", + " <th>NCR/trap /day</th>\n", + " <th>WCR/trap /day</th>\n", + " <th>Total CRW /trap /day</th>\n", + " <th>NCR/trap top /day</th>\n", + " <th>WCR/trap top/day</th>\n", + " <th>Total CRW /trap top/day</th>\n", + " <th>Proportion NCR on top</th>\n", + " <th>Proportion WCR on top</th>\n", + " <th>Proportion All CRW on top</th>\n", + " <th>Notes</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>6</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>8</td>\n", + " <td>8</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>10</td>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2022-07-08</td>\n", + " <td>2022-07-19</td>\n", + " <td>11.0</td>\n", + " <td>11.35</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 43 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Grand Acc # Weekly Acc# Sample collection period Sticky trap pair (rep) \\\n", + "0 2 2 1 1 \n", + "1 4 4 1 2 \n", + "2 6 6 1 3 \n", + "3 8 8 1 4 \n", + "4 10 10 1 5 \n", + "\n", + " 2022 Julian Date Month 2022 Set up Date 2022 Data Collect. Date \\\n", + "0 NaN NaN 2022-07-08 2022-07-19 \n", + "1 NaN NaN 2022-07-08 2022-07-19 \n", + "2 NaN NaN 2022-07-08 2022-07-19 \n", + "3 NaN NaN 2022-07-08 2022-07-19 \n", + "4 NaN NaN 2022-07-08 2022-07-19 \n", + "\n", + " Coll. intrvl (d) Coll. hour ... NCR/trap /day WCR/trap /day \\\n", + "0 11.0 11.35 ... 0.0 0.0 \n", + "1 11.0 11.35 ... 0.0 0.0 \n", + "2 11.0 11.35 ... 0.0 0.0 \n", + "3 11.0 11.35 ... 0.0 0.0 \n", + "4 11.0 11.35 ... 0.0 0.0 \n", + "\n", + " Total CRW /trap /day NCR/trap top /day WCR/trap top/day \\\n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "\n", + " Total CRW /trap top/day Proportion NCR on top Proportion WCR on top \\\n", + "0 0.0 NaN NaN \n", + "1 0.0 NaN NaN \n", + "2 0.0 NaN NaN \n", + "3 0.0 NaN NaN \n", + "4 0.0 NaN NaN \n", + "\n", + " Proportion All CRW on top Notes \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + "[5 rows x 43 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels_path = \"/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx\"\n", + "df = pd.read_excel(labels_path)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7097a875", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Grand Acc #', 'Weekly Acc#', 'Sample collection period',\n", + " 'Sticky trap pair (rep)', '2022 Julian Date', 'Month',\n", + " '2022 Set up Date', '2022 Data Collect. Date', 'Coll. intrvl (d)',\n", + " 'Coll. hour', 'Trap visitor', 'Drone pilot', 'On farm location',\n", + " 'Site Abrv.', 'trap angle', 'trap orient'n', 'Trap #',\n", + " 'Sticky Trap Name', 'T-top side NCR', 'bottom side NCR',\n", + " 'T-top side WCR', 'bottom side WCR', 'WCR mal', 'WCR fem',\n", + " 'T-top side \"other\"', 'Bottom side \"other\"', 'T-top side total CRW',\n", + " 'bottom side total CRW', 'Trap Total NCR', 'Trap Total WCR',\n", + " 'Trap Total CRW', 'Proportion NCR in total CRW',\n", + " 'Proportion WCR in total CRW', 'NCR/trap /day', 'WCR/trap /day',\n", + " 'Total CRW /trap /day', 'NCR/trap top /day', 'WCR/trap top/day',\n", + " 'Total CRW /trap top/day', 'Proportion NCR on top',\n", + " 'Proportion WCR on top', 'Proportion All CRW on top', 'Notes'],\n", + " dtype='object')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "24d3874e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>trap orient'n</th>\n", + " <th>2022 Data Collect. Date</th>\n", + " <th>Sticky Trap Name</th>\n", + " <th>T-top side WCR</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U4</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U6</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U8</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Angled</td>\n", + " <td>2022-07-19</td>\n", + " <td>U10</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>603</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M24</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>604</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M26</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>605</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M28</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>606</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M30</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>607</th>\n", + " <td>Angled</td>\n", + " <td>NaT</td>\n", + " <td>M32</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>320 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " trap orient'n 2022 Data Collect. Date Sticky Trap Name T-top side WCR\n", + "0 Angled 2022-07-19 U2 0\n", + "1 Angled 2022-07-19 U4 0\n", + "2 Angled 2022-07-19 U6 0\n", + "3 Angled 2022-07-19 U8 0\n", + "4 Angled 2022-07-19 U10 0\n", + ".. ... ... ... ...\n", + "603 Angled NaT M24 NaN\n", + "604 Angled NaT M26 NaN\n", + "605 Angled NaT M28 NaN\n", + "606 Angled NaT M30 NaN\n", + "607 Angled NaT M32 NaN\n", + "\n", + "[320 rows x 4 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_angled = df[df[\"trap orient'n\"] == \"Angled \"]\n", + "df_angled[\"2022 Data Collect. Date\"].astype(str)\n", + "df_angled[[\"trap orient'n\", \"2022 Data Collect. Date\", \"Sticky Trap Name\", \"T-top side WCR\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0ccf1183", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "B8_15August2022.jpg\n", + "B12_15August2022.jpg\n", + "B26_15August2022.jpg\n", + "B22_15August2022.jpg\n", + "B4_15August2022.jpg\n", + "B20_15August2022.jpg\n", + "B32_15August2022.jpg\n", + "B16_15August2022.jpg\n", + "B18_15August2022.jpg\n", + "B28_15August2022.jpg\n", + "B14_15August2022.jpg\n", + "B24_15August2022.jpg\n", + "B10_15August2022.jpg\n", + "B2_15August2022.jpg\n", + "B6_15August2022.jpg\n", + "B28_25August2022.jpg\n", + "B26_25August2022.jpg\n", + "B24_25August2022.jpg\n", + "B16_25August2022.HEIC\n", + "B12_25August2022.HEIC\n", + "B2_25August2022.HEIC\n", + "B20_25August2022.jpg\n", + "B8_25August2022.HEIC\n", + "B18_25August2022.jpg\n", + "B32_25August2022.jpg\n", + "Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg\n", + "B4_25August2022.HEIC\n", + "B22_25August2022.jpg\n", + "B6_25August2022.HEIC\n", + "B10_25August2022.HEIC\n", + "B30_25August2022.jpg\n", + "B14_25August2022.HEIC\n", + "B18b_25August2022.jpg\n", + "B30b_25August2022.jpg\n", + "B14_20July2022.jpg\n", + "B8_20July2022.jpg\n", + "B32_20July2022.jpg\n", + "B26b_20July2022.jpg\n", + "B16_20JULY2022.jpg\n", + "B26_20July2022.jpg\n", + "B28 angled sticky-20JUL2022.jpg\n", + "B32 angled sticky-20JUL2022.jpg\n", + "B30_20July2022.jpg\n", + "B2_20July2022.jpg\n", + "B4_20July2022.jpg\n", + "B10_20July2022.jpg\n", + "B28_20July2022.jpg\n", + "B12_20July2022.jpg\n", + "B6_20July2022.jpg\n", + "B2_3AUGUST2022.HEIC\n", + "B18_3AUGUST2022.jpeg\n", + "B20_3AUGUST2022.jpeg\n", + "B4_3AUGUST2022.HEIC\n", + "B10_3AUGUST2022.HEIC\n", + "B26_3AUGUST2022.jpeg\n", + "B8_3AUGUST2022.HEIC\n", + "B28_3AUGUST2022.jpeg\n", + "B22_3AUGUST2022.jpeg\n", + "B14_3AUGUST2022.HEIC\n", + "B16_3AUGUST2022.HEIC\n", + "B24_3AUGUST2022.jpeg\n", + "B12_3AUGUST2022.HEIC\n", + "B6_3AUGUST2022.HEIC\n", + "B8_26JUL2022.jpg\n", + "B14_26JUL2022.jpg\n", + "B6_26JUL2022.jpg\n", + "B2_26JUL2022.jpg\n", + "B4_26JUL2022.jpg\n", + "B10_26JUL2022.jpg\n", + "B16_26JUL2022.jpg\n", + "B12_26JUL2022jpg.jpg\n", + "F2_3AUGUST2022.jpg\n", + "F28_3AUGUST2022.HEIC\n", + "F18_3AUGUST2022.HEIC\n", + "F10_3AUGUST2022.jpg\n", + "F16_3AUGUST2022.jpg\n", + "F6_3AUGUST2022.jpg\n", + "F12_3AUGUST2022jpg.jpg\n", + "F26_3AUGUST2022.HEIC\n", + "F4_3AUGUST2022.jpg\n", + "F8_3AUGUST2022.jpg\n", + "F20_3AUGUST2022.HEIC\n", + "F30_3AUGUST2022.HEIC\n", + "F22_3AUGUST2022.HEIC\n", + "F32_3AUGUST2022.HEIC\n", + "F14_3AUGUST2022.jpg\n", + "F24_3AUGUST2022.HEIC\n", + "F28_25August2022.HEIC\n", + "F2b_25August2022.heic\n", + "F26_25August2022.HEIC\n", + "F32_25August2022.HEIC\n", + "F16 wide view_25August2022.jpg\n", + "F16_25August2022.jpg\n", + "F12_25August2022.jpg\n", + "F30_25August2022.HEIC\n", + "F2_25August2022.jpg\n", + "F24_25August2022.HEIC\n", + "F20_25August2022.HEIC\n", + "F14_25August2022.jpg\n", + "F22_25August2022.HEIC\n", + "F18_25August2022.HEIC\n", + "F14b_25August2022.jpg\n", + "Faivre field soybean phenoloyg_25August2022.jpg\n", + "F14_20July2022.jpg\n", + "F32_20July2022.jpg\n", + "F8_20July2022.jpg\n", + "F12_20July2022.jpg\n", + "F28_20July2022.jpg\n", + "F30_20July2022.jpg\n", + "F18_20July2022.jpg\n", + "F20_20July2022.jpg\n", + "F2_20July2022.jpg\n", + "F26_20July2022.jpg\n", + "F22_20July2022.jpg\n", + "F6b_20July2022.jpg\n", + "F6_20July2022.jpg\n", + "F16_20July2022.jpg\n", + "F24_20July2022.jpg\n", + "F10_20July2022.jpg\n", + "F20_26JUL2022.HEIC\n", + "F26_26JUL2022.HEIC\n", + "F8_26JUL2022.jpg\n", + "F28_26JUL2022.HEIC\n", + "F30_26JUL2022.HEIC\n", + "F2_26JUL2022.jpg\n", + "F24_26JUL2022.HEIC\n", + "F18_26JUL2022.HEIC\n", + "F10_26JUL2022.jpg\n", + "F32_26JUL2022.HEIC\n", + "F22_26JUL2022.HEIC\n", + "M16_19July2022.jpg\n", + "M10_19July2022.jpg\n", + "M2_19July2022.jpg\n", + "M14_19July2022.jpg\n", + "M12_19July2022.jpg\n", + "M6_19July2022.jpg\n", + "M4_19July2022.jpg\n", + "M8_19July2022.jpg\n", + "M20_4AUGUST2022..jpg\n", + "M18_4AUGUST2022.jpg\n", + "M2_4AUGUST2022.HEIC\n", + "M22_4AUGUST2022.jpg\n", + "M30_4AUGUST2022..jpg\n", + "M8_4AUGUST2022.HEIC\n", + "M16_4AUGUST2022.HEIC\n", + "M10_4AUGUST2022.HEIC\n", + "M6_4AUGUST2022.HEIC\n", + "M12_4AUGUST2022.HEIC\n", + "M26_4AUGUST2022.jpg\n", + "M28_4AUGUST2022..jpg\n", + "M32_4AUGUST2022..jpg\n", + "M24_4AUGUST2022..jpg\n", + "M14_4AUGUST2022.HEIC\n", + "M14._17AUGUST2022.HEIC\n", + "M24_17AUGUST2022.jpg\n", + "M30_17AUGUST2022.jpg\n", + "M10_17AUGUST2022.HEIC\n", + "M22_17AUGUST2022.jpg\n", + "M2_17AUGUST2022.HEIC\n", + "M16_17AUGUST2022.HEIC\n", + "M26_17AUGUST2022.jpg\n", + "M12_17AUGUST2022.HEIC\n", + "M18_17AUGUST2022.jpg\n", + "M28_17AUGUST2022.jpg\n", + "M6_17AUGUST2022.HEIC\n", + "M32-17AUGUST2022.jpg\n", + "M4_17AUGUST2022.HEIC\n", + "M8_17AUGUST2022.HEIC\n", + "M20_17AUGUST2022.jpg\n", + "M12_27JULY2022.jpg\n", + "Moore_M_Phenology of soybean_27JULY2022.jpg\n", + "M8_27JULY2022.jpg\n", + "M26_27JULY2022.jpeg\n", + "M16_27JULY2022.jpg\n", + "M32_27JULY2022.jpeg\n", + "M30_27JULY2022.jpeg\n", + "M10_27JULY2022.jpg\n", + "M4_27JULY2022.jpg\n", + "M14_27JULY2022.jpg\n", + "M20_27JULY2022.jpeg\n", + "M28_27JULY2022.jpeg\n", + "M6_27JULY2022.jpg\n", + "M22_27JULY2022.jpeg\n", + "M24_27JULY2022.jpeg\n", + "M2_27JULY2022.jpg\n", + "U30_19JULY2022.jpg\n", + "U2_19JULY2022.HEIC\n", + "U10_19JULY2022.HEIC\n", + "U20_19JULY2022jpg.jpg\n", + "U22_19JULY2022.jpg\n", + "U16_19JULY2022.HEIC\n", + "U24_19JULY2022.jpg\n", + "U8_19JULY2022.HEIC\n", + "U14_19JULY2022.HEIC\n", + "U18_19JULY2022.jpg\n", + "U4_19JULY2022.HEIC\n", + "U12_19JULY2022.HEIC\n", + "U26_19JULY2022.jpg\n", + "U6_19JULY2022.HEIC\n", + "U32_19JULY2022.jpg\n", + "U28_19JULY2022.jpg\n", + "U26_5AUGUST2022.jpg\n", + "U32_5AUGUST2022jpg.jpg\n", + "IMG_4978.HEIC\n", + "IMG_4980.HEIC\n", + "IMG_4982.HEIC\n", + "IMG_4981.HEIC\n", + "U24_5AUGUST2022.jpg\n", + "U20_5AUGUST2022.jpg\n", + "IMG_4979.HEIC\n", + "IMG_4983.HEIC\n", + "IMG_4977.HEIC\n", + "U18_5AUGUST2022.jpg\n", + "U28_5AUGUST2022.jpg\n", + "U22_5AUGUST2022.jpg\n", + "U30_5AUGUST2022.jpg\n", + "U26_27JULY2022.jpg\n", + "U20_27JULY2022.jpg\n", + "U2_27JULY2022.HEIC\n", + "U16_27JULY2022.HEIC\n", + "U10_27JULY2022.HEIC\n", + "U24_27JULY2022.jpg\n", + "U6_27JULY2022.HEIC\n", + "U12_27JULY2022.HEIC\n", + "U8_27JULY2022.HEIC\n", + "U4_27JULY2022.HEIC\n", + "U28_27JULY2022.jpg\n", + "U32_27JULY2022.jpg\n", + "U30_27JULY2022.jpg\n", + "U22_27JULY2022jpg.jpg\n", + "U18_27JULY2022.jpg\n", + "U14_27JULY2022.HEIC\n", + "U4_10AUGUST2022.HEIC\n", + "U22_10AUGUST2022.jpg\n", + "U26_10AUGUST2022.jpg\n", + "U24_10AUGUST2022.jpg\n", + "U28_10AUGUST2022.jpg\n", + "U30_10AUGUST2022.jpg\n", + "U10_10AUGUST2022.HEIC\n", + "U32_10AUGUST2022.jpg\n", + "U18_10AUGUST2022.jpg\n", + "U20_10AUGUST2022.jpg\n", + "U16_10AUGUST2022.HEIC\n", + "U14_10AUGUST2022.HEIC\n", + "U2_10AUGUST2022.HEIC\n", + "U8_10AUGUST2022.HEIC\n", + "U12_10AUGUST2022.HEIC\n", + "U6_10AUGUST2022.HEIC\n", + "U16_23August2022.HEIC\n", + "U12_23August2022.HEIC\n", + "U26_23August2022.jpg\n", + "U20_23August2022.jpg\n", + "U10_23August2022.HEIC\n", + "U6_23August2022.HEIC\n", + "U4_23August2022.HEIC\n", + "U2_23August2022.HEIC\n", + "U28_23August2022.jpg\n", + "U22_23August2022.jpg\n", + "U14_23August2022.HEIC\n", + "U18_23August2022.HEIC\n", + "U18_23August2022.jpg\n", + "U24_23August2022.jpg\n", + "U28b_23August2022.jpg\n", + "U30_23August2022.jpg\n", + "U32_23August2022.jpg\n" + ] + } + ], + "source": [ + "folders = [\"buchhloz\", \"faivre\", \"moore\", \"underwood\"]\n", + "\n", + "transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + "])\n", + "\n", + "for folder_name in folders:\n", + " dates = []\n", + " for date in glob.glob(r\"/raid/projects/akhot2/group-01-phys371-sp2023/data/\" + folder_name + \"/*\"):\n", + " dates.append(date)\n", + " for date in dates:\n", + " for image_file in glob.glob(date + \"/*\"):\n", + " print(image_file.split(\"/\")[-1])\n", + " #continue\n", + " if image_file.split(\".\")[-1].lower() == \"heic\":\n", + " heif_file = pillow_heif.read_heif(image_file)\n", + " img = Image.frombytes(\n", + " heif_file.mode,\n", + " heif_file.size,\n", + " heif_file.data,\n", + " \"raw\",\n", + " )\n", + " #print(\"HEIC FILE:\")\n", + " else:\n", + " img = Image.open(image_file)\n", + " #print(\"JPG FILE:\")\n", + " img_tensor = transform(img)\n", + " #plt.imshow(img)\n", + " #plt.show()\n", + " #print(img_tensor.shape)\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "eaa3da3e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80679db4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/training/preprocess.py b/training/preprocess.py new file mode 100644 index 0000000..7371f47 --- /dev/null +++ b/training/preprocess.py @@ -0,0 +1 @@ +B \ No newline at end of file -- GitLab