Skip to content
Snippets Groups Projects
Commit 19606b81 authored by fresleven's avatar fresleven
Browse files

Starting preprocessing

parent 7574c5cc
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:ff555f6b tags:
``` python
import glob
from tqdm import tqdm
import pandas as pd
import torch
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import pillow_heif
```
%% Cell type:code id:fd6db132 tags:
``` python
labels_path = "/raid/projects/akhot2/group-01-phys371-sp2023/data/trap_labels.xlsx"
df = pd.read_excel(labels_path)
df.head()
```
%% Output
/raid/projects/akhot2/conda/envs/akhot2/lib/python3.9/site-packages/openpyxl/worksheet/header_footer.py:48: UserWarning: Cannot parse header or footer so it will be ignored
warn("""Cannot parse header or footer so it will be ignored""")
Grand Acc # Weekly Acc# Sample collection period Sticky trap pair (rep) \
0 2 2 1 1
1 4 4 1 2
2 6 6 1 3
3 8 8 1 4
4 10 10 1 5
2022 Julian Date Month 2022 Set up Date 2022 Data Collect. Date \
0 NaN NaN 2022-07-08 2022-07-19
1 NaN NaN 2022-07-08 2022-07-19
2 NaN NaN 2022-07-08 2022-07-19
3 NaN NaN 2022-07-08 2022-07-19
4 NaN NaN 2022-07-08 2022-07-19
Coll. intrvl (d) Coll. hour ... NCR/trap /day WCR/trap /day \
0 11.0 11.35 ... 0.0 0.0
1 11.0 11.35 ... 0.0 0.0
2 11.0 11.35 ... 0.0 0.0
3 11.0 11.35 ... 0.0 0.0
4 11.0 11.35 ... 0.0 0.0
Total CRW /trap /day NCR/trap top /day WCR/trap top/day \
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 0.0 0.0
Total CRW /trap top/day Proportion NCR on top Proportion WCR on top \
0 0.0 NaN NaN
1 0.0 NaN NaN
2 0.0 NaN NaN
3 0.0 NaN NaN
4 0.0 NaN NaN
Proportion All CRW on top Notes
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
[5 rows x 43 columns]
%% Cell type:code id:7097a875 tags:
``` python
df.columns
```
%% Output
Index(['Grand Acc #', 'Weekly Acc#', 'Sample collection period',
'Sticky trap pair (rep)', '2022 Julian Date', 'Month',
'2022 Set up Date', '2022 Data Collect. Date', 'Coll. intrvl (d)',
'Coll. hour', 'Trap visitor', 'Drone pilot', 'On farm location',
'Site Abrv.', 'trap angle', 'trap orient'n', 'Trap #',
'Sticky Trap Name', 'T-top side NCR', 'bottom side NCR',
'T-top side WCR', 'bottom side WCR', 'WCR mal', 'WCR fem',
'T-top side "other"', 'Bottom side "other"', 'T-top side total CRW',
'bottom side total CRW', 'Trap Total NCR', 'Trap Total WCR',
'Trap Total CRW', 'Proportion NCR in total CRW',
'Proportion WCR in total CRW', 'NCR/trap /day', 'WCR/trap /day',
'Total CRW /trap /day', 'NCR/trap top /day', 'WCR/trap top/day',
'Total CRW /trap top/day', 'Proportion NCR on top',
'Proportion WCR on top', 'Proportion All CRW on top', 'Notes'],
dtype='object')
%% Cell type:code id:24d3874e tags:
``` python
df_angled = df[df["trap orient'n"] == "Angled "]
df_angled["2022 Data Collect. Date"].astype(str)
df_angled[["trap orient'n", "2022 Data Collect. Date", "Sticky Trap Name", "T-top side WCR"]]
```
%% Output
trap orient'n 2022 Data Collect. Date Sticky Trap Name T-top side WCR
0 Angled 2022-07-19 U2 0
1 Angled 2022-07-19 U4 0
2 Angled 2022-07-19 U6 0
3 Angled 2022-07-19 U8 0
4 Angled 2022-07-19 U10 0
.. ... ... ... ...
603 Angled NaT M24 NaN
604 Angled NaT M26 NaN
605 Angled NaT M28 NaN
606 Angled NaT M30 NaN
607 Angled NaT M32 NaN
[320 rows x 4 columns]
%% Cell type:code id:0ccf1183 tags:
``` python
folders = ["buchhloz", "faivre", "moore", "underwood"]
transform = transforms.Compose([
transforms.PILToTensor()
])
for folder_name in folders:
dates = []
for date in glob.glob(r"/raid/projects/akhot2/group-01-phys371-sp2023/data/" + folder_name + "/*"):
dates.append(date)
for date in dates:
for image_file in glob.glob(date + "/*"):
if image_file.split(".")[-1].lower() == "heic":
heif_file = pillow_heif.read_heif(image_file)
img = Image.frombytes(
heif_file.mode,
heif_file.size,
heif_file.data,
"raw",
)
#print("HEIC FILE:")
else:
img = Image.open(image_file)
#print("JPG FILE:")
img_tensor = transform(img)
#plt.imshow(img)
#plt.show()
#print(img_tensor.shape)
print(image_file.split("/")[-1])
```
%% Output
B8_15August2022.jpg
B12_15August2022.jpg
B26_15August2022.jpg
B22_15August2022.jpg
B4_15August2022.jpg
B20_15August2022.jpg
B32_15August2022.jpg
B16_15August2022.jpg
B18_15August2022.jpg
B28_15August2022.jpg
B14_15August2022.jpg
B24_15August2022.jpg
B10_15August2022.jpg
B2_15August2022.jpg
B6_15August2022.jpg
B28_25August2022.jpg
B26_25August2022.jpg
B24_25August2022.jpg
B16_25August2022.HEIC
B12_25August2022.HEIC
B2_25August2022.HEIC
B20_25August2022.jpg
B8_25August2022.HEIC
B18_25August2022.jpg
B32_25August2022.jpg
Buchholz trap 18 8_25_2022 WCR and NCR present_3468.jpg
B4_25August2022.HEIC
B22_25August2022.jpg
B6_25August2022.HEIC
B10_25August2022.HEIC
B30_25August2022.jpg
B14_25August2022.HEIC
B18b_25August2022.jpg
B30b_25August2022.jpg
B14_20July2022.jpg
B8_20July2022.jpg
B32_20July2022.jpg
B26b_20July2022.jpg
B16_20JULY2022.jpg
B26_20July2022.jpg
B28 angled sticky-20JUL2022.jpg
B32 angled sticky-20JUL2022.jpg
B30_20July2022.jpg
B2_20July2022.jpg
B4_20July2022.jpg
B10_20July2022.jpg
B28_20July2022.jpg
B12_20July2022.jpg
B6_20July2022.jpg
B2_3AUGUST2022.HEIC
B18_3AUGUST2022.jpeg
B20_3AUGUST2022.jpeg
B4_3AUGUST2022.HEIC
B10_3AUGUST2022.HEIC
B26_3AUGUST2022.jpeg
B8_3AUGUST2022.HEIC
B28_3AUGUST2022.jpeg
B22_3AUGUST2022.jpeg
B14_3AUGUST2022.HEIC
B16_3AUGUST2022.HEIC
B24_3AUGUST2022.jpeg
B12_3AUGUST2022.HEIC
B6_3AUGUST2022.HEIC
B8_26JUL2022.jpg
B14_26JUL2022.jpg
B6_26JUL2022.jpg
B2_26JUL2022.jpg
B4_26JUL2022.jpg
B10_26JUL2022.jpg
B16_26JUL2022.jpg
B12_26JUL2022jpg.jpg
F2_3AUGUST2022.jpg
F28_3AUGUST2022.HEIC
F18_3AUGUST2022.HEIC
F10_3AUGUST2022.jpg
F16_3AUGUST2022.jpg
F6_3AUGUST2022.jpg
F12_3AUGUST2022jpg.jpg
F26_3AUGUST2022.HEIC
F4_3AUGUST2022.jpg
F8_3AUGUST2022.jpg
F20_3AUGUST2022.HEIC
F30_3AUGUST2022.HEIC
F22_3AUGUST2022.HEIC
F32_3AUGUST2022.HEIC
F14_3AUGUST2022.jpg
F24_3AUGUST2022.HEIC
F28_25August2022.HEIC
F2b_25August2022.heic
F26_25August2022.HEIC
F32_25August2022.HEIC
F16 wide view_25August2022.jpg
F16_25August2022.jpg
F12_25August2022.jpg
F30_25August2022.HEIC
F2_25August2022.jpg
F24_25August2022.HEIC
F20_25August2022.HEIC
F14_25August2022.jpg
F22_25August2022.HEIC
F18_25August2022.HEIC
F14b_25August2022.jpg
Faivre field soybean phenoloyg_25August2022.jpg
F14_20July2022.jpg
F32_20July2022.jpg
F8_20July2022.jpg
F12_20July2022.jpg
F28_20July2022.jpg
F30_20July2022.jpg
F18_20July2022.jpg
F20_20July2022.jpg
F2_20July2022.jpg
F26_20July2022.jpg
F22_20July2022.jpg
F6b_20July2022.jpg
F6_20July2022.jpg
F16_20July2022.jpg
F24_20July2022.jpg
F10_20July2022.jpg
F20_26JUL2022.HEIC
F26_26JUL2022.HEIC
F8_26JUL2022.jpg
F28_26JUL2022.HEIC
F30_26JUL2022.HEIC
F2_26JUL2022.jpg
F24_26JUL2022.HEIC
F18_26JUL2022.HEIC
F10_26JUL2022.jpg
F32_26JUL2022.HEIC
F22_26JUL2022.HEIC
M16_19July2022.jpg
M10_19July2022.jpg
M2_19July2022.jpg
M14_19July2022.jpg
M12_19July2022.jpg
M6_19July2022.jpg
M4_19July2022.jpg
M8_19July2022.jpg
M20_4AUGUST2022..jpg
M18_4AUGUST2022.jpg
M2_4AUGUST2022.HEIC
M22_4AUGUST2022.jpg
M30_4AUGUST2022..jpg
M8_4AUGUST2022.HEIC
M16_4AUGUST2022.HEIC
M10_4AUGUST2022.HEIC
M6_4AUGUST2022.HEIC
M12_4AUGUST2022.HEIC
M26_4AUGUST2022.jpg
M28_4AUGUST2022..jpg
M32_4AUGUST2022..jpg
M24_4AUGUST2022..jpg
M14_4AUGUST2022.HEIC
M14._17AUGUST2022.HEIC
M24_17AUGUST2022.jpg
M30_17AUGUST2022.jpg
M10_17AUGUST2022.HEIC
M22_17AUGUST2022.jpg
M2_17AUGUST2022.HEIC
M16_17AUGUST2022.HEIC
M26_17AUGUST2022.jpg
M12_17AUGUST2022.HEIC
M18_17AUGUST2022.jpg
M28_17AUGUST2022.jpg
M6_17AUGUST2022.HEIC
M32-17AUGUST2022.jpg
M4_17AUGUST2022.HEIC
M8_17AUGUST2022.HEIC
M20_17AUGUST2022.jpg
M12_27JULY2022.jpg
Moore_M_Phenology of soybean_27JULY2022.jpg
M8_27JULY2022.jpg
M26_27JULY2022.jpeg
M16_27JULY2022.jpg
M32_27JULY2022.jpeg
M30_27JULY2022.jpeg
%% Cell type:code id:eaa3da3e tags:
``` python
```
%% Cell type:code id:80679db4 tags:
``` python
```
This diff is collapsed.
B
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment