Skip to content
Snippets Groups Projects
Commit acb17271 authored by SurajSSingh's avatar SurajSSingh
Browse files

Added Export Simple Data

parent 1a950002
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Notebook for training and testing AI models
%% Cell type:markdown id: tags:
## Setup
%% Cell type:code id: tags:
``` python
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from attention import Attention
```
%% Cell type:code id: tags:
``` python
print(tf.__version__)
```
%% Output
2.8.0
%% Cell type:code id: tags:
``` python
# CONSTANTS
RAW_SLEEP_DATA_PATH = ".data/raw_bed_sleep-state.csv"
CLEANED_SLEEP_DATA_PATH = ".data/clean_bed_sleep-state.csv"
```
%% Cell type:code id: tags:
``` python
## Parameters and Hyper-parameters
SLEEP_STAGES = 4
```
%% Cell type:markdown id: tags:
## Import Data
%% Cell type:markdown id: tags:
### Cleaning Raw Data
%% Cell type:code id: tags:
``` python
import csv
import datetime
import itertools
```
%% Cell type:code id: tags:
``` python
datetime.datetime.strptime("2022-04-21T10:18:00+02:00","%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(minutes=1)
```
%% Output
datetime.datetime(2022, 4, 21, 10, 19, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)))
%% Cell type:code id: tags:
``` python
def stage_probability(stage, to_test):
return 1.0 if stage == to_test else 0.0
```
%% Cell type:code id: tags:
``` python
((start_time - in_bed_time).seconds)/3600
```
%% Output
6.833333333333333
%% Cell type:code id: tags:
``` python
cleaned_info = []
date_seen = set()
previous_duration = 60
with open(RAW_SLEEP_DATA_PATH, mode ='r') as raw_file:
csvFile = csv.reader(raw_file)
# max_count = 1
# stuff = set()
in_bed_time = None
current_sleep_id = -1
for index, lines in enumerate(csvFile):
if index == 0:
cleaned_info.append([
"sleep_id",
"sleep_begin",
"stage_start",
"time_since_begin_sec",
"stage_duration_sec",
"stage_end",
"stage_value",
"awake_probability",
"light_probability",
"deep_probability",
"rem_probability",
])
continue
start_time = datetime.datetime.strptime(lines[0],"%Y-%m-%dT%H:%M:%S%z")
if start_time in date_seen:
continue
date_seen.add(start_time)
if not in_bed_time or in_bed_time > start_time:
current_sleep_id += 1
in_bed_time = start_time
# for duration, stage in enumerate(
# for offset, (duration, stage) in enumerate(
# zip(
# # itertools.accumulate(lines[1].strip("[]").split(","), lambda x,y: int(x)+int(y)//60, initial = 0),
# map(int, lines[1].strip("[]").split(","))
# map(int, lines[2].strip("[]").split(","))
# )
# # map(int, lines[2].strip("[]").split(","))
# ):
for offset, (duration, stage) in enumerate(zip(map(int, lines[1].strip("[]").split(",")), map(int, lines[2].strip("[]").split(",")))):
# print(f"{(index, subindex) = }, {duration = }, {stage = }")
# print(f"{(index, duration) = } {stage = }")
current_time = start_time + datetime.timedelta(seconds=offset*previous_duration)
cleaned_info.append([
current_sleep_id,
in_bed_time,
current_time,
(current_time - in_bed_time).seconds,
duration,
current_time + datetime.timedelta(seconds=duration),
stage,
stage_probability(0, stage),
stage_probability(1, stage),
stage_probability(2, stage),
stage_probability(3, stage),
])
previous_duration = duration
# print(f"{(index, subindex) = }, {val = }")
# print(list())
# if index >= max_count:
# break
```
%% Cell type:code id: tags:
``` python
with open(CLEANED_SLEEP_DATA_PATH, 'w') as clean_file:
write = csv.writer(clean_file)
write.writerows(cleaned_info)
print("Finished Writing Cleaned Data")
```
%% Output
Finished Writing Cleaned Data
%% Cell type:markdown id: tags:
### Creating DataFrame from clean raw data
%% Cell type:code id: tags:
``` python
# Get the cleaned data
sleep_df_raw = pd.read_csv(CLEANED_SLEEP_DATA_PATH)#, parse_dates=["start", "end"], infer_datetime_format=True)
```
%% Cell type:code id: tags:
``` python
# Preprocess data:
# 1. convert to datetime
sleep_df_raw["sleep_begin"] = pd.to_datetime(sleep_df_raw["sleep_begin"], utc=True)
sleep_df_raw["stage_start"] = pd.to_datetime(sleep_df_raw["stage_start"], utc=True)
sleep_df_raw["stage_end"] = pd.to_datetime(sleep_df_raw["stage_end"], utc=True)
# 2. Separate time, hour and minute
# MAYBE 3. smaller units: int16 or int8
```
%% Cell type:code id: tags:
``` python
def get_minute(row, index):
return row[index].time().minute
def get_hour(row, index):
return row[index].time().hour
```
%% Cell type:code id: tags:
``` python
sleep_df_raw["stage_start_hour"] = sleep_df_raw.apply (lambda row: get_hour(row, "stage_start"), axis=1)
sleep_df_raw["stage_start_minute"] = sleep_df_raw.apply (lambda row: get_minute(row, "stage_start"), axis=1)
```
%% Cell type:code id: tags:
``` python
sleep_df_raw.info()
```
%% Output
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 551042 entries, 0 to 551041
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sleep_id 551042 non-null int64
1 sleep_begin 551042 non-null datetime64[ns, UTC]
2 stage_start 551042 non-null datetime64[ns, UTC]
3 time_since_begin_sec 551042 non-null int64
4 stage_duration_sec 551042 non-null int64
5 stage_end 551042 non-null datetime64[ns, UTC]
6 stage_value 551042 non-null int64
7 awake_probability 551042 non-null float64
8 light_probability 551042 non-null float64
9 deep_probability 551042 non-null float64
10 rem_probability 551042 non-null float64
11 stage_start_hour 551042 non-null int64
12 stage_start_minute 551042 non-null int64
dtypes: datetime64[ns, UTC](3), float64(4), int64(6)
memory usage: 54.7 MB
%% Cell type:code id: tags:
``` python
sleep_df_raw
```
%% Output
sleep_id sleep_begin stage_start \
0 0 2022-04-21 08:18:00+00:00 2022-04-21 08:18:00+00:00
1 0 2022-04-21 08:18:00+00:00 2022-04-21 08:19:00+00:00
2 0 2022-04-21 08:18:00+00:00 2022-04-21 08:20:00+00:00
3 0 2022-04-21 08:18:00+00:00 2022-04-21 08:21:00+00:00
4 0 2022-04-21 08:18:00+00:00 2022-04-21 08:22:00+00:00
... ... ... ...
551037 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:17:00+00:00
551038 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:18:00+00:00
551039 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:19:00+00:00
551040 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:20:00+00:00
551041 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:21:00+00:00
time_since_begin_sec stage_duration_sec stage_end \
0 0 60 2022-04-21 08:19:00+00:00
1 60 60 2022-04-21 08:20:00+00:00
2 120 60 2022-04-21 08:21:00+00:00
3 180 60 2022-04-21 08:22:00+00:00
4 240 60 2022-04-21 08:23:00+00:00
... ... ... ...
551037 25560 60 2019-02-11 13:18:00+00:00
551038 25620 60 2019-02-11 13:19:00+00:00
551039 25680 60 2019-02-11 13:20:00+00:00
551040 25740 60 2019-02-11 13:21:00+00:00
551041 25800 60 2019-02-11 13:22:00+00:00
stage_value awake_probability light_probability deep_probability \
0 0 1.0 0.0 0.0
1 0 1.0 0.0 0.0
2 0 1.0 0.0 0.0
3 0 1.0 0.0 0.0
4 0 1.0 0.0 0.0
... ... ... ... ...
551037 1 0.0 1.0 0.0
551038 1 0.0 1.0 0.0
551039 1 0.0 1.0 0.0
551040 1 0.0 1.0 0.0
551041 1 0.0 1.0 0.0
rem_probability stage_start_hour stage_start_minute
0 0.0 8 18
1 0.0 8 19
2 0.0 8 20
3 0.0 8 21
4 0.0 8 22
... ... ... ...
551037 0.0 13 17
551038 0.0 13 18
551039 0.0 13 19
551040 0.0 13 20
551041 0.0 13 21
[551042 rows x 13 columns]
%% Cell type:code id: tags:
``` python
sleep_data = sleep_df_raw[["sleep_id", "stage_start_hour", "stage_start_minute", "awake_probability", "rem_probability","light_probability", "deep_probability"]]
sleep_data.insert(loc=1, column="minutes_since_begin" , value= sleep_df_raw["time_since_begin_sec"]//60)
```
%% Cell type:code id: tags:
``` python
print(sleep_data.head())
print(sleep_data.info())
```
%% Output
sleep_id minutes_since_begin stage_start_hour stage_start_minute \
0 0 0 8 18
1 0 1 8 19
2 0 2 8 20
3 0 3 8 21
4 0 4 8 22
awake_probability rem_probability light_probability deep_probability
0 1.0 0.0 0.0 0.0
1 1.0 0.0 0.0 0.0
2 1.0 0.0 0.0 0.0
3 1.0 0.0 0.0 0.0
4 1.0 0.0 0.0 0.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 551042 entries, 0 to 551041
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sleep_id 551042 non-null int64
1 minutes_since_begin 551042 non-null int64
2 stage_start_hour 551042 non-null int64
3 stage_start_minute 551042 non-null int64
4 awake_probability 551042 non-null float64
5 rem_probability 551042 non-null float64
6 light_probability 551042 non-null float64
7 deep_probability 551042 non-null float64
dtypes: float64(4), int64(4)
memory usage: 33.6 MB
None
%% Cell type:code id: tags:
``` python
sleep_data
sleep_data.to_csv(".data/sleep_data_simple.csv", index=False, index_label=False)
```
%% Output
sleep_id minutes_since_begin stage_start_hour stage_start_minute \
0 0 0 8 18
1 0 1 8 19
2 0 2 8 20
3 0 3 8 21
4 0 4 8 22
... ... ... ... ...
551037 1132 426 13 17
551038 1132 427 13 18
551039 1132 428 13 19
551040 1132 429 13 20
551041 1132 430 13 21
awake_probability rem_probability light_probability \
0 1.0 0.0 0.0
1 1.0 0.0 0.0
2 1.0 0.0 0.0
3 1.0 0.0 0.0
4 1.0 0.0 0.0
... ... ... ...
551037 0.0 0.0 1.0
551038 0.0 0.0 1.0
551039 0.0 0.0 1.0
551040 0.0 0.0 1.0
551041 0.0 0.0 1.0
deep_probability
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
551037 0.0
551038 0.0
551039 0.0
551040 0.0
551041 0.0
[551042 rows x 8 columns]
%% Cell type:markdown id: tags:
## Model Development
%% Cell type:markdown id: tags:
### Helper functions and class
### Setup
%% Cell type:code id: tags:
``` python
TEST_SIZE = 122
VALIDATION_SIZE = 183
TIME_STEP_INPUT = 10 # in minutes
BATCH_SIZE = 32
INPUT_TIME_STEP = 10 # in minutes
INPUT_FEATURES_SIZE = 7
MAX_EPOCHS = 20
```
%% Cell type:code id: tags:
``` python
sleep_data = pd.read_csv(".data/sleep_data_simple.csv")
```
%% Cell type:code id: tags:
``` python
sleep_data
```
%% Output
Unnamed: 0 sleep_id minutes_since_begin stage_start_hour \
0 0 0 0 8
1 1 0 1 8
2 2 0 2 8
3 3 0 3 8
4 4 0 4 8
... ... ... ... ...
551037 551037 1132 426 13
551038 551038 1132 427 13
551039 551039 1132 428 13
551040 551040 1132 429 13
551041 551041 1132 430 13
stage_start_minute awake_probability rem_probability \
0 18 1.0 0.0
1 19 1.0 0.0
2 20 1.0 0.0
3 21 1.0 0.0
4 22 1.0 0.0
... ... ... ...
551037 17 0.0 0.0
551038 18 0.0 0.0
551039 19 0.0 0.0
551040 20 0.0 0.0
551041 21 0.0 0.0
light_probability deep_probability
0 0.0 0.0
1 0.0 0.0
2 0.0 0.0
3 0.0 0.0
4 0.0 0.0
... ... ...
551037 1.0 0.0
551038 1.0 0.0
551039 1.0 0.0
551040 1.0 0.0
551041 1.0 0.0
[551042 rows x 9 columns]
%% Cell type:markdown id: tags:
### Helper functions and class
%% Cell type:code id: tags:
``` python
def training_test_split_by_unique_index(data, index: str, test_size: int = 10):
test_ids = np.random.choice(data[index].unique(), size = test_size, replace=False)
return data[~data[index].isin(test_ids)], data[data[index].isin(test_ids)]
```
%% Cell type:code id: tags:
``` python
# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series
class WindowGenerator():
def __init__(self, data, index: str = "sleep_id", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
def __init__(self, data, index: str = "sleep_id", input_width: int = INPUT_TIME_STEP, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
# Partition data
self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)
self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)
# Window paramters
self.input_width = input_width
self.label_width = 1
self.shift = 1
self.total_window_size = self.input_width + self.shift
self.input_slice = slice(0, input_width)
self.input_indices = np.arange(self.total_window_size)[self.input_slice]
self.label_start = self.total_window_size - self.label_width
self.labels_slice = slice(self.label_start, None)
self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
self.input_feature_slice = input_feature_slice
self.label_feature_slice = label_feature_slice
self.sample_ds = self.make_dataset(sleep_data[sleep_data[index] == 0])
if generate_data_now:
self.training_ds = self.make_dataset(self.training)
self.validation_ds = self.make_dataset(self.validation)
self.testing_ds = self.make_dataset(self.testing)
self.training_ds = self.make_dataset(self.training, index)
self.validation_ds = self.make_dataset(self.validation, index)
self.testing_ds = self.make_dataset(self.testing, index)
def __repr__(self):
return "WindowGenerator:\n\t" +'\n\t'.join([
f'Total window size: {self.total_window_size}',
f'Input indices: {self.input_indices}',
f'Label indices: {self.label_indices}',
])
def split_window(self, features):
inputs = features[:, self.input_slice, self.input_feature_slice]
labels = features[:, self.labels_slice, self.label_feature_slice]
labels = tf.squeeze(features[:, self.labels_slice, self.label_feature_slice])
inputs.set_shape([None, self.input_width, None])
labels.set_shape([None, self.label_width, None])
# labels.set_shape([None, self.label_width, None])
return inputs, labels
def make_dataset(self, data, index_group: str = "sleep_id", sort_by: str = "minutes_since_begin"):
ds_all = None
for i_group in data[index_group].unique():
subset_data = np.array(data[data[index_group] == i_group].sort_values(by=[sort_by]), dtype=np.float32)
ds = tf.keras.utils.timeseries_dataset_from_array(
data=subset_data,
targets=None,
sequence_length=self.total_window_size,
sequence_stride=1,
shuffle=False,
batch_size=BATCH_SIZE,)
ds_all = ds if ds_all is None else ds_all.concatenate(ds)
ds_all = ds_all.map(self.split_window)
return ds_all
# def generate_all_datasets(self):
# self._training_ds = self.make_dataset(self.training)
# self._validation_ds = self.make_dataset(self.validation)
# self._testing_ds = self.make_dataset(self.testing)
# def training_dataset(self):
# if self._training_ds is None:
# self._training_ds = self.make_dataset(self.training)
# return self._training_ds
# def validation_dataset(self):
# if self._validation_ds is None:
# self._validation_ds = self.make_dataset(self.validation)
# return self._validation_ds
# def test_dataset(self):
# if self._testing_ds is None:
# self._testing_ds = self.make_dataset(self.testing)
# return self._testing_ds
```
%% Cell type:markdown id: tags:
### Data Prep
All inputs follow: (batch_size, timesteps, input_dim)
%% Cell type:code id: tags:
``` python
wg = WindowGenerator(sleep_data)
wg
```
%% Output
WindowGenerator:
Total window size: 11
Input indices: [0 1 2 3 4 5 6 7 8 9]
Label indices: [10]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3619'>3620</a> try:
-> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3620'>3621</a> return self._engine.get_loc(casted_key)
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3621'>3622</a> except KeyError as err:
File pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'sleep_id'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 33' in <cell line: 1>()
----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000072?line=0'>1</a> wg = WindowGenerator(sleep_data)
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000072?line=1'>2</a> wg
/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 31' in WindowGenerator.__init__(self, data, index, input_width, validation_size, test_size, input_feature_slice, label_feature_slice, generate_data_now)
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=2'>3</a> def __init__(self, data, index: str = "sleep_id", input_width: int = INPUT_TIME_STEP, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=3'>4</a> # Partition data
----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=4'>5</a> self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=5'>6</a> self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=7'>8</a> # Window paramters
/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 30' in training_test_split_by_unique_index(data, index, test_size)
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=0'>1</a> def training_test_split_by_unique_index(data, index: str, test_size: int = 10):
----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=1'>2</a> test_ids = np.random.choice(data[index].unique(), size = test_size, replace=False)
<a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=2'>3</a> return data[~data[index].isin(test_ids)], data[data[index].isin(test_ids)]
File ~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3502'>3503</a> if self.columns.nlevels > 1:
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3503'>3504</a> return self._getitem_multilevel(key)
-> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3504'>3505</a> indexer = self.columns.get_loc(key)
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3505'>3506</a> if is_integer(indexer):
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3506'>3507</a> indexer = [indexer]
File ~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3620'>3621</a> return self._engine.get_loc(casted_key)
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3621'>3622</a> except KeyError as err:
-> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3622'>3623</a> raise KeyError(key) from err
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3623'>3624</a> except TypeError:
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3624'>3625</a> # If we have a listlike key, _check_indexing_error will raise
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3625'>3626</a> # InvalidIndexError. Otherwise we fall through and re-raise
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3626'>3627</a> # the TypeError.
<a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3627'>3628</a> self._check_indexing_error(key)
KeyError: 'sleep_id'
%% Cell type:code id: tags:
``` python
sample = wg.sample_ds.take(1)
```
%% Cell type:code id: tags:
``` python
sample_array = list(sample.as_numpy_iterator())
```
%% Cell type:code id: tags:
``` python
INDEX_TIMESTEP = 18
sample_array[0][0][INDEX_TIMESTEP], sample_array[0][1][INDEX_TIMESTEP]
```
%% Output
(array([[18., 8., 36., 1., 0., 0., 0.],
[19., 8., 37., 1., 0., 0., 0.],
[20., 8., 38., 1., 0., 0., 0.],
[21., 8., 39., 1., 0., 0., 0.],
[22., 8., 40., 1., 0., 0., 0.],
[23., 8., 41., 1., 0., 0., 0.],
[24., 8., 42., 1., 0., 0., 0.],
[25., 8., 43., 1., 0., 0., 0.],
[26., 8., 44., 1., 0., 0., 0.],
[27., 8., 45., 1., 0., 0., 0.]], dtype=float32),
array([[0., 0., 1., 0.]], dtype=float32))
((10, 7), (4,))
%% Cell type:markdown id: tags:
### General Model Helper
%% Cell type:code id: tags:
``` python
# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series#linear_model
def compile_and_fit(model, window: WindowGenerator, loss = tf.losses.MeanSquaredError(), optimizer = tf.optimizers.Adam(), metrics = tf.metrics.MeanAbsoluteError(), patience:int = 2, epochs: int = MAX_EPOCHS):
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=patience,
mode='min'
)
model.compile(
loss=loss,
optimizer=optimizer,
metrics=metrics,
)
return model.fit(window.training_ds, epochs=epochs, validation_data=window.validation_ds, callbacks=[early_stopping])
```
%% Cell type:markdown id: tags:
### Model 1: LSTM
%% Cell type:code id: tags:
``` python
# Hyper-parameters
LSTM_UNITS = 16
LSTM_LEARNING_RATE = 0.0001
```
%% Cell type:code id: tags:
``` python
# Model Definition
lstm_model = keras.Sequential()
lstm_model.add(layers.Input(shape=(TIME_STEP_INPUT, 8)))
lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
lstm_model.add(layers.Input(shape=(INPUT_TIME_STEP, INPUT_FEATURES_SIZE)))
# lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
# lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=False))
lstm_model.add(layers.Dense(SLEEP_STAGES))
lstm_model.build()
print(lstm_model.summary())
```
%% Output
Model: "sequential_12"
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_14 (LSTM) (None, 10, 16) 1600
lstm_8 (LSTM) (None, 16) 1536
dense_9 (Dense) (None, 10, 4) 68
dense_7 (Dense) (None, 4) 68
=================================================================
Total params: 1,668
Trainable params: 1,668
Total params: 1,604
Trainable params: 1,604
Non-trainable params: 0
_________________________________________________________________
None
%% Cell type:code id: tags:
``` python
# Model Training
lstm_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
lstm_optm = keras.optimizers.Adam(learning_rate=LSTM_LEARNING_RATE)
lstm_metrics = [tf.keras.metrics.SparseCategoricalCrossentropy(from_logits=True), tf.keras.metrics.Accuracy()]
```
%% Cell type:code id: tags:
``` python
lstm_history = compile_and_fit(model=lstm_model, window=wg, loss= lstm_loss, optimizer= lstm_optm, metrics=lstm_metrics)
```
%% Output
Epoch 1/20
Canceled future for execute_request message before replies were done
The Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.
%% Cell type:markdown id: tags:
### Model 2: GRU
%% Cell type:code id: tags:
``` python
# Hyper-paramters
GRU_UNITS = 16
```
%% Cell type:code id: tags:
``` python
gru_model = keras.Sequential([
layers.GRU(GRU_UNITS),
layers.Dense(SLEEP_STAGES)
])
gru_model.add(layers.Embedding(input_dim=1000, output_dim=64))
print(gru_model.summary())
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
### Model 3: Attention Mechanism
%% Cell type:code id: tags:
``` python
ATTENTION_UNITS = 16
```
%% Cell type:code id: tags:
``` python
am_model = keras.Sequential([
Attention(ATTENTION_UNITS)
layers.Dense(SLEEP_STAGES)
])
print(am_model.summary())
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
### Model Head-to-Head testing
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# Scratch
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment