Skip to content
Snippets Groups Projects
Commit 1a950002 authored by SurajSSingh's avatar SurajSSingh
Browse files

Fixed minor issues with datasets

parent 0c167832
No related branches found
No related tags found
No related merge requests found
*.pdf *.pdf
.data/ .data/
\ No newline at end of file *.pyc
\ No newline at end of file
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Notebook for training and testing AI models # Notebook for training and testing AI models
%% Cell type:markdown id: tags:
## Setup
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from tensorflow import keras from tensorflow import keras
from tensorflow.keras import layers from tensorflow.keras import layers
import pandas as pd import pandas as pd
from attention import Attention from attention import Attention
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
print(tf.__version__) print(tf.__version__)
``` ```
%% Output %% Output
2.8.0 2.8.0
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# CONSTANTS # CONSTANTS
RAW_SLEEP_DATA_PATH = ".data/raw_bed_sleep-state.csv" RAW_SLEEP_DATA_PATH = ".data/raw_bed_sleep-state.csv"
CLEANED_SLEEP_DATA_PATH = ".data/clean_bed_sleep-state.csv" CLEANED_SLEEP_DATA_PATH = ".data/clean_bed_sleep-state.csv"
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
## Parameters and Hyper-parameters ## Parameters and Hyper-parameters
SLEEP_STAGES = 4 SLEEP_STAGES = 4
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Import Data ## Import Data
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Cleaning Raw Data ### Cleaning Raw Data
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import csv import csv
import datetime import datetime
import itertools import itertools
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
datetime.datetime.strptime("2022-04-21T10:18:00+02:00","%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(minutes=1) datetime.datetime.strptime("2022-04-21T10:18:00+02:00","%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(minutes=1)
``` ```
%% Output %% Output
datetime.datetime(2022, 4, 21, 10, 19, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))) datetime.datetime(2022, 4, 21, 10, 19, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)))
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def stage_probability(stage, to_test): def stage_probability(stage, to_test):
return 1.0 if stage == to_test else 0.0 return 1.0 if stage == to_test else 0.0
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
((start_time - in_bed_time).seconds)/3600 ((start_time - in_bed_time).seconds)/3600
``` ```
%% Output %% Output
6.833333333333333 6.833333333333333
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
cleaned_info = [] cleaned_info = []
date_seen = set() date_seen = set()
previous_duration = 60 previous_duration = 60
with open(RAW_SLEEP_DATA_PATH, mode ='r') as raw_file: with open(RAW_SLEEP_DATA_PATH, mode ='r') as raw_file:
csvFile = csv.reader(raw_file) csvFile = csv.reader(raw_file)
# max_count = 1 # max_count = 1
# stuff = set() # stuff = set()
in_bed_time = None in_bed_time = None
current_sleep_id = -1 current_sleep_id = -1
for index, lines in enumerate(csvFile): for index, lines in enumerate(csvFile):
if index == 0: if index == 0:
cleaned_info.append([ cleaned_info.append([
"sleep_id", "sleep_id",
"sleep_begin", "sleep_begin",
"stage_start", "stage_start",
"time_since_begin_sec", "time_since_begin_sec",
"stage_duration_sec", "stage_duration_sec",
"stage_end", "stage_end",
"stage_value", "stage_value",
"awake_probability", "awake_probability",
"light_probability", "light_probability",
"deep_probability", "deep_probability",
"rem_probability", "rem_probability",
]) ])
continue continue
start_time = datetime.datetime.strptime(lines[0],"%Y-%m-%dT%H:%M:%S%z") start_time = datetime.datetime.strptime(lines[0],"%Y-%m-%dT%H:%M:%S%z")
if start_time in date_seen: if start_time in date_seen:
continue continue
date_seen.add(start_time) date_seen.add(start_time)
if not in_bed_time or in_bed_time > start_time: if not in_bed_time or in_bed_time > start_time:
current_sleep_id += 1 current_sleep_id += 1
in_bed_time = start_time in_bed_time = start_time
# for duration, stage in enumerate( # for duration, stage in enumerate(
# for offset, (duration, stage) in enumerate( # for offset, (duration, stage) in enumerate(
# zip( # zip(
# # itertools.accumulate(lines[1].strip("[]").split(","), lambda x,y: int(x)+int(y)//60, initial = 0), # # itertools.accumulate(lines[1].strip("[]").split(","), lambda x,y: int(x)+int(y)//60, initial = 0),
# map(int, lines[1].strip("[]").split(",")) # map(int, lines[1].strip("[]").split(","))
# map(int, lines[2].strip("[]").split(",")) # map(int, lines[2].strip("[]").split(","))
# ) # )
# # map(int, lines[2].strip("[]").split(",")) # # map(int, lines[2].strip("[]").split(","))
# ): # ):
for offset, (duration, stage) in enumerate(zip(map(int, lines[1].strip("[]").split(",")), map(int, lines[2].strip("[]").split(",")))): for offset, (duration, stage) in enumerate(zip(map(int, lines[1].strip("[]").split(",")), map(int, lines[2].strip("[]").split(",")))):
# print(f"{(index, subindex) = }, {duration = }, {stage = }") # print(f"{(index, subindex) = }, {duration = }, {stage = }")
# print(f"{(index, duration) = } {stage = }") # print(f"{(index, duration) = } {stage = }")
current_time = start_time + datetime.timedelta(seconds=offset*previous_duration) current_time = start_time + datetime.timedelta(seconds=offset*previous_duration)
cleaned_info.append([ cleaned_info.append([
current_sleep_id, current_sleep_id,
in_bed_time, in_bed_time,
current_time, current_time,
(current_time - in_bed_time).seconds, (current_time - in_bed_time).seconds,
duration, duration,
current_time + datetime.timedelta(seconds=duration), current_time + datetime.timedelta(seconds=duration),
stage, stage,
stage_probability(0, stage), stage_probability(0, stage),
stage_probability(1, stage), stage_probability(1, stage),
stage_probability(2, stage), stage_probability(2, stage),
stage_probability(3, stage), stage_probability(3, stage),
]) ])
previous_duration = duration previous_duration = duration
# print(f"{(index, subindex) = }, {val = }") # print(f"{(index, subindex) = }, {val = }")
# print(list()) # print(list())
# if index >= max_count: # if index >= max_count:
# break # break
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
with open(CLEANED_SLEEP_DATA_PATH, 'w') as clean_file: with open(CLEANED_SLEEP_DATA_PATH, 'w') as clean_file:
write = csv.writer(clean_file) write = csv.writer(clean_file)
write.writerows(cleaned_info) write.writerows(cleaned_info)
print("Finished Writing Cleaned Data") print("Finished Writing Cleaned Data")
``` ```
%% Output %% Output
Finished Writing Cleaned Data Finished Writing Cleaned Data
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Creating DataFrame from clean raw data ### Creating DataFrame from clean raw data
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Get the cleaned data # Get the cleaned data
sleep_df_raw = pd.read_csv(CLEANED_SLEEP_DATA_PATH)#, parse_dates=["start", "end"], infer_datetime_format=True) sleep_df_raw = pd.read_csv(CLEANED_SLEEP_DATA_PATH)#, parse_dates=["start", "end"], infer_datetime_format=True)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Preprocess data: # Preprocess data:
# 1. convert to datetime # 1. convert to datetime
sleep_df_raw["sleep_begin"] = pd.to_datetime(sleep_df_raw["sleep_begin"], utc=True) sleep_df_raw["sleep_begin"] = pd.to_datetime(sleep_df_raw["sleep_begin"], utc=True)
sleep_df_raw["stage_start"] = pd.to_datetime(sleep_df_raw["stage_start"], utc=True) sleep_df_raw["stage_start"] = pd.to_datetime(sleep_df_raw["stage_start"], utc=True)
sleep_df_raw["stage_end"] = pd.to_datetime(sleep_df_raw["stage_end"], utc=True) sleep_df_raw["stage_end"] = pd.to_datetime(sleep_df_raw["stage_end"], utc=True)
# 2. Separate time, hour and minute # 2. Separate time, hour and minute
# MAYBE 3. smaller units: int16 or int8 # MAYBE 3. smaller units: int16 or int8
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def get_minute(row, index): def get_minute(row, index):
return row[index].time().minute return row[index].time().minute
def get_hour(row, index): def get_hour(row, index):
return row[index].time().hour return row[index].time().hour
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_df_raw["stage_start_hour"] = sleep_df_raw.apply (lambda row: get_hour(row, "stage_start"), axis=1) sleep_df_raw["stage_start_hour"] = sleep_df_raw.apply (lambda row: get_hour(row, "stage_start"), axis=1)
sleep_df_raw["stage_start_minute"] = sleep_df_raw.apply (lambda row: get_minute(row, "stage_start"), axis=1) sleep_df_raw["stage_start_minute"] = sleep_df_raw.apply (lambda row: get_minute(row, "stage_start"), axis=1)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_df_raw.info() sleep_df_raw.info()
``` ```
%% Output %% Output
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'>
RangeIndex: 551042 entries, 0 to 551041 RangeIndex: 551042 entries, 0 to 551041
Data columns (total 13 columns): Data columns (total 13 columns):
# Column Non-Null Count Dtype # Column Non-Null Count Dtype
--- ------ -------------- ----- --- ------ -------------- -----
0 sleep_id 551042 non-null int64 0 sleep_id 551042 non-null int64
1 sleep_begin 551042 non-null datetime64[ns, UTC] 1 sleep_begin 551042 non-null datetime64[ns, UTC]
2 stage_start 551042 non-null datetime64[ns, UTC] 2 stage_start 551042 non-null datetime64[ns, UTC]
3 time_since_begin_sec 551042 non-null int64 3 time_since_begin_sec 551042 non-null int64
4 stage_duration_sec 551042 non-null int64 4 stage_duration_sec 551042 non-null int64
5 stage_end 551042 non-null datetime64[ns, UTC] 5 stage_end 551042 non-null datetime64[ns, UTC]
6 stage_value 551042 non-null int64 6 stage_value 551042 non-null int64
7 awake_probability 551042 non-null float64 7 awake_probability 551042 non-null float64
8 light_probability 551042 non-null float64 8 light_probability 551042 non-null float64
9 deep_probability 551042 non-null float64 9 deep_probability 551042 non-null float64
10 rem_probability 551042 non-null float64 10 rem_probability 551042 non-null float64
11 stage_start_hour 551042 non-null int64 11 stage_start_hour 551042 non-null int64
12 stage_start_minute 551042 non-null int64 12 stage_start_minute 551042 non-null int64
dtypes: datetime64[ns, UTC](3), float64(4), int64(6) dtypes: datetime64[ns, UTC](3), float64(4), int64(6)
memory usage: 54.7 MB memory usage: 54.7 MB
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_df_raw sleep_df_raw
``` ```
%% Output %% Output
sleep_id sleep_begin stage_start \ sleep_id sleep_begin stage_start \
0 0 2022-04-21 08:18:00+00:00 2022-04-21 08:18:00+00:00 0 0 2022-04-21 08:18:00+00:00 2022-04-21 08:18:00+00:00
1 0 2022-04-21 08:18:00+00:00 2022-04-21 08:19:00+00:00 1 0 2022-04-21 08:18:00+00:00 2022-04-21 08:19:00+00:00
2 0 2022-04-21 08:18:00+00:00 2022-04-21 08:20:00+00:00 2 0 2022-04-21 08:18:00+00:00 2022-04-21 08:20:00+00:00
3 0 2022-04-21 08:18:00+00:00 2022-04-21 08:21:00+00:00 3 0 2022-04-21 08:18:00+00:00 2022-04-21 08:21:00+00:00
4 0 2022-04-21 08:18:00+00:00 2022-04-21 08:22:00+00:00 4 0 2022-04-21 08:18:00+00:00 2022-04-21 08:22:00+00:00
... ... ... ... ... ... ... ...
551037 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:17:00+00:00 551037 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:17:00+00:00
551038 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:18:00+00:00 551038 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:18:00+00:00
551039 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:19:00+00:00 551039 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:19:00+00:00
551040 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:20:00+00:00 551040 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:20:00+00:00
551041 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:21:00+00:00 551041 1132 2019-02-11 06:11:00+00:00 2019-02-11 13:21:00+00:00
time_since_begin_sec stage_duration_sec stage_end \ time_since_begin_sec stage_duration_sec stage_end \
0 0 60 2022-04-21 08:19:00+00:00 0 0 60 2022-04-21 08:19:00+00:00
1 60 60 2022-04-21 08:20:00+00:00 1 60 60 2022-04-21 08:20:00+00:00
2 120 60 2022-04-21 08:21:00+00:00 2 120 60 2022-04-21 08:21:00+00:00
3 180 60 2022-04-21 08:22:00+00:00 3 180 60 2022-04-21 08:22:00+00:00
4 240 60 2022-04-21 08:23:00+00:00 4 240 60 2022-04-21 08:23:00+00:00
... ... ... ... ... ... ... ...
551037 25560 60 2019-02-11 13:18:00+00:00 551037 25560 60 2019-02-11 13:18:00+00:00
551038 25620 60 2019-02-11 13:19:00+00:00 551038 25620 60 2019-02-11 13:19:00+00:00
551039 25680 60 2019-02-11 13:20:00+00:00 551039 25680 60 2019-02-11 13:20:00+00:00
551040 25740 60 2019-02-11 13:21:00+00:00 551040 25740 60 2019-02-11 13:21:00+00:00
551041 25800 60 2019-02-11 13:22:00+00:00 551041 25800 60 2019-02-11 13:22:00+00:00
stage_value awake_probability light_probability deep_probability \ stage_value awake_probability light_probability deep_probability \
0 0 1.0 0.0 0.0 0 0 1.0 0.0 0.0
1 0 1.0 0.0 0.0 1 0 1.0 0.0 0.0
2 0 1.0 0.0 0.0 2 0 1.0 0.0 0.0
3 0 1.0 0.0 0.0 3 0 1.0 0.0 0.0
4 0 1.0 0.0 0.0 4 0 1.0 0.0 0.0
... ... ... ... ... ... ... ... ... ...
551037 1 0.0 1.0 0.0 551037 1 0.0 1.0 0.0
551038 1 0.0 1.0 0.0 551038 1 0.0 1.0 0.0
551039 1 0.0 1.0 0.0 551039 1 0.0 1.0 0.0
551040 1 0.0 1.0 0.0 551040 1 0.0 1.0 0.0
551041 1 0.0 1.0 0.0 551041 1 0.0 1.0 0.0
rem_probability stage_start_hour stage_start_minute rem_probability stage_start_hour stage_start_minute
0 0.0 8 18 0 0.0 8 18
1 0.0 8 19 1 0.0 8 19
2 0.0 8 20 2 0.0 8 20
3 0.0 8 21 3 0.0 8 21
4 0.0 8 22 4 0.0 8 22
... ... ... ... ... ... ... ...
551037 0.0 13 17 551037 0.0 13 17
551038 0.0 13 18 551038 0.0 13 18
551039 0.0 13 19 551039 0.0 13 19
551040 0.0 13 20 551040 0.0 13 20
551041 0.0 13 21 551041 0.0 13 21
[551042 rows x 13 columns] [551042 rows x 13 columns]
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_data = sleep_df_raw[["sleep_id", "stage_start_hour", "stage_start_minute", "awake_probability", "rem_probability","light_probability", "deep_probability"]] sleep_data = sleep_df_raw[["sleep_id", "stage_start_hour", "stage_start_minute", "awake_probability", "rem_probability","light_probability", "deep_probability"]]
sleep_data.insert(loc=1, column="minutes_since_begin" , value= sleep_df_raw["time_since_begin_sec"]//60) sleep_data.insert(loc=1, column="minutes_since_begin" , value= sleep_df_raw["time_since_begin_sec"]//60)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
print(sleep_data.head()) print(sleep_data.head())
print(sleep_data.info()) print(sleep_data.info())
``` ```
%% Output %% Output
sleep_id minutes_since_begin stage_start_hour stage_start_minute \ sleep_id minutes_since_begin stage_start_hour stage_start_minute \
0 0 0 8 18 0 0 0 8 18
1 0 1 8 19 1 0 1 8 19
2 0 2 8 20 2 0 2 8 20
3 0 3 8 21 3 0 3 8 21
4 0 4 8 22 4 0 4 8 22
awake_probability rem_probability light_probability deep_probability awake_probability rem_probability light_probability deep_probability
0 1.0 0.0 0.0 0.0 0 1.0 0.0 0.0 0.0
1 1.0 0.0 0.0 0.0 1 1.0 0.0 0.0 0.0
2 1.0 0.0 0.0 0.0 2 1.0 0.0 0.0 0.0
3 1.0 0.0 0.0 0.0 3 1.0 0.0 0.0 0.0
4 1.0 0.0 0.0 0.0 4 1.0 0.0 0.0 0.0
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'>
RangeIndex: 551042 entries, 0 to 551041 RangeIndex: 551042 entries, 0 to 551041
Data columns (total 8 columns): Data columns (total 8 columns):
# Column Non-Null Count Dtype # Column Non-Null Count Dtype
--- ------ -------------- ----- --- ------ -------------- -----
0 sleep_id 551042 non-null int64 0 sleep_id 551042 non-null int64
1 minutes_since_begin 551042 non-null int64 1 minutes_since_begin 551042 non-null int64
2 stage_start_hour 551042 non-null int64 2 stage_start_hour 551042 non-null int64
3 stage_start_minute 551042 non-null int64 3 stage_start_minute 551042 non-null int64
4 awake_probability 551042 non-null float64 4 awake_probability 551042 non-null float64
5 rem_probability 551042 non-null float64 5 rem_probability 551042 non-null float64
6 light_probability 551042 non-null float64 6 light_probability 551042 non-null float64
7 deep_probability 551042 non-null float64 7 deep_probability 551042 non-null float64
dtypes: float64(4), int64(4) dtypes: float64(4), int64(4)
memory usage: 33.6 MB memory usage: 33.6 MB
None None
%% Cell type:code id: tags:
``` python
sleep_data
```
%% Output
sleep_id minutes_since_begin stage_start_hour stage_start_minute \
0 0 0 8 18
1 0 1 8 19
2 0 2 8 20
3 0 3 8 21
4 0 4 8 22
... ... ... ... ...
551037 1132 426 13 17
551038 1132 427 13 18
551039 1132 428 13 19
551040 1132 429 13 20
551041 1132 430 13 21
awake_probability rem_probability light_probability \
0 1.0 0.0 0.0
1 1.0 0.0 0.0
2 1.0 0.0 0.0
3 1.0 0.0 0.0
4 1.0 0.0 0.0
... ... ... ...
551037 0.0 0.0 1.0
551038 0.0 0.0 1.0
551039 0.0 0.0 1.0
551040 0.0 0.0 1.0
551041 0.0 0.0 1.0
deep_probability
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
... ...
551037 0.0
551038 0.0
551039 0.0
551040 0.0
551041 0.0
[551042 rows x 8 columns]
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Model Development ## Model Development
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Helper functions and class ### Helper functions and class
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
TEST_SIZE = 122 TEST_SIZE = 122
VALIDATION_SIZE = 183 VALIDATION_SIZE = 183
TIME_STEP_INPUT = 10 # in minutes TIME_STEP_INPUT = 10 # in minutes
BATCH_SIZE = 32 BATCH_SIZE = 32
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def training_test_split_by_unique_index(data, index: str, test_size: int = 10): def training_test_split_by_unique_index(data, index: str, test_size: int = 10):
test_ids = np.random.choice(data[index].unique(), size = test_size, replace=False) test_ids = np.random.choice(data[index].unique(), size = test_size, replace=False)
return data[~data[index].isin(test_ids)], data[data[index].isin(test_ids)] return data[~data[index].isin(test_ids)], data[data[index].isin(test_ids)]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series # Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series
class WindowGenerator(): class WindowGenerator():
def __init__(self, data, index: str = "sleep_id", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, generate_data_now: bool = True): def __init__(self, data, index: str = "sleep_id", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
# Partition data # Partition data
self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size) self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)
self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size) self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)
# Window paramters # Window paramters
self.input_width = input_width self.input_width = input_width
self.label_width = 1 self.label_width = 1
self.shift = 1 self.shift = 1
self.total_window_size = self.input_width + self.shift self.total_window_size = self.input_width + self.shift
self.input_slice = slice(0, input_width) self.input_slice = slice(0, input_width)
self.input_indices = np.arange(self.total_window_size)[self.input_slice] self.input_indices = np.arange(self.total_window_size)[self.input_slice]
self.label_start = self.total_window_size - self.label_width self.label_start = self.total_window_size - self.label_width
self.labels_slice = slice(self.label_start, None) self.labels_slice = slice(self.label_start, None)
self.label_indices = np.arange(self.total_window_size)[self.labels_slice] self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
self.input_feature_slice = input_feature_slice
self.label_feature_slice = label_feature_slice
self.sample_ds = self.make_dataset(sleep_data[sleep_data[index] == 0])
if generate_data_now: if generate_data_now:
self.training_ds = self.make_dataset(self.training) self.training_ds = self.make_dataset(self.training)
self.validation_ds = self.make_dataset(self.validation) self.validation_ds = self.make_dataset(self.validation)
self.testing_ds = self.make_dataset(self.testing) self.testing_ds = self.make_dataset(self.testing)
def __repr__(self): def __repr__(self):
return "WindowGenerator:\n\t" +'\n\t'.join([ return "WindowGenerator:\n\t" +'\n\t'.join([
f'Total window size: {self.total_window_size}', f'Total window size: {self.total_window_size}',
f'Input indices: {self.input_indices}', f'Input indices: {self.input_indices}',
f'Label indices: {self.label_indices}', f'Label indices: {self.label_indices}',
]) ])
def split_window(self, features): def split_window(self, features):
inputs = features[:, self.input_slice, :] inputs = features[:, self.input_slice, self.input_feature_slice]
labels = features[:, self.labels_slice, :] labels = features[:, self.labels_slice, self.label_feature_slice]
inputs.set_shape([None, self.input_width, None]) inputs.set_shape([None, self.input_width, None])
labels.set_shape([None, self.label_width, None]) labels.set_shape([None, self.label_width, None])
return inputs, labels return inputs, labels
def make_dataset(self, data, index_group: str = "sleep_id", sort_by: str = "minutes_since_begin"): def make_dataset(self, data, index_group: str = "sleep_id", sort_by: str = "minutes_since_begin"):
ds_all = None ds_all = None
for i_group in data[index_group].unique(): for i_group in data[index_group].unique():
subset_data = np.array(data[data[index_group] == i_group].sort_values(by=[sort_by]), dtype=np.float32) subset_data = np.array(data[data[index_group] == i_group].sort_values(by=[sort_by]), dtype=np.float32)
ds = tf.keras.utils.timeseries_dataset_from_array( ds = tf.keras.utils.timeseries_dataset_from_array(
data=subset_data, data=subset_data,
targets=None, targets=None,
sequence_length=self.total_window_size, sequence_length=self.total_window_size,
sequence_stride=1, sequence_stride=1,
shuffle=False, shuffle=False,
batch_size=BATCH_SIZE,) batch_size=BATCH_SIZE,)
ds_all = ds if ds_all is None else ds_all.concatenate(ds) ds_all = ds if ds_all is None else ds_all.concatenate(ds)
ds_all = ds_all.map(self.split_window) ds_all = ds_all.map(self.split_window)
return ds_all return ds_all
# def generate_all_datasets(self): # def generate_all_datasets(self):
# self._training_ds = self.make_dataset(self.training) # self._training_ds = self.make_dataset(self.training)
# self._validation_ds = self.make_dataset(self.validation) # self._validation_ds = self.make_dataset(self.validation)
# self._testing_ds = self.make_dataset(self.testing) # self._testing_ds = self.make_dataset(self.testing)
# def training_dataset(self): # def training_dataset(self):
# if self._training_ds is None: # if self._training_ds is None:
# self._training_ds = self.make_dataset(self.training) # self._training_ds = self.make_dataset(self.training)
# return self._training_ds # return self._training_ds
# def validation_dataset(self): # def validation_dataset(self):
# if self._validation_ds is None: # if self._validation_ds is None:
# self._validation_ds = self.make_dataset(self.validation) # self._validation_ds = self.make_dataset(self.validation)
# return self._validation_ds # return self._validation_ds
# def test_dataset(self): # def test_dataset(self):
# if self._testing_ds is None: # if self._testing_ds is None:
# self._testing_ds = self.make_dataset(self.testing) # self._testing_ds = self.make_dataset(self.testing)
# return self._testing_ds # return self._testing_ds
``` ```
%% Cell type:code id: tags:
``` python
wg = WindowGenerator(sleep_data)
wg
```
%% Output
2022-05-11 00:38:01.423873: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
WindowGenerator:
Total window size: 11
Input indices: [0 1 2 3 4 5 6 7 8 9]
Label indices: [10]
%% Cell type:code id: tags:
``` python
len(wg.testing_ds)
```
%% Output
1836
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Data Prep ### Data Prep
All inputs follow: (batch_size, timesteps, input_dim) All inputs follow: (batch_size, timesteps, input_dim)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
training_sleep_data, test_sleep_data = training_test_split_by_unique_index(sleep_data, "sleep_id", 5) wg = WindowGenerator(sleep_data)
``` wg
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 32' in <cell line: 1>()
----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000068?line=0'>1</a> training_sleep_data, test_sleep_data = training_test_split_by_unique_index(sleep_data, "sleep_id", 5)
NameError: name 'training_test_split_by_unique_index' is not defined
%% Cell type:code id: tags:
``` python
set(training.sleep_id.unique()).intersection(set(test.sleep_id.unique()))
``` ```
%% Output %% Output
set() WindowGenerator:
Total window size: 11
Input indices: [0 1 2 3 4 5 6 7 8 9]
Label indices: [10]
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_data_tensor = tf.convert_to_tensor(sleep_data) sample = wg.sample_ds.take(1)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_data_tensor.shape sample_array = list(sample.as_numpy_iterator())
``` ```
%% Output
TensorShape([551042, 7])
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
sleep_data_tensor[1] INDEX_TIMESTEP = 18
sample_array[0][0][INDEX_TIMESTEP], sample_array[0][1][INDEX_TIMESTEP]
``` ```
%% Output %% Output
<tf.Tensor: shape=(7,), dtype=float64, numpy=array([ 1., 8., 19., 1., 0., 0., 0.])> (array([[18., 8., 36., 1., 0., 0., 0.],
[19., 8., 37., 1., 0., 0., 0.],
[20., 8., 38., 1., 0., 0., 0.],
[21., 8., 39., 1., 0., 0., 0.],
[22., 8., 40., 1., 0., 0., 0.],
[23., 8., 41., 1., 0., 0., 0.],
[24., 8., 42., 1., 0., 0., 0.],
[25., 8., 43., 1., 0., 0., 0.],
[26., 8., 44., 1., 0., 0., 0.],
[27., 8., 45., 1., 0., 0., 0.]], dtype=float32),
array([[0., 0., 1., 0.]], dtype=float32))
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Model 1: LSTM ### Model 1: LSTM
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Hyper-parameters # Hyper-parameters
LSTM_UNITS = 16 LSTM_UNITS = 16
LSTM_LEARNING_RATE = 0.0001 LSTM_LEARNING_RATE = 0.0001
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Model Definition # Model Definition
lstm_model = keras.Sequential() lstm_model = keras.Sequential()
lstm_model.add(layers.Input(shape=(TIME_STEP_INPUT, 8))) lstm_model.add(layers.Input(shape=(TIME_STEP_INPUT, 8)))
lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True)) lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
# lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True)) # lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
lstm_model.add(layers.Dense(SLEEP_STAGES)) lstm_model.add(layers.Dense(SLEEP_STAGES))
lstm_model.build() lstm_model.build()
print(lstm_model.summary()) print(lstm_model.summary())
``` ```
%% Output %% Output
Model: "sequential_12" Model: "sequential_12"
_________________________________________________________________ _________________________________________________________________
Layer (type) Output Shape Param # Layer (type) Output Shape Param #
================================================================= =================================================================
lstm_14 (LSTM) (None, 10, 16) 1600 lstm_14 (LSTM) (None, 10, 16) 1600
dense_9 (Dense) (None, 10, 4) 68 dense_9 (Dense) (None, 10, 4) 68
================================================================= =================================================================
Total params: 1,668 Total params: 1,668
Trainable params: 1,668 Trainable params: 1,668
Non-trainable params: 0 Non-trainable params: 0
_________________________________________________________________ _________________________________________________________________
None None
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Model Training # Model Training
lstm_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True) lstm_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
lstm_optm = keras.optimizers.Adam(learning_rate=LSTM_LEARNING_RATE) lstm_optm = keras.optimizers.Adam(learning_rate=LSTM_LEARNING_RATE)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Model 2: GRU ### Model 2: GRU
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Hyper-paramters # Hyper-paramters
GRU_UNITS = 16 GRU_UNITS = 16
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
gru_model = keras.Sequential([ gru_model = keras.Sequential([
layers.GRU(GRU_UNITS), layers.GRU(GRU_UNITS),
layers.Dense(SLEEP_STAGES) layers.Dense(SLEEP_STAGES)
]) ])
gru_model.add(layers.Embedding(input_dim=1000, output_dim=64)) gru_model.add(layers.Embedding(input_dim=1000, output_dim=64))
print(gru_model.summary()) print(gru_model.summary())
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Model 3: Attention Mechanism ### Model 3: Attention Mechanism
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
ATTENTION_UNITS = 16 ATTENTION_UNITS = 16
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
am_model = keras.Sequential([ am_model = keras.Sequential([
Attention(ATTENTION_UNITS) Attention(ATTENTION_UNITS)
layers.Dense(SLEEP_STAGES) layers.Dense(SLEEP_STAGES)
]) ])
print(am_model.summary()) print(am_model.summary())
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Model Head-to-Head testing ### Model Head-to-Head testing
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Scratch # Scratch
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment