# Notebook for training and testing AI models

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

In [3]:
print(tf.__version__)

2.8.0


In [14]:
# CONSTANTS
RAW_SLEEP_DATA_PATH = ".data/raw_bed_sleep-state.csv"
CLEANED_SLEEP_DATA_PATH = ".data/clean_bed_sleep-state.csv"

## Parameters and Hyper-parameters

In [4]:
SLEEP_STAGES = 4


## Import Data

### Cleaning Raw Data

In [59]:
import csv
import datetime
import itertools

In [104]:
datetime.datetime.strptime("2022-04-21T10:18:00+02:00","%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(minutes=1)

datetime.datetime(2022, 4, 21, 10, 19, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)))

In [115]:
def stage_probability(stage, to_test):
    return 1.0 if stage == to_test else 0.0

In [150]:
((start_time - in_bed_time).seconds)/3600

6.833333333333333

In [156]:
cleaned_info = []
date_seen = set()
previous_duration = 60
with open(RAW_SLEEP_DATA_PATH, mode ='r') as raw_file:
    csvFile = csv.reader(raw_file)
    max_count = 1
    stuff = set()
    in_bed_time = None
    for index, lines in enumerate(csvFile):
        if index == 0:
            cleaned_info.append([
                "sleep_begin",
                "stage_start",
                "time_since_begin_sec",
                "stage_duration_sec",
                "stage_end", 
                "stage_value",
                "awake_probability",
                "light_probability",
                "deep_probability",
                "rem_probability",
            ])
            continue
        start_time = datetime.datetime.strptime(lines[0],"%Y-%m-%dT%H:%M:%S%z")
        if start_time in date_seen:
            continue
        date_seen.add(start_time)
        if not in_bed_time or in_bed_time > start_time:
            in_bed_time = start_time
        # for duration, stage in enumerate(
        # for offset, (duration, stage) in enumerate(
        #     zip(
        #         # itertools.accumulate(lines[1].strip("[]").split(","), lambda x,y: int(x)+int(y)//60, initial = 0), 
        #         map(int, lines[1].strip("[]").split(","))
        #         map(int, lines[2].strip("[]").split(","))
        #     )
        #         # map(int, lines[2].strip("[]").split(","))
        # ):
        for offset, (duration, stage) in enumerate(zip(map(int, lines[1].strip("[]").split(",")), map(int, lines[2].strip("[]").split(",")))):
            # print(f"{(index, subindex) = }, {duration = }, {stage = }")
            # print(f"{(index, duration) = } {stage = }")
            current_time = start_time + datetime.timedelta(seconds=offset*previous_duration)
            cleaned_info.append([
                in_bed_time,
                current_time, 
                (current_time - in_bed_time).seconds,
                duration, 
                current_time + datetime.timedelta(seconds=duration), 
                stage,
                stage_probability(0, stage),
                stage_probability(1, stage),
                stage_probability(2, stage),
                stage_probability(3, stage),
            ])
            previous_duration = duration
            # print(f"{(index, subindex) = }, {val = }")
        # print(list())
        # if index >= max_count:
        #     break


In [157]:
with open(CLEANED_SLEEP_DATA_PATH, 'w') as clean_file:
    write = csv.writer(clean_file)
    write.writerows(cleaned_info)

### Creating DataFrame from clean raw data

In [158]:
# Get the cleaned data
sleep_df_raw = pd.read_csv(CLEANED_SLEEP_DATA_PATH)#, parse_dates=["start", "end"], infer_datetime_format=True)

In [160]:
# Preprocess data: 
#   1. convert to datetime
sleep_df_raw["sleep_begin"] = pd.to_datetime(sleep_df_raw["sleep_begin"], utc=True)
sleep_df_raw["stage_start"] = pd.to_datetime(sleep_df_raw["stage_start"], utc=True)
sleep_df_raw["stage_end"] = pd.to_datetime(sleep_df_raw["stage_end"], utc=True)
#   MAYBE 2. smaller units: int16 or int8 

In [161]:
sleep_df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 551042 entries, 0 to 551041
Data columns (total 10 columns):
 #   Column                Non-Null Count   Dtype              
---  ------                --------------   -----              
 0   sleep_begin           551042 non-null  datetime64[ns, UTC]
 1   stage_start           551042 non-null  datetime64[ns, UTC]
 2   time_since_begin_sec  551042 non-null  int64              
 3   stage_duration_sec    551042 non-null  int64              
 4   stage_end             551042 non-null  datetime64[ns, UTC]
 5   stage_value           551042 non-null  int64              
 6   awake_probability     551042 non-null  float64            
 7   light_probability     551042 non-null  float64            
 8   deep_probability      551042 non-null  float64            
 9   rem_probability       551042 non-null  float64            
dtypes: datetime64[ns, UTC](3), float64(4), int64(3)
memory usage: 42.0 MB


In [162]:
sleep_df_raw

Unnamed: 0,sleep_begin,stage_start,time_since_begin_sec,stage_duration_sec,stage_end,stage_value,awake_probability,light_probability,deep_probability,rem_probability
0,2022-04-21 08:18:00+00:00,2022-04-21 08:18:00+00:00,0,60,2022-04-21 08:19:00+00:00,0,1.0,0.0,0.0,0.0
1,2022-04-21 08:18:00+00:00,2022-04-21 08:19:00+00:00,60,60,2022-04-21 08:20:00+00:00,0,1.0,0.0,0.0,0.0
2,2022-04-21 08:18:00+00:00,2022-04-21 08:20:00+00:00,120,60,2022-04-21 08:21:00+00:00,0,1.0,0.0,0.0,0.0
3,2022-04-21 08:18:00+00:00,2022-04-21 08:21:00+00:00,180,60,2022-04-21 08:22:00+00:00,0,1.0,0.0,0.0,0.0
4,2022-04-21 08:18:00+00:00,2022-04-21 08:22:00+00:00,240,60,2022-04-21 08:23:00+00:00,0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
551037,2019-02-11 06:11:00+00:00,2019-02-11 13:17:00+00:00,25560,60,2019-02-11 13:18:00+00:00,1,0.0,1.0,0.0,0.0
551038,2019-02-11 06:11:00+00:00,2019-02-11 13:18:00+00:00,25620,60,2019-02-11 13:19:00+00:00,1,0.0,1.0,0.0,0.0
551039,2019-02-11 06:11:00+00:00,2019-02-11 13:19:00+00:00,25680,60,2019-02-11 13:20:00+00:00,1,0.0,1.0,0.0,0.0
551040,2019-02-11 06:11:00+00:00,2019-02-11 13:20:00+00:00,25740,60,2019-02-11 13:21:00+00:00,1,0.0,1.0,0.0,0.0


start                2019-02-11 06:11:00+00:00
duration                                    60
end                  2019-02-11 06:12:00+00:00
stage                                        0
awake_probability                          0.0
light_probability                          0.0
deep_probability                           0.0
rem_probability                            0.0
dtype: object

## Model 1: GRU

In [None]:
model = keras.Sequential()
model.add(layers.Embedding(input_dim=1000, output_dim=64))
model.add(layers.GRU(128))
model.add(layers.Dense(10))

# Scratch