Added Export Simple Data

acb17271 · SurajSSingh · 1a950002 · acb17271
Commit acb17271 authored 2 years ago by SurajSSingh
--- a/tf_model.ipynb
+++ b/tf_model.ipynb
@@ -16,7 +16,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -30,7 +30,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@@ -47,7 +47,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -58,7 +58,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -241,7 +241,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -251,7 +251,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -266,7 +266,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -279,7 +279,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -289,7 +289,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -325,7 +325,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -602,7 +602,7 @@
       "[551042 rows x 13 columns]"
      ]
     },
-     "execution_count": 12,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -613,7 +613,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -623,7 +623,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
@@ -669,7 +669,54 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sleep_data.to_csv(\".data/sleep_data_simple.csv\", index=False, index_label=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model Development"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TEST_SIZE = 122\n",
+    "VALIDATION_SIZE = 183\n",
+    "\n",
+    "BATCH_SIZE = 32\n",
+    "INPUT_TIME_STEP = 10 # in minutes\n",
+    "INPUT_FEATURES_SIZE = 7\n",
+    "MAX_EPOCHS = 20"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sleep_data = pd.read_csv(\".data/sleep_data_simple.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
@@ -693,6 +740,7 @@
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
       "      <th>sleep_id</th>\n",
       "      <th>minutes_since_begin</th>\n",
       "      <th>stage_start_hour</th>\n",
@@ -708,6 +756,7 @@
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
+       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>18</td>\n",
       "      <td>1.0</td>\n",
@@ -717,6 +766,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
+       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
@@ -728,6 +778,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
+       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
@@ -739,6 +790,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
+       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
@@ -750,6 +802,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
+       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
@@ -769,9 +822,11 @@
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
+       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>551037</th>\n",
+       "      <td>551037</td>\n",
       "      <td>1132</td>\n",
       "      <td>426</td>\n",
       "      <td>13</td>\n",
@@ -783,6 +838,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>551038</th>\n",
+       "      <td>551038</td>\n",
       "      <td>1132</td>\n",
       "      <td>427</td>\n",
       "      <td>13</td>\n",
@@ -794,6 +850,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>551039</th>\n",
+       "      <td>551039</td>\n",
       "      <td>1132</td>\n",
       "      <td>428</td>\n",
       "      <td>13</td>\n",
@@ -805,6 +862,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>551040</th>\n",
+       "      <td>551040</td>\n",
       "      <td>1132</td>\n",
       "      <td>429</td>\n",
       "      <td>13</td>\n",
@@ -816,6 +874,7 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>551041</th>\n",
+       "      <td>551041</td>\n",
       "      <td>1132</td>\n",
       "      <td>430</td>\n",
       "      <td>13</td>\n",
@@ -827,53 +886,53 @@
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
-       "<p>551042 rows × 8 columns</p>\n",
+       "<p>551042 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
-       "        sleep_id  minutes_since_begin  stage_start_hour  stage_start_minute  \\\n",
-       "0              0                    0                 8                  18   \n",
-       "1              0                    1                 8                  19   \n",
-       "2              0                    2                 8                  20   \n",
-       "3              0                    3                 8                  21   \n",
-       "4              0                    4                 8                  22   \n",
-       "...          ...                  ...               ...                 ...   \n",
-       "551037      1132                  426                13                  17   \n",
-       "551038      1132                  427                13                  18   \n",
-       "551039      1132                  428                13                  19   \n",
-       "551040      1132                  429                13                  20   \n",
-       "551041      1132                  430                13                  21   \n",
+       "        Unnamed: 0  sleep_id  minutes_since_begin  stage_start_hour  \\\n",
+       "0                0         0                    0                 8   \n",
+       "1                1         0                    1                 8   \n",
+       "2                2         0                    2                 8   \n",
+       "3                3         0                    3                 8   \n",
+       "4                4         0                    4                 8   \n",
+       "...            ...       ...                  ...               ...   \n",
+       "551037      551037      1132                  426                13   \n",
+       "551038      551038      1132                  427                13   \n",
+       "551039      551039      1132                  428                13   \n",
+       "551040      551040      1132                  429                13   \n",
+       "551041      551041      1132                  430                13   \n",
       "\n",
-       "        awake_probability  rem_probability  light_probability  \\\n",
-       "0                     1.0              0.0                0.0   \n",
-       "1                     1.0              0.0                0.0   \n",
-       "2                     1.0              0.0                0.0   \n",
-       "3                     1.0              0.0                0.0   \n",
-       "4                     1.0              0.0                0.0   \n",
-       "...                   ...              ...                ...   \n",
-       "551037                0.0              0.0                1.0   \n",
-       "551038                0.0              0.0                1.0   \n",
-       "551039                0.0              0.0                1.0   \n",
-       "551040                0.0              0.0                1.0   \n",
-       "551041                0.0              0.0                1.0   \n",
+       "        stage_start_minute  awake_probability  rem_probability  \\\n",
+       "0                       18                1.0              0.0   \n",
+       "1                       19                1.0              0.0   \n",
+       "2                       20                1.0              0.0   \n",
+       "3                       21                1.0              0.0   \n",
+       "4                       22                1.0              0.0   \n",
+       "...                    ...                ...              ...   \n",
+       "551037                  17                0.0              0.0   \n",
+       "551038                  18                0.0              0.0   \n",
+       "551039                  19                0.0              0.0   \n",
+       "551040                  20                0.0              0.0   \n",
+       "551041                  21                0.0              0.0   \n",
       "\n",
-       "        deep_probability  \n",
-       "0                    0.0  \n",
-       "1                    0.0  \n",
-       "2                    0.0  \n",
-       "3                    0.0  \n",
-       "4                    0.0  \n",
-       "...                  ...  \n",
-       "551037               0.0  \n",
-       "551038               0.0  \n",
-       "551039               0.0  \n",
-       "551040               0.0  \n",
-       "551041               0.0  \n",
+       "        light_probability  deep_probability  \n",
+       "0                     0.0               0.0  \n",
+       "1                     0.0               0.0  \n",
+       "2                     0.0               0.0  \n",
+       "3                     0.0               0.0  \n",
+       "4                     0.0               0.0  \n",
+       "...                   ...               ...  \n",
+       "551037                1.0               0.0  \n",
+       "551038                1.0               0.0  \n",
+       "551039                1.0               0.0  \n",
+       "551040                1.0               0.0  \n",
+       "551041                1.0               0.0  \n",
       "\n",
-       "[551042 rows x 8 columns]"
+       "[551042 rows x 9 columns]"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -882,13 +941,6 @@
    "sleep_data"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Model Development"
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -896,18 +948,6 @@
    "### Helper functions and class"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "TEST_SIZE = 122\n",
-    "VALIDATION_SIZE = 183\n",
-    "TIME_STEP_INPUT = 10 # in minutes\n",
-    "BATCH_SIZE = 32"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 17,
@@ -921,13 +961,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series\n",
    "class WindowGenerator():\n",
-    "    def __init__(self, data, index: str = \"sleep_id\", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):\n",
+    "    def __init__(self, data, index: str = \"sleep_id\", input_width: int = INPUT_TIME_STEP, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):\n",
    "        # Partition data\n",
    "        self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)\n",
    "        self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)\n",
@@ -952,9 +992,9 @@
    "        self.sample_ds = self.make_dataset(sleep_data[sleep_data[index] == 0])\n",
    "\n",
    "        if generate_data_now:\n",
-    "            self.training_ds = self.make_dataset(self.training)\n",
-    "            self.validation_ds = self.make_dataset(self.validation)\n",
-    "            self.testing_ds = self.make_dataset(self.testing)\n",
+    "            self.training_ds = self.make_dataset(self.training, index)\n",
+    "            self.validation_ds = self.make_dataset(self.validation, index)\n",
+    "            self.testing_ds = self.make_dataset(self.testing, index)\n",
    "\n",
    "\n",
    "    def __repr__(self):\n",
@@ -966,9 +1006,9 @@
    "\n",
    "    def split_window(self, features):\n",
    "        inputs = features[:, self.input_slice, self.input_feature_slice]\n",
-    "        labels = features[:, self.labels_slice, self.label_feature_slice]\n",
+    "        labels = tf.squeeze(features[:, self.labels_slice, self.label_feature_slice])\n",
    "        inputs.set_shape([None, self.input_width, None])\n",
-    "        labels.set_shape([None, self.label_width, None])\n",
+    "        # labels.set_shape([None, self.label_width, None])\n",
    "        return inputs, labels\n",
    "\n",
    "    def make_dataset(self, data, index_group: str = \"sleep_id\", sort_by: str = \"minutes_since_begin\"):\n",
@@ -1019,21 +1059,31 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "WindowGenerator:\n",
-       "\tTotal window size: 11\n",
-       "\tInput indices: [0 1 2 3 4 5 6 7 8 9]\n",
-       "\tLabel indices: [10]"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "KeyError",
+     "evalue": "'sleep_id'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "File \u001b[0;32m~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3619'>3620</a>\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3620'>3621</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_engine\u001b[39m.\u001b[39;49mget_loc(casted_key)\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3621'>3622</a>\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n",
+      "File \u001b[0;32mpandas/_libs/index.pyx:136\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/index.pyx:163\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5198\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5206\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'sleep_id'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[1;32m/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 33'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000072?line=0'>1</a>\u001b[0m wg \u001b[39m=\u001b[39m WindowGenerator(sleep_data)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000072?line=1'>2</a>\u001b[0m wg\n",
+      "\u001b[1;32m/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 31'\u001b[0m in \u001b[0;36mWindowGenerator.__init__\u001b[0;34m(self, data, index, input_width, validation_size, test_size, input_feature_slice, label_feature_slice, generate_data_now)\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=2'>3</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, data, index: \u001b[39mstr\u001b[39m \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39msleep_id\u001b[39m\u001b[39m\"\u001b[39m, input_width: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m INPUT_TIME_STEP, validation_size: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m VALIDATION_SIZE, test_size: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m TEST_SIZE, input_feature_slice: \u001b[39mslice\u001b[39m \u001b[39m=\u001b[39m \u001b[39mslice\u001b[39m(\u001b[39m1\u001b[39m,\u001b[39m100\u001b[39m), label_feature_slice: \u001b[39mslice\u001b[39m \u001b[39m=\u001b[39m \u001b[39mslice\u001b[39m(\u001b[39m-\u001b[39m\u001b[39m4\u001b[39m,\u001b[39m100\u001b[39m), generate_data_now: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=3'>4</a>\u001b[0m     \u001b[39m# Partition data\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=4'>5</a>\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtesting \u001b[39m=\u001b[39m training_test_split_by_unique_index(sleep_data, index, test_size)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=5'>6</a>\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalidation \u001b[39m=\u001b[39m training_test_split_by_unique_index(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining, index, validation_size)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=7'>8</a>\u001b[0m     \u001b[39m# Window paramters\u001b[39;00m\n",
+      "\u001b[1;32m/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 30'\u001b[0m in \u001b[0;36mtraining_test_split_by_unique_index\u001b[0;34m(data, index, test_size)\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=0'>1</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtraining_test_split_by_unique_index\u001b[39m(data, index: \u001b[39mstr\u001b[39m, test_size: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m \u001b[39m10\u001b[39m):\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=1'>2</a>\u001b[0m     test_ids \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mchoice(data[index]\u001b[39m.\u001b[39munique(), size \u001b[39m=\u001b[39m test_size, replace\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=2'>3</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m data[\u001b[39m~\u001b[39mdata[index]\u001b[39m.\u001b[39misin(test_ids)], data[data[index]\u001b[39m.\u001b[39misin(test_ids)]\n",
+      "File \u001b[0;32m~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/frame.py:3505\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3502'>3503</a>\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcolumns\u001b[39m.\u001b[39mnlevels \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3503'>3504</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3504'>3505</a>\u001b[0m indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcolumns\u001b[39m.\u001b[39;49mget_loc(key)\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3505'>3506</a>\u001b[0m \u001b[39mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3506'>3507</a>\u001b[0m     indexer \u001b[39m=\u001b[39m [indexer]\n",
+      "File \u001b[0;32m~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3620'>3621</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine\u001b[39m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3621'>3622</a>\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m-> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3622'>3623</a>\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(key) \u001b[39mfrom\u001b[39;00m \u001b[39merr\u001b[39;00m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3623'>3624</a>\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3624'>3625</a>\u001b[0m     \u001b[39m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3625'>3626</a>\u001b[0m     \u001b[39m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3626'>3627</a>\u001b[0m     \u001b[39m#  the TypeError.\u001b[39;00m\n\u001b[1;32m   <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3627'>3628</a>\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_indexing_error(key)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'sleep_id'"
+     ]
    }
   ],
   "source": [
@@ -1043,7 +1093,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1052,7 +1102,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1061,26 +1111,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "(array([[18.,  8., 36.,  1.,  0.,  0.,  0.],\n",
-       "        [19.,  8., 37.,  1.,  0.,  0.,  0.],\n",
-       "        [20.,  8., 38.,  1.,  0.,  0.,  0.],\n",
-       "        [21.,  8., 39.,  1.,  0.,  0.,  0.],\n",
-       "        [22.,  8., 40.,  1.,  0.,  0.,  0.],\n",
-       "        [23.,  8., 41.,  1.,  0.,  0.,  0.],\n",
-       "        [24.,  8., 42.,  1.,  0.,  0.,  0.],\n",
-       "        [25.,  8., 43.,  1.,  0.,  0.,  0.],\n",
-       "        [26.,  8., 44.,  1.,  0.,  0.,  0.],\n",
-       "        [27.,  8., 45.,  1.,  0.,  0.,  0.]], dtype=float32),\n",
-       " array([[0., 0., 1., 0.]], dtype=float32))"
+       "((10, 7), (4,))"
      ]
     },
-     "execution_count": 37,
+     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -1090,6 +1130,36 @@
    "sample_array[0][0][INDEX_TIMESTEP], sample_array[0][1][INDEX_TIMESTEP]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### General Model Helper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series#linear_model\n",
+    "def compile_and_fit(model, window: WindowGenerator, loss = tf.losses.MeanSquaredError(), optimizer = tf.optimizers.Adam(), metrics = tf.metrics.MeanAbsoluteError(), patience:int = 2, epochs: int = MAX_EPOCHS):\n",
+    "    early_stopping = tf.keras.callbacks.EarlyStopping(\n",
+    "        monitor='val_loss',\n",
+    "        patience=patience,\n",
+    "        mode='min'\n",
+    "    )\n",
+    "\n",
+    "    model.compile(\n",
+    "        loss=loss,\n",
+    "        optimizer=optimizer,\n",
+    "        metrics=metrics,\n",
+    "    )\n",
+    "\n",
+    "    return model.fit(window.training_ds, epochs=epochs, validation_data=window.validation_ds, callbacks=[early_stopping])"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -1099,7 +1169,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 215,
+   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1110,24 +1180,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 278,
+   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Model: \"sequential_12\"\n",
+      "Model: \"sequential_7\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
-      " lstm_14 (LSTM)              (None, 10, 16)            1600      \n",
+      " lstm_8 (LSTM)               (None, 16)                1536      \n",
      "                                                                 \n",
-      " dense_9 (Dense)             (None, 10, 4)             68        \n",
+      " dense_7 (Dense)             (None, 4)                 68        \n",
      "                                                                 \n",
      "=================================================================\n",
-      "Total params: 1,668\n",
-      "Trainable params: 1,668\n",
+      "Total params: 1,604\n",
+      "Trainable params: 1,604\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "None\n"
@@ -1137,9 +1207,10 @@
   "source": [
    "# Model Definition\n",
    "lstm_model = keras.Sequential()\n",
-    "lstm_model.add(layers.Input(shape=(TIME_STEP_INPUT, 8)))\n",
-    "lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))\n",
+    "lstm_model.add(layers.Input(shape=(INPUT_TIME_STEP, INPUT_FEATURES_SIZE)))\n",
    "# lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))\n",
+    "# lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))\n",
+    "lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=False))\n",
    "lstm_model.add(layers.Dense(SLEEP_STAGES))\n",
    "lstm_model.build()\n",
    "print(lstm_model.summary())"
@@ -1147,13 +1218,47 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Model Training\n",
    "lstm_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
-    "lstm_optm = keras.optimizers.Adam(learning_rate=LSTM_LEARNING_RATE)"
+    "lstm_optm = keras.optimizers.Adam(learning_rate=LSTM_LEARNING_RATE)\n",
+    "lstm_metrics = [tf.keras.metrics.SparseCategoricalCrossentropy(from_logits=True), tf.keras.metrics.Accuracy()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/20\n"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mCanceled future for execute_request message before replies were done"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "lstm_history = compile_and_fit(model=lstm_model, window=wg, loss= lstm_loss, optimizer= lstm_optm, metrics=lstm_metrics)"
   ]
  },
  {

 %% Cell type:markdown id: tags:

 # Notebook for training and testing AI models

 %% Cell type:markdown id: tags:

 ## Setup

 %% Cell type:code id: tags:

 ``` python
 import tensorflow as tf
 import numpy as np
 from tensorflow import keras
 from tensorflow.keras import layers
 import pandas as pd
 from attention import Attention
 ```

 %% Cell type:code id: tags:

 ``` python
 print(tf.__version__)
 ```

 %% Output

    2.8.0

 %% Cell type:code id: tags:

 ``` python
 # CONSTANTS
 RAW_SLEEP_DATA_PATH = ".data/raw_bed_sleep-state.csv"
 CLEANED_SLEEP_DATA_PATH = ".data/clean_bed_sleep-state.csv"
 ```

 %% Cell type:code id: tags:

 ``` python
 ## Parameters and Hyper-parameters
 SLEEP_STAGES = 4
 ```

 %% Cell type:markdown id: tags:

 ## Import Data

 %% Cell type:markdown id: tags:

 ### Cleaning Raw Data

 %% Cell type:code id: tags:

 ``` python
 import csv
 import datetime
 import itertools
 ```

 %% Cell type:code id: tags:

 ``` python
 datetime.datetime.strptime("2022-04-21T10:18:00+02:00","%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(minutes=1)
 ```

 %% Output

    datetime.datetime(2022, 4, 21, 10, 19, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)))

 %% Cell type:code id: tags:

 ``` python
 def stage_probability(stage, to_test):
    return 1.0 if stage == to_test else 0.0
 ```

 %% Cell type:code id: tags:

 ``` python
 ((start_time - in_bed_time).seconds)/3600
 ```

 %% Output

    6.833333333333333

 %% Cell type:code id: tags:

 ``` python
 cleaned_info = []
 date_seen = set()
 previous_duration = 60
 with open(RAW_SLEEP_DATA_PATH, mode ='r') as raw_file:
    csvFile = csv.reader(raw_file)
    # max_count = 1
    # stuff = set()
    in_bed_time = None
    current_sleep_id = -1
    for index, lines in enumerate(csvFile):
        if index == 0:
            cleaned_info.append([
                "sleep_id",
                "sleep_begin",
                "stage_start",
                "time_since_begin_sec",
                "stage_duration_sec",
                "stage_end",
                "stage_value",
                "awake_probability",
                "light_probability",
                "deep_probability",
                "rem_probability",
            ])
            continue
        start_time = datetime.datetime.strptime(lines[0],"%Y-%m-%dT%H:%M:%S%z")
        if start_time in date_seen:
            continue
        date_seen.add(start_time)
        if not in_bed_time or in_bed_time > start_time:
            current_sleep_id += 1
            in_bed_time = start_time
        # for duration, stage in enumerate(
        # for offset, (duration, stage) in enumerate(
        #     zip(
        #         # itertools.accumulate(lines[1].strip("[]").split(","), lambda x,y: int(x)+int(y)//60, initial = 0),
        #         map(int, lines[1].strip("[]").split(","))
        #         map(int, lines[2].strip("[]").split(","))
        #     )
        #         # map(int, lines[2].strip("[]").split(","))
        # ):
        for offset, (duration, stage) in enumerate(zip(map(int, lines[1].strip("[]").split(",")), map(int, lines[2].strip("[]").split(",")))):
            # print(f"{(index, subindex) = }, {duration = }, {stage = }")
            # print(f"{(index, duration) = } {stage = }")
            current_time = start_time + datetime.timedelta(seconds=offset*previous_duration)
            cleaned_info.append([
                current_sleep_id,
                in_bed_time,
                current_time,
                (current_time - in_bed_time).seconds,
                duration,
                current_time + datetime.timedelta(seconds=duration),
                stage,
                stage_probability(0, stage),
                stage_probability(1, stage),
                stage_probability(2, stage),
                stage_probability(3, stage),
            ])
            previous_duration = duration
            # print(f"{(index, subindex) = }, {val = }")
        # print(list())
        # if index >= max_count:
        #     break
 ```

 %% Cell type:code id: tags:

 ``` python
 with open(CLEANED_SLEEP_DATA_PATH, 'w') as clean_file:
    write = csv.writer(clean_file)
    write.writerows(cleaned_info)
 print("Finished Writing Cleaned Data")
 ```

 %% Output

    Finished Writing Cleaned Data

 %% Cell type:markdown id: tags:

 ### Creating DataFrame from clean raw data

 %% Cell type:code id: tags:

 ``` python
 # Get the cleaned data
 sleep_df_raw = pd.read_csv(CLEANED_SLEEP_DATA_PATH)#, parse_dates=["start", "end"], infer_datetime_format=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # Preprocess data:
 #   1. convert to datetime
 sleep_df_raw["sleep_begin"] = pd.to_datetime(sleep_df_raw["sleep_begin"], utc=True)
 sleep_df_raw["stage_start"] = pd.to_datetime(sleep_df_raw["stage_start"], utc=True)
 sleep_df_raw["stage_end"] = pd.to_datetime(sleep_df_raw["stage_end"], utc=True)
 #   2. Separate time, hour and minute
 #   MAYBE 3. smaller units: int16 or int8
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_minute(row, index):
    return row[index].time().minute

 def get_hour(row, index):
    return row[index].time().hour
 ```

 %% Cell type:code id: tags:

 ``` python
 sleep_df_raw["stage_start_hour"] = sleep_df_raw.apply (lambda row: get_hour(row, "stage_start"), axis=1)
 sleep_df_raw["stage_start_minute"] = sleep_df_raw.apply (lambda row: get_minute(row, "stage_start"), axis=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 sleep_df_raw.info()
 ```

 %% Output

    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 551042 entries, 0 to 551041
    Data columns (total 13 columns):
     #   Column                Non-Null Count   Dtype
    ---  ------                --------------   -----
     0   sleep_id              551042 non-null  int64
     1   sleep_begin           551042 non-null  datetime64[ns, UTC]
     2   stage_start           551042 non-null  datetime64[ns, UTC]
     3   time_since_begin_sec  551042 non-null  int64
     4   stage_duration_sec    551042 non-null  int64
     5   stage_end             551042 non-null  datetime64[ns, UTC]
     6   stage_value           551042 non-null  int64
     7   awake_probability     551042 non-null  float64
     8   light_probability     551042 non-null  float64
     9   deep_probability      551042 non-null  float64
     10  rem_probability       551042 non-null  float64
     11  stage_start_hour      551042 non-null  int64
     12  stage_start_minute    551042 non-null  int64
    dtypes: datetime64[ns, UTC](3), float64(4), int64(6)
    memory usage: 54.7 MB

 %% Cell type:code id: tags:

 ``` python
 sleep_df_raw
 ```

 %% Output

            sleep_id               sleep_begin               stage_start  \
    0              0 2022-04-21 08:18:00+00:00 2022-04-21 08:18:00+00:00
    1              0 2022-04-21 08:18:00+00:00 2022-04-21 08:19:00+00:00
    2              0 2022-04-21 08:18:00+00:00 2022-04-21 08:20:00+00:00
    3              0 2022-04-21 08:18:00+00:00 2022-04-21 08:21:00+00:00
    4              0 2022-04-21 08:18:00+00:00 2022-04-21 08:22:00+00:00
    ...          ...                       ...                       ...
    551037      1132 2019-02-11 06:11:00+00:00 2019-02-11 13:17:00+00:00
    551038      1132 2019-02-11 06:11:00+00:00 2019-02-11 13:18:00+00:00
    551039      1132 2019-02-11 06:11:00+00:00 2019-02-11 13:19:00+00:00
    551040      1132 2019-02-11 06:11:00+00:00 2019-02-11 13:20:00+00:00
    551041      1132 2019-02-11 06:11:00+00:00 2019-02-11 13:21:00+00:00
    
            time_since_begin_sec  stage_duration_sec                 stage_end  \
    0                          0                  60 2022-04-21 08:19:00+00:00
    1                         60                  60 2022-04-21 08:20:00+00:00
    2                        120                  60 2022-04-21 08:21:00+00:00
    3                        180                  60 2022-04-21 08:22:00+00:00
    4                        240                  60 2022-04-21 08:23:00+00:00
    ...                      ...                 ...                       ...
    551037                 25560                  60 2019-02-11 13:18:00+00:00
    551038                 25620                  60 2019-02-11 13:19:00+00:00
    551039                 25680                  60 2019-02-11 13:20:00+00:00
    551040                 25740                  60 2019-02-11 13:21:00+00:00
    551041                 25800                  60 2019-02-11 13:22:00+00:00
    
            stage_value  awake_probability  light_probability  deep_probability  \
    0                 0                1.0                0.0               0.0
    1                 0                1.0                0.0               0.0
    2                 0                1.0                0.0               0.0
    3                 0                1.0                0.0               0.0
    4                 0                1.0                0.0               0.0
    ...             ...                ...                ...               ...
    551037            1                0.0                1.0               0.0
    551038            1                0.0                1.0               0.0
    551039            1                0.0                1.0               0.0
    551040            1                0.0                1.0               0.0
    551041            1                0.0                1.0               0.0
    
            rem_probability  stage_start_hour  stage_start_minute
    0                   0.0                 8                  18
    1                   0.0                 8                  19
    2                   0.0                 8                  20
    3                   0.0                 8                  21
    4                   0.0                 8                  22
    ...                 ...               ...                 ...
    551037              0.0                13                  17
    551038              0.0                13                  18
    551039              0.0                13                  19
    551040              0.0                13                  20
    551041              0.0                13                  21
    
    [551042 rows x 13 columns]

 %% Cell type:code id: tags:

 ``` python
 sleep_data = sleep_df_raw[["sleep_id", "stage_start_hour", "stage_start_minute", "awake_probability", "rem_probability","light_probability", "deep_probability"]]
 sleep_data.insert(loc=1, column="minutes_since_begin" , value= sleep_df_raw["time_since_begin_sec"]//60)
 ```

 %% Cell type:code id: tags:

 ``` python
 print(sleep_data.head())
 print(sleep_data.info())
 ```

 %% Output

       sleep_id  minutes_since_begin  stage_start_hour  stage_start_minute  \
    0         0                    0                 8                  18
    1         0                    1                 8                  19
    2         0                    2                 8                  20
    3         0                    3                 8                  21
    4         0                    4                 8                  22
    
       awake_probability  rem_probability  light_probability  deep_probability
    0                1.0              0.0                0.0               0.0
    1                1.0              0.0                0.0               0.0
    2                1.0              0.0                0.0               0.0
    3                1.0              0.0                0.0               0.0
    4                1.0              0.0                0.0               0.0
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 551042 entries, 0 to 551041
    Data columns (total 8 columns):
     #   Column               Non-Null Count   Dtype
    ---  ------               --------------   -----
     0   sleep_id             551042 non-null  int64
     1   minutes_since_begin  551042 non-null  int64
     2   stage_start_hour     551042 non-null  int64
     3   stage_start_minute   551042 non-null  int64
     4   awake_probability    551042 non-null  float64
     5   rem_probability      551042 non-null  float64
     6   light_probability    551042 non-null  float64
     7   deep_probability     551042 non-null  float64
    dtypes: float64(4), int64(4)
    memory usage: 33.6 MB
    None

 %% Cell type:code id: tags:

 ``` python
-sleep_data
+sleep_data.to_csv(".data/sleep_data_simple.csv", index=False, index_label=False)
 ```

-%% Output
-
-            sleep_id  minutes_since_begin  stage_start_hour  stage_start_minute  \
-    0              0                    0                 8                  18
-    1              0                    1                 8                  19
-    2              0                    2                 8                  20
-    3              0                    3                 8                  21
-    4              0                    4                 8                  22
-    ...          ...                  ...               ...                 ...
-    551037      1132                  426                13                  17
-    551038      1132                  427                13                  18
-    551039      1132                  428                13                  19
-    551040      1132                  429                13                  20
-    551041      1132                  430                13                  21
-    
-            awake_probability  rem_probability  light_probability  \
-    0                     1.0              0.0                0.0
-    1                     1.0              0.0                0.0
-    2                     1.0              0.0                0.0
-    3                     1.0              0.0                0.0
-    4                     1.0              0.0                0.0
-    ...                   ...              ...                ...
-    551037                0.0              0.0                1.0
-    551038                0.0              0.0                1.0
-    551039                0.0              0.0                1.0
-    551040                0.0              0.0                1.0
-    551041                0.0              0.0                1.0
-    
-            deep_probability
-    0                    0.0
-    1                    0.0
-    2                    0.0
-    3                    0.0
-    4                    0.0
-    ...                  ...
-    551037               0.0
-    551038               0.0
-    551039               0.0
-    551040               0.0
-    551041               0.0
-    
-    [551042 rows x 8 columns]
-
 %% Cell type:markdown id: tags:

 ## Model Development

 %% Cell type:markdown id: tags:

-### Helper functions and class
+### Setup

 %% Cell type:code id: tags:

 ``` python
 TEST_SIZE = 122
 VALIDATION_SIZE = 183
-TIME_STEP_INPUT = 10 # in minutes
+
 BATCH_SIZE = 32
+INPUT_TIME_STEP = 10 # in minutes
+INPUT_FEATURES_SIZE = 7
+MAX_EPOCHS = 20
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sleep_data = pd.read_csv(".data/sleep_data_simple.csv")
 ```

 %% Cell type:code id: tags:

 ``` python
+sleep_data
+```
+
+%% Output
+
+            Unnamed: 0  sleep_id  minutes_since_begin  stage_start_hour  \
+    0                0         0                    0                 8
+    1                1         0                    1                 8
+    2                2         0                    2                 8
+    3                3         0                    3                 8
+    4                4         0                    4                 8
+    ...            ...       ...                  ...               ...
+    551037      551037      1132                  426                13
+    551038      551038      1132                  427                13
+    551039      551039      1132                  428                13
+    551040      551040      1132                  429                13
+    551041      551041      1132                  430                13
+    
+            stage_start_minute  awake_probability  rem_probability  \
+    0                       18                1.0              0.0
+    1                       19                1.0              0.0
+    2                       20                1.0              0.0
+    3                       21                1.0              0.0
+    4                       22                1.0              0.0
+    ...                    ...                ...              ...
+    551037                  17                0.0              0.0
+    551038                  18                0.0              0.0
+    551039                  19                0.0              0.0
+    551040                  20                0.0              0.0
+    551041                  21                0.0              0.0
+    
+            light_probability  deep_probability
+    0                     0.0               0.0
+    1                     0.0               0.0
+    2                     0.0               0.0
+    3                     0.0               0.0
+    4                     0.0               0.0
+    ...                   ...               ...
+    551037                1.0               0.0
+    551038                1.0               0.0
+    551039                1.0               0.0
+    551040                1.0               0.0
+    551041                1.0               0.0
+    
+    [551042 rows x 9 columns]
+
+%% Cell type:markdown id: tags:
+
+### Helper functions and class
+
+%% Cell type:code id: tags:
+
+``` python
 def training_test_split_by_unique_index(data, index: str, test_size: int = 10):
    test_ids = np.random.choice(data[index].unique(), size = test_size, replace=False)
    return data[~data[index].isin(test_ids)], data[data[index].isin(test_ids)]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series
 class WindowGenerator():
-    def __init__(self, data, index: str = "sleep_id", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
+    def __init__(self, data, index: str = "sleep_id", input_width: int = INPUT_TIME_STEP, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
        # Partition data
        self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)
        self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)

        # Window paramters
        self.input_width = input_width
        self.label_width = 1
        self.shift = 1

        self.total_window_size = self.input_width + self.shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

        self.input_feature_slice = input_feature_slice
        self.label_feature_slice = label_feature_slice

        self.sample_ds = self.make_dataset(sleep_data[sleep_data[index] == 0])

        if generate_data_now:
-            self.training_ds = self.make_dataset(self.training)
-            self.validation_ds = self.make_dataset(self.validation)
-            self.testing_ds = self.make_dataset(self.testing)
+            self.training_ds = self.make_dataset(self.training, index)
+            self.validation_ds = self.make_dataset(self.validation, index)
+            self.testing_ds = self.make_dataset(self.testing, index)


    def __repr__(self):
        return "WindowGenerator:\n\t" +'\n\t'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
        ])

    def split_window(self, features):
        inputs = features[:, self.input_slice, self.input_feature_slice]
-        labels = features[:, self.labels_slice, self.label_feature_slice]
+        labels = tf.squeeze(features[:, self.labels_slice, self.label_feature_slice])
        inputs.set_shape([None, self.input_width, None])
-        labels.set_shape([None, self.label_width, None])
+        # labels.set_shape([None, self.label_width, None])
        return inputs, labels

    def make_dataset(self, data, index_group: str = "sleep_id", sort_by: str = "minutes_since_begin"):
        ds_all = None
        for i_group in data[index_group].unique():
            subset_data = np.array(data[data[index_group] == i_group].sort_values(by=[sort_by]), dtype=np.float32)
            ds = tf.keras.utils.timeseries_dataset_from_array(
                data=subset_data,
                targets=None,
                sequence_length=self.total_window_size,
                sequence_stride=1,
                shuffle=False,
                batch_size=BATCH_SIZE,)
            ds_all = ds if ds_all is None else ds_all.concatenate(ds)
        ds_all = ds_all.map(self.split_window)

        return ds_all

    # def generate_all_datasets(self):
    #     self._training_ds = self.make_dataset(self.training)
    #     self._validation_ds = self.make_dataset(self.validation)
    #     self._testing_ds = self.make_dataset(self.testing)

    # def training_dataset(self):
    #     if self._training_ds is None:
    #         self._training_ds = self.make_dataset(self.training)
    #     return self._training_ds

    # def validation_dataset(self):
    #     if self._validation_ds is None:
    #         self._validation_ds = self.make_dataset(self.validation)
    #     return self._validation_ds

    # def test_dataset(self):
    #     if self._testing_ds is None:
    #         self._testing_ds = self.make_dataset(self.testing)
    #     return self._testing_ds
 ```

 %% Cell type:markdown id: tags:

 ### Data Prep

 All inputs follow: (batch_size, timesteps, input_dim)

 %% Cell type:code id: tags:

 ``` python
 wg = WindowGenerator(sleep_data)
 wg
 ```

 %% Output

-    WindowGenerator:
-    	Total window size: 11
-    	Input indices: [0 1 2 3 4 5 6 7 8 9]
-    	Label indices: [10]
+    ---------------------------------------------------------------------------
+    KeyError                                  Traceback (most recent call last)
+File     ~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3619'>3620</a> try:
+    -> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3620'>3621</a>     return self._engine.get_loc(casted_key)
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3621'>3622</a> except KeyError as err:
+File     pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
+File     pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
+File     pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
+File     pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
+    KeyError: 'sleep_id'
+
+The above exception was the direct cause of the following exception:
+    KeyError                                  Traceback (most recent call last)
+    /Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 33' in <cell line: 1>()
+    ----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000072?line=0'>1</a> wg = WindowGenerator(sleep_data)
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000072?line=1'>2</a> wg
+    /Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 31' in WindowGenerator.__init__(self, data, index, input_width, validation_size, test_size, input_feature_slice, label_feature_slice, generate_data_now)
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=2'>3</a> def __init__(self, data, index: str = "sleep_id", input_width: int = INPUT_TIME_STEP, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=3'>4</a>     # Partition data
+    ----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=4'>5</a>     self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=5'>6</a>     self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000065?line=7'>8</a>     # Window paramters
+    /Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 30' in training_test_split_by_unique_index(data, index, test_size)
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=0'>1</a> def training_test_split_by_unique_index(data, index: str, test_size: int = 10):
+    ----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=1'>2</a>     test_ids = np.random.choice(data[index].unique(), size = test_size, replace=False)
+          <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000066?line=2'>3</a>     return data[~data[index].isin(test_ids)], data[data[index].isin(test_ids)]
+File     ~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3502'>3503</a> if self.columns.nlevels > 1:
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3503'>3504</a>     return self._getitem_multilevel(key)
+    -> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3504'>3505</a> indexer = self.columns.get_loc(key)
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3505'>3506</a> if is_integer(indexer):
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/frame.py?line=3506'>3507</a>     indexer = [indexer]
+File     ~/Documents/School Folder/CS 437/Lab/Final Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3620'>3621</a>     return self._engine.get_loc(casted_key)
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3621'>3622</a> except KeyError as err:
+    -> <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3622'>3623</a>     raise KeyError(key) from err
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3623'>3624</a> except TypeError:
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3624'>3625</a>     # If we have a listlike key, _check_indexing_error will raise
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3625'>3626</a>     #  InvalidIndexError. Otherwise we fall through and re-raise
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3626'>3627</a>     #  the TypeError.
+       <a href='file:///Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/venv/lib/python3.10/site-packages/pandas/core/indexes/base.py?line=3627'>3628</a>     self._check_indexing_error(key)
+    KeyError: 'sleep_id'

 %% Cell type:code id: tags:

 ``` python
 sample = wg.sample_ds.take(1)
 ```

 %% Cell type:code id: tags:

 ``` python
 sample_array = list(sample.as_numpy_iterator())
 ```

 %% Cell type:code id: tags:

 ``` python
 INDEX_TIMESTEP = 18
 sample_array[0][0][INDEX_TIMESTEP], sample_array[0][1][INDEX_TIMESTEP]
 ```

 %% Output

-    (array([[18.,  8., 36.,  1.,  0.,  0.,  0.],
-            [19.,  8., 37.,  1.,  0.,  0.,  0.],
-            [20.,  8., 38.,  1.,  0.,  0.,  0.],
-            [21.,  8., 39.,  1.,  0.,  0.,  0.],
-            [22.,  8., 40.,  1.,  0.,  0.,  0.],
-            [23.,  8., 41.,  1.,  0.,  0.,  0.],
-            [24.,  8., 42.,  1.,  0.,  0.,  0.],
-            [25.,  8., 43.,  1.,  0.,  0.,  0.],
-            [26.,  8., 44.,  1.,  0.,  0.,  0.],
-            [27.,  8., 45.,  1.,  0.,  0.,  0.]], dtype=float32),
-     array([[0., 0., 1., 0.]], dtype=float32))
+    ((10, 7), (4,))
+
+%% Cell type:markdown id: tags:
+
+### General Model Helper
+
+%% Cell type:code id: tags:
+
+``` python
+# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series#linear_model
+def compile_and_fit(model, window: WindowGenerator, loss = tf.losses.MeanSquaredError(), optimizer = tf.optimizers.Adam(), metrics = tf.metrics.MeanAbsoluteError(), patience:int = 2, epochs: int = MAX_EPOCHS):
+    early_stopping = tf.keras.callbacks.EarlyStopping(
+        monitor='val_loss',
+        patience=patience,
+        mode='min'
+    )
+
+    model.compile(
+        loss=loss,
+        optimizer=optimizer,
+        metrics=metrics,
+    )
+
+    return model.fit(window.training_ds, epochs=epochs, validation_data=window.validation_ds, callbacks=[early_stopping])
+```

 %% Cell type:markdown id: tags:

 ### Model 1: LSTM

 %% Cell type:code id: tags:

 ``` python
 # Hyper-parameters
 LSTM_UNITS = 16
 LSTM_LEARNING_RATE = 0.0001
 ```

 %% Cell type:code id: tags:

 ``` python
 # Model Definition
 lstm_model = keras.Sequential()
-lstm_model.add(layers.Input(shape=(TIME_STEP_INPUT, 8)))
-lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
+lstm_model.add(layers.Input(shape=(INPUT_TIME_STEP, INPUT_FEATURES_SIZE)))
+# lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
 # lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=True))
+lstm_model.add(layers.LSTM(LSTM_UNITS, stateful=False, return_sequences=False))
 lstm_model.add(layers.Dense(SLEEP_STAGES))
 lstm_model.build()
 print(lstm_model.summary())
 ```

 %% Output

-    Model: "sequential_12"
+    Model: "sequential_7"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #
    =================================================================
-     lstm_14 (LSTM)              (None, 10, 16)            1600
+     lstm_8 (LSTM)               (None, 16)                1536
    
-     dense_9 (Dense)             (None, 10, 4)             68
+     dense_7 (Dense)             (None, 4)                 68
    
    =================================================================
-    Total params: 1,668
-    Trainable params: 1,668
+    Total params: 1,604
+    Trainable params: 1,604
    Non-trainable params: 0
    _________________________________________________________________
    None

 %% Cell type:code id: tags:

 ``` python
 # Model Training
 lstm_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
 lstm_optm = keras.optimizers.Adam(learning_rate=LSTM_LEARNING_RATE)
+lstm_metrics = [tf.keras.metrics.SparseCategoricalCrossentropy(from_logits=True), tf.keras.metrics.Accuracy()]
+```
+
+%% Cell type:code id: tags:
+
+``` python
+lstm_history = compile_and_fit(model=lstm_model, window=wg, loss= lstm_loss, optimizer= lstm_optm, metrics=lstm_metrics)
 ```

+%% Output
+
+    Epoch 1/20
+
+    Canceled future for execute_request message before replies were done
+
+    The Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.
+
 %% Cell type:markdown id: tags:

 ### Model 2: GRU

 %% Cell type:code id: tags:

 ``` python
 # Hyper-paramters
 GRU_UNITS = 16
 ```

 %% Cell type:code id: tags:

 ``` python
 gru_model = keras.Sequential([
    layers.GRU(GRU_UNITS),
    layers.Dense(SLEEP_STAGES)
 ])
 gru_model.add(layers.Embedding(input_dim=1000, output_dim=64))
 print(gru_model.summary())
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 ### Model 3: Attention Mechanism

 %% Cell type:code id: tags:

 ``` python
 ATTENTION_UNITS = 16
 ```

 %% Cell type:code id: tags:

 ``` python
 am_model = keras.Sequential([
    Attention(ATTENTION_UNITS)
    layers.Dense(SLEEP_STAGES)
 ])
 print(am_model.summary())
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 ### Model Head-to-Head testing

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 # Scratch