From 1a950002ed1c2b1318d3074cf85229aa9294b812 Mon Sep 17 00:00:00 2001
From: SurajSSingh <surajss@uci.edu>
Date: Wed, 11 May 2022 10:56:23 -0700
Subject: [PATCH] Fixed minor issues with datasets

---
 .gitignore     |   3 +-
 tf_model.ipynb | 393 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 277 insertions(+), 119 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8fa6d85..2269427 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 *.pdf
-.data/
\ No newline at end of file
+.data/
+*.pyc
\ No newline at end of file
diff --git a/tf_model.ipynb b/tf_model.ipynb
index efcdfa7..4404a8c 100644
--- a/tf_model.ipynb
+++ b/tf_model.ipynb
@@ -7,9 +7,16 @@
     "# Notebook for training and testing AI models"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -23,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -40,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +58,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -234,7 +241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -244,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -259,7 +266,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -272,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -282,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -318,7 +325,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -595,7 +602,7 @@
        "[551042 rows x 13 columns]"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -606,7 +613,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -616,7 +623,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -660,6 +667,221 @@
     "print(sleep_data.info())"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sleep_id</th>\n",
+       "      <th>minutes_since_begin</th>\n",
+       "      <th>stage_start_hour</th>\n",
+       "      <th>stage_start_minute</th>\n",
+       "      <th>awake_probability</th>\n",
+       "      <th>rem_probability</th>\n",
+       "      <th>light_probability</th>\n",
+       "      <th>deep_probability</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>18</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>20</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>8</td>\n",
+       "      <td>21</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>8</td>\n",
+       "      <td>22</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551037</th>\n",
+       "      <td>1132</td>\n",
+       "      <td>426</td>\n",
+       "      <td>13</td>\n",
+       "      <td>17</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551038</th>\n",
+       "      <td>1132</td>\n",
+       "      <td>427</td>\n",
+       "      <td>13</td>\n",
+       "      <td>18</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551039</th>\n",
+       "      <td>1132</td>\n",
+       "      <td>428</td>\n",
+       "      <td>13</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551040</th>\n",
+       "      <td>1132</td>\n",
+       "      <td>429</td>\n",
+       "      <td>13</td>\n",
+       "      <td>20</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551041</th>\n",
+       "      <td>1132</td>\n",
+       "      <td>430</td>\n",
+       "      <td>13</td>\n",
+       "      <td>21</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>551042 rows Ã— 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        sleep_id  minutes_since_begin  stage_start_hour  stage_start_minute  \\\n",
+       "0              0                    0                 8                  18   \n",
+       "1              0                    1                 8                  19   \n",
+       "2              0                    2                 8                  20   \n",
+       "3              0                    3                 8                  21   \n",
+       "4              0                    4                 8                  22   \n",
+       "...          ...                  ...               ...                 ...   \n",
+       "551037      1132                  426                13                  17   \n",
+       "551038      1132                  427                13                  18   \n",
+       "551039      1132                  428                13                  19   \n",
+       "551040      1132                  429                13                  20   \n",
+       "551041      1132                  430                13                  21   \n",
+       "\n",
+       "        awake_probability  rem_probability  light_probability  \\\n",
+       "0                     1.0              0.0                0.0   \n",
+       "1                     1.0              0.0                0.0   \n",
+       "2                     1.0              0.0                0.0   \n",
+       "3                     1.0              0.0                0.0   \n",
+       "4                     1.0              0.0                0.0   \n",
+       "...                   ...              ...                ...   \n",
+       "551037                0.0              0.0                1.0   \n",
+       "551038                0.0              0.0                1.0   \n",
+       "551039                0.0              0.0                1.0   \n",
+       "551040                0.0              0.0                1.0   \n",
+       "551041                0.0              0.0                1.0   \n",
+       "\n",
+       "        deep_probability  \n",
+       "0                    0.0  \n",
+       "1                    0.0  \n",
+       "2                    0.0  \n",
+       "3                    0.0  \n",
+       "4                    0.0  \n",
+       "...                  ...  \n",
+       "551037               0.0  \n",
+       "551038               0.0  \n",
+       "551039               0.0  \n",
+       "551040               0.0  \n",
+       "551041               0.0  \n",
+       "\n",
+       "[551042 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sleep_data"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -676,7 +898,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -688,7 +910,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -699,13 +921,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Adapted from https://www.tensorflow.org/tutorials/structured_data/time_series\n",
     "class WindowGenerator():\n",
-    "    def __init__(self, data, index: str = \"sleep_id\", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, generate_data_now: bool = True):\n",
+    "    def __init__(self, data, index: str = \"sleep_id\", input_width: int = TIME_STEP_INPUT, validation_size: int = VALIDATION_SIZE, test_size: int = TEST_SIZE, input_feature_slice: slice = slice(1,100), label_feature_slice: slice = slice(-4,100), generate_data_now: bool = True):\n",
     "        # Partition data\n",
     "        self.training, self.testing = training_test_split_by_unique_index(sleep_data, index, test_size)\n",
     "        self.training, self.validation = training_test_split_by_unique_index(self.training, index, validation_size)\n",
@@ -723,7 +945,12 @@
     "        self.label_start = self.total_window_size - self.label_width\n",
     "        self.labels_slice = slice(self.label_start, None)\n",
     "        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]\n",
-    "        \n",
+    "\n",
+    "        self.input_feature_slice = input_feature_slice\n",
+    "        self.label_feature_slice = label_feature_slice\n",
+    "\n",
+    "        self.sample_ds = self.make_dataset(sleep_data[sleep_data[index] == 0])\n",
+    "\n",
     "        if generate_data_now:\n",
     "            self.training_ds = self.make_dataset(self.training)\n",
     "            self.validation_ds = self.make_dataset(self.validation)\n",
@@ -738,11 +965,10 @@
     "        ])\n",
     "\n",
     "    def split_window(self, features):\n",
-    "        inputs = features[:, self.input_slice, :]\n",
-    "        labels = features[:, self.labels_slice, :]\n",
+    "        inputs = features[:, self.input_slice, self.input_feature_slice]\n",
+    "        labels = features[:, self.labels_slice, self.label_feature_slice]\n",
     "        inputs.set_shape([None, self.input_width, None])\n",
     "        labels.set_shape([None, self.label_width, None])\n",
-    "\n",
     "        return inputs, labels\n",
     "\n",
     "    def make_dataset(self, data, index_group: str = \"sleep_id\", sort_by: str = \"minutes_since_begin\"):\n",
@@ -782,58 +1008,6 @@
     "    #     return self._testing_ds"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2022-05-11 00:38:01.423873: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "WindowGenerator:\n",
-       "\tTotal window size: 11\n",
-       "\tInput indices: [0 1 2 3 4 5 6 7 8 9]\n",
-       "\tLabel indices: [10]"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "wg = WindowGenerator(sleep_data)\n",
-    "wg"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1836"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(wg.testing_ds)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -845,92 +1019,75 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'training_test_split_by_unique_index' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[1;32m/Users/nowadmin/Documents/School Folder/CS 437/Lab/Final Project/tf_model.ipynb Cell 32'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/nowadmin/Documents/School%20Folder/CS%20437/Lab/Final%20Project/tf_model.ipynb#ch0000068?line=0'>1</a>\u001b[0m training_sleep_data, test_sleep_data \u001b[39m=\u001b[39m training_test_split_by_unique_index(sleep_data, \u001b[39m\"\u001b[39m\u001b[39msleep_id\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m5\u001b[39m)\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'training_test_split_by_unique_index' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "training_sleep_data, test_sleep_data = training_test_split_by_unique_index(sleep_data, \"sleep_id\", 5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 288,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "set()"
+       "WindowGenerator:\n",
+       "\tTotal window size: 11\n",
+       "\tInput indices: [0 1 2 3 4 5 6 7 8 9]\n",
+       "\tLabel indices: [10]"
       ]
      },
-     "execution_count": 288,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "set(training.sleep_id.unique()).intersection(set(test.sleep_id.unique()))"
+    "wg = WindowGenerator(sleep_data)\n",
+    "wg"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 239,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
-    "sleep_data_tensor = tf.convert_to_tensor(sleep_data)"
+    "sample = wg.sample_ds.take(1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 240,
+   "execution_count": 34,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "TensorShape([551042, 7])"
-      ]
-     },
-     "execution_count": 240,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "sleep_data_tensor.shape"
+    "sample_array = list(sample.as_numpy_iterator())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 241,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<tf.Tensor: shape=(7,), dtype=float64, numpy=array([ 1.,  8., 19.,  1.,  0.,  0.,  0.])>"
+       "(array([[18.,  8., 36.,  1.,  0.,  0.,  0.],\n",
+       "        [19.,  8., 37.,  1.,  0.,  0.,  0.],\n",
+       "        [20.,  8., 38.,  1.,  0.,  0.,  0.],\n",
+       "        [21.,  8., 39.,  1.,  0.,  0.,  0.],\n",
+       "        [22.,  8., 40.,  1.,  0.,  0.,  0.],\n",
+       "        [23.,  8., 41.,  1.,  0.,  0.,  0.],\n",
+       "        [24.,  8., 42.,  1.,  0.,  0.,  0.],\n",
+       "        [25.,  8., 43.,  1.,  0.,  0.,  0.],\n",
+       "        [26.,  8., 44.,  1.,  0.,  0.,  0.],\n",
+       "        [27.,  8., 45.,  1.,  0.,  0.,  0.]], dtype=float32),\n",
+       " array([[0., 0., 1., 0.]], dtype=float32))"
       ]
      },
-     "execution_count": 241,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sleep_data_tensor[1]"
+    "INDEX_TIMESTEP = 18\n",
+    "sample_array[0][0][INDEX_TIMESTEP], sample_array[0][1][INDEX_TIMESTEP]"
    ]
   },
   {
-- 
GitLab