diff --git a/examples/ncf/README.md b/examples/ncf/README.md
index eac9b0c5ed7963c996019a7899c9eb0d7c57dfbc..e8286a223714567fe2d01a38c151a69af2de927e 100644
--- a/examples/ncf/README.md
+++ b/examples/ncf/README.md
@@ -25,8 +25,6 @@ The model trains on binary information about whether or not a user interacted wi
 
 ## Setup
 
-### Steps to configure machine
-
 * Install `unzip` and `curl`
 
   ```bash
@@ -41,14 +39,21 @@ The model trains on binary information about whether or not a user interacted wi
   pip install -e .
   ```
 
-* Download and verify data
+* Obtain the ml-20m dataset
 
   ```bash
   cd <distiller-repo-root>/examples/ncf
+  
   # Creates ml-20.zip
-  source ../download_dataset.sh
+  source download_dataset.sh
+  
   # Confirms the MD5 checksum of ml-20.zip
-  source ../verify_dataset.sh
+  source verify_dataset.sh
+  
+  # Extracts the dataset into a sub-directory named 'ml-20m'
+  # During the last step the script might appear to hang,
+  # This is normal, it finishes after a few minutes
+  source extract_dataset.sh
   ```
 
 ## Running the Sample
diff --git a/examples/ncf/extract_dataset.sh b/examples/ncf/extract_dataset.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7ab3a5658660205c872337d78ab85e685deba2c0
--- /dev/null
+++ b/examples/ncf/extract_dataset.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+echo "unzip ml-20m.zip"
+if unzip -u ml-20m.zip
+then
+    echo "Start processing ml-20m/ratings.csv"
+	python convert.py ml-20m/ratings.csv ml-20m --negatives 999
+else
+	echo "Problem unzipping ml-20.zip"
+	echo "Please run 'download_data.sh && verify_datset.sh' first"
+fi