diff --git a/hpvm/projects/keras/scripts/test_benchmarks.py b/hpvm/projects/keras/scripts/test_benchmarks.py
new file mode 100644
index 0000000000000000000000000000000000000000..d68a61264a3297b12bdf88812def72859543b3a9
--- /dev/null
+++ b/hpvm/projects/keras/scripts/test_benchmarks.py
@@ -0,0 +1,115 @@
+
+
+
+import subprocess
+
+class Benchmark:
+
+    def __init__(self, binary_path, test_accuracy):
+
+        self.binary_path = binary_path
+        self.test_accuracy = test_accuracy
+        self.epsilon = 0.05 # Adding some slack for accuracy difference
+
+
+    def getPath(self):
+        return self.binary_path
+
+    
+    def readAccuracy(self):
+
+        f = open("final_accuracy", "r") # File with final benchmark accuracy 
+        acc_str = f.read()
+        return float(acc_str)
+    
+        
+    def run(self):
+
+        # Test Bechmark accuracy with pretrained weights (hpvm_relaod)
+        run_cmd = "python " + self.binary_path + " hpvm_reload "
+        try:
+            subprocess.call(run_cmd, shell=True)
+        except:
+            return False
+
+        accuracy = self.readAccuracy()
+
+        print ("accuracy = ", accuracy, " test_accuracy = ", self.test_accuracy) 
+
+        test_success = False
+        if (abs(self.test_accuracy - accuracy) < self.epsilon):
+            print ("Test for " + self. binary_path + " Passed ")
+            test_success = True
+        else:
+            print ("Test Failed for " + self.binary_path)
+            test_success = False
+
+        return test_success
+    
+        
+
+class BenchmarkTests:
+
+    def __init__(self):
+
+        self.benchmarks = []
+        self.passed_tests = []
+        self.failed_tests = []
+
+
+    def addBenchmark(self, benchmark):
+
+        self.benchmarks.append(benchmark)
+
+
+    def runTests(self):
+
+        for benchmark in self.benchmarks:
+            test_success = benchmark.run()
+
+            if not test_success:
+                self.failed_tests.append(benchmark.getPath())
+            else:
+                self.passed_tests.append(benchmark.getPath())
+
+    def printSummary(self):
+
+        failed_test_count = len(self.failed_tests)
+        passed_test_count = len(self.passed_tests)
+        
+        print (" Tests Passed  = " + str(passed_test_count) + " / " + str(len(self.benchmarks)))
+        print ("******* Passed Tests ** \n")
+        for passed_test in self.passed_tests:
+            print ("Passed: " + passed_test)
+
+        print (" Tests Failed  = " + str(failed_test_count) + " / " + str(len(self.benchmarks)))
+        print ("****** Failed Tests *** \n")
+        for failed_test in self.failed_tests:
+            print ("Failed: " + failed_test)
+            
+
+        
+            
+if __name__ == "__main__":
+
+    testMgr = BenchmarkTests()
+    AlexNet = Benchmark("src/alexnet.py", 79.28)
+    AlexNet2 = Benchmark("src/alexnet2.py", 84.98)
+    LeNet = Benchmark("src/lenet.py", 98.70)
+    MobileNet = Benchmark("src/mobilenet_cifar10.py", 84.42)
+    ResNet18 = Benchmark("src/resnet18_cifar10.py", 89.56)
+    VGG16_cifar10 = Benchmark("src/vgg16_cifar10.py", 89.96)
+    VGG16_cifar100 = Benchmark("src/vgg16_cifar100.py", 66.50)
+
+    testMgr.addBenchmark(AlexNet)
+    testMgr.addBenchmark(AlexNet2)
+    testMgr.addBenchmark(LeNet)
+    testMgr.addBenchmark(MobileNet)
+    testMgr.addBenchmark(ResNet18)
+    testMgr.addBenchmark(VGG16_cifar10)
+    testMgr.addBenchmark(VGG16_cifar100)
+
+    testMgr.runTests()
+    testMgr.printSummary()
+    
+    
diff --git a/hpvm/projects/keras/src/Benchmark.py b/hpvm/projects/keras/src/Benchmark.py
index 3610b2e9a5ad10c2b3d90795eb20b3d6839b730f..0871b74959ce6e19c7fedb89bbc101429a42c41f 100644
--- a/hpvm/projects/keras/src/Benchmark.py
+++ b/hpvm/projects/keras/src/Benchmark.py
@@ -90,6 +90,10 @@ class Benchmark:
       score = model.evaluate(X_test, to_categorical(y_test, self.num_classes), verbose=0)
       print('Test accuracy2:', score[1])
 
+      f = open("final_accuracy", "w+")
+      f.write(str(score[1] * 100))
+      f.close()
+
 
       if len(argv) > 2:
         if argv[2] == "frontend":