extract.py 2.81 KB
Newer Older
Haoyu Z's avatar
Haoyu Z committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import os.path
from os import path
import psycopg2
import traceback
import sys

query = {} # a dictionary (key:questionId, value: User-dictionary(key:UserId, value: Num-dictionary(key:10-digit num, value: submission)))

def commitAndExecute(conn, cur, command):
    cur.execute(command)
    conn.commit()


def extractData(conn, cur):
    mydir = "HW1"
    filesname = os.listdir(mydir)

    for f in filesname:
        # First find questionID
        q_idx = f.find("q")
        t_idx = f.find("_")
        query_idx = f.find("query")

        questionId = 0
        num = 0 # the number of submission for the user for a single question.
        if f[q_idx+1] == "1" and f[q_idx+2] == "0":
            questionId = int(f[q_idx+1 : q_idx+3])
            num = int(f[q_idx+3 : query_idx])
        else:
            questionId = int(f[q_idx+1 : q_idx+2])
            num = int(f[q_idx+2 : query_idx])
        userId = int(f[:t_idx])
          
        # question id not in dictionary.
        if questionId not in query:
            query[questionId] = {}
     
        filepath = mydir + "/" + f
        fl = open(filepath)
        sql_str = fl.read()
        sql_str = sql_str.strip().replace("\n", " ")
Haoyu Z's avatar
Haoyu Z committed
43
44

        flag = 0
Haoyu Z's avatar
Haoyu Z committed
45
46
47
48
49
50
        try:
            commitAndExecute(conn, cur, sql_str)
            if userId not in query[questionId]:
                query[questionId][userId] = {} 
            query[questionId][userId][num] = sql_str
        except (Exception, psycopg2.DatabaseError) as error:
Haoyu Z's avatar
Haoyu Z committed
51
            flag = 1
Haoyu Z's avatar
Haoyu Z committed
52
53
54
55
56
57
58


def main():
    try:
        os.makedirs("hw1-provenance")
    except OSError as e:
        print(e)
Haoyu Z's avatar
Haoyu Z committed
59
    conn = psycopg2.connect("dbname=provdb user=joker password=m")
Haoyu Z's avatar
Haoyu Z committed
60
61
62
63
    cur = conn.cursor()
    extractData(conn, cur)
    conn.close()
    for question_id in [1,2,3,4,5,6,7,8]:
Haoyu Z's avatar
Haoyu Z committed
64
65
        if question_id != 2:
            continue
Haoyu Z's avatar
Haoyu Z committed
66
        f = open("hw1-provenance/q"+str(question_id), "w+")
Haoyu Z's avatar
Haoyu Z committed
67
68
69
70
        question_submissions = query[question_id]
        for student_id in list(question_submissions.keys()):
            submissions = question_submissions[student_id]
            for sub_id in list(submissions.keys()):
Haoyu Z's avatar
Haoyu Z committed
71
                '''try:
Haoyu Z's avatar
Haoyu Z committed
72
                    conn = psycopg2.connect("dbname=provdb user=joker password=m")
Haoyu Z's avatar
Haoyu Z committed
73
74
75
76
77
78
79
80
                    cur = conn.cursor()
                    commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
                    provenance.exe_query(submissions[sub_id], f, cur, conn)
                except (Exception, psycopg2.DatabaseError) as error:
                    print(traceback.format_exc())
                    exit(1)
                finally:
                    if conn is not None:
Haoyu Z's avatar
Haoyu Z committed
81
82
                        conn.close()'''
                f.write(submissions[sub_id]+"\n")
Haoyu Z's avatar
Haoyu Z committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    f.close()







    print(query.keys())




if __name__ == "__main__":
    # execute only if run as a script
    main()