provenance.py 5.71 KB
Newer Older
1
import psycopg2
Haoyu Z's avatar
debug    
Haoyu Z committed
2
import sqlparse as sp
3
4
5
import argparse
import sys
import copy
Haoyu Z's avatar
debug    
Haoyu Z committed
6
import time
Haoyu Z's avatar
debug    
Haoyu Z committed
7
import operator
Haoyu Z's avatar
debug    
Haoyu Z committed
8
import provenanceUtils as ut
Haoyu Z's avatar
debug    
Haoyu Z committed
9
import traceback
Haoyu Z's avatar
debug    
Haoyu Z committed
10
11
12
13

#When craeting the auxillary provenance tables, need to add a column called provtuple, which is a sorted array
#[t1, t2, t3...]

Haoyu Z's avatar
debug    
Haoyu Z committed
14
tablesWithProvenance = {}
Haoyu Z's avatar
debug    
Haoyu Z committed
15
16
17
18
19
20
21
22
23
24
25
26

def tablesInQuery(query):
    parsed = sp.parse(query)[0]
    output = []
    state = ""
    for token in parsed.tokens:
        cur = token.value
        if cur == " ":
            continue
        if cur.lower() == "from":
            state = "first table"
            continue
27

Haoyu Z's avatar
debug    
Haoyu Z committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
        if state == "first table":
            if "(" not in cur and "join" in cur:
                state = "second table"
                continue
            if "(" in cur and ")" in cur:
                #subquery
                output.extend(tablesInQuery(cur[cur.index("(")+1 : cur.index(")")]))
            else:
                output.append(cur.split("as")[0])
            continue
        
        if state == "second table":
            if "(" in cur and ")" in cur:
                #subquery
Haoyu Z's avatar
debug    
Haoyu Z committed
42
                output.extend(tablesInQuery(cur[cur.index("(")+1 : cur.index(")")]))
Haoyu Z's avatar
debug    
Haoyu Z committed
43
44
45
46
47
48
49
50
51
52
53
54
            else:
                output.append(cur.split("as")[0])
            break

    return output


#Preprocess query
def preprocess_query(query, cur, conn):
    tables = tablesInQuery(query)
    output = []
    for table in tables:
Haoyu Z's avatar
debug    
Haoyu Z committed
55
56
57
58
59
60
        if table not in tablesWithProvenance:
            command1 = "select add_provenance('{}');".format(table)
            command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table, table)
            ut.commitAndExecute(conn, cur, command1)
            ut.commitAndExecute(conn, cur, command2)
            tablesWithProvenance[table] = 1
Haoyu Z's avatar
debug    
Haoyu Z committed
61
62
63
64
        formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table)
        index = query.find('select') + 6
        tmp = query[:index] + formula + query[index:]
        output.append(tmp)
Haoyu Z's avatar
debug    
Haoyu Z committed
65
        
Haoyu Z's avatar
debug    
Haoyu Z committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
    return (tables,output)

#formulas:[[formula1, formula2, ...],
#           [formula1, ...]
#           ...
#         ]
#fs: [formula1, formula2, ...]

def process_formula(fs, tables):
    elements = []
    for i, formula in enumerate(fs):
        nf, idx = ut.remove1sAndAddTablename(formula, tables[i])
        elements.append(((nf, idx)))
    elements.sort(key=operator.itemgetter(1))
    formula_tmp = [x[0] for x in elements]
    output = " ".join(formula_tmp)
    return output

def get_header(query):
    parsed = sp.parse(query)[0]
    headers = (parsed.tokens)[2].value.split(",")
    output = ""
    for h in headers:
        h = h.strip()
        header = ""
        if "." in h:
            header = h.split(".")[1]
        else:
            header = h
        output += header + "   |   "
    return output
Haoyu Z's avatar
debug    
Haoyu Z committed
97
98


Haoyu Z's avatar
debug    
Haoyu Z committed
99
100
def exe_query(query, out_file, cur, conn):
    out_file.write(query + "\n")
Haoyu Z's avatar
debug    
Haoyu Z committed
101
    print("executing query: " + query)
Haoyu Z's avatar
debug    
Haoyu Z committed
102
103
    tables, queries = preprocess_query(query, cur, conn)
    result_t = None
104
105
    formulas = []
    for processed_query in queries:
Haoyu Z's avatar
debug    
Haoyu Z committed
106
        ut.commitAndExecute(conn, cur, processed_query)
Haoyu Z's avatar
debug    
Haoyu Z committed
107
        output = cur.fetchall()
Haoyu Z's avatar
debug    
Haoyu Z committed
108
109
110
111
112
        if result_t == None:
            result_t = output
        for i, tu in enumerate(output):
            if len(formulas) == i:
                formulas.append([tu[0]])
113
            else:
Haoyu Z's avatar
debug    
Haoyu Z committed
114
115
                formulas[i].append(tu[0])
    
Haoyu Z's avatar
debug    
Haoyu Z committed
116
    print("formulas", formulas)
Haoyu Z's avatar
debug    
Haoyu Z committed
117
118
119
    processed_formulas = []
    for fs in formulas:
        rowformula = process_formula(fs, tables)
Haoyu Z's avatar
debug    
Haoyu Z committed
120
        processed_formulas.append(rowformula)
Haoyu Z's avatar
debug    
Haoyu Z committed
121
    print("processed formula", processed_formulas)
Haoyu Z's avatar
debug    
Haoyu Z committed
122
    result__ = []
Haoyu Z's avatar
debug    
Haoyu Z committed
123
124
125
    for i, row in enumerate(result_t):
        rowl = list(row)
        rowl[0] = processed_formulas[i]
Haoyu Z's avatar
debug    
Haoyu Z committed
126
127
        result__.append(rowl)
    print("result", result__)
Haoyu Z's avatar
Haoyu Z committed
128

Haoyu Z's avatar
debug    
Haoyu Z committed
129
130
131
    # Combine rows with same output
    result = ut.combineSameRow(result__)
    
Haoyu Z's avatar
debug    
Haoyu Z committed
132
133
    # Now print the processed query
    header = get_header(query)
Haoyu Z's avatar
debug    
Haoyu Z committed
134
    out_file.write("provenance formula   |   " + header+ "provsql-token"+"\n\n")
Haoyu Z's avatar
debug    
Haoyu Z committed
135
136
137
138
    for row in result:
        line = " | ".join(row)
        out_file.write(line+"\n")
    out_file.write("\n---------------------------\n\n")
139
140
141
142
143
    

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("env", help="Environment configuration file")
Haoyu Z's avatar
debug    
Haoyu Z committed
144
    parser.add_argument("-q", "--query", help="Relative path for query file.")
145
146
147
148
149
150
151
152
153
154
155
156
    parser.add_argument("-o", "--output", help="Relative path for output file. Default as stdout")
    args = parser.parse_args() 

    env_config = ''
    queries = []
    out_file = sys.stdout
    with open(args.env) as fp:
        line = fp.readline()
        while line:
            tmp = line.strip()
            env_config  = env_config + tmp + ' '
            line = fp.readline()
Haoyu Z's avatar
debug    
Haoyu Z committed
157
    print("database configuration " + env_config)
Haoyu Z's avatar
debug    
Haoyu Z committed
158

159
    if args.query:
Haoyu Z's avatar
debug    
Haoyu Z committed
160
        with open(args.query) as fp:
161
162
163
            line = fp.readline()
            while line:
                queries.append(line.strip())
Haoyu Z's avatar
debug    
Haoyu Z committed
164
                line = fp.readline()
165
    else:
Haoyu Z's avatar
debug    
Haoyu Z committed
166
        print("Please specify at least one query file.")
167
        exit(1)
Haoyu Z's avatar
debug    
Haoyu Z committed
168

169
170
171
    if args.output:
        out_file = open(args.output, 'w')
    
Haoyu Z's avatar
debug    
Haoyu Z committed
172
    conn = None
173
    try:
Haoyu Z's avatar
debug    
Haoyu Z committed
174
        print('connecting to databse...')
Haoyu Z's avatar
debug    
Haoyu Z committed
175
        conn = psycopg2.connect(env_config)
176
        cur = conn.cursor()
Haoyu Z's avatar
debug    
Haoyu Z committed
177
        ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
Haoyu Z's avatar
debug    
Haoyu Z committed
178

179
        for query in queries:
Haoyu Z's avatar
debug    
Haoyu Z committed
180
            exe_query(query, out_file, cur, conn)
Haoyu Z's avatar
debug    
Haoyu Z committed
181
            print("FINISHED: "+query)
Haoyu Z's avatar
debug    
Haoyu Z committed
182

Haoyu Z's avatar
debug    
Haoyu Z committed
183
        print("Done!")
184
    except (Exception, psycopg2.DatabaseError) as error:
Haoyu Z's avatar
debug    
Haoyu Z committed
185
        print(traceback.format_exc())
186
187
        exit(1)
    finally:
Haoyu Z's avatar
debug    
Haoyu Z committed
188
        out_file.close()
189
190
191
192
193
194
        if conn is not None:
            conn.close()
            print('Database connection closed.')

if __name__ == '__main__':
    main()