Commit ba3220bf authored by haoyuz3's avatar haoyuz3
Browse files

debug

parent 00030495
{
"python.linting.pylintEnabled": true,
"python.linting.enabled": true
}
\ No newline at end of file
SELECT FirstName, LastName, COUNT(PurchaseId) AS cnt
FROM Customers LEFT JOIN Purchases USING (CustomerId)
GROUP BY FirstName, LastName
ORDER BY FirstName, LastName DESC;
\ No newline at end of file
SELECT FirstName, LastName, COUNT(PurchaseId) AS cnt FROM Customers LEFT JOIN Purchases USING (CustomerId) GROUP BY FirstName, LastName ORDER BY FirstName, LastName DESC;
\ No newline at end of file
dbname=provdb
user=joker
password=m
\ No newline at end of file
......@@ -12,7 +12,7 @@ def commitAndExecute(conn, cur, command):
conn.commit()
def extractData(conn, cur):
def extractData():
mydir = "HW1"
filesname = os.listdir(mydir)
......@@ -40,15 +40,9 @@ def extractData(conn, cur):
fl = open(filepath)
sql_str = fl.read()
sql_str = sql_str.strip().replace("\n", " ")
flag = 0
try:
commitAndExecute(conn, cur, sql_str)
if userId not in query[questionId]:
query[questionId][userId] = {}
query[questionId][userId][num] = sql_str
except (Exception, psycopg2.DatabaseError) as error:
flag = 1
if userId not in query[questionId]:
query[questionId][userId] = {}
query[questionId][userId][num] = sql_str
def main():
......@@ -58,41 +52,31 @@ def main():
print(e)
conn = psycopg2.connect("dbname=provdb user=joker password=m")
cur = conn.cursor()
extractData(conn, cur)
conn.close()
extractData()
#print(query)
for question_id in [1,2,3,4,5,6,7,8]:
if question_id != 2:
if question_id != 1:
continue
f = open("hw1-provenance/q"+str(question_id), "w+")
f = open("hw1-provenance/q"+str(question_id)+".txt", "w+")
question_submissions = query[question_id]
for student_id in list(question_submissions.keys()):
submissions = question_submissions[student_id]
for sub_id in list(submissions.keys()):
'''try:
conn = psycopg2.connect("dbname=provdb user=joker password=m")
cur = conn.cursor()
commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
provenance.exe_query(submissions[sub_id], f, cur, conn)
try:
commitAndExecute(conn, cur, submissions[sub_id])
f.write(submissions[sub_id]+"\n")
except (Exception, psycopg2.DatabaseError) as error:
print(traceback.format_exc())
exit(1)
finally:
if conn is not None:
conn.close()'''
f.write(submissions[sub_id]+"\n")
print(submissions[sub_id], error)
conn.commit()
cur.close()
conn.close()
conn = psycopg2.connect("dbname=provdb user=joker password=m")
cur = conn.cursor()
f.close()
print(query.keys())
if __name__ == "__main__":
# execute only if run as a script
main()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
SELECT FirstName, LastName, Instructor FROM Students st LEFT JOIN Courses c ON st.Department = c.Department WHERE (FirstName LIKE 'A%' OR LastName LIKE 'A%') AND Instructor LIKE 'A%';
......@@ -200,6 +200,8 @@ INSERT INTO public.enrollments VALUES ('tjones9', 125, 3, 60);
INSERT INTO public.enrollments VALUES ('tjones9', 127, 4, 77);
INSERT INTO public.enrollments VALUES ('ssmith6', 124, 4, 87);
INSERT INTO public.enrollments VALUES ('bmars4', 124, 4, 99);
INSERT INTO public.enrollments VALUES ('aeros45', 125, 4, 99);
INSERT INTO public.enrollments VALUES ('amya2', 125, 4, 99);
--
......@@ -290,6 +292,8 @@ INSERT INTO public.students VALUES ('esheeran5', 'Ed', 'Sheeran', 'ECE');
INSERT INTO public.students VALUES ('ssmith6', 'Sam', 'Smith', 'ECE');
INSERT INTO public.students VALUES ('tjones9', 'Tom', 'Jones', 'ECE');
INSERT INTO public.students VALUES ('bmars4', 'Bruno', 'Mars', 'Physics');
INSERT INTO public.students VALUES ('aeros45', 'Aero', 'AeroSmith', 'CS');
INSERT INTO public.students VALUES ('amya2', 'Amy', 'Alan', 'Physics');
--
......
import psycopg2
import sqlparse as sp
import argparse
import sys
import copy
import time
import operator
import provenanceUtils as ut
import traceback
#When craeting the auxillary provenance tables, need to add a column called provtuple, which is a sorted array
#[t1, t2, t3...]
tablesWithProvenance = {}
twojoins = ["INNERJOIN", "LEFTJOIN", "RIGHTJOIN", "FULLJOIN", "NATURALJOIN"]
threejoins = ["LEFTOUTERJOIN", "RIGHTOUTERJOIN", "FULLOUTERJOIN"]
def tablesInQuery(query):
output = []
tokens = query.split(" ")
tokens = list(filter(lambda a: a != "", tokens))
state = "not in"
idx = 0
while idx < len(tokens):
if tokens[idx] == "":
idx += 1
continue
if state == "not in":
if tokens[idx].upper() == "FROM":
state = "from in"
if tokens[idx].upper() == "JOIN":
state = "join in"
elif state == "from in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if "," in tokens[idx]:
output.append(tokens[idx].split(",")[0])
else:
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif "," in tokens[idx + 1]:
output.append(tokens[idx])
idx += 1
elif tokens[idx+1].upper() == "WHERE" :
state = "not in"
output.append(tokens[idx])
idx += 1
elif tokens[idx+2].upper() == "WHERE":
state = "not in"
output.append(tokens[idx])
idx += 2
elif (tokens[idx+2]+tokens[idx+3]).upper() in twojoins:
output.append(tokens[idx])
idx += 1
elif (tokens[idx+2]+tokens[idx+3]+tokens[idx+4]).upper() in threejoins:
output.append(tokens[idx])
idx += 1
else:
output.append(tokens[idx])
elif state == "join in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if tokens[idx+2].upper() == "USING":
output.append(tokens[idx])
idx += 2
state = "not in"
continue
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif tokens[idx+1].upper() in ["WHERE", "ON", "JOIN"] or (tokens[idx+1]+tokens[idx+2]).upper() in twojoins or (tokens[idx+1]+tokens[idx+2]+tokens[idx+3]).upper() in threejoins:
output.append(tokens[idx])
else:
output.append(tokens[idx])
idx+=1
idx += 1
return list(set(output))
'''parsed = sp.parse(query)[0]
output = []
state = ""
for token in parsed.tokens:
cur = token.value
if cur == " ":
continue
if cur.lower() == "from":
state = "first table"
continue
if state == "first table":
if "(" not in cur and "join" in cur:
state = "second table"
continue
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
continue
if state == "second table":
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
break
return list(set(output))'''
#Preprocess query
def preprocess_query(query, cur, conn):
tables = tablesInQuery(query)
output = []
for table in tables:
if table not in tablesWithProvenance:
cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table)
command1 = "select add_provenance('{}');".format(table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table, table)
ut.commitAndExecute(conn, cur, cmdAddProvtuple)
ut.commitAndExecute(conn, cur, command1)
ut.commitAndExecute(conn, cur, command2)
tablesWithProvenance[table] = 1
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table)
index = query.find('select') + 6
tmp = query[:index] + formula + query[index:]
output.append(tmp)
return (tables,output)
#formulas:[[formula1, formula2, ...],
# [formula1, ...]
# ...
# ]
#fs: [formula1, formula2, ...]
def process_formula(fs, tables):
base_formula = ut.baseFormula(fs[0])
for i, formula in enumerate(fs):
base_formula = ut.remove1sAndAddTablename(formula, tables[i], base_formula)
return base_formula
def get_header(query):
parsed = sp.parse(query)[0]
headers = (parsed.tokens)[2].value.split(",")
output = ""
for h in headers:
h = h.strip()
header = ""
if "." in h:
header = h.split(".")[1]
else:
header = h
output += header + " | "
return output
def exe_query(query, out_file, cur, conn):
out_file.write(query + "\n")
print("executing query: " + query)
tables, queries = preprocess_query(query, cur, conn)
result_t = None
formulas = []
for processed_query in queries:
ut.commitAndExecute(conn, cur, processed_query)
output = cur.fetchall()
if result_t == None:
result_t = output
for i, tu in enumerate(output):
if len(formulas) == i:
formulas.append([tu[0]])
else:
formulas[i].append(tu[0])
processed_formulas = []
for fs in formulas:
rowformula = process_formula(fs, tables)
processed_formulas.append(rowformula)
result__ = []
for i, row in enumerate(result_t):
rowl = list(row)
rowl[0] = processed_formulas[i]
result__.append(rowl)
# Combine rows with same output
result = ut.combineSameRow(result__)
# Now print the processed query
header = get_header(query)
out_file.write("provenance formula | " + header+ "provsql-token"+"\n\n")
for row in result:
line = " | ".join(str(elem) for elem in row)
out_file.write(line+"\n")
out_file.write("\n---------------------------\n\n")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("env", help="Environment configuration file")
parser.add_argument("-q", "--query", help="Relative path for query file.")
parser.add_argument("-o", "--output", help="Relative path for output file. Default as stdout")
args = parser.parse_args()
env_config = ''
queries = []
out_file = sys.stdout
with open(args.env) as fp:
line = fp.readline()
while line:
tmp = line.strip()
env_config = env_config + tmp + ' '
line = fp.readline()
print("database configuration " + env_config)
if args.query:
with open(args.query) as fp:
line = fp.readline()
while line:
queries.append(line.strip())
line = fp.readline()
else:
print("Please specify at least one query file.")
exit(1)
if args.output:
out_file = open(args.output, 'w')
conn = None
try:
print('connecting to databse...')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
for query in queries:
exe_query(query, out_file, cur, conn)
print("FINISHED: "+query)
print("Done!")
except (Exception, psycopg2.DatabaseError) as error:
print(traceback.format_exc())
exit(1)
finally:
out_file.close()
if conn is not None:
conn.close()
print('Database connection closed.')
if __name__ == '__main__':
main()
\ No newline at end of file
def commitAndExecute(conn, cur, command):
cur.execute(command)
conn.commit()
# Return formula composed with 𝟙.
def baseFormula(formula):
charset = ["⊕", "⊗", "⊖", "𝟙", " ", "(", ")"]
output = ""
state = 0
for c in formula:
if c in charset:
if state == 1:
output = output + "𝟙" + c
state = 0
else:
output = output + c
else:
if state == 1:
continue
else:
state = 1
return output
def updateBaseFormula(tname, count, base):
charset = ["⊕", "⊗", "⊖", " ", "(", ")"]
output = ""
tmp = 0
state = 0
for i, c in enumerate(base):
if c in charset:
if state == 1:
tmp += 1
state = 0
output += c
continue
elif c == "𝟙":
if tmp == count:
output = output + tname
output = output + base[i+1:]
break
else:
output += c
tmp += 1
else:
if state == 0:
state = 1
output += c
return output
def remove1sAndAddTablename(formula, table, base):
charset = ["⊕", "⊗", "⊖", " ", "(", ")"]
count = 0
state = 0
tname = ""
for c in formula:
if c in charset:
if state == 1:
state = 0
base = updateBaseFormula(table + "." + tname, count, base)
tname = ""
count += 1
continue
elif c == "𝟙":
count += 1
else:
if state == 0:
state = 1
tname = tname + c
return base
def compareTwoRows(row1, row2):
for i in range(1, len(row1)-1):
if row1[i] != row2[i]:
return False
return True
def combineSameRow(input):
output = []
lastseen = None
for row in input:
if lastseen == None:
output.append(row)
lastseen = row
elif compareTwoRows(row, lastseen):
tmp = output[len(output)-1]
tmp[0] = tmp[0] + " ⊕ " + row[0]
output[len(output)-1] = tmp
else:
output.append(row)
lastseen = row
return output
......@@ -4,5 +4,5 @@ createdb provdb
psql provdb < ../provsql/config/setup.sql
psql provdb < provdb.sql
psql provdb < ../provsql/config/func.sql
python3 provenance.py -q hw1-provenance/q1.txt -o hw1-provenance/q1_output.txt config.txt
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment