Skip to content
Snippets Groups Projects
Commit 2728356d authored by haoyuz3's avatar haoyuz3
Browse files

update q1

parents ba3220bf 59226a0f
No related branches found
No related tags found
No related merge requests found
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%'); SELECT DISTINCT S.NetId, S.FirstName, S.LastName FROM Students S, Enrollments E, Courses C WHERE S.NetId = E.NetId AND E.CRN = C.CRN AND S.Department = 'CS' AND C.Department = 'ECE' AND E.Score <= All (SELECT E2.Score FROM Enrollments E2 WHERE E2.CRN = E.CRN) ORDER BY S.LastName, S.NetId DESC;
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%'); SELECT DISTINCT S.NetId, S.FirstName, S.LastName FROM Students S, Enrollments E, Courses C WHERE S.NetId = E.NetId AND E.CRN = C.CRN AND S.Department = 'CS' AND C.Department = 'ECE' AND E.Score <= All (SELECT E2.Score FROM Enrollments E2 WHERE E2.CRN = E.CRN) ORDER BY S.LastName, S.NetId DESC;
...@@ -158,16 +158,17 @@ def preprocess_query(query, cur, conn): ...@@ -158,16 +158,17 @@ def preprocess_query(query, cur, conn):
tables = tablesInQuery(query) tables = tablesInQuery(query)
output = [] output = []
for table in tables: for table in tables:
print(table)
if table not in tablesWithProvenance: if table not in tablesWithProvenance:
cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table) cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table)
command1 = "select add_provenance('{}');".format(table) command1 = "select add_provenance('{}');".format(table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table, table) command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table.lower(), table)
ut.commitAndExecute(conn, cur, cmdAddProvtuple) ut.commitAndExecute(conn, cur, cmdAddProvtuple)
ut.commitAndExecute(conn, cur, command1) ut.commitAndExecute(conn, cur, command1)
ut.commitAndExecute(conn, cur, command2) ut.commitAndExecute(conn, cur, command2)
tablesWithProvenance[table] = 1 tablesWithProvenance[table] = 1
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table) formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
index = query.find('select') + 6 index = query.lower().find('select') + 6
tmp = query[:index] + formula + query[index:] tmp = query[:index] + formula + query[index:]
output.append(tmp) output.append(tmp)
...@@ -292,4 +293,4 @@ def main(): ...@@ -292,4 +293,4 @@ def main():
print('Database connection closed.') print('Database connection closed.')
if __name__ == '__main__': if __name__ == '__main__':
main() main()
\ No newline at end of file
...@@ -64,7 +64,6 @@ def main(): ...@@ -64,7 +64,6 @@ def main():
for sub_id in list(submissions.keys()): for sub_id in list(submissions.keys()):
try: try:
commitAndExecute(conn, cur, submissions[sub_id]) commitAndExecute(conn, cur, submissions[sub_id])
f.write(submissions[sub_id]+"\n") f.write(submissions[sub_id]+"\n")
except (Exception, psycopg2.DatabaseError) as error: except (Exception, psycopg2.DatabaseError) as error:
print(submissions[sub_id], error) print(submissions[sub_id], error)
...@@ -73,10 +72,11 @@ def main(): ...@@ -73,10 +72,11 @@ def main():
conn.close() conn.close()
conn = psycopg2.connect("dbname=provdb user=joker password=m") conn = psycopg2.connect("dbname=provdb user=joker password=m")
cur = conn.cursor() cur = conn.cursor()
f.close() f.close()
if __name__ == "__main__": if __name__ == "__main__":
# execute only if run as a script # execute only if run as a script
main() main()
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%');
provenance formula | FirstName | LastName | Instructor | provsql-token
(students.t5 ⊗ enrollments.t10 ⊗ courses.t1) | Aero | AeroSmith | Abdu Alawini | f075f66a-1215-518c-a081-127e2af7978f
(students.t6 ⊗ enrollments.t11 ⊗ courses.t1) | Amy | Alan | Abdu Alawini | 314e60dc-db00-5560-9989-c4f776e59707
---------------------------
...@@ -7,168 +7,49 @@ import time ...@@ -7,168 +7,49 @@ import time
import operator import operator
import provenanceUtils as ut import provenanceUtils as ut
import traceback import traceback
import re
#When craeting the auxillary provenance tables, need to add a column called provtuple, which is a sorted array #When craeting the auxillary provenance tables, need to add a column called provtuple, which is a sorted array
#[t1, t2, t3...] #[t1, t2, t3...]
tablesWithProvenance = {} tablesWithProvenance = {}
twojoins = ["INNERJOIN", "LEFTJOIN", "RIGHTJOIN", "FULLJOIN", "NATURALJOIN"] def tablesInQuery(conn, cur, query):
threejoins = ["LEFTOUTERJOIN", "RIGHTOUTERJOIN", "FULLOUTERJOIN"] cur.execute("explain " + query)
output = cur.fetchall()
def tablesInQuery(query): tables = []
output = [] for tmp in output:
tokens = query.split(" ") row = tmp[0]
tokens = list(filter(lambda a: a != "", tokens)) if "Seq Scan on " in row:
state = "not in" idx = row.index("Seq Scan on ")
idx = 0 idx += 12
while idx < len(tokens): table = ""
if tokens[idx] == "": for i in range(idx, len(row)):
idx += 1 if row[i] == " ":
continue break
if state == "not in": table += row[i]
if tokens[idx].upper() == "FROM": tables.append(table)
state = "from in" return tables
if tokens[idx].upper() == "JOIN":
state = "join in"
elif state == "from in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if "," in tokens[idx]:
output.append(tokens[idx].split(",")[0])
else:
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif "," in tokens[idx + 1]:
output.append(tokens[idx])
idx += 1
elif tokens[idx+1].upper() == "WHERE" :
state = "not in"
output.append(tokens[idx])
idx += 1
elif tokens[idx+2].upper() == "WHERE":
state = "not in"
output.append(tokens[idx])
idx += 2
elif (tokens[idx+2]+tokens[idx+3]).upper() in twojoins:
output.append(tokens[idx])
idx += 1
elif (tokens[idx+2]+tokens[idx+3]+tokens[idx+4]).upper() in threejoins:
output.append(tokens[idx])
idx += 1
else:
output.append(tokens[idx])
elif state == "join in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if tokens[idx+2].upper() == "USING":
output.append(tokens[idx])
idx += 2
state = "not in"
continue
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif tokens[idx+1].upper() in ["WHERE", "ON", "JOIN"] or (tokens[idx+1]+tokens[idx+2]).upper() in twojoins or (tokens[idx+1]+tokens[idx+2]+tokens[idx+3]).upper() in threejoins:
output.append(tokens[idx])
else:
output.append(tokens[idx])
idx+=1
idx += 1
return list(set(output))
'''parsed = sp.parse(query)[0]
output = []
state = ""
for token in parsed.tokens:
cur = token.value
if cur == " ":
continue
if cur.lower() == "from":
state = "first table"
continue
if state == "first table":
if "(" not in cur and "join" in cur:
state = "second table"
continue
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
continue
if state == "second table":
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
break
return list(set(output))'''
#Preprocess query #Preprocess query
def preprocess_query(query, cur, conn): def preprocess_query(query, cur, conn):
tables = tablesInQuery(query) tables = tablesInQuery(conn, cur, query)
output = [] output = []
for table in tables: for table in tables:
print(table)
if table not in tablesWithProvenance: if table not in tablesWithProvenance:
cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table) cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table)
command1 = "select add_provenance('{}');".format(table) command1 = "select add_provenance('{}');".format(table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table, table) command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table.lower(), table)
ut.commitAndExecute(conn, cur, cmdAddProvtuple) ut.commitAndExecute(conn, cur, cmdAddProvtuple)
ut.commitAndExecute(conn, cur, command1) ut.commitAndExecute(conn, cur, command1)
ut.commitAndExecute(conn, cur, command2) ut.commitAndExecute(conn, cur, command2)
tablesWithProvenance[table] = 1 tablesWithProvenance[table] = 1
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table) formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
index = query.find('select') + 6 index = query.lower().find('select') + 6
tmp = query[:index] + formula + query[index:] tmp = query[:index] + formula + query[index:]
print(tmp)
output.append(tmp) output.append(tmp)
return (tables,output) return (tables,output)
...@@ -201,7 +82,7 @@ def get_header(query): ...@@ -201,7 +82,7 @@ def get_header(query):
def exe_query(query, out_file, cur, conn): def exe_query(query, out_file, cur, conn):
out_file.write(query + "\n")
print("executing query: " + query) print("executing query: " + query)
tables, queries = preprocess_query(query, cur, conn) tables, queries = preprocess_query(query, cur, conn)
result_t = None result_t = None
...@@ -232,6 +113,8 @@ def exe_query(query, out_file, cur, conn): ...@@ -232,6 +113,8 @@ def exe_query(query, out_file, cur, conn):
# Now print the processed query # Now print the processed query
header = get_header(query) header = get_header(query)
out_file.write(query + "\n")
out_file.write("provenance formula | " + header+ "provsql-token"+"\n\n") out_file.write("provenance formula | " + header+ "provsql-token"+"\n\n")
for row in result: for row in result:
line = " | ".join(str(elem) for elem in row) line = " | ".join(str(elem) for elem in row)
...@@ -271,25 +154,30 @@ def main(): ...@@ -271,25 +154,30 @@ def main():
out_file = open(args.output, 'w') out_file = open(args.output, 'w')
conn = None conn = None
try: print('connecting to databse...')
print('connecting to databse...') conn = psycopg2.connect(env_config)
conn = psycopg2.connect(env_config) cur = conn.cursor()
cur = conn.cursor() ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
for query in queries: for query in queries:
try:
exe_query(query, out_file, cur, conn) exe_query(query, out_file, cur, conn)
print("FINISHED: "+query) print("FINISHED: "+query)
except (Exception, psycopg2.DatabaseError) as error:
print("Done!") print(traceback.format_exc())
except (Exception, psycopg2.DatabaseError) as error: #out_file.write(traceback.format_exc()+"\n")
print(traceback.format_exc()) conn.commit()
exit(1) cur.close()
finally:
out_file.close()
if conn is not None:
conn.close() conn.close()
print('Database connection closed.') conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
print("Done!")
out_file.close()
if conn is not None:
conn.close()
print('Database connection closed.')
if __name__ == '__main__': if __name__ == '__main__':
main() main()
\ No newline at end of file
...@@ -4,5 +4,5 @@ createdb provdb ...@@ -4,5 +4,5 @@ createdb provdb
psql provdb < ../provsql/config/setup.sql psql provdb < ../provsql/config/setup.sql
psql provdb < provdb.sql psql provdb < provdb.sql
psql provdb < ../provsql/config/func.sql psql provdb < ../provsql/config/func.sql
python3 provenance.py -q hw1-provenance/q1.txt -o hw1-provenance/q1_output.txt config.txt python3 provenance.py -q Solutions/Q1-solution.sql -o hw1-provenance/q1_solution.txt config.txt
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment