Commit 2728356d authored by haoyuz3's avatar haoyuz3
Browse files

update q1

parents ba3220bf 59226a0f
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%');
SELECT DISTINCT S.NetId, S.FirstName, S.LastName FROM Students S, Enrollments E, Courses C WHERE S.NetId = E.NetId AND E.CRN = C.CRN AND S.Department = 'CS' AND C.Department = 'ECE' AND E.Score <= All (SELECT E2.Score FROM Enrollments E2 WHERE E2.CRN = E.CRN) ORDER BY S.LastName, S.NetId DESC;
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%');
SELECT DISTINCT S.NetId, S.FirstName, S.LastName FROM Students S, Enrollments E, Courses C WHERE S.NetId = E.NetId AND E.CRN = C.CRN AND S.Department = 'CS' AND C.Department = 'ECE' AND E.Score <= All (SELECT E2.Score FROM Enrollments E2 WHERE E2.CRN = E.CRN) ORDER BY S.LastName, S.NetId DESC;
......@@ -158,16 +158,17 @@ def preprocess_query(query, cur, conn):
tables = tablesInQuery(query)
output = []
for table in tables:
print(table)
if table not in tablesWithProvenance:
cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table)
command1 = "select add_provenance('{}');".format(table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table, table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table.lower(), table)
ut.commitAndExecute(conn, cur, cmdAddProvtuple)
ut.commitAndExecute(conn, cur, command1)
ut.commitAndExecute(conn, cur, command2)
tablesWithProvenance[table] = 1
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table)
index = query.find('select') + 6
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
index = query.lower().find('select') + 6
tmp = query[:index] + formula + query[index:]
output.append(tmp)
......@@ -292,4 +293,4 @@ def main():
print('Database connection closed.')
if __name__ == '__main__':
main()
\ No newline at end of file
main()
......@@ -64,7 +64,6 @@ def main():
for sub_id in list(submissions.keys()):
try:
commitAndExecute(conn, cur, submissions[sub_id])
f.write(submissions[sub_id]+"\n")
except (Exception, psycopg2.DatabaseError) as error:
print(submissions[sub_id], error)
......@@ -73,10 +72,11 @@ def main():
conn.close()
conn = psycopg2.connect("dbname=provdb user=joker password=m")
cur = conn.cursor()
f.close()
if __name__ == "__main__":
# execute only if run as a script
main()
\ No newline at end of file
main()
This diff is collapsed.
This diff is collapsed.
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%');
provenance formula | FirstName | LastName | Instructor | provsql-token
(students.t5 ⊗ enrollments.t10 ⊗ courses.t1) | Aero | AeroSmith | Abdu Alawini | f075f66a-1215-518c-a081-127e2af7978f
(students.t6 ⊗ enrollments.t11 ⊗ courses.t1) | Amy | Alan | Abdu Alawini | 314e60dc-db00-5560-9989-c4f776e59707
---------------------------
......@@ -7,168 +7,49 @@ import time
import operator
import provenanceUtils as ut
import traceback
import re
#When craeting the auxillary provenance tables, need to add a column called provtuple, which is a sorted array
#[t1, t2, t3...]
tablesWithProvenance = {}
twojoins = ["INNERJOIN", "LEFTJOIN", "RIGHTJOIN", "FULLJOIN", "NATURALJOIN"]
threejoins = ["LEFTOUTERJOIN", "RIGHTOUTERJOIN", "FULLOUTERJOIN"]
def tablesInQuery(query):
output = []
tokens = query.split(" ")
tokens = list(filter(lambda a: a != "", tokens))
state = "not in"
idx = 0
while idx < len(tokens):
if tokens[idx] == "":
idx += 1
continue
if state == "not in":
if tokens[idx].upper() == "FROM":
state = "from in"
if tokens[idx].upper() == "JOIN":
state = "join in"
elif state == "from in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if "," in tokens[idx]:
output.append(tokens[idx].split(",")[0])
else:
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif "," in tokens[idx + 1]:
output.append(tokens[idx])
idx += 1
elif tokens[idx+1].upper() == "WHERE" :
state = "not in"
output.append(tokens[idx])
idx += 1
elif tokens[idx+2].upper() == "WHERE":
state = "not in"
output.append(tokens[idx])
idx += 2
elif (tokens[idx+2]+tokens[idx+3]).upper() in twojoins:
output.append(tokens[idx])
idx += 1
elif (tokens[idx+2]+tokens[idx+3]+tokens[idx+4]).upper() in threejoins:
output.append(tokens[idx])
idx += 1
else:
output.append(tokens[idx])
elif state == "join in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if tokens[idx+2].upper() == "USING":
output.append(tokens[idx])
idx += 2
state = "not in"
continue
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif tokens[idx+1].upper() in ["WHERE", "ON", "JOIN"] or (tokens[idx+1]+tokens[idx+2]).upper() in twojoins or (tokens[idx+1]+tokens[idx+2]+tokens[idx+3]).upper() in threejoins:
output.append(tokens[idx])
else:
output.append(tokens[idx])
idx+=1
idx += 1
return list(set(output))
'''parsed = sp.parse(query)[0]
output = []
state = ""
for token in parsed.tokens:
cur = token.value
if cur == " ":
continue
if cur.lower() == "from":
state = "first table"
continue
if state == "first table":
if "(" not in cur and "join" in cur:
state = "second table"
continue
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
continue
if state == "second table":
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
break
return list(set(output))'''
def tablesInQuery(conn, cur, query):
cur.execute("explain " + query)
output = cur.fetchall()
tables = []
for tmp in output:
row = tmp[0]
if "Seq Scan on " in row:
idx = row.index("Seq Scan on ")
idx += 12
table = ""
for i in range(idx, len(row)):
if row[i] == " ":
break
table += row[i]
tables.append(table)
return tables
#Preprocess query
def preprocess_query(query, cur, conn):
tables = tablesInQuery(query)
tables = tablesInQuery(conn, cur, query)
output = []
for table in tables:
print(table)
if table not in tablesWithProvenance:
cmdAddProvtuple = "select \"add_provtuple\"('{}')".format(table)
command1 = "select add_provenance('{}');".format(table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table, table)
command2 = "select create_provenance_mapping('{}_provtuple_mapping','{}','provtuple');".format(table.lower(), table)
ut.commitAndExecute(conn, cur, cmdAddProvtuple)
ut.commitAndExecute(conn, cur, command1)
ut.commitAndExecute(conn, cur, command2)
tablesWithProvenance[table] = 1
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table)
index = query.find('select') + 6
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
index = query.lower().find('select') + 6
tmp = query[:index] + formula + query[index:]
print(tmp)
output.append(tmp)
return (tables,output)
......@@ -201,7 +82,7 @@ def get_header(query):
def exe_query(query, out_file, cur, conn):
out_file.write(query + "\n")
print("executing query: " + query)
tables, queries = preprocess_query(query, cur, conn)
result_t = None
......@@ -232,6 +113,8 @@ def exe_query(query, out_file, cur, conn):
# Now print the processed query
header = get_header(query)
out_file.write(query + "\n")
out_file.write("provenance formula | " + header+ "provsql-token"+"\n\n")
for row in result:
line = " | ".join(str(elem) for elem in row)
......@@ -271,25 +154,30 @@ def main():
out_file = open(args.output, 'w')
conn = None
try:
print('connecting to databse...')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
print('connecting to databse...')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
for query in queries:
for query in queries:
try:
exe_query(query, out_file, cur, conn)
print("FINISHED: "+query)
print("Done!")
except (Exception, psycopg2.DatabaseError) as error:
print(traceback.format_exc())
exit(1)
finally:
out_file.close()
if conn is not None:
except (Exception, psycopg2.DatabaseError) as error:
print(traceback.format_exc())
#out_file.write(traceback.format_exc()+"\n")
conn.commit()
cur.close()
conn.close()
print('Database connection closed.')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
print("Done!")
out_file.close()
if conn is not None:
conn.close()
print('Database connection closed.')
if __name__ == '__main__':
main()
\ No newline at end of file
......@@ -4,5 +4,5 @@ createdb provdb
psql provdb < ../provsql/config/setup.sql
psql provdb < provdb.sql
psql provdb < ../provsql/config/func.sql
python3 provenance.py -q hw1-provenance/q1.txt -o hw1-provenance/q1_output.txt config.txt
python3 provenance.py -q Solutions/Q1-solution.sql -o hw1-provenance/q1_solution.txt config.txt
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment