Commit 1bf1e2c8 authored by haoyuz3's avatar haoyuz3
Browse files

q3 error

parent 2728356d
CREATE FUNCTION public.formula_semimod(formula1 text, formula2 text) RETURNS text
LANGUAGE sql IMMUTABLE STRICT
AS $$
SELECT concat('(',formula1,' * ',formula2,')')
$$;
CREATE FUNCTION public.formula_agg_state(state public.formula_state, value text) RETURNS public.formula_state
LANGUAGE plpgsql IMMUTABLE
AS $$
BEGIN
IF state IS NULL OR state.nbargs=0 THEN
RETURN (value,1);
ELSE
RETURN (concat(state.formula,' , ',value),state.nbargs+1);
END IF;
END
$$;
CREATE AGGREGATE public.formula_agg(text) (
SFUNC = public.formula_agg_state,
STYPE = public.formula_state,
INITCOND = '(1,0)'
);
CREATE FUNCTION public.formula_agg_final(state public.formula_state, fname varchar) RETURNS text
LANGUAGE sql IMMUTABLE STRICT
AS
$$
SELECT concat(fname,'{ ',state.formula,' }');
$$;
CREATE FUNCTION public.aggregation_formula(token anyelement, token2value regclass) RETURNS text
LANGUAGE plpgsql
AS $$
BEGIN
RETURN provsql.aggregation_evaluate(
token::provsql.provenance_token,
token2value,
'formula_agg_final',
'formula_agg',
'formula_semimod',
'𝟙'::text,
'formula_plus',
'formula_times',
'formula_monus',
'formula_delta');
END
$$;
......@@ -7,155 +7,34 @@ import time
import operator
import provenanceUtils as ut
import traceback
import re
#When craeting the auxillary provenance tables, need to add a column called provtuple, which is a sorted array
#[t1, t2, t3...]
tablesWithProvenance = {}
twojoins = ["INNERJOIN", "LEFTJOIN", "RIGHTJOIN", "FULLJOIN", "NATURALJOIN"]
threejoins = ["LEFTOUTERJOIN", "RIGHTOUTERJOIN", "FULLOUTERJOIN"]
def tablesInQuery(query):
output = []
tokens = query.split(" ")
tokens = list(filter(lambda a: a != "", tokens))
state = "not in"
idx = 0
while idx < len(tokens):
if tokens[idx] == "":
idx += 1
continue
if state == "not in":
if tokens[idx].upper() == "FROM":
state = "from in"
if tokens[idx].upper() == "JOIN":
state = "join in"
elif state == "from in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if "," in tokens[idx]:
output.append(tokens[idx].split(",")[0])
else:
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif "," in tokens[idx + 1]:
output.append(tokens[idx])
idx += 1
elif tokens[idx+1].upper() == "WHERE" :
state = "not in"
output.append(tokens[idx])
idx += 1
elif tokens[idx+2].upper() == "WHERE":
state = "not in"
output.append(tokens[idx])
idx += 2
elif (tokens[idx+2]+tokens[idx+3]).upper() in twojoins:
output.append(tokens[idx])
idx += 1
elif (tokens[idx+2]+tokens[idx+3]+tokens[idx+4]).upper() in threejoins:
output.append(tokens[idx])
idx += 1
else:
output.append(tokens[idx])
elif state == "join in":
if tokens[idx].upper() in ["WHERE", "ON"]:
state = "not in"
idx += 1
continue
if tokens[idx].upper() == "JOIN":
state = "join in"
idx += 1
continue
if (tokens[idx]+tokens[idx+1]).upper() in twojoins:
state = "join in"
idx += 2
continue
if (tokens[idx]+tokens[idx+1]+tokens[idx+2]).upper() in threejoins:
state = "join in"
idx += 3
continue
if "SELECT" in tokens[idx].upper():
idx += 1
state = "not in"
continue
if tokens[idx+2].upper() == "USING":
output.append(tokens[idx])
idx += 2
state = "not in"
continue
if tokens[idx + 1].upper() == "AS":
output.append(tokens[idx])
idx += 2
elif tokens[idx+1].upper() in ["WHERE", "ON", "JOIN"] or (tokens[idx+1]+tokens[idx+2]).upper() in twojoins or (tokens[idx+1]+tokens[idx+2]+tokens[idx+3]).upper() in threejoins:
output.append(tokens[idx])
else:
output.append(tokens[idx])
idx+=1
idx += 1
return list(set(output))
'''parsed = sp.parse(query)[0]
output = []
state = ""
for token in parsed.tokens:
cur = token.value
if cur == " ":
continue
if cur.lower() == "from":
state = "first table"
continue
if state == "first table":
if "(" not in cur and "join" in cur:
state = "second table"
continue
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
continue
if state == "second table":
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
break
return list(set(output))'''
def tablesInQuery(conn, cur, query):
cur.execute("explain " + query)
output = cur.fetchall()
tables = []
for tmp in output:
row = tmp[0]
if "Seq Scan on " in row:
idx = row.index("Seq Scan on ")
idx += 12
table = ""
for i in range(idx, len(row)):
if row[i] == " ":
break
table += row[i]
tables.append(table)
return tables
#Preprocess query
def preprocess_query(query, cur, conn):
tables = tablesInQuery(query)
tables = tablesInQuery(conn, cur, query)
output = []
for table in tables:
print(table)
......@@ -170,6 +49,7 @@ def preprocess_query(query, cur, conn):
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
index = query.lower().find('select') + 6
tmp = query[:index] + formula + query[index:]
print(tmp)
output.append(tmp)
return (tables,output)
......@@ -202,7 +82,7 @@ def get_header(query):
def exe_query(query, out_file, cur, conn):
out_file.write(query + "\n")
print("executing query: " + query)
tables, queries = preprocess_query(query, cur, conn)
result_t = None
......@@ -233,6 +113,8 @@ def exe_query(query, out_file, cur, conn):
# Now print the processed query
header = get_header(query)
out_file.write(query + "\n")
out_file.write("provenance formula | " + header+ "provsql-token"+"\n\n")
for row in result:
line = " | ".join(str(elem) for elem in row)
......@@ -272,25 +154,30 @@ def main():
out_file = open(args.output, 'w')
conn = None
try:
print('connecting to databse...')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
print('connecting to databse...')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
for query in queries:
for query in queries:
try:
exe_query(query, out_file, cur, conn)
print("FINISHED: "+query)
print("Done!")
except (Exception, psycopg2.DatabaseError) as error:
print(traceback.format_exc())
exit(1)
finally:
out_file.close()
if conn is not None:
except (Exception, psycopg2.DatabaseError) as error:
print(traceback.format_exc())
#out_file.write(traceback.format_exc()+"\n")
conn.commit()
cur.close()
conn.close()
print('Database connection closed.')
conn = psycopg2.connect(env_config)
cur = conn.cursor()
ut.commitAndExecute(conn, cur, 'SET search_path TO public, provsql;')
print("Done!")
out_file.close()
if conn is not None:
conn.close()
print('Database connection closed.')
if __name__ == '__main__':
main()
main()
\ No newline at end of file
SELECT Students.FirstName, Students.LastName, Courses.Instructor FROM Students INNER JOIN Enrollments ON Students.NetId=Enrollments.NetId INNER JOIN Courses ON Enrollments.CRN=Courses.CRN WHERE (Students.FirstName LIKE 'A%' OR Students.LastName LIKE 'A%') AND (Courses.Instructor LIKE 'A%');
provenance formula | FirstName | LastName | Instructor | provsql-token
(students.t5 ⊗ enrollments.t10 ⊗ courses.t1) | Aero | AeroSmith | Abdu Alawini | f075f66a-1215-518c-a081-127e2af7978f
(students.t6 ⊗ enrollments.t11 ⊗ courses.t1) | Amy | Alan | Abdu Alawini | 314e60dc-db00-5560-9989-c4f776e59707
(students.t5 ⊗ enrollments.t10 ⊗ courses.t1) | Aero | AeroSmith | Abdu Alawini | b440ab49-79d6-55e6-9b60-e58cd002527f
(students.t6 ⊗ enrollments.t11 ⊗ courses.t1) | Amy | Alan | Abdu Alawini | 3d0d85b3-f3b5-5c08-9a9f-ea92b8a093e8
---------------------------
SELECT E.CRN,C.Title,C.Department, AVG(Score) FROM Enrollments E, Courses C WHERE E.CRN = C.CRN AND C.Department='ECE' GROUP BY E.CRN,C.Title,C.Department HAVING AVG(Score) > 70 ORDER BY AVG(Score) DESC;
\ No newline at end of file
......@@ -46,7 +46,8 @@ def preprocess_query(query, cur, conn):
ut.commitAndExecute(conn, cur, command1)
ut.commitAndExecute(conn, cur, command2)
tablesWithProvenance[table] = 1
formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
#formula = " formula(provenance(), '{}_provtuple_mapping'), ".format(table.lower())
formula = " aggregation_formula(AVG(Score), '{}_provtuple_mapping'), ".format(table.lower())
index = query.lower().find('select') + 6
tmp = query[:index] + formula + query[index:]
print(tmp)
......
......@@ -2,7 +2,8 @@
dropdb provdb
createdb provdb
psql provdb < ../provsql/config/setup.sql
psql provsb < ../provsql/config/aggregation.sql
psql provdb < provdb.sql
psql provdb < ../provsql/config/func.sql
python3 provenance.py -q Solutions/Q1-solution.sql -o hw1-provenance/q1_solution.txt config.txt
python3 provenance.py -q hw1-provenance/q3.txt -o hw1-provenance/q3_output.txt config.txt
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment