Commit ea092031 authored by Haoyu Z's avatar Haoyu Z
Browse files

debug

parent 480b1351
......@@ -14,14 +14,37 @@ import traceback
tablesWithProvenance = {}
def tablesInQuery(query):
parsed = list(sp.parse(query))
tables = []
for s in parsed:
if s.get_type() != 'UNKNOWN':
output_t = ut.get_table_identifier(s)
tables.append(set(ut.decompose_identifiers(output_t)))
return tables
parsed = sp.parse(query)[0]
output = []
state = ""
for token in parsed.tokens:
cur = token.value
if cur == " ":
continue
if cur.lower() == "from":
state = "first table"
continue
if state == "first table":
if "(" not in cur and "join" in cur:
state = "second table"
continue
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
continue
if state == "second table":
if "(" in cur and ")" in cur:
#subquery
output.extend(tablesInQuery(cur[cur.index("(")+1 : len(cur) - 1 - cur[::-1].index(")")]))
else:
output.append(cur.split("as")[0].strip())
break
return list(set(output))
#Preprocess query
......
# Code for table processing is from Haorong (haorong4@illinois.edu). Thanks to Haorong.
#
def commitAndExecute(conn, cur, command):
cur.execute(command)
conn.commit()
......@@ -94,55 +91,4 @@ def combineSameRow(input):
output.append(row)
lastseen = row
return output
# Following code is from Haorong
############################################################################
from sqlparse.sql import IdentifierList, Identifier,Parenthesis, Token, Comparison, Where, Function
from sqlparse.tokens import Keyword, DML,Whitespace, Punctuation, Name
def is_subquery(parsed):
if not parsed.is_group:
return False
for item in parsed.tokens:
if item.ttype is DML and item.value.upper() == 'SELECT':
return True
return False
def get_table_identifier(parsed):
from_flag = False
view_flag = False
result = []
for item in list(parsed.tokens):
if item.is_group:
for x in get_table_identifier(item):
result.append(x)
if from_flag:
if is_subquery(item):
for x in get_table_identifier(item):
result.append(x)
elif item.ttype is Keyword and item.value.upper() in ['ORDER', 'GROUP', 'BY', 'HAVING', 'GROUP BY', 'ON', 'WHERE', 'SET']:
from_flag = False
break
else:
result.append(item)
elif view_flag and not item.ttype == Whitespace:
result.append(item)
view_flag = False
if (item.ttype is Keyword and item.value.upper()in ['FROM', 'INTO'] ) or ( item.ttype is DML and item.value.upper() == 'UPDATE' ):
from_flag = True
if item.ttype is Keyword and item.value.upper() == 'VIEW':
view_flag = True
return result
def decompose_identifiers(identifiers):
result = []
for item in identifiers:
if isinstance(item, IdentifierList):
for identifier in item.get_identifiers():
result.append(item)
else:
result.append(item)
return result
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment