Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SQL Provenance
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
haoyuz3
SQL Provenance
Commits
ea092031
Commit
ea092031
authored
4 years ago
by
Haoyu Z
Browse files
Options
Downloads
Patches
Plain Diff
debug
parent
480b1351
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
provsql/src/provenance.py
+31
-8
31 additions, 8 deletions
provsql/src/provenance.py
provsql/src/provenanceUtils.py
+0
-54
0 additions, 54 deletions
provsql/src/provenanceUtils.py
with
31 additions
and
62 deletions
provsql/src/provenance.py
+
31
−
8
View file @
ea092031
...
...
@@ -14,14 +14,37 @@ import traceback
tablesWithProvenance
=
{}
def
tablesInQuery
(
query
):
parsed
=
list
(
sp
.
parse
(
query
))
tables
=
[]
for
s
in
parsed
:
if
s
.
get_type
()
!=
'
UNKNOWN
'
:
output_t
=
ut
.
get_table_identifier
(
s
)
tables
.
append
(
set
(
ut
.
decompose_identifiers
(
output_t
)))
return
tables
parsed
=
sp
.
parse
(
query
)[
0
]
output
=
[]
state
=
""
for
token
in
parsed
.
tokens
:
cur
=
token
.
value
if
cur
==
"
"
:
continue
if
cur
.
lower
()
==
"
from
"
:
state
=
"
first table
"
continue
if
state
==
"
first table
"
:
if
"
(
"
not
in
cur
and
"
join
"
in
cur
:
state
=
"
second table
"
continue
if
"
(
"
in
cur
and
"
)
"
in
cur
:
#subquery
output
.
extend
(
tablesInQuery
(
cur
[
cur
.
index
(
"
(
"
)
+
1
:
len
(
cur
)
-
1
-
cur
[::
-
1
].
index
(
"
)
"
)]))
else
:
output
.
append
(
cur
.
split
(
"
as
"
)[
0
].
strip
())
continue
if
state
==
"
second table
"
:
if
"
(
"
in
cur
and
"
)
"
in
cur
:
#subquery
output
.
extend
(
tablesInQuery
(
cur
[
cur
.
index
(
"
(
"
)
+
1
:
len
(
cur
)
-
1
-
cur
[::
-
1
].
index
(
"
)
"
)]))
else
:
output
.
append
(
cur
.
split
(
"
as
"
)[
0
].
strip
())
break
return
list
(
set
(
output
))
#Preprocess query
...
...
This diff is collapsed.
Click to expand it.
provsql/src/provenanceUtils.py
+
0
−
54
View file @
ea092031
# Code for table processing is from Haorong (haorong4@illinois.edu). Thanks to Haorong.
#
def
commitAndExecute
(
conn
,
cur
,
command
):
cur
.
execute
(
command
)
conn
.
commit
()
...
...
@@ -94,55 +91,4 @@ def combineSameRow(input):
output
.
append
(
row
)
lastseen
=
row
return
output
# Following code is from Haorong
############################################################################
from
sqlparse.sql
import
IdentifierList
,
Identifier
,
Parenthesis
,
Token
,
Comparison
,
Where
,
Function
from
sqlparse.tokens
import
Keyword
,
DML
,
Whitespace
,
Punctuation
,
Name
def
is_subquery
(
parsed
):
if
not
parsed
.
is_group
:
return
False
for
item
in
parsed
.
tokens
:
if
item
.
ttype
is
DML
and
item
.
value
.
upper
()
==
'
SELECT
'
:
return
True
return
False
def
get_table_identifier
(
parsed
):
from_flag
=
False
view_flag
=
False
result
=
[]
for
item
in
list
(
parsed
.
tokens
):
if
item
.
is_group
:
for
x
in
get_table_identifier
(
item
):
result
.
append
(
x
)
if
from_flag
:
if
is_subquery
(
item
):
for
x
in
get_table_identifier
(
item
):
result
.
append
(
x
)
elif
item
.
ttype
is
Keyword
and
item
.
value
.
upper
()
in
[
'
ORDER
'
,
'
GROUP
'
,
'
BY
'
,
'
HAVING
'
,
'
GROUP BY
'
,
'
ON
'
,
'
WHERE
'
,
'
SET
'
]:
from_flag
=
False
break
else
:
result
.
append
(
item
)
elif
view_flag
and
not
item
.
ttype
==
Whitespace
:
result
.
append
(
item
)
view_flag
=
False
if
(
item
.
ttype
is
Keyword
and
item
.
value
.
upper
()
in
[
'
FROM
'
,
'
INTO
'
]
)
or
(
item
.
ttype
is
DML
and
item
.
value
.
upper
()
==
'
UPDATE
'
):
from_flag
=
True
if
item
.
ttype
is
Keyword
and
item
.
value
.
upper
()
==
'
VIEW
'
:
view_flag
=
True
return
result
def
decompose_identifiers
(
identifiers
):
result
=
[]
for
item
in
identifiers
:
if
isinstance
(
item
,
IdentifierList
):
for
identifier
in
item
.
get_identifiers
():
result
.
append
(
item
)
else
:
result
.
append
(
item
)
return
result
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment