Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
def check_if_address_valid(address):
"""
Take input address and scrape date
:param addr:
:return:
"""
x = re.search("^https://www.goodreads.com/book/show/[0-9]+", address)
if x:
return True
else:
return False
def parse_query_to_url(query):
elements = re.findall("[0-9A-Za-z_\"><]+", query)
count = len(elements)
if count == 3:
# can only be A.B:C or wrong
if re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + elements[2])
else:
print("Invalid query.")
return ""
elif count == 4:
# a pair and one of [NOT, >, <].
if re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:\sNOT\s[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + "NOT" + elements[2])
elif re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:\s>\s[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + ">" + elements[2])
elif re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:\s<\s[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + "<" + elements[2])
else:
print("Invalid query.")
return ""
elif 6 <= count <= 8:
# AND or OR operator
if re.search(".*\sAND\s.*", query):
parts = query.split(" AND ")
return parse_query_to_url(parts[0]) + "%26AND%26" + parse_query_to_url(parts[1])
elif re.search(".*\sOR\s.*", query):
parts = query.split(" OR ")
return parse_query_to_url(parts[0]) + "%26OR%26" + parse_query_to_url(parts[1])
else:
print("Invalid query.")
return ""
def parse_query_to_json(pair):
print(pair)
elements = re.findall("[0-9A-Za-z\"_.]+", pair)
if count != 2:
print("Failed to parse query: invalid args number")
return {"wrong": "True"} # will never be founded in database
else:
# can be A.B: C or A.B: "C"
if re.search(":", pair):
if re.search("^[0-9.]*$", elements[1]):
return {elements[0].split(".")[1]: float(elements[1])}
else:
if re.search("\".*\"", elements[1]):
return {elements[0].split(".")[1]: elements[1]}
else:
return {elements[0].split(".")[1]: {"$regex": elements[1], "$options": "i"}}
if re.search("NOT", pair):
if re.search("^[0-9.]*$", elements[1]):
return {elements[0].split(".")[1]: {"$ne": float(elements[1])}}
else:
return {elements[0].split(".")[1]: {"$not": {"$regex": elements[1], "$options": "i"}}}
elif re.search(">", pair):
return {elements[0].split(".")[1]: {"$gt": float(elements[1])}}
elif re.search("<", pair):
return {elements[0].split(".")[1]: {"$lt": float(elements[1])}}
else:
print("Failed to parse query: unknown operator")
return {"wrong": "True"}