Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import re
def check_if_address_valid(address):
"""
Take input address and scrape date
:param addr:
:return:
"""
x = re.search("^https://www.goodreads.com/book/show/[0-9]+", address)
if x:
return True
else:
return False
def parse_query_to_url(query):
elements = re.findall("[0-9A-Za-z_\"><]+", query)
count = len(elements)
if count == 3:
# can only be A.B:C or wrong
if re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + elements[2])
else:
print("Invalid query.")
return ""
elif count == 4:
# a pair and one of [NOT, >, <].
if re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:\sNOT\s[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + "NOT" + elements[2])
elif re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:\s>\s[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + ">" + elements[2])
elif re.search("^[0-9a-zA-Z_]+\.[0-9a-zA-Z_]:\s<\s[0-9a-zA-Z_\"]", query):
return url_safe(elements[0] + "." + elements[1] + "%3A" + "<" + elements[2])
else:
print("Invalid query.")
return ""
elif 6 <= count <= 8:
# AND or OR operator
if re.search(".*\sAND\s.*", query):
parts = query.split(" AND ")
return parse_query_to_url(parts[0]) + "%26AND%26" + parse_query_to_url(parts[1])
elif re.search(".*\sOR\s.*", query):
parts = query.split(" OR ")
return parse_query_to_url(parts[0]) + "%26OR%26" + parse_query_to_url(parts[1])
else:
print("Invalid query.")
return ""
def parse_query_to_json(pair):
elements = re.findall("[0-9A-Za-z\"._]", pair)
count = len(elements)
print(count)
if count != 3 and count != 4:
print("Failed to parse query: invalid args number")
return {"wrong": "True"} # will never be founded in database
elif count == 3:
# can be A.B: C or A.B: "C"
if re.search("\".*\"", elements[2]):
return {elements[0] + "." + elements[1]: elements[2]}
else:
return {elements[0] + "." + elements[1]: {"$regex": elements[2], "$options": "i"}}
else:
# can be NOT, >, or <
if elements[2] == "NOT":
return {elements[0] + "." + elements[1]: {"$not": {"$regex": elements[3], "$options": "i"}}}
elif elements[2] == ">":
return {elements[0] + "." + elements[1]: {"$gt": float(elements[3])}}
elif elements[2] == "<":
return {elements[0] + "." + elements[1]: {"$lt": float(elements[3])}}
else:
print("Failed to parse query: unknown operator")
return {"wrong": "True"}
def url_safe(element):
return element.replace("\"", "%22").replace(">", "%3E").replace("<", "%3C")