import re def check_if_address_valid(address): """ Take input address and scrape date :param addr: :return: """ x = re.search("^https://www.goodreads.com/book/show/[0-9]+", address) if x: return True else: return False def parse_query_to_url(query): elements = re.findall("[0-9A-Za-z_\"><.]+", query) count = len(elements) if count == 2: # can only be A.B:C or wrong if re.search("^[0-9a-zA-Z_.]+:[0-9a-zA-Z_\".]", query): return url_safe(elements[0] + "%3A" + elements[1]) else: print("Invalid query1.") return "" elif count == 3: # a pair and one of [NOT, >, <]. if re.search("^[0-9a-zA-Z_.]+:\s*NOT\s*[0-9a-zA-Z_\".]", query): return url_safe(elements[0] + "%3A" + "NOT" + elements[2]) elif re.search("^[0-9a-zA-Z_.]+:\s*>\s*[0-9a-zA-Z_\".]", query): return url_safe(elements[0] + "%3A" + ">" + elements[2]) elif re.search("^[0-9a-zA-Z_.]+:\s*<\s*[0-9a-zA-Z_\".]", query): return url_safe(elements[0] + "%3A" + "<" + elements[2]) else: print("Invalid query2.") return "" elif 5 <= count <= 7: # AND or OR operator if re.search(".*\sAND\s.*", query): parts = query.split(" AND ") return parse_query_to_url(parts[0]) + "%26AND%26" + parse_query_to_url(parts[1]) elif re.search(".*\sOR\s.*", query): parts = query.split(" OR ") return parse_query_to_url(parts[0]) + "%26OR%26" + parse_query_to_url(parts[1]) else: print("Invalid query3.") return "" def parse_query_to_json(pair): elements = re.findall("[0-9A-Za-z\"_.]+", pair) count = len(elements) if count != 2 and count != 3: print("Failed to parse query: invalid args number") return {"wrong": "True"} # will never be founded in database elif count == 3: # A.B: NOT C if re.search("^[0-9.]*$", elements[2]) and not re.search("id", elements[0]): return {elements[0].split(".")[1]: {"$ne": float(elements[2])}} else: return {elements[0].split(".")[1]: {"$not": {"$regex": elements[2], "$options": "i"}}} else: # can be A.B: C or A.B: "C" if re.search(":", pair): if re.search("^[0-9.]*$", elements[1]) and not re.search("id", elements[0]): return {elements[0].split(".")[1]: float(elements[1])} else: if re.search("\".*\"", elements[1]): return {elements[0].split(".")[1]: elements[1]} else: return {elements[0].split(".")[1]: {"$regex": elements[1], "$options": "i"}} else: if re.search(">", pair): return {elements[0].split(".")[1]: {"$gt": float(elements[1])}} elif re.search("<", pair): return {elements[0].split(".")[1]: {"$lt": float(elements[1])}} else: print("Failed to parse query: unknown operator") return {"wrong": "True"} def url_safe(element): return element.replace("\"", "%22").replace(">", "%3E").replace("<", "%3C").replace("&", "%26").replace(":", "%3A")