import json import DataBase.JsonParser as parser import os from dotenv import load_dotenv from pymongo import MongoClient def get_db(): """ return the database of goodReads crawler """ load_dotenv() url = os.getenv('MONGODB_URL') client = MongoClient(url) return client.get_database("crawler_db") def insert_dicts(dictionary, opt): """ Insert books or authors collection in database :param dictionary: the dictionary to insert to collection :param opt: =0 means books collection; =1 means authors collection :return: no return value """ db = get_db() if opt == 0: records = db.books elif opt == 1: records = db.authors else: print("failed to get json file: wrong opt for selecting collection") return json_list = [] if opt == 0: json_list = parser.parse_book_dict_to_json(dictionary) elif opt == 1: json_list = parser.parse_author_dict_to_json(dictionary) records.insert_many(json_list) def update_dicts(opt, identifier, content): """ Update documentations in a given collection :param opt: =0 means books collection; =1 means authors collection :param identifier: the identifier of the documentation we want to find :param content: the content to update :return: no return value """ db = get_db() if opt == 0: records = db.books elif opt == 1: records = db.authors result = records.update_many( identifier, {"$set": content}, ) print("matched documentation: " + str(result.matched_count)) print("modified documentation: " + str(result.modified_count)) def get_documents_json(opt, identifier): """ find documentations specified by the identifier and output a json data :param opt: =0 means books collection; =1 means authors collection :param identifier: identifier of the documents we want, {} means locate the whole collection :return: json file of selected documentations """ db = get_db() if opt == 0: records = db.books elif opt == 1: records = db.authors else: print("failed to get json file: wrong opt for selecting collection") return json.dumps({}) data = records.find(identifier) file = {} if opt == 0: typeName = "books" else: typeName = "authors" file[typeName] = [] for item in data: item.pop("_id") file[typeName].append(item) return json.dumps(file) def download_collection(opt, identifier, name): """ download books collection or authors collection :param opt: =0 means books collection; =1 means authors collection :param identifier: identifier of the documents we want to download; empty({}) means selected all documents in given collection :param name: file name of downloaded json :return: JSON file of the collection """ json_file = get_documents_json(opt, identifier) load_dotenv() root = os.getenv('ROOT') with open(root + name + ".json", "w") as output: output.write(json_file) def clean(opt, identifier): """ delete specific documents in given collection :param opt: =0 means books collection; =1 means authors collection :param identifier: identifier of the documents we want to delete; empty({}) means selected all documents in given collection :return: no return value """ db = get_db() if opt == 0: records = db.books elif opt == 1: records = db.authors else: print("failed to get json file: wrong opt for selecting collection") return records.delete_many(identifier)