Skip to content
Snippets Groups Projects
mongoDB.py 4.05 KiB
Newer Older
  • Learn to ignore specific revisions
  • import json
    import DataBase.JsonParser as parser
    import os
    from dotenv import load_dotenv
    from pymongo import MongoClient
    
    
    def get_db():
        """ return the database of goodReads crawler """
        load_dotenv()
        url = os.getenv('MONGODB_URL')
        client = MongoClient(url)
        return client.get_database("crawler_db")
    
    
    
    def insert_document(docu, opt):
        db = get_db()
        if opt == 0:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_books
    
    zshan2's avatar
    zshan2 committed
            records = db.test_authors
    
        else:
            print("failed to get json file: wrong opt for selecting collection")
            return
        records.insert_one(docu)
    
    
    
    def insert_dicts(dictionary, opt):
        """
        Insert books or authors collection in database
        :param dictionary: the dictionary to insert to collection
        :param opt: =0 means books collection; =1 means authors collection
        :return: no return value
        """
        db = get_db()
        if opt == 0:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_books
    
        elif opt == 1:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_authors
    
        else:
            print("failed to get json file: wrong opt for selecting collection")
            return
        json_list = []
        if opt == 0:
            json_list = parser.parse_book_dict_to_json(dictionary)
        elif opt == 1:
            json_list = parser.parse_author_dict_to_json(dictionary)
        records.insert_many(json_list)
    
    
    def update_dicts(opt, identifier, content):
        """
        Update documentations in a given collection
        :param opt: =0 means books collection; =1 means authors collection
        :param identifier: the identifier of the documentation we want to find
        :param content: the content to update
        :return: no return value
        """
        db = get_db()
        if opt == 0:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_books
    
        elif opt == 1:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_authors
    
        else:
            print("failed to get json file: wrong opt for selecting collection")
            return
        result = records.update_one(
    
            identifier,
            {"$set": content},
    
        )
        print("matched documentation: " + str(result.matched_count))
        print("modified documentation: " + str(result.modified_count))
    
    
    def get_documents_json(opt, identifier):
        """
        find documentations specified by the identifier and output a json data
        :param opt: =0 means books collection; =1 means authors collection
        :param identifier: identifier of the documents we want, {} means locate the whole collection
        :return: json file of selected documentations
        """
        db = get_db()
        if opt == 0:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_books
    
        elif opt == 1:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_authors
    
        else:
            print("failed to get json file: wrong opt for selecting collection")
            return json.dumps({})
        data = records.find(identifier)
        file = {}
        if opt == 0:
            typeName = "books"
        else:
            typeName = "authors"
        file[typeName] = []
        for item in data:
            item.pop("_id")
            file[typeName].append(item)
        return json.dumps(file)
    
    
    def download_collection(opt, identifier, name):
        """
        download books collection or authors collection
        :param opt: =0 means books collection; =1 means authors collection
        :param identifier: identifier of the documents we want to download;
        empty({}) means selected all documents in given collection
        :param name: file name of downloaded json
        :return: JSON file of the collection
        """
        json_file = get_documents_json(opt, identifier)
        load_dotenv()
    
        file_root = os.getenv('FILE_ROOT')
        with open(file_root + name + ".json", "w") as output:
    
            output.write(json_file)
    
    
    def clean(opt, identifier):
        """
        delete specific documents in given collection
        :param opt: =0 means books collection; =1 means authors collection
        :param identifier: identifier of the documents we want to delete;
        empty({}) means selected all documents in given collection
        :return: no return value
        """
        db = get_db()
        if opt == 0:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_books
    
        elif opt == 1:
    
    zshan2's avatar
    zshan2 committed
            records = db.test_authors
    
        else:
            print("failed to get json file: wrong opt for selecting collection")
            return
        records.delete_many(identifier)