Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import json
import DataBase.JsonParser as parser
import os
from dotenv import load_dotenv
from pymongo import MongoClient
def get_db():
""" return the database of goodReads crawler """
load_dotenv()
url = os.getenv('MONGODB_URL')
client = MongoClient(url)
return client.get_database("crawler_db")
def insert_dicts(dictionary, opt):
"""
Insert books or authors collection in database
:param dictionary: the dictionary to insert to collection
:param opt: =0 means books collection; =1 means authors collection
:return: no return value
"""
db = get_db()
if opt == 0:
records = db.books
elif opt == 1:
records = db.authors
else:
print("failed to get json file: wrong opt for selecting collection")
return
json_list = []
if opt == 0:
json_list = parser.parse_book_dict_to_json(dictionary)
elif opt == 1:
json_list = parser.parse_author_dict_to_json(dictionary)
records.insert_many(json_list)
def update_dicts(opt, identifier, content):
"""
Update documentations in a given collection
:param opt: =0 means books collection; =1 means authors collection
:param identifier: the identifier of the documentation we want to find
:param content: the content to update
:return: no return value
"""
db = get_db()
if opt == 0:
records = db.books
elif opt == 1:
records = db.authors
result = records.update_many(
identifier,
{"$set": content},
)
print("matched documentation: " + str(result.matched_count))
print("modified documentation: " + str(result.modified_count))
def get_documents_json(opt, identifier):
"""
find documentations specified by the identifier and output a json data
:param opt: =0 means books collection; =1 means authors collection
:param identifier: identifier of the documents we want, {} means locate the whole collection
:return: json file of selected documentations
"""
db = get_db()
if opt == 0:
records = db.books
elif opt == 1:
records = db.authors
else:
print("failed to get json file: wrong opt for selecting collection")
return json.dumps({})
data = records.find(identifier)
file = {}
if opt == 0:
typeName = "books"
else:
typeName = "authors"
file[typeName] = []
for item in data:
item.pop("_id")
file[typeName].append(item)
return json.dumps(file)
def download_collection(opt, identifier, name):
"""
download books collection or authors collection
:param opt: =0 means books collection; =1 means authors collection
:param identifier: identifier of the documents we want to download;
empty({}) means selected all documents in given collection
:param name: file name of downloaded json
:return: JSON file of the collection
"""
json_file = get_documents_json(opt, identifier)
load_dotenv()
root = os.getenv('ROOT')
with open(root + name + ".json", "w") as output:
output.write(json_file)
def clean(opt, identifier):
"""
delete specific documents in given collection
:param opt: =0 means books collection; =1 means authors collection
:param identifier: identifier of the documents we want to delete;
empty({}) means selected all documents in given collection
:return: no return value
"""
db = get_db()
if opt == 0:
records = db.books
elif opt == 1:
records = db.authors
else:
print("failed to get json file: wrong opt for selecting collection")
return
records.delete_many(identifier)