Commit 51d4a6fd authored by Haj Rezvan's avatar Haj Rezvan

Optimized the functions.

parent 88bb2e3d
import json import json
import os import os
import threading
def logs(cnt, i): def logs(cnt, i):
...@@ -14,30 +13,35 @@ def index(): ...@@ -14,30 +13,35 @@ def index():
index_file = open(f"./index/ii.json", "w", encoding="utf-8") index_file = open(f"./index/ii.json", "w", encoding="utf-8")
invert_index = dict() invert_index = dict()
os.chdir("./document tokens") os.chdir("./document tokens")
for tk in os.listdir(): files = os.listdir()
print(tk) counter = 0
pre_percent = 0
print("0% is Index made!")
for tk in files:
token_file = open(f"./{tk}", "r", encoding="utf-8") token_file = open(f"./{tk}", "r", encoding="utf-8")
backup = token_file.readline()
try: try:
tkn = json.load(token_file) tkn = json.load(token_file)
except (json.decoder.JSONDecodeError, Exception) as e: # backup = token_file.readline()
thread = threading.Thread(target=logs, args=(backup, tk,)) tkn = dict(tkn)
thread.start() tk = tk.replace(".json", "")
tkn = dict(tkn) tk = int(tk)
tk = tk.replace(".json", "")
tk = int(tk) for key in tkn.keys():
if key not in invert_index.keys():
for key in tkn.keys(): invert_index[str(key)] = [tk]
if key not in invert_index.keys(): else:
invert_index[str(key)] = [tk] indexes = invert_index[key]
else: indexes.append(tk)
indexes = invert_index[key] indexes.sort()
indexes.append(tk) token_file.close()
indexes.sort()
token_file.close() new_percent = int((counter / len(files)) * 100)
if new_percent != pre_percent:
# str_out = str(invert_index) print(f"{new_percent}% is Index made!")
# str_out = str_out.replace("\'", "\"") pre_percent = new_percent
# str_out = dict(str_out) counter = counter + 1
except Exception as e:
print(f"Exception in file {tk}\n{e.args}\n")
json.dump(invert_index, index_file) json.dump(invert_index, index_file)
print("Invert index made!") print("Invert index made!")
...@@ -38,17 +38,20 @@ def __retrieval(data, i, tag): ...@@ -38,17 +38,20 @@ def __retrieval(data, i, tag):
return obj_cnt return obj_cnt
except Exception as ignore: except Exception as ignore:
print(f"We have a Exception!! {ignore.with_traceback}") print(f"We have a Exception!! {ignore.with_traceback}")
pass
def __processor(file): def __processor(file):
data = json.load(file) data = json.load(file)
length = len(data)
for i in tqdm(range(14000), desc="Splatted: "): doc_ids = list(data.keys())
title = __retrieval(data, i, "title") for i in tqdm(range(length), desc="Splatted: "):
content = __retrieval(data, i, "content") doc_id = doc_ids[i]
url = __retrieval(data, i, "url") title = __retrieval(data, doc_id, "title")
tags = __retrieval(data, i, "tags") content = __retrieval(data, doc_id, "content")
date = __retrieval(data, i, "date") url = __retrieval(data, doc_id, "url")
category = __retrieval(data, i, "category") tags = __retrieval(data, doc_id, "tags")
date = __retrieval(data, doc_id, "date")
__writer(i, title, content, url, tags, date, category) category = __retrieval(data, doc_id, "category")
__writer(doc_id, title, content, url, tags, date, category)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment