Commit e63793e9 authored by Haj Rezvan's avatar Haj Rezvan

Complete functions

parent e3f40329
...@@ -3,37 +3,8 @@ import os ...@@ -3,37 +3,8 @@ import os
import threading import threading
def subtract(a, b): def __printer():
return [*(item for item in a if item not in b)] print(os.listdir())
def union_two(a, b):
(m, n) = (len(a), len(b))
i = j = 0
# Destination Array
d = []
# Merge from a and b together
while i < m and j < n:
if a[i] <= b[j]:
d.append(a[i])
i += 1
else:
d.append(b[j])
j += 1
# Merge from a if b has run out
while i < m:
d.append(a[i])
i += 1
# Merge from b if a has run out
while j < n:
d.append(b[j])
j += 1
return d
def normalize_list(lst): def normalize_list(lst):
...@@ -43,35 +14,6 @@ def normalize_list(lst): ...@@ -43,35 +14,6 @@ def normalize_list(lst):
return lst return lst
def union(inp):
n = len(inp)
vl = normalize_list(inp)
while n >= 2:
if n == 2:
vl = union_two(vl[0], vl[1])
elif n > 2:
tmp = list()
tmp.append(vl[n - 1])
vl = union(vl[0: n - 2])
tmp.append(vl)
vl = tmp
n = n - 1
return vl
def intersect(lst1, lst2):
return list(set(lst1) & set(lst2))
def operations(inp):
flag = []
if "AND" in inp:
flag.append("&")
elif "NOT" in inp:
flag.append("!")
return flag
def get_info(inp): def get_info(inp):
result = list() result = list()
if type(inp) == dict: if type(inp) == dict:
...@@ -84,14 +26,16 @@ def get_info(inp): ...@@ -84,14 +26,16 @@ def get_info(inp):
# Reading docs from docs dictionary. # Reading docs from docs dictionary.
out = list() out = list()
# __printer()
for docID in result: for docID in result:
file = open(f"./docs/{docID}.json", "r", encoding="utf-8") file = open(f'./docs/{docID}.json', "r", encoding="utf-8")
cnt = json.load(file) cnt = json.load(file)
cnt = dict(cnt) cnt = dict(cnt)
val = cnt.values() val = cnt.values()
out.append(list(val)) out.append(list(val))
file.close() file.close()
# os.chdir("..")
return out return out
...@@ -184,7 +128,7 @@ def __simple_check(i: int, query: list, dictionary: dict, doc_id: list): ...@@ -184,7 +128,7 @@ def __simple_check(i: int, query: list, dictionary: dict, doc_id: list):
selected = list() selected = list()
if dictionary.keys().__contains__(query[i]): if dictionary.keys().__contains__(query[i]):
for term in range(file_numbers): for term in range(file_numbers):
if dictionary[query[i]].keys().__contains__(term): if dictionary[query[i]].__contains__(term):
selected.append(term) selected.append(term)
doc_id = __intersection(i, doc_id, selected) doc_id = __intersection(i, doc_id, selected)
else: else:
...@@ -195,38 +139,59 @@ def __simple_check(i: int, query: list, dictionary: dict, doc_id: list): ...@@ -195,38 +139,59 @@ def __simple_check(i: int, query: list, dictionary: dict, doc_id: list):
file_numbers = 0 file_numbers = 0
def __checker(query: list, dictionary: dict): def __checker(query: str, dictionary: dict, size: int):
finish = False finish = False
i = 0 # For getting index of words in dictionary i = 0 # For getting index of words in dictionary
content = list() rsl, content = list(), list()
while i < len(query): lst_ctn = [[[]] for i in range(size)]
query = query.split()
while i < size:
if query[i][0] == '"': if query[i][0] == '"':
content = __double_quotation(i, query, dictionary, finish, content) content = (__double_quotation(i, query, dictionary, finish, content))
elif query[i][0] == '!': elif query[i][0] == '!':
content = __not(i, query, dictionary, content) content = (__not(i, query, dictionary, content))
else: else:
content = __simple_check(i, query, dictionary, content) content = __simple_check(i, query, dictionary, content)
lst_ctn[i] = content.copy()
i = i + 1 i = i + 1
return content for lst in lst_ctn:
rsl.append(lst)
return rsl
def enter(it): def enter(it: str):
t1 = threading.Thread(target=__write_logs, args=(it,)) t1 = threading.Thread(target=__write_logs, args=(it,))
t1.start() t1.start()
t2 = threading.Thread(target=__file_number, args=()) # t2 = threading.Thread(target=__file_number, args=())
t2.start() # t2.start()
spl = list(it.split(" ")) __file_number()
# __printer()
os.chdir("..")
file = open("./index/ii.json", "r", encoding="utf-8") file = open("./index/ii.json", "r", encoding="utf-8")
index = json.load(file) index = json.load(file)
dictionary = dict(index) dictionary = dict(index)
rs = __checker(it, dictionary) split = it.split(" ")
size = len(split)
rs = __checker(it, dictionary, size)
out_list = list()
intersect = [set() for _ in range(size)]
i = 0
for ls in rs:
while i < size:
intersect[i].update(ls)
i = i + 1
break
for ls in rs:
for item in ls:
out_list.append(item)
rs = set.intersection(*intersect)
ld = dict() out_data = get_info(list(rs))
for i in range(len(rs)):
ld[rs[i]] = index.get(rs[i])
print(ld[rs[i]])
out_data = get_info(ld)
t1.join() t1.join()
return out_data return out_data
...@@ -22,18 +22,18 @@ def stop_word(): ...@@ -22,18 +22,18 @@ def stop_word():
if __name__ == '__main__': if __name__ == '__main__':
print("بسم الله الرحمن الرحیم") print("بسم الله الرحمن الرحیم")
thread = threading.Thread(target=stop_word()) # thread = threading.Thread(target=stop_word())
thread.run() # thread.run()
# split_document.file_open() # Main Splitter for all of news. # split_document.file_open() # Main Splitter for all of news.
split_document.file_open_test() # Splitter for test dataset. # split_document.file_open_test() # Splitter for test dataset.
files = __file_finder() # files = __file_finder()
os.chdir("..") # os.chdir("..")
# tokenizer.get_file(files) # Main tokenizer. # tokenizer.get_file(files) # Main tokenizer.
tokenizer.get_file_test(files) # Tokenizer in test dataset. # tokenizer.get_file_test(files) # Tokenizer in test dataset.
index_maker.index() index_maker.index()
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
<img <img
src="{{ url_for('static', filename='farsnewslogo.png') }}" src="{{ url_for('static', filename='farsnewslogo.png') }}"
style="width: 17%; position: absolute; right: 2rem; top: 1rem" style="width: 17%; position: absolute; right: 2rem; top: 1rem"
alt="" alt="Fars Logo"
/> />
<hr style="color: #80808024"/> <hr style="color: #80808024"/>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment