Commit e63793e9 authored by Haj Rezvan's avatar Haj Rezvan

Complete functions

parent e3f40329
......@@ -3,37 +3,8 @@ import os
import threading
def subtract(a, b):
return [*(item for item in a if item not in b)]
def union_two(a, b):
(m, n) = (len(a), len(b))
i = j = 0
# Destination Array
d = []
# Merge from a and b together
while i < m and j < n:
if a[i] <= b[j]:
d.append(a[i])
i += 1
else:
d.append(b[j])
j += 1
# Merge from a if b has run out
while i < m:
d.append(a[i])
i += 1
# Merge from b if a has run out
while j < n:
d.append(b[j])
j += 1
return d
def __printer():
print(os.listdir())
def normalize_list(lst):
......@@ -43,35 +14,6 @@ def normalize_list(lst):
return lst
def union(inp):
n = len(inp)
vl = normalize_list(inp)
while n >= 2:
if n == 2:
vl = union_two(vl[0], vl[1])
elif n > 2:
tmp = list()
tmp.append(vl[n - 1])
vl = union(vl[0: n - 2])
tmp.append(vl)
vl = tmp
n = n - 1
return vl
def intersect(lst1, lst2):
return list(set(lst1) & set(lst2))
def operations(inp):
flag = []
if "AND" in inp:
flag.append("&")
elif "NOT" in inp:
flag.append("!")
return flag
def get_info(inp):
result = list()
if type(inp) == dict:
......@@ -84,14 +26,16 @@ def get_info(inp):
# Reading docs from docs dictionary.
out = list()
# __printer()
for docID in result:
file = open(f"./docs/{docID}.json", "r", encoding="utf-8")
file = open(f'./docs/{docID}.json', "r", encoding="utf-8")
cnt = json.load(file)
cnt = dict(cnt)
val = cnt.values()
out.append(list(val))
file.close()
# os.chdir("..")
return out
......@@ -184,7 +128,7 @@ def __simple_check(i: int, query: list, dictionary: dict, doc_id: list):
selected = list()
if dictionary.keys().__contains__(query[i]):
for term in range(file_numbers):
if dictionary[query[i]].keys().__contains__(term):
if dictionary[query[i]].__contains__(term):
selected.append(term)
doc_id = __intersection(i, doc_id, selected)
else:
......@@ -195,38 +139,59 @@ def __simple_check(i: int, query: list, dictionary: dict, doc_id: list):
file_numbers = 0
def __checker(query: list, dictionary: dict):
def __checker(query: str, dictionary: dict, size: int):
finish = False
i = 0 # For getting index of words in dictionary
content = list()
while i < len(query):
rsl, content = list(), list()
lst_ctn = [[[]] for i in range(size)]
query = query.split()
while i < size:
if query[i][0] == '"':
content = __double_quotation(i, query, dictionary, finish, content)
content = (__double_quotation(i, query, dictionary, finish, content))
elif query[i][0] == '!':
content = __not(i, query, dictionary, content)
content = (__not(i, query, dictionary, content))
else:
content = __simple_check(i, query, dictionary, content)
lst_ctn[i] = content.copy()
i = i + 1
return content
for lst in lst_ctn:
rsl.append(lst)
return rsl
def enter(it):
def enter(it: str):
t1 = threading.Thread(target=__write_logs, args=(it,))
t1.start()
t2 = threading.Thread(target=__file_number, args=())
t2.start()
spl = list(it.split(" "))
# t2 = threading.Thread(target=__file_number, args=())
# t2.start()
__file_number()
# __printer()
os.chdir("..")
file = open("./index/ii.json", "r", encoding="utf-8")
index = json.load(file)
dictionary = dict(index)
rs = __checker(it, dictionary)
split = it.split(" ")
size = len(split)
rs = __checker(it, dictionary, size)
out_list = list()
intersect = [set() for _ in range(size)]
i = 0
for ls in rs:
while i < size:
intersect[i].update(ls)
i = i + 1
break
for ls in rs:
for item in ls:
out_list.append(item)
rs = set.intersection(*intersect)
ld = dict()
for i in range(len(rs)):
ld[rs[i]] = index.get(rs[i])
print(ld[rs[i]])
out_data = get_info(list(rs))
out_data = get_info(ld)
t1.join()
return out_data
......@@ -22,18 +22,18 @@ def stop_word():
if __name__ == '__main__':
print("بسم الله الرحمن الرحیم")
thread = threading.Thread(target=stop_word())
thread.run()
# thread = threading.Thread(target=stop_word())
# thread.run()
# split_document.file_open() # Main Splitter for all of news.
split_document.file_open_test() # Splitter for test dataset.
# split_document.file_open_test() # Splitter for test dataset.
files = __file_finder()
os.chdir("..")
# files = __file_finder()
# os.chdir("..")
# tokenizer.get_file(files) # Main tokenizer.
tokenizer.get_file_test(files) # Tokenizer in test dataset.
# tokenizer.get_file_test(files) # Tokenizer in test dataset.
index_maker.index()
......@@ -62,7 +62,7 @@
<img
src="{{ url_for('static', filename='farsnewslogo.png') }}"
style="width: 17%; position: absolute; right: 2rem; top: 1rem"
alt=""
alt="Fars Logo"
/>
<hr style="color: #80808024"/>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment