立即学习:https://edu.csdn.net/course/play/9460/199584?utm_source=blogtoedu
# utils.py
import GrobalParament
# 去掉回车换行
def delete_r_n(line):
return line.replace("\r","").replace("\n","").strip()
# 读取停用词
def get_stop_words(stop_words_dir):
stop_word = []
with open(stop_words_dir, "r", encoding = GrobalParament.encoding) as f_reader:
for line in f_reader:
line = delete_r_n(line)
stop_words.append(line)
stop_words = set(stop_words)
retrun stop_words
if __name__ == "__main__":
stop_words = get_stop_words(GrobalParament.stop_word_dir)
print(len(stop_words))
print(stop_words[:20])
相关资源:停用词和词向量.rar