-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinconnu.txt
36 lines (31 loc) · 1.23 KB
/
inconnu.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import math
import string
import sys
import os
import tokenize
from sys import argv
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
os.chdir('/Users/philb/OneDrive/Projet Final/Repository/Elections/')
from spacy.lang.fr.stop_words import STOP_WORDS as fr_stop
n = len(sys.argv)
with open('0.txt', 'r', encoding="utf8") as f:
texte1 = f.read().replace('\n', '')
with open('1.txt', 'r', encoding="utf8") as f:
texte2 = f.read().replace('\n', '')
with open('2.txt', 'r', encoding="utf8") as f:
texte3 = f.read().replace('\n', '')
with open('3.txt', 'r', encoding="utf8") as f:
texte4 = f.read().replace('\n', '')
with open('4.txt', 'r', encoding="utf8") as f:
texte5 = f.read().replace('\n', '')
corpus = [texte1, texte2, texte3, texte4, texte5]
vectorizer = TfidfVectorizer(stop_words=fr_stop)
vecs = vectorizer.fit_transform(corpus)
feature_names = vectorizer.get_feature_names_out()
dense = vecs.todense()
lst1 = dense.tolist()
df = pd.DataFrame(lst1, columns=feature_names)
df.T.sum(axis=1)
Cloud = WordCloud(background_color="white", max_words=30).generate_from_frequencies(df.T.sum(axis=1))
WordCloud(background_color="white", max_words=30).generate_from_frequencies(test.T.sum(axis=1))