Spaces:
Runtime error
Runtime error
Commit
·
567bb6c
1
Parent(s):
a6177b6
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,27 @@
|
|
1 |
from textblob import TextBlob
|
2 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
3 |
-
from transformers import pipeline
|
4 |
import pandas as pd
|
5 |
import re
|
6 |
import streamlit as st
|
7 |
-
from datasets import load_dataset
|
8 |
|
9 |
-
def
|
10 |
blob = TextBlob(text)
|
11 |
-
|
12 |
-
return str(blob.translate(from_lang="pt", to="en"))
|
13 |
-
except:
|
14 |
-
return text
|
15 |
|
16 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
sentiment_dict = vader_object.polarity_scores(sentence)
|
19 |
negative = sentiment_dict['neg']
|
@@ -30,56 +38,26 @@ def sentiment_classification(sentence, vader_object):
|
|
30 |
else :
|
31 |
overall_sentiment = "Neutral"
|
32 |
|
33 |
-
return overall_sentiment
|
34 |
-
|
35 |
-
def
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
#elif re.search(r"/524 Particip|Atompar|B Tech Eqi|Betapart |Cabinda Part|Caconde Part|Cemepe |Cims |Gama Part |Invest Bemge|Polpar |Prompt Part |Sudeste|Sul 116 Part|Ybyra/gm", text):
|
51 |
-
#return "Others"
|
52 |
-
elif re.search(r"/3r Petroleum|Cosan|Dommo|Enauta Part|Pet Manguinh|Petrobras|Petrorecsa|Petrorio|Ultrapar|Vibra|Lupatech|Oceanpact|Osx Brasil/gm", text):
|
53 |
-
return "Oil, Gas and Biofuels"
|
54 |
-
elif re.search(r"/Intelbras|Multilaser|Positivo Tec|Bemobi Tech|Brq|Enjoei|Getninjas|Infracomm|Locaweb|Meliuz|Mobly|Neogrid|Padtec|Quality Soft|Sinqia|Tc|Totvs |Wdc Networks|Westwing/gm", text):
|
55 |
-
return "Information Technology"
|
56 |
-
elif re.search(r"/Aes Brasil|Aes Sul|Aesoperacoes|Afluente T|Alupar|Ampla Energ|Auren|Cachoeira|Ceb|Ceee-d|Ceee-t|Celesc|Celgpar|Celpe|Cemig|Cemig Dist|Cemig Gt|Coelba|Coelce|Comerc Par|Copel|Cosern|Cpfl Energia|Cpfl Geracao|Cpfl Piratin|Cpfl Renovav|Ebe|Elektro|Eletrobras|Eletropar|Emae|Energias Br|Energisa|Energisa Mt|Enersul|Eneva|Engie Brasil|Eqtl Para|Eqtlmaranhao|Equatorial|Escelsa|Fgenergia|Ger Paranap|Itapebi|Light|Light S/a|Neoenergia|Omegaenergia|Paul F Luz|Proman|Rede Energia|Renova|Statkraft|Sto Antonio|Taesa|Term. Pe Iii|Termope|Tran Paulist|Uptick|Ambipar|Casan|Copasa|Igua Sa|Orizon|Sabesp|Sanepar|Sanesalto|Ceg|Comgas|Compass|AES|Cemig|CEMIG/gm", text):
|
57 |
-
return "Public utility"
|
58 |
-
else:
|
59 |
-
labels = [
|
60 |
-
"Industrial Goods",
|
61 |
-
"Communications",
|
62 |
-
"Cyclic Consumption",
|
63 |
-
"Non-cyclical Consumption",
|
64 |
-
"Financial",
|
65 |
-
"Basic Materials",
|
66 |
-
#"Others",
|
67 |
-
"Oil, Gas and Biofuels",
|
68 |
-
"Health",
|
69 |
-
#"Initial Sector",
|
70 |
-
"Information Technology",
|
71 |
-
"Public utility"
|
72 |
-
]
|
73 |
-
|
74 |
-
template = "The economic sector of this set of words is {}."
|
75 |
-
|
76 |
-
results = text_classifier(text, labels, hypothesis_template=template)
|
77 |
-
|
78 |
-
index = results["scores"].index(max(results["scores"]))
|
79 |
-
|
80 |
-
return str(results["labels"][index])
|
81 |
|
82 |
sid_obj = SentimentIntensityAnalyzer()
|
|
|
83 |
classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
|
84 |
|
85 |
sheet_id = "1IGFSKnnmQndKVmGOWMCbsvJJMU_2jvnm"
|
@@ -91,6 +69,7 @@ header = st.container()
|
|
91 |
model = st.container()
|
92 |
dataset = st.container()
|
93 |
analysis = st.container()
|
|
|
94 |
|
95 |
with st.sidebar:
|
96 |
st.markdown("# Lorem Ipsum\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros. Curabitur pretium sollicitudin sapien, vel mattis augue convallis quis. Suspendisse eleifend turpis non nunc gravida, aliquet hendrerit orci viverra. Sed aliquet, nunc eu posuere tempor, libero ex dignissim velit, ut ultricies erat felis at urna. Proin metus augue, commodo in faucibus sed, aliquet ac eros. Nullam turpis leo, dictum eu tellus a, aliquam egestas velit. Suspendisse cursus augue a sem dapibus, eu faucibus nisl ultrices. Cras tortor ipsum, luctus vitae tincidunt id, dapibus id justo. Sed mi nunc, tempor eu iaculis in, tristique cursus massa. Integer metus felis, pulvinar ut aliquam ut, consectetur in nulla.")
|
@@ -99,13 +78,42 @@ with header:
|
|
99 |
st.title("IC 2022 Classificação de Dados Financeiros")
|
100 |
st.write("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros.")
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
with model:
|
103 |
st.header("Modelo para Tradução e Classificação!")
|
104 |
text = st.text_area("Coloque seu texto sobre mercado financeiro em português!")
|
105 |
|
|
|
|
|
106 |
if text:
|
107 |
-
|
108 |
-
|
109 |
sentiment = sentiment_classification(text_en, sid_obj)
|
110 |
st.write("*Sentiment:* {} - {}".format(sentiment[0], sentiment[1]))
|
111 |
theme = theme_classification(text_en, classifier)
|
|
|
1 |
from textblob import TextBlob
|
2 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
3 |
+
from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer, MBart50TokenizerFast, MBartForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification
|
4 |
import pandas as pd
|
5 |
import re
|
6 |
import streamlit as st
|
|
|
7 |
|
8 |
+
def translate_text_blob(text):
|
9 |
blob = TextBlob(text)
|
10 |
+
return str(blob.translate(from_lang="pt", to="en"))
|
|
|
|
|
|
|
11 |
|
12 |
+
def translate_text_M2M100(text, model, tokenizer):
|
13 |
+
encoded = tokenizer(text, return_tensors="pt")
|
14 |
+
generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("en"))
|
15 |
+
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
16 |
+
|
17 |
+
def translate_BART(text, model, tokenizer):
|
18 |
+
inputs = tokenizer(text, return_tensors='pt')
|
19 |
+
input_ids = inputs.input_ids
|
20 |
+
attention_mask = inputs.attention_mask
|
21 |
+
output = model.generate(input_ids, attention_mask=attention_mask, forced_bos_token_id=tokenizer.lang_code_to_id['en_XX'])
|
22 |
+
return tokenizer.decode(output[0], skip_special_tokens=True)
|
23 |
+
|
24 |
+
def sentiment_vader(sentence, vader_object):
|
25 |
|
26 |
sentiment_dict = vader_object.polarity_scores(sentence)
|
27 |
negative = sentiment_dict['neg']
|
|
|
38 |
else :
|
39 |
overall_sentiment = "Neutral"
|
40 |
|
41 |
+
return overall_sentiment.upper()
|
42 |
+
|
43 |
+
def sentiment_finbert(text, pipeline):
|
44 |
+
result = pipeline(text)[0]
|
45 |
+
return result["label"].upper()
|
46 |
+
|
47 |
+
m2m100 = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
|
48 |
+
m2m100_token = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
|
49 |
+
m2m100_token.src_lang = "pt"
|
50 |
+
|
51 |
+
BART_token = MBart50TokenizerFast.from_pretrained('Narrativa/mbart-large-50-finetuned-opus-pt-en-translation')
|
52 |
+
BART = MBartForConditionalGeneration.from_pretrained('Narrativa/mbart-large-50-finetuned-opus-pt-en-translation')
|
53 |
+
BART_token.src_lang = 'pt_XX'
|
54 |
+
|
55 |
+
finbert_token = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
56 |
+
finbert = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert", num_labels=3)
|
57 |
+
finbert_pipeline = pipeline("sentiment-analysis", model=finbert, tokenizer=finbert_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
sid_obj = SentimentIntensityAnalyzer()
|
60 |
+
|
61 |
classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
|
62 |
|
63 |
sheet_id = "1IGFSKnnmQndKVmGOWMCbsvJJMU_2jvnm"
|
|
|
69 |
model = st.container()
|
70 |
dataset = st.container()
|
71 |
analysis = st.container()
|
72 |
+
model_1, model_2 = st.columns(2)
|
73 |
|
74 |
with st.sidebar:
|
75 |
st.markdown("# Lorem Ipsum\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros. Curabitur pretium sollicitudin sapien, vel mattis augue convallis quis. Suspendisse eleifend turpis non nunc gravida, aliquet hendrerit orci viverra. Sed aliquet, nunc eu posuere tempor, libero ex dignissim velit, ut ultricies erat felis at urna. Proin metus augue, commodo in faucibus sed, aliquet ac eros. Nullam turpis leo, dictum eu tellus a, aliquam egestas velit. Suspendisse cursus augue a sem dapibus, eu faucibus nisl ultrices. Cras tortor ipsum, luctus vitae tincidunt id, dapibus id justo. Sed mi nunc, tempor eu iaculis in, tristique cursus massa. Integer metus felis, pulvinar ut aliquam ut, consectetur in nulla.")
|
|
|
78 |
st.title("IC 2022 Classificação de Dados Financeiros")
|
79 |
st.write("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros.")
|
80 |
|
81 |
+
with model_1:
|
82 |
+
st.header("Modelo para Tradução e Classificação!")
|
83 |
+
translator = st.selectbox('Qual modelo você deseja usar para tradução?', ('TextBlob', 'M2M100', 'BART'))
|
84 |
+
sentimentor = st.selectbox('Qual modelo você deseja usar para a análise de sentimentos?', ('Vader', 'FinBERT'))
|
85 |
+
text = st.text_area("Coloque seu texto sobre mercado financeiro em português!", "As ações da Ultrafarma subiram em 98% no último bimestre, segundo os dados da revista!")
|
86 |
+
submit = st.button('Gerar!')
|
87 |
+
|
88 |
+
with model_2:
|
89 |
+
if submit:
|
90 |
+
with st.spinner('Aguarde enquanto estamos processando...'):
|
91 |
+
if translator == "TextBlob":
|
92 |
+
text_en = translate_text_blob(text)
|
93 |
+
elif translator == "M2M100":
|
94 |
+
text_en = translate_text_M2M100(text, m2m100, m2m100_token)
|
95 |
+
elif translator == "BART":
|
96 |
+
text_en = translate_text_M2M100(text, BART, BART_token)
|
97 |
+
|
98 |
+
if sentimentor == "Vader":
|
99 |
+
sentiment = sentiment_vader(text_en, sid_obj)
|
100 |
+
elif sentimentor == "FinBERT":
|
101 |
+
sentiment = sentiment_finbert(text_en, finbert_pipeline)
|
102 |
+
|
103 |
+
st.write(text_en)
|
104 |
+
st.write(sentiment)
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
with model:
|
109 |
st.header("Modelo para Tradução e Classificação!")
|
110 |
text = st.text_area("Coloque seu texto sobre mercado financeiro em português!")
|
111 |
|
112 |
+
submit = st.button('Generate')
|
113 |
+
|
114 |
if text:
|
115 |
+
text_en = translate_text(text)
|
116 |
+
st.write("*Translation:* {}".format(text_en))
|
117 |
sentiment = sentiment_classification(text_en, sid_obj)
|
118 |
st.write("*Sentiment:* {} - {}".format(sentiment[0], sentiment[1]))
|
119 |
theme = theme_classification(text_en, classifier)
|