I have following function. So I would like to take 2 fields and concatenate them but when my pipeline finishes it doesn´t work
the pipeline finishes as correct but when I see in bigquery the fields have not been concatenated
It will be great if u can help me.
it´s the code used in the function:
import apache_beam as beam
from ..tools import ProcessLogger
_logger = ProcessLogger()
class ConcatFieldsFn(beam.DoFn):
"""Concatena los valores de los campos especificados en una pCollection por un valor especificado"""
def __init__(self, process: str, data_name: str, parameters: dict):
# Configuracion del logger
self.logger_data_name = data_name
self.logger_process = process
self.logger_subprocess = "Concatenar valores"
_logger.data_name = self.logger_data_name
_logger.process = self.logger_process
_logger.subprocess = self.logger_subprocess
# Parametros del proceso
self._fields = [field.get("name") for field in parameters.get("fields", None)]
_logger.info(
f"Se aplica regla: {_logger.subprocess} con los parametros: {parameters}"
)
def process(self, element):
# Configuracion del logger
_logger.data_name = self.logger_data_name
_logger.process = self.logger_process
_logger.subprocess = self.logger_subprocess
for field in self._fields:
if element[field] != None:
try:
element[field] = "|".join(element[field])
except Exception as ex:
_logger.error(
f"No se pueden eliminar las letras de los campos seleccionados: {ex}"
)
return [element]
I´m trying to remove rows containing blank texts or in tweet texts column. But I have tried in different ways counting the rows that only contain whitespace or counting the leading spaces and trailing spaces but to get a criterion to eliminate it.
ID tweet WhiteSpaceCount HaveWhiteSpace
0 this is a text 0 False
1 0 False
2 Hello im fine 0 False
I want to delete all the rows that don´t have any information on the tweet column.
Code here:
def extractAndSave(api, name):
# Creamos una lista de tweets:
previous_date = date.today() - timedelta(days=1)
query_date = date.today()
name = name
tweets = API_EXTRACTOR.search(q=name + "-filter:retweets", result_type='recent', timeout=999999, count=200,
end_time=previous_date, tweet_mode='extended')
# Podemos crear un dataframe como sigue:
tweet_list = []
for tweet in tweets:
tweet_list.append(tweet.full_text)
datos = pd.DataFrame(data=tweet_list, columns=['TWEETS'])
# CREANDO COLUMNA DE ID
id_list = []
for id in tweets:
id_list.append(id.id)
id = pd.DataFrame(data=id_list, columns=['ID'])
# CREANDO COLUMNA DE ID
creado_list = []
for creado in tweets:
creado_list.append(creado.created_at)
creado = pd.DataFrame(data=creado_list, columns=['FECHA_CREACION'])
# CREANDO COLUMNA DE nombre de usuario
user_list = []
for usuario in tweets:
user_list.append(usuario.user.screen_name)
usuario = pd.DataFrame(data=user_list, columns=['USUARIO'])
# CREANDO COLUMNA DE FUENTE
fuente_list = []
for fuente in tweets:
fuente_list.append(fuente.source)
fuente = pd.DataFrame(data=fuente_list, columns=['FUENTE'])
# CREANDO COLUMNA DE ME GUSTA
like_list = []
for like in tweets:
like_list.append(like.favorite_count)
like = pd.DataFrame(data=like_list, columns=['ME_GUSTA'])
# CREANDO COLUMNA DE RT
rt_list = []
for rt in tweets:
rt_list.append(rt.retweet_count)
retweet = pd.DataFrame(data=rt_list, columns=['ME_GUSTA'])
# CREANDO COLUMNA DE IDIOMA
idioma_list = []
for idioma in tweets:
idioma_list.append(idioma.lang)
idioma = pd.DataFrame(data=idioma_list, columns=['IDIOMA'])
# CREANDO COLUMNA DE IDIOMA
quote_list = []
for quote in tweets:
quote_list.append(quote.is_quote_status)
quote = pd.DataFrame(data=quote_list, columns=['CITADO'])
# CREANDO COLUMNA DE IDIOMA
location_list = []
for location in tweets:
location_list.append(location.user.location)
location = pd.DataFrame(data=location_list, columns=['LOCACION'])
# CONCATENANDO DATAFRAMES
datos = pd.concat([datos, id, creado, usuario, fuente, like, retweet, quote, idioma, location], axis=1)
# Dropear toda la fila si la columna tweets viene vacia.
datos['pass/fail'] = np.where(datos['TWEETS'].astype(str).str.fullmatch(r"\s*"),'FAIL','PASS')
datos['CONTEO_ESPACIOS']= (datos['TWEETS'].str.startswith(" ") | datos['TWEETS'].str.endswith(" ")).sum()
# Hora de publicación
datos['HORA_PUBLICACION'] = datos['FECHA_CREACION'].dt.hour
datos['DIA_SEMANA'] = datos['FECHA_CREACION'].dt.day_name()
# Extrayendo solo los tweets del día anterior
datos['FECHA_CREACION'] = pd.to_datetime(datos['FECHA_CREACION']).dt.date
datos = datos[datos['FECHA_CREACION'] == previous_date]
print(datos)
# Guardando en dataframe.
return datos
Instead of removing rows that you don't need, keep only the ones you do need:
df = df[df["tweet"].str.strip().str.len()>0]
>>> df
ID tweet WhiteSpaceCount HaveWhiteSpace
0 0 this is a text 0 False
2 2 Hello im fine 0 False
So, what I need to do is: send e-mails to a list of e-mail that are on a spreadsheet. And I need to send them from Outlook.
I am really new at programming, actually, this the first time I try to program.
This is the code I have:
import win32com.client as win32
import pandas as pd
# criar a integração com o outlook
outlook = win32.Dispatch('outlook.application')
# criar um email
email = outlook.CreateItem(0)
email_list = pd.read_excel('D:\Projeto Zurich/email_list.xlsx')
cliente = email_list['EMAIL']
# configurar as informações do seu e-mail
email.To = "cliente"
email.Subject = "Informações sobre o seu sinistro!"
email.HTMLBody = """
<p>Prezado(a) segurado(a),</p>
<p>Foram realizadas diversas tentativas de contato sem sucesso para agendar a realização da visita da assistência técnica Electrolux, por esse motivo estamos cancelando o seu sinistro.</p>
<p>Para solicitar a reabertura, você pode entrar em contato pelos nossos canais de atendimento:</p>
<p>4020 4848 (capitais e regiões metropolitanas)</p>
<p>0800 285 4141 (demais localidades)</p>
<p>Ou através dos nossos canais digitais em https://www.zurich.com.br/pt-br/atendimento</p>
<p>Atenciosamente,</p>
<p>Zurich Seguros</p>
"""
email.Send()
print("Email Enviado")
And this is the spreadsheet I have:
Is there anything I can do to make this work?
PS.: The code works when I am not trying to use the spreadsheet, like when I add a real email address at email.To =
Clientele is a string. If you wanted to use clientele as a variable you'd have to delete the quotes around it. That being said I would overcome the problem with a simple for loop. It really depends on if you want to send one giant group email or if you want to send to the recipients individually. If you wanted to send it individually, the code could be updated as
for x in clientele:
email.To = x
email.Subject = "Informações sobre o seu sinistro!"
email.HTMLBody = """
<p>Prezado(a) segurado(a),</p>
<p>Foram realizadas diversas tentativas de contato sem sucesso para agendar a
realização da visita da assistência técnica Electrolux, por esse motivo estamos
cancelando o seu sinistro.</p>
<p>Para solicitar a reabertura, você pode entrar em contato pelos nossos canais de atendimento:</p>
<p>4020 4848 (capitais e regiões metropolitanas)</p>
<p>0800 285 4141 (demais localidades)</p>
<p>Ou através dos nossos canais digitais em https://www.zurich.com.br/pt-br/atendimento</p>
<p>Atenciosamente,</p>
<p>Zurich Seguros</p>
"""
email.Send()
print("Email Enviado")
This will substitute the TO address with each individual's email and send them one at a time.
I use the twilio API to write a simple program to remember my father to take his meds. Here is my code which can be replicated by anyone who has an account in twilio, one phone number and the sheet medications.csv(see bellow):
from twilio.rest import Client
import pandas as pd
from datetime import datetime
import re
account_sid = 'XXXXXXXXXXXXX' #INSERT YOUR SID HERE
auth_token = 'XXXXXXXXXXXXX' #INSERT YOUR TOKEN HERE
client = Client(account_sid, auth_token)
def to_list_integer(dt_time):
return [dt_time.year,dt_time.month,dt_time.day, dt_time.hour,dt_time.minute]
df=pd.read_csv('medications.csv')
while True:
data_lista = to_list_integer(datetime.now())
x=datetime(*data_lista)
hora = x.strftime("%Hh%M")
for i in range(len(df)):
if (datetime.now().hour==df['hour'][i]) & (datetime.now().minute==df['minute'][i]) & (datetime.now().second==1):
text = '''*Lembrete: {0}*\n
Olá sr. X, agora são {1},
\nestá na hora de tomar {2} comprimido(s) de {0}
\n\n*recomendação médica: {3}*'''.format(df['name'][i],hora,re.search(r'\d+',df['recomendation'][i]).group(),
df['recomendation'][i])
persons=['whatsapp:+XXXXXXXXXXXXX','whatsapp:+XXXXXXXXXXXXX','whatsapp:+XXXXXXXXXXXXX',
'whatsapp:+XXXXXXXXXXXXX', 'whatsapp:+XXXXXXXXXXXXX']
for person in persons:
message = client.messages.create(
body=text,
from_='whatsapp:+XXXXXXXXXXX',#INSERT YOUR TWILIO NUMBER HERE
to=person
)
print(message.sid)
medications.csv:
name,dosage,recomendation,hour,minute
Selozek,100mg,Tomar 1 comprimido pela manhã,6,9
Vasopril Plus,20/12.5 mg,Tomar 1 comprimido pela manhã,6,10
Glifage XR,500mg,Tomar 2 comprimidos após o café e após a última refeição da noite,7,10
Clopidogrel,75mg,Tomar 1 comprimido após o almoço,13,10
AAS,100mg,Tomar 1 comprimido após o almoço,13,11
Enalapril,10mg,Tomar 2 comprimidos à noite,20,10
Glifage XR,500mg,Tomar 2 comprimidos após o café e após a última refeição da noite,20,11
Rosuvastatina,20mg,Tomar 1 comprimido após a última refeição da noite,20,12
The code was working properly yesterday but today it is no longer working. The program runs without errors and print the sids for each message. However, none of the phones in the list persons are receiving the message. What is wrong here?
I'm getting the JSONDecodeError error when i try to run the below code. I am not sure how to fix it.
I am trying to stream tweets. It works fine for few tweets and all of a sudden I get the error and the stream stops please help how can I fix this?
UPDATED FULL CODE
from tweepy import API
from tweepy import Cursor
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from textblob import TextBlob
from googletrans import Translator
import json
import re
import csv
ACCESS_TOKEN = "2446723783-cqGTmWPdtJI5HhCT7UmmET2xVY7xlA1RqlBQdQl"
ACCESS_TOKEN_SECRET = "6ZRIqd9HDCgZxsR6iMUbt6eOgk1YVsz59lEsvL30eFnPf"
CONSUMER_KEY = "slWhR4Z6VOjp0R3ojx1B2a4pr"
CONSUMER_SECRET = "6OrySqqUIwQM8ioBycqbStWizM6KkLCMFecjXPvbVmsrsw6eNT"
Count = 0
translator = Translator()
class StdOutListener(StreamListener):
def on_data(self, data):
global Count
#Data has the JSON STring
y = json.loads(data)
print("--------------------")
try:
y = y['extended_tweet']['full_text']
y = str(y)
## Portuguese to English conversion block
translations = translator.translate(y, dest='en')
print(translations.origin, '\n -> \n', translations.text)
z = translations.text
a = translations.origin
row = [data,a,z]
with open(fetched_tweets_filename, 'a') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(row)
Count = Count + 1
print(Count)
except KeyError:
y = y['text']
y = str(y)
## Portuguese to English conversion block
translations = translator.translate(y, dest='en')
print(translations.origin, '\n -> \n', translations.text)
z = translations.text
a = translations.origin
row = [data,a,z]
with open(fetched_tweets_filename, 'a') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(row)
Count = Count + 1
print(Count)
### Sentiment Analysis block
analysis = TextBlob(z)
if analysis.sentiment.polarity > 0:
print("sentiment is positiv")
elif analysis.sentiment.polarity == 0:
print("sentiment is Neutral")
else:
print("sentiment is Negative")
print("--------------------\n")
return True
def on_error(self,status):
print(status)
if __name__ == "__main__":
listener=StdOutListener()
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
fetched_tweets_filename = "datas3.csv"
stream = Stream(auth, listener)
stream.filter(track=['como'],languages=['pt'])
Trackback
--------------------
RT #VBrasil_: Você decidiu chamar todos os meninos para assistir um filme na sua casa! Mas como cada um queria assistir algo, vocês decidir…
->
RT #VBrasil_: You decided to call all the boys to watch a movie in your house! But as everyone wanted to watch something, you decide ...
1
sentiment is Neutral
--------------------
--------------------
RT #queroqtefodas2: Como é ter uma turma unida ?
->
RT #myselfs: What is it like to have a group together?
2
sentiment is Neutral
--------------------
--------------------
RT #thiegonovais: FUTURO AMEAÇADO?
Entenda como é importante os fãs de Harry Potter se unirem mais do que nunca. Isso pode impactar tudo!…
->
RT #thiegonovais: FUTURE THREATENED?
Understand how important it is for Harry Potter fans to come together more than ever. This can impact everything! ...
3
sentiment is positiv
--------------------
--------------------
RT #nadiardgs: como assim você OUSA seguir no Instagram a conta do meu namorado de 27 anos desempregado streamer de lol e fã de Djonga?
tá…
->
RT #nadiardgs: how dare you follow on Instagram the account of my unemployed 27-year-old boyfriend lol streamer and Djonga fan?
OK…
4
sentiment is positiv
--------------------
--------------------
Amanda foi dormir na amiga dela, e eu fiquei sozinha como sempre.
->
Amanda went to bed with her friend, and I was alone as usual.
5
sentiment is Negative
--------------------
--------------------
Este é só um exemplo de "voto de cabresto" pelo interior do nosso Brasil. O #ptbrasil sabia fazer isto muito bem, entretanto...
->
This is just an example of a "halting vote" for the interior of our Brazil. #Ptbrasil knew how to do this very well, though ...
6
sentiment is positiv
--------------------
--------------------
E ainda tem gente que tira sarro de feminismo vtnc...
Com nossa luta há décadas ainda vemos notícias como essa diariamente, imagina sem!
->
And there are still people who make fun of vtnc feminism ...
With our struggle for decades we still see news like this every day, imagine without!
7
sentiment is positiv
--------------------
--------------------
RT #cvrlosmdz: Eu nem tenho palavras para quem quebra pinky promise smh como é que conseguem quebrar uma cena tão séria ? Oh pinky mm é a p…
->
RT #cvrlosmdz: I do not even have words for anyone who breaks pinky promise smh how can they break such a serious scene? Oh pinky mm is the p ...
8
sentiment is Negative
--------------------
--------------------
Se tem uma coisa que vou sentir saudade quando defender o mestrado é a refeição por 3 reais no RU, putz como eu amo pagar só treisss reaissssss numa refeição
QUANTO?
SÓ TREISSSSS REAISSSSS
->
If there is one thing I will miss when defending the masters is the meal for 3 reals in the UK, putz as I love to pay only real reals in a meal
HOW MUCH?
ONLY TREISSSSS REAISSSSS
9
sentiment is positiv
--------------------
--------------------
Estou doida p/ir pro cinema, porem cm o gato mas como eu não tenho kkk vai cm as manas mesmo!!!!
->
I'm crazy to go to the cinema, but the cat but I do not have kkk goes in the same manas !!!!
10
sentiment is Negative
--------------------
--------------------
#expedientefutebol futebol é assim, cabe ao jogador se portar como exemplo que é, e enquanto todos lamberem ele, irá continuar assim, e na seleção brasileira ele joga merda nenhuma
->
# football football is so, it's up to the player to be an example, and while everyone licks it, will continue like this, and in the Brazilian team he plays no shit
11
sentiment is positiv
--------------------
--------------------
KKKKKKKKKKK, nem como tanto assim
->
KKKKKKKKKKK, or how much
12
sentiment is positiv
--------------------
--------------------
Tô assistindo os vídeos antigos do Felipe Neto e lembrando de como eu era feliz
->
I'm watching Felipe Neto's old videos and remembering how happy I was
13
sentiment is positiv
--------------------
--------------------
a julia as vezes eh tao seca que qnd fala eu te amo eu fico boba
->
Julia sometimes it's so dry that you say I love you, I'm silly.
14
sentiment is Negative
--------------------
--------------------
RT #hellboyahs: gente me ajuda, como dizer para uma pessoa que não quer mais conversar com ela por ela ser chata mas sem magoar a pessoa?
->
RT #hellboyahs: People help me, how to tell a person who does not want to talk to her anymore because she is annoying but not hurting the person?
15
sentiment is Negative
--------------------
--------------------
#resistxre #CabelloJane458 Uai cada um tem sua opinião e tem pessoas que expressão sua opinião como vc expressou a sua . O RBR é uma produção tosca kkkkkkkkkkk da vergonha alheia pq poderia ser melhor a Record poderia fazer algo legal se não nem fizesse pra virar aquilo .
->
#resistxre # CabelloJane458 Uai everyone has their opinion and has people who express their opinion as you expressed yours. The RBR is a crude production kkkkkkkkkkk of the shame of others could be better Record could do something cool if it did not even make it turn.
16
sentiment is positiv
--------------------
--------------------
RT #Bultaourune: Lembando que o BTS é o #1 Ato Coreano >NA HISTÓRIA< indicado a Top Duo/Group no BBMAs, also eles serão tb o #1 k-Ato a gan…
->
RT #Bultaourune: Remembering that the BTS is the # 1 Korean Act & NA HISTORY & lt; indicated the Top Duo / Group in the BBMAs, also they will be tb o # 1 k-Ato a gan ...
17
sentiment is positiv
--------------------
--------------------
Traceback (most recent call last):
File "C:\Users\ManoharRaoN\Desktop\SentizAnalyzer\Version3.py", line 85, in <module>
stream.filter(track=['como'],languages=['pt'])
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 453, in filter
self._start(is_async)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 368, in _start
self._run()
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 300, in _run
six.reraise(*exc_info)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 269, in _run
self._read_loop(resp)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 331, in _read_loop
self._data(next_status_obj)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 303, in _data
if self.listener.on_data(data) is False:
File "C:\Users\ManoharRaoN\Desktop\SentizAnalyzer\Version3.py", line 48, in on_data
translations = translator.translate(y, dest='en')
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\googletrans\client.py", line 172, in translate
data = self._translate(text, dest, src)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\googletrans\client.py", line 81, in _translate
data = utils.format_json(r.text)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\googletrans\utils.py", line 62, in format_json
converted = legacy_format_json(original)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\site-packages\googletrans\utils.py", line 54, in legacy_format_json
converted = json.loads(text)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
[Finished in 13.7s with exit code 1]
[shell_cmd: python -u "C:\Users\ManoharRaoN\Desktop\SentizAnalyzer\Version3.py"]
[dir: C:\Users\ManoharRaoN\Desktop\SentizAnalyzer]
[path: C:\ProgramData\Oracle\Java\javapath;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;C:\Program Files (x86)\Webex\Webex\Applications;C:\Program Files\IBM\SPSS\Modeler\18.1.1\ext\bin\spss.TMWBServer\bin;C:\Program Files (x86)\Sennheiser\SoftphoneSDK\;C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37;C:\Users\ManoharRaoN\AppData\Local\Programs\Python\Python37\Scripts;C:\Users\ManoharRaoN\AppData\Roaming\nltk_data;C:\Users\ManoharRaoN\AppData\Local\Microsoft\WindowsApps;C:\Users\ManoharRaoN\AppData\Local\Box\Box Edit\]
I think It happens on certain tweets, How can I pass this when it occurs.
Not sure I can be completely wrong.
I also get the below error when I try to write a string which contains a special character such as "te amo ❤️ "
Below is the error
--------------------
RT #PortalMessi: “Messi seria para mim, como um jogador que eu vi jogar, o número 1. Meu pai sempre disse que o melhor foi Pelé, mas eu não…
->
RT #PortalMessi: "Messi would be for me, as a player I saw play, the number 1. My father always said that the best was Pele, but I did not ...
1
sentiment is positiv
--------------------
--------------------
RT #felipeneto: Informo que a dívida de 170 mil reais do Botafogo comigo como pessoa física foi integralmente perdoada, junto com os juros…
->
RT #felipeneto: I report that Botafogo's debt of 170,000 reais with me as a natural person has been totally forgiven, along with interest ...
2
sentiment is positiv
--------------------
--------------------
#MaluMonteiro08 óh #deus como pode tamanha beleza?????
->
# MaluMonteiro08 ohh #deus how can such beauty ?????
3
sentiment is Neutral
--------------------
--------------------
Meu pai amava.
Descanse em paz, rainha! ❤️
->
My father loved it.
Rest in peace, queen! (I.e.
Traceback (most recent call last):
File "C:\Users\Stramzik\Desktop\SentizAnalyzer\Version3.py", line 32, in on_data
y = y['extended_tweet']['full_text']
KeyError: 'extended_tweet'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Stramzik\Desktop\SentizAnalyzer\Version3.py", line 84, in <module>
stream.filter(track=['como'],languages=['pt'])
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 453, in filter
self._start(is_async)
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 368, in _start
self._run()
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 300, in _run
six.reraise(*exc_info)
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 269, in _run
self._read_loop(resp)
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 331, in _read_loop
self._data(next_status_obj)
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 303, in _data
if self.listener.on_data(data) is False:
File "C:\Users\Stramzik\Desktop\SentizAnalyzer\Version3.py", line 55, in on_data
writer.writerow(row)
File "C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 5967-5968: character maps to <undefined>
[Finished in 5.5s with exit code 1]
[shell_cmd: python -u "C:\Users\Stramzik\Desktop\SentizAnalyzer\Version3.py"]
[dir: C:\Users\Stramzik\Desktop\SentizAnalyzer]
[path: C:\ProgramData\Oracle\Java\javapath;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;C:\Program Files (x86)\Webex\Webex\Applications;C:\Program Files\IBM\SPSS\Modeler\18.1.1\ext\bin\spss.TMWBServer\bin;C:\Program Files (x86)\Sennheiser\SoftphoneSDK\;C:\Users\Stramzik\AppData\Local\Programs\Python\Python37;C:\Users\Stramzik\AppData\Local\Programs\Python\Python37\Scripts;C:\Users\Stramzik\AppData\Roaming\nltk_data;C:\Users\Stramzik\AppData\Local\Microsoft\WindowsApps;C:\Users\Stramzik\AppData\Local\Box\Box Edit\]
Check if the data you are getting is actually json, in case of some errors you may not be getting valid json data and also try putting the json.loads() inside the try/catch block.
Encode the data to bytes while writing to the file using a proper encoding.
import json
import csv
class StdOutListener(StreamListener):
def on_data(self, data):
global Count
#Data has the JSON STring
print("--------------------")
try:
y = json.loads(data)
y = y['extended_tweet']['full_text']
y = str(y)
## Portuguese to English conversion block
translations = translator.translate(y, dest='en')
print(translations.origin, '\n -> \n', translations.text)
z = translations.text.encode("utf-8")
a = translations.origin.encode("utf-8")
data = data.encode("utf-8")
row = [data,a,z]
with open(fetched_tweets_filename, 'a') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(row)
Count = Count + 1
print(Count)
except KeyError:
y = y.get('text', '') # change this to get
y = str(y)
## Portuguese to English conversion block
translations = translator.translate(y, dest='en')
print(translations.origin, '\n -> \n', translations.text)
z = translations.text.encode("utf-8")
a = translations.origin.encode("utf-8")
row = [data, a, z]
with open(fetched_tweets_filename, 'a') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(row)
Count = Count + 1
print(Count)
except Exception as e:
print(str(e))
### Sentiment Analysis block
analysis = TextBlob(z)
if analysis.sentiment.polarity > 0:
print("sentiment is positiv")
elif analysis.sentiment.polarity == 0:
print("sentiment is Neutral")
else:
print("sentiment is Negative")
print("--------------------\n")
return True
def on_error(self,status):
print(status)
if __name__ == "__main__":
listener=StdOutListener()
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
fetched_tweets_filename = "datas3.csv"
stream = Stream(auth, listener)
stream.filter(track=['como'],languages=['pt'])