Need help about fetchCurrenciesData - python

import requests
from datetime import datetime
import time
class Bot:
def __init__(self):
self.url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
self.params = {
'start': '1',
'limit': '100',
'convert': 'USD'
}
self.headers = {
'Accepts': 'application/json',
'X-CMC-PRO-API-KEY': 'my key',
}
self.orders = []
def fetchCurrenciesData(self):
r = requests.get(url=self.url, headers=self.headers, params=self.params).json()
return r['data']
def canBuy(self):
#controlla se le operazioni di acquisto non sono chiuse
for order in self.orders:
if order['status'] == 'open':
return False
return True
impactBot = Bot()
while(1):
now = datetime.now()
currencies = impactBot.fetchCurrenciesData()
print(currencies)
i = 1 #incremento in percentuale
r = 4 #valore sopra il quale fare partire l'operazione
n = 0 #numero di valute cui prezzo ha subito un incremento migliore di i dall'ultima volta
z = -1 #percentuale sotto la quale vendi la valuta
bestCurrency = None #valuta con rivalutazione del prezzo maggiore
#logica
if impactBot.canBuy():
print('Non ci sono posizioni aperte - Controllo se trovo valute che hanno guadagnato piu di (i)% nell ultima ora ')
for currency in currencies:
if not bestCurrency or currency['quote'] ['USD'] ['percent_change_1h'] > bestCurrency['quote'] ['USD'] ['percent_change_1h']:
bestCurrency = currency
if currency['quote'] ['USD'] ['percent_change_1h'] > i:
n = n +1
if n > 4:
print ('Ho trovato piu di quattro valute - Creo un nuovo ordine')
newOrder = {
'datetime': now,
'symbol': currency['symbol'],
'enterPrice': currency['quote'] ['USD'] ['price'], #prezzo con cui abbiamo acquistato
'exitPrice': None,
'status': 'open'
}
impactBot.orders.append(newOrder)
else:
print('Controllo gli ordini ancora aperti - Se si verifica la condizione di svalutazioneallora vendo')
for currency in currencies:
if currency['quote'] ['USD'] ['percent_change_1h']:
for order in impactBot.orders:
if order['status'] == 'open' and order['symbol'] == currency['symbol']:
#vendi
order['status'] = 'close'
order['exitPrice'] = currency['quote'] ['USD'] ['price']
#overview
initialAmount = 10000
profit = 0
for order in impactBot.orders:
if order['status'] == 'close':
profit += initialAmount * order['exitPrice'] / order['enterPrice']
finalAmount = initialAmount + profit
print('Ho realizzato {len(impactBot.orders)} compravendite aono partito con {initialAmount}$ e adesso ne ho {finalAmount}$')
#routine
minutes = 10
seconds = minutes * 60
time.sleep(seconds)
This are the errors:
Traceback (most recent call last):
File "/Users/andyduma/PycharmProjects/esercitazioni/Bot.py", line 38, in <module>
currencies = impactBot.fetchCurrenciesData()
File "/Users/andyduma/PycharmProjects/esercitazioni/Bot.py", line 24, in fetchCurrenciesData
return r['data']
KeyError: 'data'

When you make requests like these you should make a condition to test the results so you can see what is going on. I can't be positive that my way of testing the error code is exactly how you should do it, but it is definitely in the ballpark.
def fetchCurrenciesData(self):
r = requests.get(url=self.url, headers=self.headers, params=self.params).json()
status = r['status']
if 'error_code' not in [*status] or status['error_code'] == 0:
#print(r) #uncomment if the next line throws an error
return r['data']
raise ValueError(f"Error {status['error_code']}: {status['error_message']}")
and
while(1):
now = datetime.now()
try:
currencies = impactBot.fetchCurrenciesData()
print(currencies)
except ValueError as err:
print(err)
break
If you look at the python example for the API you are not doing it the way they tell you to. Your way may work regardless, but why invent your own solutions when the company that is providing you with the API has given you a copy/paste solution to use? They want you to create a Session and you have completely failed to do that.

Related

How can i delete all my tweets using twitter api

I'm trying to delete all the tweets/replies from my account, at first it worked, but it got to a point where he stopped deleting, and he can't receive the tweets anymore, giving the error:
File "main.py", line 29, in fetch_tweets
oldest = all_tweets[-1].id - 1
IndexError: list index out of range
and on my account, even if it doesn't appear on the profile (I don't know why) there are still 19.2k tweets to be deleted, does anyone have any idea how to fix this?
code:
'''
Script para apagar todos os meus tweets mais antigos que determinada data
'''
from keep_alive import keep_alive
import tweepy
from config import *
import datetime
import pandas as pd
client = tweepy.Client(bearer_token, api_key, api_secret, access_token, access_token_secret)
auth = tweepy.OAuth1UserHandler(api_key, api_secret, access_token, access_token_secret)
api = tweepy.API(auth)
def fetch_tweets(username):
'''
Baixa todos os tweets do usuário
determinado em 'username'
'''
print("Resgatando Tweets. . .")
all_tweets = []
new_tweets = api.user_timeline(screen_name=username, tweet_mode='extended', count=200, exclude_replies=False)
all_tweets.extend(new_tweets)
# Salva o id do tweet antigo menos um
oldest = all_tweets[-1].id - 1
while len(new_tweets) > 0: # Continua pegando tweets até a requisição retornar nada
# Todos as requests posteriores usam max_id "para avançar no tempo"
new_tweets = api.user_timeline(screen_name=username, tweet_mode='extended', count=200, max_id=oldest)
all_tweets.extend(new_tweets)
# Atualiza o id
oldest = all_tweets[-1].id - 1
# Transform the tweepy tweets into a 2D array that will populate the csv
output = [
[ tweet.id,
tweet.created_at,
tweet.created_at.strftime("%d-%m-%Y"),
tweet.retweet_count,
tweet.favorite_count,
username ] for tweet in all_tweets
]
for sublist in output:
sublist.append(username)
return output
def validate_date(date_text):
'''
Verifica se a data entrada pelo usuário
está no foramto YYYY-MM-DD. Se não estiver,
levanta uma exeção com mensagem de erro.
'''
try:
datetime.datetime.strptime(date_text, '%Y-%m-%d')
except ValueError:
raise ValueError("A data não está no formato YYYY-MM-DD. Execute o programa novamente.")
def filter_tweets(start, tweets):
'''
Usa o dataframe com todos os tweets
e a data de corte, depois da qual os
tweets devem ser mantidos, para gerar
uma lista com os ids das publicações
devem ser removidas.
'''
print("Filtrando Tweets. . .")
now = datetime.datetime.now()
start_date = pd.to_datetime(start, format = "%Y-%m-%d")
# Filtra intervalo de tweets que quero manter
keep_dates = pd.date_range(start=start_date, end=now)
keep_dates = [str(date)[:10] for date in keep_dates]
# Cria uma lista de ids cujo tweet deve ser mantido
tweets_to_delete = [ tweet[0] for tweet in tweets if str(pd.to_datetime(tweet[1]))[:10] not in keep_dates ]
return tweets_to_delete
def delete_tweets(tweet_ids):
'''
Deleta os tweets cujos números
identificadores estão na lista
tweet_ids
'''
print("Deletando Tweets. . .")
# Começa a deletar:
delete_count = 0
for tweet_id in tweet_ids:
try:
api.destroy_status(tweet_id)
print(tweet_id, 'deletado!', delete_count)
delete_count += 1
except:
print(tweet_id, 'não pode ser deletado!')
print('Pronto!', delete_count, 'tweets foram deletados, ao todo.')
##########################
### Execução principal ###
##########################
def main():
print("Iniciando. . .")
username = "xxxxxxxxxx"
start = "2022-10-25"
while True:
try:
tweets = fetch_tweets(username)
tweets = filter_tweets(start, tweets)
delete_tweets(tweets)
except tweepy.TweepyException as e
try:
print(e)
except:
print("error")
keep_alive()
main()
I already tried to change the parameters, put a conditional to check if the list is empty, but none of that worked
The API can only go back so far in retrieving older Tweets.
Another option would be to request your Twitter Archive, which would contain the Tweet IDs and content - you can then potentially use the API to delete Tweets by ID.
Note that there are account and API rate limits that will restrict the speed at which you can run this operation.

More efficient way to manipulate large dataframe

It's my first real Python script, so feel free to make comments in order to improve my code.
The purpose of this script is to extract 2 Oracle tables with Python, store them in a dataframe and then join them with pandas.
But for queries returning more than 500k lines I feel that it is slow. Do you know why?
import pandas as pd
from datetime import date
from sqlalchemy import create_engine
import cx_Oracle, time
import pandas as pd
import config
## Variable pour le timer
start = time.time()
## User input en ligne de commande
year = input('Saisir une annee : ')
month = input('Saisir le mois, au fomat MM : ')
societe_var = input('SA (APPLE,PEACH,BANANA,ANANAS,ALL) : ')
## SA + BU correspondantes aux SA
sa_list = ['APPLE','PEACH','BANANA','ANANAS']
bu_list_MERE = ['006111','1311402','1311403','1311404','1340115','13411106','1311407','1111','6115910','1166157','6811207','8311345','1111','1188100','8118101','8811102','8810113','8811104','8118105','8811106','8811107','8118108','1111']
bu_list_GARE = ['131400','310254']
bu_list_VOYA = ['0151100','1110073','1007115','1311335','1113340','1311341','1113342','1331143']
bu_list_RESO = ['1211345','13111345','11113395','73111345']
#Permet de pointre vers la bonne liste en fonction de la SA saisie
bu_list_map = {
'APPLE': bu_list_APPLE,
'PEACH': bu_list_PEACH,
'BANANA': bu_list_BANANA,
'ANANAS' : bu_list_ANANAS
}
if societe_var == 'ALL' :
print('non codé pour le moment')
elif societe_var in sa_list :
bu_list = bu_list_map.get(societe_var)
sa_var = societe_var
i=1
for bu in bu_list :
start_bu = time.time()
## On vient ici charger la requête SQL avec les bonnes variables pour gla_va_parametre -- EPOST
query1 = open('gla_va_parametre - VAR.sql',"r").read()
query1 = query1.replace('#ANNEE',"'" + year + "'").replace('%MOIS%',"'" + month + "'").replace('%SA%',"'" + societe_var + "'").replace('%BUGL%',"'" + bu + "'").replace('%DIVISION%','"C__en__PS_S1_D_OP_UNIT13".OPERATING_UNIT')
## On vient ici charger la requête SQL avec les bonnes variables pour cle-gla_tva -- FPOST
query2 = open('cle-gla_tva - VAR.sql',"r").read()
query2 = query2.replace('#ANNEE',"'" + year + "'").replace('%MOIS%',"'" + month + "'").replace('%SA%',"'" + societe_var + "'").replace('%BUGL%',"'" + bu + "'").replace('%DIVISION%','OPERATING_UNIT')
# Param de connexion
connection_EPOST = cx_Oracle.connect(user=config.user_EPOST, password=config.password_EPOST, dsn=config.host_EPOST, )
connection_FPOST = cx_Oracle.connect(user=config.user_FPOST, password=config.password_FPOST, dsn=config.host_FPOST, )
## Récup partie EPOST
with connection_EPOST :
# On déclare une variable liste vide
dfl = []
# On déclare un DataFrame vide
dfs = pd.DataFrame()
z=1
# Start Chunking
for chunk in pd.read_sql(query1, con=connection_EPOST,chunksize=25000) :
# Start Appending Data Chunks from SQL Result set into List
dfl.append(chunk)
print('chunk num : ' + str(z))
z = z + 1
# Start appending data from list to dataframe
dfs = pd.concat(dfl, ignore_index=True)
print('param récupéré')
## Récup partie FPOST
with connection_FPOST :
# On déclare une variable liste vide
df2 = []
# On déclare un DataFrame vide
dfs2 = pd.DataFrame()
# Start Chunking
for chunk in pd.read_sql(query2, con=connection_FPOST,chunksize=10000) :
# Start Appending Data Chunks from SQL Result set into List
df2.append(chunk)
# Start appending data from list to dataframe
dfs2 = pd.concat(df2, ignore_index=True)
print('clé récupéré')
print('Début de la jointure')
jointure = pd.merge(dfs,dfs2,how='left',left_on=['Code_BU_GL','Code_division','Code_ecriture','Date_comptable','Code_ligne_ecriture','UNPOST_SEQ'],right_on=['BUSINESS_UNIT','OPERATING_UNIT','JOURNAL_ID','JOURNAL_DATE','JOURNAL_LINE','UNPOST_SEQ']).drop(columns= ['BUSINESS_UNIT','OPERATING_UNIT','JOURNAL_ID','JOURNAL_DATE','JOURNAL_LINE'])
jointure.to_csv('out\gla_va_'+year+month+"_"+societe_var+"_"+bu+"_"+date.today().strftime("%Y%m%d")+'.csv', index=False, sep='|')
print('Fichier ' + str(i) + "/" + str(len(bu_list)) + ' généré en : '+ str(time.time() - start_bu)+' secondes')
i = i + 1
print("L'extraction du périmètre de la SA " + societe_var + " s'est effectué en :" + str((time.time() - start)/60) + " min" )

correct way to implement thread python

I am implement a simple simulator of soccer with python using threads and lock, the app works fine but I have doubts in the way that implement the thread it seems to me that the first team has an advantage because is executing first.
def jugar(Equipo1, Equipo2):
# Busco las probabilidades de encajar por cada equipo
prob_encajar_eq1 = Equipo1.probabilidad_encajar()
prob_encajar_eq2 = Equipo2.probabilidad_encajar()
def jugar_equipo1(defensa_rival):
semaforo.acquire()
if Equipo1.hacer_pases():
Equipo1.shoot(defensa_rival)
semaforo.release()
def jugar_equipo2(defensa_rival):
semaforo.acquire()
if Equipo2.hacer_pases():
Equipo2.shoot(defensa_rival)
semaforo.release()
hilo_equipo1 = threading.Thread(name = 'hilo_eq1', target = jugar_equipo1, args = (prob_encajar_eq2,))
hilo_equipo2 = threading.Thread(name = 'hilo_eq2', target = jugar_equipo2, args = (prob_encajar_eq1,))
hilo_equipo1.start()
hilo_equipo2.start()
hilo_equipo1.join()
hilo_equipo2.join()
to make several attempts both teams, I do a cicle for a few seconds and inside the function jugar() which is the one that does the work with threads but here is were I have the doubts, because every time that jugar is executing the threads are declared again.
if __name__ == '__main__':
cargar_informacion()
eqA = Equipo(equipoA, ranking_eqA)
eqB = Equipo(equipoB, ranking_eqB)
probabilidades = porcenajes_ranking(ranking_eqA)
eqA.cargar_probabilidades(probabilidades)
probabilidades = porcenajes_ranking(ranking_eqB)
eqB.cargar_probabilidades(probabilidades)
starttime=time.time()
tiempo = 0
# creo la barra de progreso
bar = progressbar.ProgressBar(widgets=[
progressbar.Percentage(),
progressbar.Bar(),
], max_value=100).start()
# hacemos que el juego tarde aproximadamente 10seg en simularse.
while tiempo < 10:
time.sleep(0.3 - ((time.time() - starttime) % 0.3))
jugar(eqA,eqB)
tiempo = time.time() - starttime
bar += 2.8
bar.finish() # Para que finalice la barra de progreso
resultados_finales(eqA, eqB) # Mostramos el resultado final del partido.

Python2 EOF when reading a line

everyone! I have this problem when I try to call a function that involves interaction with the user and I don't know what the problem is. The function that requires an input works perfect when separate from the other function.
I am using Jupyter Notebook [py2].
The function I am talking about is the following:
import numpy as np
import matplotlib.pyplot as plt
#Distribución Exponencial
def dist_exp():
a = int(raw_input("Ingrese Lambda: "))
b = int(raw_input("Ingrese la cantidad de numeros a generar: "))
beta = 1./a
exp = np.random.exponential((beta), b) #el primer valor es Beta (1/Lambda)
mediana_t = np.log(2)/(a*a)
print exp #imprime los números aleatorios generados
print "Estadísticos teóricos: ", "Minimo=0", " Maximo= infinito", " Media={}".format(beta), " Mediana={}".format(mediana_t), " Varianza={}".format(1/(a*a))
#imprime los estadísticos teóricos
print "Estadísticos muestrales: ", "Minimo={}".format(np.min(exp)), " Maximo={}".format(np.max(exp)), " Media={}".format(np.mean(exp)), " Mediana={}".format(np.median(exp)), " Varianza={}".format(np.var(exp))
#imprime los estadísticos muestrales
#bins son las clases para el histograma
if b<1000: #para los bins
bn = 20
else:
bn = 200
h = plt.hist(exp, bins=bn, normed=True) #bins son las clases para el histograma
plt.show() #despliega el histograma
I am calling this function (and 4 other similar functions) from the following:
from ipywidgets import widgets, interactive
from IPython.display import display
print "Ingrese la distribucion deseada. Las opciones son: Binomial, Exponencial, Geometrica, Lognormal y Triangular"
text = widgets.Text()
display(text)
def handle_submit(sender):
print(text.value)
if text.value == "Binomial":
return dist_bin()
elif text.value == "Exponencial":
return dist_exp()
elif text.value == "Geometrica":
return dist_geom()
elif text.value == "Lognormal":
return dist_log()
elif text.value == "Triangular":
return dist_tri()
else:
print "Por favor ingrese una distribucion valida. Ponga atencion a las opciones."
text.on_submit(handle_submit)
So, everytime the user types a valid String in the textbox, I need to perform a function, but I get this error instantly:
---------------------------------------------------------------------------
EOFError Traceback (most recent call last)
<ipython-input-8-1e49bbab45fa> in handle_submit(sender)
10 print(text.value)
11 if text.value == "Binomial":
---> 12 return dist_bin()
13 elif text.value == "Exponencial":
14 return dist_exp()
<ipython-input-5-081f517da431> in dist_bin()
1 #Distribución Binomial
2 def dist_bin():
----> 3 n = int(raw_input("Ingrese n: ")) #número de intentos
4 p = float(raw_input("Ingrese p: ")) #probabilidad de cada intento
5 num = int(raw_input("Ingrese la cantidad de numeros a generar: "))
EOFError: EOF when reading a line
I will appreciate any help.
Thank you all!

Pandas + Python: More efficient code

This is my code:
import pandas as pd
import os
import glob as g
archivos = g.glob('C:\Users\Desktop\*.csv')
for archiv in archivos:
nombre = os.path.splitext(archiv)[0]
df = pd.read_csv(archiv, sep=",")
d = pd.to_datetime(df['DATA_LEITURA'], format="%Y%m%d")
df['FECHA_LECTURA'] = d.dt.date
del df['DATA_LEITURA']
df['CONSUMO']=""
df['DIAS']=""
df["SUMDIAS"]=""
df["SUMCONS"]=""
df["CONSANUAL"] = ""
ordenado = df.sort_values(['NR_CPE','FECHA_LECTURA', 'HORA_LEITURA'], ascending=True)
##Agrupamos por el CPE
agrupado = ordenado.groupby('NR_CPE')
for name, group in agrupado: #Recorremos el grupo
indice = group.index.values
inicio = indice[0]
fin = indice[-1]
#Llenamos la primeras lectura de cada CPE, con esa lectura (porque no hay una lectura anterior)
ordenado.CONSUMO.loc[inicio] = 0
ordenado.DIAS.loc[inicio] = 0
cont=0
for i in indice: #Recorremos lo que hay dentro de los grupos, dentro de los CPES(lecturas)
if i > inicio and i <= fin :
cont=cont+1
consumo = ordenado.VALOR_LEITURA[indice[cont]] - ordenado.VALOR_LEITURA[indice[cont-1]]
dias = (ordenado.FECHA_LECTURA[indice[cont]] - ordenado.FECHA_LECTURA[indice[cont-1]]).days
ordenado.CONSUMO.loc[i] = consumo
ordenado.DIAS.loc[i] = dias
# Hago las sumatorias, el resultado es un objeto DataFrame
dias = agrupado['DIAS'].sum()
consu = agrupado['CONSUMO'].sum()
canu = (consu/dias) * 365
#Contador con el numero de courrencias de los campos A,B y C
conta=0
contb=0
contc=0
#Como es un DF, para recorrerlo tengo que iterar sobre ellos para hacer la comparacion
print "Grupos:"
for ind, sumdias in dias.iteritems():
if sumdias <= 180:
grupo = "A"
conta=conta+1
elif sumdias > 180 and sumdias <= 365:
grupo = "B"
contb=contb+1
elif sumdias > 365:
grupo = "C"
contc=contc+1
print "grupo A: " , conta
print "grupo B: " , contb
print "grupo C: " , contc
#Formateamos los campos para no mostrar todos los decimales
Fdias = dias.map('{:.0f}'.format)
Fcanu = canu.map('{:.2f}'.format)
frames = [Fdias, consu, Fcanu]
concat = pd.concat(frames,axis=1).replace(['inf','nan'],[0,0])
with open('C:\Users\Documents\RPE_PORTUGAL\Datos.csv','a') as f:
concat.to_csv(f,header=False,columns=['CPE','DIAS','CONSUMO','CONSUMO_ANUAL'])
try:
ordenado.to_excel(nombre+'.xls', columns=["NOME_DISTRITO",
"NR_CPE","MARCA_EQUIPAMENTO","NR_EQUIPAMENTO","VALOR_LEITURA","REGISTADOR","TIPO_REGISTADOR",
"TIPO_DADOS_RECOLHIDOS","FACTOR_MULTIPLICATIVO_FINAL","NR_DIGITOS_INTEIRO","UNIDADE_MEDIDA",
"TIPO_LEITURA","MOTIVO_LEITURA","ESTADO_LEITURA","HORA_LEITURA","FECHA_LECTURA","CONSUMO","DIAS"],
index=False)
print (archiv)
print ("===============================================")
print ("*****Se ha creado el archivo correctamente*****")
print ("===============================================")
except IOError:
print ("===================================================")
print ("¡¡¡¡¡Hubo un error en la escritura del archivo!!!!!")
print ("===================================================")
This takes a file where I have lectures of energy consumption from different dates for every light meter('NR_CPE') and do some calculations:
Calculate the energy consumption for every 'NR_CPE' by substracting the previous reading with the next one and the result put in a new column named 'CONSUMO'.
Calculate the number of days where I'v got a reading and sum up the number of days
Add the consumption for every 'NR_CPE' and calculate the anual consumption.
Finally I want to classify by number of days that every light meter('NR_CPE') has a lecture. A if it has less than 180 days, B between 180 and 1 year and C more than a year.
And finally write this result in two differents files.
Any idea of how should I re-code this to have the same output and be faster?
Thank you all.
BTW this is my dataset:
,NOME_DISTRITO,NR_CPE,MARCA_EQUIPAMENTO,NR_EQUIPAMENTO,VALOR_LEITURA,REGISTADOR,TIPO_REGISTADOR,TIPO_DADOS_RECOLHIDOS,FACTOR_MULTIPLICATIVO_FINAL,NR_DIGITOS_INTEIRO,UNIDADE_MEDIDA,TIPO_LEITURA,MOTIVO_LEITURA,ESTADO_LEITURA,DATA_LEITURA,HORA_LEITURA
0,GUARDA,A002000642VW,101,1865411,4834,001,S,1,1,4,kWh,1,1,A,20150629,205600
1,GUARDA,A002000642VW,101,1865411,4834,001,S,1,1,4,kWh,2,2,A,20160218,123300
2,GUARDA,A002000642VJ,122,204534,25083,001,S,1,1,5,kWh,1,1,A,20150629,205700
3,GUARDA,A002000642VJ,122,204534,27536,001,S,1,1,5,kWh,2,2,A,20160218,123200
4,GUARDA,A002000642HR,101,1383899,11734,001,S,1,1,5,kWh,1,1,A,20150629,205600
5,GUARDA,A002000642HR,101,1383899,11800,001,S,1,1,5,kWh,2,2,A,20160218,123000
6,GUARDA,A002000995VM,101,97706436,12158,001,S,1,1,5,kWh,1,3,A,20150713,155300
7,GUARDA,A002000995VM,101,97706436,12163,001,S,1,1,5,kWh,2,2,A,20160129,162300
8,GUARDA,A002000995VM,101,97706436,12163,001,S,1,1,5,kWh,2,2,A,20160202,195800
9,GUARDA,A2000995VM,101,97706436,12163,001,S,1,1,5,kWh,1,3,A,20160404,145200
10,GUARDA,A002000996LV,168,5011703276,3567,001,V,1,1,6,kWh,1,1,A,20150528,205900
11,GUARDA,A02000996LV,168,5011703276,3697,001,V,1,1,6,kWh,2,2,A,20150929,163500
12,GUARDA,A02000996LV,168,5011703276,1287,002,P,1,1,6,kWh,1,1,A,20150528,205900
Generally you want to avoid for loops in pandas.
For example, the first loop where you calculate total consumption and days could be rewritten as a groupby apply something like:
def last_minus_first(df):
columns_of_interest = df[['VALOR_LEITURA', 'days']]
diff = columns_of_interest.iloc[-1] - columns_of_interest.iloc[0]
return diff
df['date'] = pd.to_datetime(df['DATA_LEITURA'], format="%Y%m%d")
df['days'] = (df['date'] - pd.datetime(1970,1,1)).dt.days # create days column
df.groupby('NR_CPE').apply(last_minus_first)
(btw I don't understand why you are subtracting each entry from the previous, surely for meter readings this is the same as last-first?)
Then given the result of the above as consumption, you can replace your second for loop (for ind, sumdias in dias.iteritems()) with something like:
pd.cut(consumption.days, [-1, 180, 365, np.inf], labels=['a', 'b', 'c']).value_counts()

Categories