How to fix for loop in Python Visio Converter - python

I am trying to run a for loop but have it stop when it gets to a certain date, and it does this, however, it prints out multiple shapes on top of each other, when I only want 1. The program somehow prints the number of shapes based on the row number it is in Excel. Not sure how to fix this, any help would be appreciated.
from PIL import Image, ImageDraw, ImageFont
import win32com.client
from win32com.client import constants as vis
app = win32com.client.gencache.EnsureDispatch( 'Visio.Application' )
current = datetime.datetime(*xlrd.xldate_as_tuple(sheet3.cell_value(7,9), wb.datemode))
currentDate = current.strftime('%m/%d')
dateList = []
for row in range(1,sheet3.nrows):
if sheet3.cell_value(row,13) == "":
continue
date = datetime.datetime(*xlrd.xldate_as_tuple(sheet3.cell_value(row,13), wb.datemode))
dateList.append(date.strftime('%m/%d'))
for date in dateList:
x1 = sheet3.cell_value(row,14)
x2 = sheet3.cell_value(row,15)
y1 = sheet3.cell_value(row,16)
y2 = sheet3.cell_value(row,17)
borderColor = 0
borderType = 0
colorValue = sheet3.cell_value(9,10)
colorFunc(x1,y1,x2,y2)
shape.Cells('FillforegndTrans').FormulaU = sheet3.cell_value(7,10)
if currentDate == date:
break

I have figured it out. Instead of having a for loop, I need to just state at the end an if statement, if the currentDate is in the dateList, then break.
dateList = []
for row in range(1,sheet3.nrows):
if sheet3.cell_value(row,13) == "":
continue
date = datetime.datetime(*xlrd.xldate_as_tuple(sheet3.cell_value(row,13), wb.datemode))
dateList.append(date.strftime('%m/%d'))
current = datetime.datetime(*xlrd.xldate_as_tuple(sheet3.cell_value(7,9), wb.datemode))
currentDate = current.strftime('%m/%d')
x1 = sheet3.cell_value(row,14)
x2 = sheet3.cell_value(row,15)
y1 = sheet3.cell_value(row,16)
y2 = sheet3.cell_value(row,17)
borderColor = 0
borderType = 0
colorValue = sheet3.cell_value(9,10)
colorFunc(x1,y1,x2,y2)
shape.Cells('FillforegndTrans').FormulaU = sheet3.cell_value(7,10)
if currentDate in dateList:
break

Related

How can I take the last value -1 pandas

I am trying to do a function where I check if a date is in my excel file, and if unfortunately it is not. I retrieve the date before.
I succeeded with the after date and here is my code.
Only with the date before, I really can't do it.
i tried this for the day before:
def get_all_dates_between_2_dates_with_special_begin_substraction(Class, date_départ, date_de_fin, date_debut_analyse, exclus=False):
date_depart = date_départ
date_fin = date_de_fin
result_dates = []
inFile = "database/Calendar_US_Target.xlsx"
inSheetName = "Sheet1"
df =(pd.read_excel(inFile, sheet_name = inSheetName))
date_depart = datetime.datetime.strptime(date_depart, '%Y-%m-%d')
date_fin = datetime.datetime.strptime(date_fin, '%Y-%m-%d')
date_calcul_depart = datetime.datetime.strptime(date_debut_analyse, '%Y-%m-%d')
var_date_depart = date_depart
time_to_add = ""
if (Class.F0 == "mois"):
time_to_add = relativedelta(months=1)
if (Class.F0 == "trimestre"):
time_to_add = relativedelta(months=3)
if (Class.F0 == "semestre"):
time_to_add = relativedelta(months=6)
if (Class.F0 == "année"):
time_to_add = relativedelta(years=1)
while var_date_depart <= date_fin:
-------------------------------------------------------------
df['mask'] = (var_date_depart <= df['TARGETirs_holi']) # daybefore
print(df.head())
print(df[df.mask =="True"].head(1)) #want to check the last true value
------------------------------------------------------------------------------
if (result >= date_calcul_depart):
result = (str(result)[0:10])
result = result[8:10] + "/" + result[5:7] + "/" + result[0:4]
result_dates.append(str(result))
var_date_depart = var_date_depart + time_to_add
if (exclus == True):
result_dates = result_dates[1:-1]
return(result_dates)
I want to say, do a column (or a dataframe) where the first date is true where the first date smaller than the second then i take the last value who is true.
for example:
I have this array [12-05-2022,15-05-2022,16-05-2022 and 19-05-2022]
if i put 15-05-2022, it gives me 15-05-2022, but if i put 18-05-2022, its gives me 16-05-2022
Thanks!

I am getting 'index out of bound error' when reading from csv in pandas but not when I extract the data via api. What could be the reason?

So for my bot, I am first extracting data via api and storing it in csv. When I run my for loop on data via api, it gives no error and runs smoothly.
But when the csv file is read and run, it gives out of bound error.
This is my function to generate data:
full_list = pd.DataFrame(columns=("date","open","high","low","close","volume","ticker","RSI","ADX","20_sma","max_100"))
def stock_data(ticker):
create_data = fetchOHLC(ticker,'minute',60)
create_data["ticker"] = ticker
create_data["RSI"] = round(rsi(create_data,25),2)
create_data["ADX"] = round(adx(create_data,14),2)
create_data["20_sma"] = round(create_data.close.rolling(10).mean().shift(),2)
create_data["max_100"] = create_data.close.rolling(100).max().shift()
create_data.dropna(inplace=True,axis=0)
create_data.reset_index(inplace=True)
return create_data
stocklist = open("stocklist.txt","r+")
tickers = stocklist.readlines()
for x in tickers:
try:
full_list = full_list.append(stock_data(x.strip()))
except:
print(f'{x.strip()} did not work')
full_list.to_csv("All_Data")
full_list
So when I run the same code below on dataframe created I got no error. But when I run the same code on the csv file, I get out of bound error.
list_tickers = full_list["ticker"].unique()
for y in list_tickers[:2]:
main = full_list[full_list["ticker"]==y]
pos = 0
num = 0
tick = y
signal_time = 0
signal_rsi = 0
signal_adx = 0
buy_time = 0
buy_price = 0
sl = 0
#to add trailing sl in this.
for x in main.index:
maxx = main.iloc[x]["max_100"]
rsi = main.iloc[x]["RSI"]
adx = main.iloc[x]["ADX"]
sma = main.iloc[x]["20_sma"]
close = main.iloc[x]["close"]
high = main.iloc[x]["high"]
if rsi > 80 and adx > 35 and close > maxx:
if pos == 0:
buy_price = main.iloc[x+1]["open"]
buy_time = main.iloc[x+1]["date"]
pos=1
signal_time = main.iloc[x]["date"]
signal_rsi = main.iloc[x]["RSI"]
signal_adx = main.iloc[x]["ADX"]
elif close < sma:
if pos == 1:
sell_time = main.iloc[x]["date"]
sell_price = sma*.998
pos=0
positions.loc[positions.shape[0]] = [y,signal_time,signal_rsi,signal_adx,buy_time,buy_price,sell_time,sell_price]
Any idea why?
Here is a cleanup and file call code:
full_list = pd.read_csv("All_data")
full_list.dropna(inplace=True,axis=0)
full_list.drop(labels="Unnamed: 0",axis=1) < index of previous dataframe
full_list.head(5)
Thanks

How can i create an input for choosing different files to access?

I am quite new to python so please bear with me.
Currently, this is my code:
import pandas as pd
import statistics
import matplotlib.pyplot as plt
import math
from datetime import datetime
start_time = datetime.now()
gf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/exfileCLEAN2.csv",
skiprows=[1])
bf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/2SeconddatasetCLEAN.csv",
skiprows=[1])
df = (input("Which data set? "))
magnitudes = (df['Magnitude '].values)
times = df['Time '].values
average = statistics.mean(magnitudes)
sd = statistics.stdev(magnitudes)
below = sd * 3
class data_set:
def __init__(self, index):
self.mags = []
self.i = index
self.mid_time = df['Time '][index]
self.mid_mag = df['Magnitude '][index]
self.times = []
ran = 80
for ii in range(ran):
self.times.append(df['Time '][self.i + ii - ran / 2])
self.mags.append(df['Magnitude '][self.i + ii - ran / 2])
data = []
today = float(input("What is the range? "))
i = 0
while (i < len(df['Magnitude '])):
if (abs(df['Magnitude '][i]) <= (average - below)):
# check if neighbours
t = df['Time '][i]
tt = True
for d in range(len(data)):
if abs(t - data[d].mid_time) <= today:
# check if closer to center
if df['Magnitude '][i] < data[d].mid_mag:
data[d] = data_set(i)
print("here")
tt = False
break
if tt:
data.append(data_set(i))
i += 1
print("found values")
# graphing
height = 2 # Change this for number of columns
width = math.ceil(len(data) / height)
if width < 2:
width = 2
fig, axes = plt.subplots(width, height, figsize=(30, 30))
row = 0
col = 0
for i in range(len(data)):
axes[row][col].plot(data[i].times, data[i].mags)
col += 1
if col > height - 1:
col = 0
row += 1
plt.show()
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))
Currently, the error produced is this:
/Users/aaronhuang/.conda/envs/EXTTEst/bin/python "/Users/aaronhuang/PycharmProjects/EXTTEst/Code sandbox.py"
Which data set? gf
Traceback (most recent call last):
File "/Users/aaronhuang/PycharmProjects/EXTTEst/Code sandbox.py", line 14, in <module>
magnitudes = int(df['Magnitude '].values)
TypeError: string indices must be integers
Process finished with exit code 1
I am trying to have the user be able to choose which file to access to perform the rest of the code on.
So if the user types gf I would like the code to access the first data file.
Any help would be appreciated. Thank you
Why not use an if-statement at the beginning? Try this:
instead of:
gf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/exfileCLEAN2.csv",
skiprows=[1])
bf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/2SeconddatasetCLEAN.csv",
skiprows=[1])
df = (input("Which data set? "))
Use this:
choice = input("Which data set? ")
if choice == "gf":
df = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/exfileCLEAN2.csv",
skiprows=[1])
elif choice == "bf":
df = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/2SeconddatasetCLEAN.csv",
skiprows=[1])
else:
print("Error. Your choice is not valid")
df = ""
break

Is python pandas dataframe too slow?

I have an interesting problem. I have two files, NYPD_Motor_Collisions.csv has 1.2M lines and weatherfinal.txt has 109K lines. The objective is to merge the temp and prec data from weatherfinal.txt to the Collisions files as two columns based on the latitudes and longitudes. I wrote the following code using dataframe in pandas python.
from math import cos, asin, sqrt
import pandas as pd
import numpy as np
import os
import re
import datetime
def distance(lat1, lon1, lat2, lon2):
p = 0.017453292519943295
a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
return 12742 * asin(sqrt(a))
def closest(data, v):
return min(data, key=lambda p: distance(v['lat'],v['lon'],p['lat'],p['lon']))
tempDataList = []
#v = {'lat': 39.7622290, 'lon': -86.1519750}
#print(closest(tempDataList, v))
print os.getcwd()
filed_ = open("weatherfinal.txt", 'r')
fileo_ = open("weatherfinal_updated.txt","w")
lines_ = filed_.readlines()
for line_ in lines_:
outline = re.sub(" +"," ",line_)
fileo_.write(outline + "\n")
fileo_.close()
df = pd.read_csv("NYPD_Motor_Vehicle_Collisions.csv")
colhead = np.append(df.columns.values,['TEMP', 'PREP'])
outdf = pd.DataFrame(columns=colhead)
df2 = pd.read_csv("weatherfinal_updated.txt",' ')
df2.set_index(['WBANNO', 'LST_DATE', 'LST_TIME'])
sensorIds = df2['WBANNO'].unique()
for ids_ in sensorIds:
longitude = df2.loc[df2['WBANNO']==ids_,'LONGITUDE'].iloc[0]
latitude = df2.loc[df2['WBANNO'] == ids_, 'LATITUDE'].iloc[0]
tempDataList.append({'lat':latitude,'lon':longitude,'SENSORID': ids_ })
print tempDataList
for index, row in df.iterrows():
lon_ = row['LONGITUDE']
lat_ = row['LATITUDE']
tdate = row['DATE']
ttime = row['TIME']
tcal = 5
pcal = 0
fwdate = datetime.datetime.strptime(str(tdate), '%m/%d/%Y').strftime('%Y%m%d')
fwtime = datetime.datetime.strptime(str(ttime), '%H:%M').strftime('%H%M')
ntime = float(fwtime) + float(100)
closests_ = closest(tempDataList, {'lat':lat_,'lon':lon_})
sensorid = closests_['SENSORID']
usedSensorId = sensorid
selectedWeatherRow = df2.loc[(df2.WBANNO == sensorid) & (df2.LST_DATE == float(fwdate)) & (df2.LST_TIME >= float(fwtime)) & (df2.LST_TIME < ntime) ,['T_CALC', 'P_CALC']]
if len(selectedWeatherRow.index) == 0:
for sensId in sensorIds:
if sensId == sensorid:
continue
selectedWeatherRow = df2.loc[(df2.WBANNO == sensId) & (df2.LST_DATE == float(fwdate)) & (df2.LST_TIME >= float(fwtime)) & (df2.LST_TIME < ntime), ['T_CALC', 'P_CALC']]
if len(selectedWeatherRow.index) == 0:
continue
else:
tcal = selectedWeatherRow['T_CALC'].values[0]
pcal = selectedWeatherRow['P_CALC'].values[0]
usedSensorId = sensId
break
else:
tcal = selectedWeatherRow['T_CALC'].values[0]
pcal = selectedWeatherRow['P_CALC'].values[0]
row['TEMP'] = tcal
row['PREP'] = pcal
outdf.loc[index] = row
print index, tcal, pcal, fwdate, fwtime, ntime, usedSensorId
print "Loop completed"
outdf.to_csv("NYPD_TRAFFIC_DATA.csv")
print "file completed"
This program has been running for days. Not sure why dataframe is too slow. I rewrote the program without dataframe using dictionaries and it completed in a few minutes. Not sure if dataframe is slow or I am not using it correctly. Just posting here for learning.

OverflowError mktime argument out of range

After solving a naive datetime problem I am facing a new problem on a view to generate graphs. Now I get mktime argument out of range.
I have no idea how to solve it. I didn't write the code, I am using it from a colleague of mine and I can't seem o understand why it fails. I think it has to do with a function that runs overtime and the error pops out.
#login_required(login_url='/accounts/login/')
def loggedin(request):
data = []
data2 = []
data3 = []
dicdata2 = {}
dicdata3 = {}
datainterior = []
today = timezone.localtime(timezone.now()+timedelta(hours=1)).date()
tomorrow = today + timedelta(1)
semana= today - timedelta(7)
today = today - timedelta(1)
semana_start = datetime.combine(today, time())
semana_start = timezone.make_aware(semana_start, timezone.utc)
today_start = datetime.combine(today, time())
today_start = timezone.make_aware(today_start, timezone.utc)
today_end = datetime.combine(tomorrow, time())
today_end = timezone.make_aware(today_end, timezone.utc)
for modulo in Repository.objects.values("des_especialidade").distinct():
dic = {}
mod = str(modulo['des_especialidade'])
dic["label"] = str(mod)
dic["value"] = Repository.objects.filter(des_especialidade__iexact=mod).count()
data.append(dic)
for modulo in Repository.objects.values("modulo").distinct():
dic = {}
mod = str(modulo['modulo'])
dic["label"] = str(mod)
dic["value"] = Repository.objects.filter(modulo__iexact=mod, dt_diag__gte=semana_start).count()
datainterior.append(dic)
# print mod, Repository.objects.filter(modulo__iexact=mod).count()
# data[mod] = Repository.objects.filter(modulo__iexact=mod).count()
dicdata2['values'] = datainterior
dicdata2['key'] = "Cumulative Return"
dicdata3['values'] = data
dicdata3['color'] = "#d67777"
dicdata3['key'] = "Diagnosticos Identificados"
data3.append(dicdata3)
data2.append(dicdata2)
#-------sunburst
databurst = []
dictburst = {}
dictburst['name'] = "CHP"
childrenmodulo = []
for modulo in Repository.objects.values("modulo").distinct():
childrenmodulodic = {}
mod = str(modulo['modulo'])
childrenmodulodic['name'] = mod
childrenesp = []
for especialidade in Repository.objects.filter(modulo__iexact=mod).values("des_especialidade").distinct():
childrenespdic = {}
esp = str(especialidade['des_especialidade'])
childrenespdic['name'] = esp
childrencode = []
for code in Repository.objects.filter(modulo__iexact=mod,des_especialidade__iexact=esp).values("cod_diagnosis").distinct():
childrencodedic = {}
codee= str(code['cod_diagnosis'])
childrencodedic['name'] = 'ICD9 - '+codee
childrencodedic['size'] = Repository.objects.filter(modulo__iexact=mod,des_especialidade__iexact=esp,cod_diagnosis__iexact=codee).count()
childrencode.append(childrencodedic)
childrenespdic['children'] = childrencode
#childrenespdic['size'] = Repository.objects.filter(des_especialidade__iexact=esp).count()
childrenesp.append(childrenespdic)
childrenmodulodic['children'] = childrenesp
childrenmodulo.append(childrenmodulodic)
dictburst['children'] = childrenmodulo
databurst.append(dictburst)
# print databurst
# --------stacked area chart
datastack = []
for modulo in Repository.objects.values("modulo").distinct():
datastackdic = {}
mod = str(modulo['modulo'])
datastackdic['key'] = mod
monthsarray = []
year = timezone.localtime(timezone.now()+timedelta(hours=1)).year
month = timezone.localtime(timezone.now()+timedelta(hours=1)).month
last = timezone.localtime(timezone.now()+timedelta(hours=1)) - relativedelta(years=1)
lastyear = int(last.year)
lastmonth = int(last.month)
#i = 1
while lastmonth <= int(month) or lastyear<int(year):
date = str(lastmonth) + '/' + str(lastyear)
if (lastmonth < 12):
datef = str(lastmonth + 1) + '/' + str(lastyear)
else:
lastmonth = 01
lastyear = int(lastyear)+1
datef = str(lastmonth)+'/'+ str(lastyear)
lastmonth = 0
datainicial = datetime.strptime(date, '%m/%Y')
datainicial = timezone.make_aware(datainicial, timezone.utc)
datafinal = datetime.strptime(datef, '%m/%Y')
datafinal = timezone.make_aware(datafinal, timezone.utc)
#print "lastmonth",lastmonth,"lastyear", lastyear
#print "datainicial:",datainicial,"datafinal: ",datafinal
filtro = Repository.objects.filter(modulo__iexact=mod)
count = filtro.filter(dt_diag__gte=datainicial, dt_diag__lt=datafinal).count()
conv = datetime.strptime(date, '%m/%Y')
ms = datetime_to_ms_str(conv)
monthsarray.append([ms, count])
#i += 1
lastmonth += 1
datastackdic['values'] = monthsarray
datastack.append(datastackdic)
#print datastack
if request.user.last_login is not None:
#print(request.user.last_login)
contador_novas = Repository.objects.filter(dt_diag__lte=today_end, dt_diag__gte=today_start).count()
return render_to_response('loggedin.html',
{'user': request.user.username, 'contador': contador_novas, 'data': data, 'data2': data2,
'data3': data3,
'databurst': databurst, 'datastack':datastack})
def datetime_to_ms_str(dt):
return str(1000 * mktime(dt.timetuple()))
I think the problem is with this condition.
while lastmonth <= int(month) or lastyear<int(year):
During December, month=12, so lastmonth <= int(month) will always be True. So the loop whill always return True, even once lastyear is more that the current year.
You want to loop if the loop is in the previous year, or if the loop is in the current year and the month is not in the future. Therefore, I think you want to change it to the following:
while lastyear < year or (lastyear == year and lastmonth <= month):
To be sure that the code is working and to understand it, you need to add lots of print statements to the loops, see how lastmonth and lastyear change, and check that the loop exits when you expect it to. You also need to test it for other values of year and month so that it doesn't break next month. Ideally you want to extract this bit of the code into a separate function. It would be easier to understand the loop if it only returned a list of (month, year) integers, instead of doing lots of date formatting at the same time. Then it would be easier to add unit tests.

Categories