I am trying to do a function where I check if a date is in my excel file, and if unfortunately it is not. I retrieve the date before.
I succeeded with the after date and here is my code.
Only with the date before, I really can't do it.
i tried this for the day before:
def get_all_dates_between_2_dates_with_special_begin_substraction(Class, date_départ, date_de_fin, date_debut_analyse, exclus=False):
date_depart = date_départ
date_fin = date_de_fin
result_dates = []
inFile = "database/Calendar_US_Target.xlsx"
inSheetName = "Sheet1"
df =(pd.read_excel(inFile, sheet_name = inSheetName))
date_depart = datetime.datetime.strptime(date_depart, '%Y-%m-%d')
date_fin = datetime.datetime.strptime(date_fin, '%Y-%m-%d')
date_calcul_depart = datetime.datetime.strptime(date_debut_analyse, '%Y-%m-%d')
var_date_depart = date_depart
time_to_add = ""
if (Class.F0 == "mois"):
time_to_add = relativedelta(months=1)
if (Class.F0 == "trimestre"):
time_to_add = relativedelta(months=3)
if (Class.F0 == "semestre"):
time_to_add = relativedelta(months=6)
if (Class.F0 == "année"):
time_to_add = relativedelta(years=1)
while var_date_depart <= date_fin:
df['mask'] = (var_date_depart <= df['TARGETirs_holi']) # daybefore
print(df[df.mask =="True"].head(1)) #want to check the last true value
if (result >= date_calcul_depart):
result = (str(result)[0:10])
result = result[8:10] + "/" + result[5:7] + "/" + result[0:4]
var_date_depart = var_date_depart + time_to_add
if (exclus == True):
result_dates = result_dates[1:-1]
I want to say, do a column (or a dataframe) where the first date is true where the first date smaller than the second then i take the last value who is true.
for example:
I have this array [12-05-2022,15-05-2022,16-05-2022 and 19-05-2022]
if i put 15-05-2022, it gives me 15-05-2022, but if i put 18-05-2022, its gives me 16-05-2022
I have a python script which has multiple static functions. I want to convert that complete python script into a python library
import pandas as pd
import numpy as np
import EA_Upload_config as cfg
import datetime
def clockPrint(sentence):
now = datetime.datetime.now()
date_time = now.strftime("%H:%M:%S")
print(date_time + " : " + sentence)
def uploadToEA(df_,ds_api_name,operation_,instance,xmd_=None): #Upsert #Overwrite
import SalesforceEinsteinAnalytics as EA
clockPrint("Upload Process Initiated for "+instance+" instance...")
if instance.lower() == 'commercial':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratings.my.salesforce.com', browser='chrome')
if instance.lower() == 'analytical':
EAS = EA.salesforceEinsteinAnalytics(env_url='https://spglobalratingsae.my.salesforce.com', browser='chrome')
EAS.load_df_to_EA(df_,dataset_api_name=ds_api_name, operation=operation_,xmd=xmd_,fillna=False) #Error because of fillna=False
clockPrint("Upload Process Completed successfully for "+instance+" instance. Navigate to (Einstein Analytics --> Data Manager --> Monitor) to check progress.")
def processDate(date):
if pd.isnull(date):
return np.nan
date = pd.to_datetime(date)
date = datetime.datetime.strftime(date,"%m/%d/%Y")
return date
if __name__ == '__main__':
df = pd.read_csv(cfg.FILE_PATH)
if len(cfg.DATE_COLUMNS) != 0:
for c in cfg.DATE_COLUMNS:
df[c] = df[c].apply(lambda x: processDate(x))
for c in df.columns:
if df[c].dtype == "O":
df[c].fillna('', inplace=True)
elif np.issubdtype(df[c].dtype, np.number):
df[c].fillna(0, inplace=True)
elif df[c].dtype == "datetime64[ns]":
df[c] = df[c].apply(lambda x: processDate(x))
df[c].fillna("", inplace=True)
df.fillna("", inplace=True)
for instance in cfg.INSTANCES:
if instance.lower() == 'commercial':
uploadToEA(df, cfg.COM_DATASET_API_NAME, cfg.COM_OPERATION, instance, cfg.COM_XMD)
elif instance.lower() == 'analytical':
uploadToEA(df, cfg.ANA_DATASET_API_NAME, cfg.ANA_OPERATION, instance, cfg.ANA_XMD)
else: clockPrint("Update INSTANCES variable as ['Commercial'] or ['Analytical'] or ['Commercial','Analytical'].")
This is my complete python script which I want to convert it into a library. How should I do it?
I am using python3.7 and this is the current code base(apologies for putting so much code but thought it would help overall)
def TRADE_ENTRY(df_names, df_underlyings,df_strategies, columns, param, out_path,recovery_path):
nameUpdate =0
sg.theme('Dark Brown 1')
listing = [sg.Text(u, size = param) for u in columns]
header = [[x] for x in listing]
now = datetime.datetime.now()
core = [
sg.Input(f"{now.month}/{now.day}/{now.year}",size = param),
sg.Input(f"{now.hour}:{now.minute}:{now.second}",size = param),
sg.Listbox(list(df_strategies.STRATEGIES), size=(20,2), enable_events=False, key='_PLAYERS0_'),
sg.Listbox(['ETF', 'EQT', 'FUT', 'OPT', 'BOND'],enable_events=False,key='_PLAYERS20_',size = (20,2)),
sg.Listbox(list(df_names.NAMES), size=(20,4), enable_events=False,key='_PLAYERS6_'),
sg.Listbox( ['B', 'S'],size = (20,1),enable_events=False,key='_PLAYERS12_'),
sg.Input(size = param),
sg.Input(size = param),
sg.CalendarButton('Calendar', pad=None, font=('MS Sans Serif', 10, 'bold'),
button_color=('yellow', 'brown'), format=('%d/%m/%Y'), key='_CALENDAR_', target='_INP_'),
sg.Input(size = param),
sg.Listbox(list(df_underlyings.UNDERLYINGS), size=(20,4), enable_events=False,key='_PLAYERS2_'),
sg.Listbox(['C', 'P', 'N/A'],size = param),
mesh = [[x,y] for (x,y) in list(zip(listing, core))]
mesh[8].append(sg.Input(size = (10,2),key = '_INP_'))
layout =[[sg.Button("SEND"),sg.Button("NEW_NAME"), sg.Button("NEW_STRAT"), sg.Button("NEW_UND")] ]+ mesh
window = sg.Window('Trade Entry System', layout, font='Courier 12').Finalize()
while True:
event, values = window.read(timeout=500)
#print('EVENT, VALUES', event, values)# all the inputs with extra information for compiler
if event == "SEND":
data = values
a = list(data.values())
a = [x if isinstance(x, list) == False else empty_handler(x) for x in a]
a = [x if x !="" else "EMPTY" for x in a ]
#print('A', a)#all the inputs now in a list
df = pd.DataFrame(a, index = columns)
print('DF1', df)#columns dataframe with column names and then the values
df = df.transpose()
#print('DF2', df)#rows dataframe with column names and then the values
status = error_handling(df)
#print('STATUS', status)
if status == "ERROR":
elif status == "CORRECT":
#if a future then will overwrite its name
if df['TYPE'][0] == "FUT":
df['NAME'][0] = "F-"+ df['UNDERLYING'][0] + "-" +df['EXPIRATION'][0]
#if an option then will overwrite its name
elif df['TYPE'][0] =="OPT":
df['NAME'][0] = 'O-' + df['UNDERLYING'][0] + "--" + df['OPTION_TYPE'][0] +df['STRIKE'][0] +"--" +df['EXPIRATION'][0]
processing(df, recovery_path, out_path)
elif event == "NEW_NAME":
security_creation(r'Y:\NAMES.xlsx', "Sheet1", "NAME", param)
elif event == "NEW_STRAT":
security_creation(r'Y:\STRATEGIES.xlsx', "Sheet1", "STRATEGY", param)
elif event == "NEW_UND":
security_creation(r'Y:\UNDERLYINGS.xlsx', "Sheet1", "UNDERLYINGS", param)
elif event == sg.TIMEOUT_KEY:
df_names = pd.read_excel(r'Y:\NAMES.xlsx', "Sheet1")
df =df_names.values.tolist()
window['_PLAYERS6_'].update(values=df, set_to_index=0)
df_underlyings = pd.read_excel(r'Y:\UNDERLYINGS.xlsx', "Sheet1")
df =df_underlyings.values.tolist()
window['_PLAYERS2_'].update(values=df, set_to_index=0)
df_strategies = pd.read_excel(r'Y:\STRATEGIES.xlsx', "Sheet1")
df =df_strategies.values.tolist()
window['_PLAYERS0_'].update(values=df, set_to_index=0)
print("Listboxes updated !")
TRADE_ENTRY(df_names, df_underlyings,df_strategies, columns, param,out_path, recovery_path)
Towards the end of the function there's 3 elif, all NEW_NAME, NEW_STRAT and NEW_UND are the user submitting information to the corresponding 3 excel files. The function security_creation actually updates said excel files. Below that I am trying to update the Listboxes but no luck.
Any help would be greatly appreciated since i am so confused
I've been using this script to get the prices from some cryptocurrencies using Binance API and this script:
The problem is that with this script I cannot control the date range: for example, I want to choose the period range between Dec. 2015 and Dec. 2020, or I want the DAILY PRICES from the first day trading for any crypto ...etc.
So I share with you the code I'm using (copied from the steemit code and modified a little bit)
How can I do it?
# https://steemit.com/python/#marketstack/how-to-download-historical-price-data-from-binance-with-python###
import requests
import json
import pandas as pd
import numpy as np
import datetime as dt
frequency = input("Please enter the frequency (1m/5m/30m/.../1h/6h/1d/ : ")
def get_bars(symbol, interval=frequency):
root_url = 'https://api.binance.com/api/v1/klines'
url = root_url + '?symbol=' + symbol + '&interval=' + interval
data = json.loads(requests.get(url).text)
df = pd.DataFrame(data)
df.columns = ['open_time',
'o', 'h', 'l', 'c', 'v',
'close_time', 'qav', 'num_trades',
'taker_base_vol', 'taker_quote_vol', 'ignore']
df.index = [dt.datetime.fromtimestamp(x / 1000.0) for x in df.close_time]
return df
btcusdt = get_bars('BTCUSDT')
ethusdt = get_bars('ETHUSDT')
Can anyone help me to optimize it?
I am using this out of the binance documentation : https://python-binance.readthedocs.io/en/latest/binance.html?highlight=get_historical_klines#binance.client.Client.get_historical_klines
import os
from binance.client import Client
import pandas as pd
import datetime, time
def GetHistoricalData(self, howLong):
self.howLong = howLong
# Calculate the timestamps for the binance api function
self.untilThisDate = datetime.datetime.now()
self.sinceThisDate = self.untilThisDate - datetime.timedelta(days = self.howLong)
# Execute the query from binance - timestamps must be converted to strings !
self.candle = self.client.get_historical_klines("BNBBTC", Client.KLINE_INTERVAL_1MINUTE, str(self.sinceThisDate), str(self.untilThisDate))
# Create a dataframe to label all the columns returned by binance so we work with them later.
self.df = pd.DataFrame(self.candle, columns=['dateTime', 'open', 'high', 'low', 'close', 'volume', 'closeTime', 'quoteAssetVolume', 'numberOfTrades', 'takerBuyBaseVol', 'takerBuyQuoteVol', 'ignore'])
# as timestamp is returned in ms, let us convert this back to proper timestamps.
self.df.dateTime = pd.to_datetime(self.df.dateTime, unit='ms').dt.strftime(Constants.DateTimeFormat)
self.df.set_index('dateTime', inplace=True)
# Get rid of columns we do not need
self.df = self.df.drop(['closeTime', 'quoteAssetVolume', 'numberOfTrades', 'takerBuyBaseVol','takerBuyQuoteVol', 'ignore'], axis=1)
I do hope this helps someone.
(Please note this method is cut out of a class I have, so you may get rid of all of the self-s) , and you need to have your client set up before by
client = Client(api_key, api_secret)
Any improvements are of course welcome !
This is a function that I used.
Start and end are dates in Unix timestamp format. Interval is graph interval.
And keep in mind Binance did not exist in Dec 2015 :-)
def get_klines_iter(symbol, interval, start, end, limit=5000):
df = pd.DataFrame()
startDate = end
while startDate>start:
url = 'https://api.binance.com/api/v3/klines?symbol=' + \
symbol + '&interval=' + interval + '&limit=' + str(iteration)
if startDate is not None:
url += '&endTime=' + str(startDate)
df2 = pd.read_json(url)
df2.columns = ['Opentime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Closetime', 'Quote asset volume', 'Number of trades','Taker by base', 'Taker buy quote', 'Ignore']
df = pd.concat([df2, df], axis=0, ignore_index=True, keys=None)
startDate = df.Opentime[0]
df.reset_index(drop=True, inplace=True)
return df
from datetime import datetime
import pandas as pd
import requests
from typing import *
import time
class BinanceClient:
def __init__(self, futures=False):
self.exchange = "BINANCE"
self.futures = futures
if self.futures:
self._base_url = "https://fapi.binance.com"
self._base_url = "https://api.binance.com"
self.symbols = self._get_symbols()
def _make_request(self, endpoint: str, query_parameters: Dict):
response = requests.get(self._base_url + endpoint, params=query_parameters)
except Exception as e:
print("Connection error while making request to %s: %s", endpoint, e)
return None
if response.status_code == 200:
return response.json()
print("Error while making request to %s: %s (status code = %s)",
endpoint, response.json(), response.status_code)
return None
def _get_symbols(self) -> List[str]:
params = dict()
endpoint = "/fapi/v1/exchangeInfo" if self.futures else "/api/v3/exchangeInfo"
data = self._make_request(endpoint, params)
symbols = [x["symbol"] for x in data["symbols"]]
return symbols
def get_historical_data(self, symbol: str, interval: Optional[str] = "1m", start_time: Optional[int] = None, end_time: Optional[int] = None, limit: Optional[int] = 1500):
params = dict()
params["symbol"] = symbol
params["interval"] = interval
params["limit"] = limit
if start_time is not None:
params["startTime"] = start_time
if end_time is not None:
params["endTime"] = end_time
endpoint = "/fapi/v1/klines" if self.futures else "/api/v3/klines"
raw_candles = self._make_request(endpoint, params)
candles = []
if raw_candles is not None:
for c in raw_candles:
candles.append((float(c[0]), float(c[1]), float(c[2]), float(c[3]), float(c[4]), float(c[5]),))
return candles
return None
def ms_to_dt_utc(ms: int) -> datetime:
return datetime.utcfromtimestamp(ms / 1000)
def ms_to_dt_local(ms: int) -> datetime:
return datetime.fromtimestamp(ms / 1000)
def GetDataFrame(data):
df = pd.DataFrame(data, columns=['Timestamp', "Open", "High", "Low", "Close", "Volume"])
df["Timestamp"] = df["Timestamp"].apply(lambda x: ms_to_dt_local(x))
df['Date'] = df["Timestamp"].dt.strftime("%d/%m/%Y")
df['Time'] = df["Timestamp"].dt.strftime("%H:%M:%S")
column_names = ["Date", "Time", "Open", "High", "Low", "Close", "Volume"]
df = df.set_index('Timestamp')
df = df.reindex(columns=column_names)
return df
def GetHistoricalData(client, symbol, start_time, end_time, limit=1500):
collection = []
while start_time < end_time:
data = client.get_historical_data(symbol, start_time=start_time, end_time=end_time, limit=limit)
print(client.exchange + " " + symbol + " : Collected " + str(len(data)) + " initial data from "+ str(ms_to_dt_local(data[0][0])) +" to " + str(ms_to_dt_local(data[-1][0])))
start_time = int(data[-1][0] + 1000)
collection +=data
return collection
client = BinanceClient(futures=False)
symbol = "BTCUSDT"
interval = "1m"
fromDate = int(datetime.strptime('2021-11-15', '%Y-%m-%d').timestamp() * 1000)
toDate = int(datetime.strptime('2021-11-16', '%Y-%m-%d').timestamp() * 1000)
data = GetHistoricalData(client, symbol, fromDate, toDate)
df = GetDataFrame(data)
based on Mike Malyi and isnvi23h4's answer:
Please use python >= 3.7, the code does not need to install any dependencies
import pandas as pd
from datetime import datetime, timezone, timedelta
import calendar
def get_klines_iter(symbol, interval, start, end = None, limit=1000):
# start and end must be isoformat YYYY-MM-DD
# We are using utc time zone
# the maximum records is 1000 per each Binance API call
df = pd.DataFrame()
if start is None:
print('start time must not be None')
start = calendar.timegm(datetime.fromisoformat(start).timetuple()) * 1000
if end is None:
dt = datetime.now(timezone.utc)
utc_time = dt.replace(tzinfo=timezone.utc)
end = int(utc_time.timestamp()) * 1000
end = calendar.timegm(datetime.fromisoformat(end).timetuple()) * 1000
last_time = None
while len(df) == 0 or (last_time is not None and last_time < end):
url = 'https://api.binance.com/api/v3/klines?symbol=' + \
symbol + '&interval=' + interval + '&limit=1000'
if(len(df) == 0):
url += '&startTime=' + str(start)
url += '&startTime=' + str(last_time)
url += '&endTime=' + str(end)
df2 = pd.read_json(url)
df2.columns = ['Opentime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Closetime',
'Quote asset volume', 'Number of trades', 'Taker by base', 'Taker buy quote', 'Ignore']
dftmp = pd.DataFrame()
dftmp = pd.concat([df2, dftmp], axis=0, ignore_index=True, keys=None)
dftmp.Opentime = pd.to_datetime(dftmp.Opentime, unit='ms')
dftmp['Date'] = dftmp.Opentime.dt.strftime("%d/%m/%Y")
dftmp['Time'] = dftmp.Opentime.dt.strftime("%H:%M:%S")
dftmp = dftmp.drop(['Quote asset volume', 'Closetime', 'Opentime',
'Number of trades', 'Taker by base', 'Taker buy quote', 'Ignore'], axis=1)
column_names = ["Date", "Time", "Open", "High", "Low", "Close", "Volume"]
dftmp.reset_index(drop=True, inplace=True)
dftmp = dftmp.reindex(columns=column_names)
string_dt = str(dftmp['Date'][len(dftmp) - 1]) + 'T' + str(dftmp['Time'][len(dftmp) - 1]) + '.000Z'
utc_last_time = datetime.strptime(string_dt, "%d/%m/%YT%H:%M:%S.%fZ")
last_time = (utc_last_time - datetime(1970, 1, 1)) // timedelta(milliseconds=1)
df = pd.concat([df, dftmp], axis=0, ignore_index=True, keys=None)
df.to_csv('0y_eth_only17andnew.csv', sep='\t', index=False)
get_klines_iter('ETHBUSD', '30m', '2022-01-01', '2022-02-21')
I do it like this:
def get_binance_data(api_key, pair, countdown='open', interval='4h', start='1 Jan 2018', end=None):
client = Client(api_key=api_key)
intervals = {
interval = intervals.get(interval, '4h')
print(f'Historical interval {interval}')
klines = client.get_historical_klines(symbol=pair, interval=interval, start_str=start, end_str=end)
data = pd.DataFrame(klines)
data.columns = ['open_time','open', 'high', 'low', 'close', 'volume','close_time', 'qav','num_trades','taker_base_vol','taker_quote_vol', 'ignore']
data.index = [pd.to_datetime(x, unit='ms').strftime('%Y-%m-%d %H:%M:%S') for x in data.open_time]
usecols=['open', 'high', 'low', 'close', 'volume']
data = data[usecols]
data = data.astype('float')
return data
api_key = 'хххх...xxx' # use your api-key
symbol = 'ETHUSDT'
eth = get_binance_data(api_key, symbol)
Historical interval 4h
open high low close volume
2018-01-01 00:00:00 733.01 737.99 716.80 734.50 8739.23361
2018-01-01 04:00:00 734.99 763.55 730.01 751.99 9492.34734
2018-01-01 08:00:00 751.77 759.00 730.58 741.01 8939.36851
2018-01-01 12:00:00 741.01 752.27 724.15 748.80 11284.08664
2018-01-01 16:00:00 748.27 749.98 733.00 746.23 7757.00362
import requests
market = 'ETHEUR'
tick_interval = '1d'
url = 'https://api.binance.com/api/v3/klines?symbol='+market+'&interval='+tick_interval
data = requests.get(url).json()
I have an interesting problem. I have two files, NYPD_Motor_Collisions.csv has 1.2M lines and weatherfinal.txt has 109K lines. The objective is to merge the temp and prec data from weatherfinal.txt to the Collisions files as two columns based on the latitudes and longitudes. I wrote the following code using dataframe in pandas python.
from math import cos, asin, sqrt
import pandas as pd
import numpy as np
import os
import re
import datetime
def distance(lat1, lon1, lat2, lon2):
p = 0.017453292519943295
a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
return 12742 * asin(sqrt(a))
def closest(data, v):
return min(data, key=lambda p: distance(v['lat'],v['lon'],p['lat'],p['lon']))
tempDataList = []
#v = {'lat': 39.7622290, 'lon': -86.1519750}
#print(closest(tempDataList, v))
print os.getcwd()
filed_ = open("weatherfinal.txt", 'r')
fileo_ = open("weatherfinal_updated.txt","w")
lines_ = filed_.readlines()
for line_ in lines_:
outline = re.sub(" +"," ",line_)
fileo_.write(outline + "\n")
df = pd.read_csv("NYPD_Motor_Vehicle_Collisions.csv")
colhead = np.append(df.columns.values,['TEMP', 'PREP'])
outdf = pd.DataFrame(columns=colhead)
df2 = pd.read_csv("weatherfinal_updated.txt",' ')
df2.set_index(['WBANNO', 'LST_DATE', 'LST_TIME'])
sensorIds = df2['WBANNO'].unique()
for ids_ in sensorIds:
longitude = df2.loc[df2['WBANNO']==ids_,'LONGITUDE'].iloc[0]
latitude = df2.loc[df2['WBANNO'] == ids_, 'LATITUDE'].iloc[0]
tempDataList.append({'lat':latitude,'lon':longitude,'SENSORID': ids_ })
print tempDataList
for index, row in df.iterrows():
lon_ = row['LONGITUDE']
lat_ = row['LATITUDE']
tdate = row['DATE']
ttime = row['TIME']
tcal = 5
pcal = 0
fwdate = datetime.datetime.strptime(str(tdate), '%m/%d/%Y').strftime('%Y%m%d')
fwtime = datetime.datetime.strptime(str(ttime), '%H:%M').strftime('%H%M')
ntime = float(fwtime) + float(100)
closests_ = closest(tempDataList, {'lat':lat_,'lon':lon_})
sensorid = closests_['SENSORID']
usedSensorId = sensorid
selectedWeatherRow = df2.loc[(df2.WBANNO == sensorid) & (df2.LST_DATE == float(fwdate)) & (df2.LST_TIME >= float(fwtime)) & (df2.LST_TIME < ntime) ,['T_CALC', 'P_CALC']]
if len(selectedWeatherRow.index) == 0:
for sensId in sensorIds:
if sensId == sensorid:
selectedWeatherRow = df2.loc[(df2.WBANNO == sensId) & (df2.LST_DATE == float(fwdate)) & (df2.LST_TIME >= float(fwtime)) & (df2.LST_TIME < ntime), ['T_CALC', 'P_CALC']]
if len(selectedWeatherRow.index) == 0:
tcal = selectedWeatherRow['T_CALC'].values[0]
pcal = selectedWeatherRow['P_CALC'].values[0]
usedSensorId = sensId
tcal = selectedWeatherRow['T_CALC'].values[0]
pcal = selectedWeatherRow['P_CALC'].values[0]
row['TEMP'] = tcal
row['PREP'] = pcal
outdf.loc[index] = row
print index, tcal, pcal, fwdate, fwtime, ntime, usedSensorId
print "Loop completed"
print "file completed"
This program has been running for days. Not sure why dataframe is too slow. I rewrote the program without dataframe using dictionaries and it completed in a few minutes. Not sure if dataframe is slow or I am not using it correctly. Just posting here for learning.