I'm struggling with the results of a ScannerSubscription.
For example, if I request:
qqq_id = 0
subscript = ScannerSubscription()
subscript.numberOfRows(15)
subscript.m_scanCode = 'HIGH_OPEN_GAP'
subscript.m_instrument = 'STK'
subscript.m_averageOptionVolumeAbove = ''
subscript.m_couponRateAbove = ''
subscript.m_couponRateBelow = ''
subscript.m_abovePrice = '5'
subscript.m_belowPrice = ''
subscript.m_marketCapAbove = ''
subscript.m_marketCapBelow = ''
subscript.m_aboveVolume = '100000'
subscript.m_stockTypeFilter = 'ALL'
subscript.locationCode('STK.US.MAJOR')
tws_conn.reqScannerSubscription(qqq_id, subscript)
tws_conn.reqScannerParameters()
I received a scannerData response like this:
<scannerData reqId=0, rank=0, contractDetails=<ib.ext.ContractDetails.ContractDetails object at 0x00000000036EFA58>, distance=None, benchmark=None, projection=None, legsStr=None>
etc...
But I cannot retrieve the result values, for example:
reqScannerParameters() xml result specifies <colId>390</colId> as the colId for the Gap value:
<ScanType>
<displayName>Top Close-to-Open % Gainers</displayName>
<scanCode>HIGH_OPEN_GAP</scanCode>
<instruments>STK,STOCK.NA,STOCK.EU,STOCK.HK,FUT.US,FUT.HK,FUT.EU,FUT.NA</instruments>
<absoluteColumns>false</absoluteColumns>
<Columns varName="columns">
<Column>
<colId>390</colId>
<name>Gap</name>
<display>true</display>
<section>m</section>
<displayType>DATA</displayType>
</Column>
How do I retrieve the GAP value?
Is this even possible ?
Now I'm sure you're supposed to request data after getting the contract.
import pandas as pd
scans = 15
res = pd.DataFrame(index = range(scans), columns = ['sym','open','close','calc']).fillna(0)
msgs = []
from ib.ext.Contract import Contract
from ib.opt import ibConnection, message
from ib.ext.TickType import TickType as tt
def tickPrice(msg):
global scans
if msg.field in [tt.OPEN, tt.CLOSE]:
res.loc[msg.tickerId,tt.getField(msg.field)] = msg.price
op = res.loc[msg.tickerId,'open']
cl = res.loc[msg.tickerId,'close']
if op > 0 and cl > 0 and res.loc[msg.tickerId,'calc'] == 0:
res.loc[msg.tickerId,'calc'] = ((op-cl)*100/cl)
con.cancelMktData(msg.tickerId)
scans -= 1
if scans == 0:
print(res)
con.disconnect()
def snapshot(msg):
res.loc[msg.rank,'sym'] = msg.contractDetails.m_summary.m_symbol
#tt.OPEN (14) isn't coming with snapshot
con.reqMktData(str(msg.rank), msg.contractDetails.m_summary, "", False)
def watcher(msg):
#print (msg)
msgs.append(msg)
def scanData(msg):
snapshot(msg)
def scanDataEnd(msg):
con.cancelScannerSubscription(qqq_id)
con = ibConnection(port=7497, clientId=888)
con.registerAll(watcher)
con.unregister(watcher, message.scannerData)
con.register(scanData, message.scannerData)
con.unregister(watcher, message.scannerDataEnd)
con.register(scanDataEnd, message.scannerDataEnd)
con.unregister(watcher, message.tickPrice)
con.register(tickPrice, message.tickPrice)
con.connect()
from ib.ext.ScannerSubscription import ScannerSubscription
qqq_id = 0
subscript = ScannerSubscription()
subscript.numberOfRows(15)
subscript.m_scanCode = 'HIGH_OPEN_GAP'
subscript.m_instrument = 'STK'
subscript.m_averageOptionVolumeAbove ='0'
subscript.m_abovePrice = '5'
subscript.m_aboveVolume = '100000'
con.reqScannerSubscription(qqq_id, subscript)
res at 1 pm est =
sym open close calc
0 TAC 4.95 4.25 16.470588
1 CTRP 44.80 40.99 9.294950
2 IIIN 39.26 36.58 7.326408
3 LFC 14.60 13.63 7.116654
4 ACH 11.59 10.87 6.623735
5 KALV 9.01 8.38 7.517900
6 OMER 13.25 12.75 3.921569
7 DWTI 68.00 66.50 2.255639
8 WLDN 23.75 23.43 1.365770
9 BZQ 19.67 18.73 5.018687
10 JNUG 6.55 6.43 1.866252
11 GXP PRB 50.78 49.80 1.967871
12 AU 10.85 10.59 2.455146
13 USLV 13.07 12.81 2.029664
14 CBD 16.60 16.03 3.555833
I don't know why they don't come in rank order??
Related
Code:
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
end1 = date.today()
start1 = end1 - timedelta(days=25)
exp_date1 = date(2022,8,25)
exp_date2 = date(2022,9,29)
# stock = ['HDFCLIFE']
stock = ['RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK',
'ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND',
'GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC']
target_stocks = []
# oi_change = []
for stock in stock:
stock_jan = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date1)
stock_feb = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date2)
delivery_per_age = get_history(symbol=stock,
start=start1,
end=end1)
symbol_s = get_history(symbol=stock,
start=start1,
end=end1)
oi_combined = pd.concat([stock_jan['Change in OI'] + stock_feb['Change in OI']])
total_oi = pd.concat([stock_jan['Open Interest']+stock_feb['Open Interest']])
delivery_vol = pd.concat([delivery_per_age['Deliverable Volume']])
# delivery_per = pd.concat([delivery_per_age['%Deliverble']*100])
na_me = pd.concat([symbol_s['Symbol']])
close = pd.concat([delivery_per_age['Close']])
df = pd.DataFrame(na_me)
df['TOTAL_OPN_INT'] = total_oi
df['OI_COMBINED'] = oi_combined
df['%_CHANGE'] = ((df['OI_COMBINED'] / df['TOTAL_OPN_INT']) * 100).__round__(2)
df['AVG_OI_COMBINED'] = df['OI_COMBINED'].rolling(5).mean()
# df['DELIVERY_VOL'] = delivery_vol
# df['AVG_DELIVERY_VOL'] = df['DELIVERY_VOL'].rolling(5).mean()
# df['DELIVERY_PER'] = delivery_per
# df['AVG_DELIVERY_%'] = df['DELIVERY_PER'].rolling(5).mean()
df['_CLOSE_PRICE_'] = close
pd.set_option('display.max_columns',8)
pd.set_option('display.width',200)
# print(df)
cond = ((df.loc[df.index[-5:-1], '%_CHANGE'].agg(min) > 0) |(df.loc[df.index[-6:-1], '%_CHANGE'].agg(min) > 0)) & (df.loc[df.index[-1], '%_CHANGE'] < 0)
if(cond):
target_stocks.append(df)
print(target_stocks)
PRODUCT:
[ Symbol TOTAL_OPN_INT OI_COMBINED %_CHANGE AVG_OI_COMBINED _CLOSE_PRICE_
Date
2022-07-19 HINDUNILVR 1015800 313200 30.83 NaN 2567.95
2022-07-20 HINDUNILVR 1617900 602100 37.21 NaN 2604.50
2022-07-21 HINDUNILVR 2355000 737100 31.30 NaN 2607.45
2022-07-22 HINDUNILVR 3671400 1316400 35.86 NaN 2640.60
2022-07-25 HINDUNILVR 5421300 1749900 32.28 943740.0 2623.60
2022-07-26 HINDUNILVR 6886200 1464900 21.27 1174080.0 2547.10
2022-07-27 HINDUNILVR 8522700 1636500 19.20 1380960.0 2581.95
2022-07-28 HINDUNILVR 10300200 1777500 17.26 1589040.0 2620.10
2022-07-29 HINDUNILVR 10250100 -50100 -0.49 1315740.0 2637.40
2022-08-01 HINDUNILVR 10237200 -12900 -0.13 963180.0 2593.00
2022-08-02 HINDUNILVR 10178700 -58500 -0.57 658500.0 2635.25
2022-08-03 HINDUNILVR 10208400 29700 0.29 337140.0 2626.35
2022-08-04 HINDUNILVR 10289700 81300 0.79 -2100.0 2627.95
2022-08-05 HINDUNILVR 10334100 44400 0.43 16800.0 2645.40
2022-08-08 HINDUNILVR 10350000 15900 0.15 22560.0 2650.35
2022-08-10 HINDUNILVR 10422900 72900 0.70 48840.0 2642.80
2022-08-11 HINDUNILVR 10432800 9900 0.09 44880.0 2613.70
2022-08-12 HINDUNILVR 10378200 -54600 -0.53 17700.0 2594.95]
Process finished with exit code 0.
Problem:
When I ran the code on 12-aug I got this output as displayed above which is a list. So how can I convert that list of target_stocks into pandas dataframe.
when I tried using df2 = pd.Dataframe(target_stocks) it is throwing an error must pass 2-d input. shape(4,18,16).
You are appending a dataframe to an empty list. This method does not work for dataframes. Instead of having target_stocks = [] make it target_stocks = pd.DataFrame() (an empty dataframe). Then change:
if(cond):
target_stocks.append(df)
to
if(cond):
target_stocks = pd.concat([target_stocks, df])
To add a blank row at the end of the dataframe if the condition is met, add the code below. This finds the length of your data frame and adds a blank row (created by placing an empty value in every column):
target_stocks.loc[len(target_stocks)]=['']*len(target_stocks.columns)
All together:
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
end1 = date.today()
start1 = end1 - timedelta(days=25)
exp_date1 = date(2022,8,25)
exp_date2 = date(2022,9,29)
# stock = ['HDFCLIFE']
stock = ['RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK',
'ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND',
'GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC']
target_stocks = pd.DataFrame()
# oi_change = []
for stock in stock:
stock_jan = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date1)
stock_feb = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date2)
delivery_per_age = get_history(symbol=stock,
start=start1,
end=end1)
symbol_s = get_history(symbol=stock,
start=start1,
end=end1)
oi_combined = pd.concat([stock_jan['Change in OI'] + stock_feb['Change in OI']])
total_oi = pd.concat([stock_jan['Open Interest']+stock_feb['Open Interest']])
delivery_vol = pd.concat([delivery_per_age['Deliverable Volume']])
# delivery_per = pd.concat([delivery_per_age['%Deliverble']*100])
na_me = pd.concat([symbol_s['Symbol']])
close = pd.concat([delivery_per_age['Close']])
df = pd.DataFrame(na_me)
df['TOTAL_OPN_INT'] = total_oi
df['OI_COMBINED'] = oi_combined
df['%_CHANGE'] = ((df['OI_COMBINED'] / df['TOTAL_OPN_INT']) * 100).__round__(2)
df['AVG_OI_COMBINED'] = df['OI_COMBINED'].rolling(5).mean()
# df['DELIVERY_VOL'] = delivery_vol
# df['AVG_DELIVERY_VOL'] = df['DELIVERY_VOL'].rolling(5).mean()
# df['DELIVERY_PER'] = delivery_per
# df['AVG_DELIVERY_%'] = df['DELIVERY_PER'].rolling(5).mean()
df['_CLOSE_PRICE_'] = close
pd.set_option('display.max_columns',8)
pd.set_option('display.width',200)
# print(df)
cond = ((df.loc[df.index[-5:-1], '%_CHANGE'].agg(min) > 0) |(df.loc[df.index[-6:-1], '%_CHANGE'].agg(min) > 0)) & (df.loc[df.index[-1], '%_CHANGE'] < 0)
if(cond):
target_stocks = pd.concat([target_stocks, df])
target_stocks.loc[len(target_stocks)]=['']*len(target_stocks.columns)
target_stocks
Output:
I have the following dataframe:
country_ID
ID
direction
date
ESP_1
0
IN
2021-02-28
ENG
0
IN
2021-03-03
ENG
0
OUT
2021-03-04
ESP_2
0
IN
2021-03-05
FRA
1
OUT
2021-03-07
ENG
1
OUT
2021-03-09
ENG
1
OUT
2021-03-10
ENG
2
IN
2021-03-13
I have implemented the following functionality:
ef create_columns_analysis(df):
df['visit_ESP'] = 0
df['visit_ENG'] = 0
df['visit_FRA'] = 0
list_ids = []
for i in range(len(df)):
if df.loc[i,'country_ID'] == 'ENG':
country_ID_ENG(df, i, list_ids)
else:
# case country_ID = {FRA, ESP_1, ESP_2}
# other methods not specified
return df
For each row with a specific country_ID, a similarly structured function is applied.
I would like to optimise or simplify the code of the country_ID_ENG function. The country_ID_ENG function is defined as follows:
def country_ID_ENG(df, i, list_ids):
# If it is the first time the ID is detected
if df.loc[i,'ID'] not in list_ids:
# It adds up to one visit regardless of the direction of the ID
df.loc[i,'visit_ENG'] = 1
# Add the ID to the read list
list_ids.append(df.loc[i, 'ID'])
# Assigns the error column a start message
df.loc[i,'error'] = 'ERROR:1'
# If it is not the first time it detects that ID
else:
# Saves the information of the previous row
prev_row = df.loc[i-1]
# If the current row direction is 'IN'
if df.loc[i,'direction'] == 'IN':
# Add a visit
df.loc[i,'visit_ENG'] = 1
# Behaviour dependent on the previous row
# If the current row direction is 'IN' and previous row is 'IN'
if prev_row['direction'] == 'IN':
if prev_row['country_ID'] == 'FRA':
df.loc[i,'error'] = 'ERROR:0'
elif prev_row['country_ID'] in ['ESP_1','ESP_2']:
df.loc[i,'error'] = 'ERROR:2'
df.loc[i,'visit_FRA'] = 1
else:
df.loc[i,'error'] = 'ERROR:3'
# If the current row direction is 'IN' and previous row is 'OUT'
else:
if prev_row['country_ID'] == 'ENG':
df.loc[i,'error'] = 'ERROR:0'
elif prev_row['country_ID'] in ['FRA','ESP_2']:
df.loc[i,'error'] = 'ERROR:4'
df.loc[i,'visit_FRA'] = 1
else:
df.loc[i,'error'] = 'ERROR:5'
df.loc[i,'visit_ESP'] = 1
df.loc[i,'visit_FRA'] = 1
# If the current row direction is 'OUT'
else:
# If the current row direction is 'OUT' and previous row is 'IN'
if prev_row['direction'] == 'IN':
# If it detects an output before an input of the same 'country_ID',
# it calculates the visit time
if prev_row['country_ID'] == 'ENG':
df.loc[i,'mean_time'] = df.loc[i,'date']-prev_row['date']
df.loc[i,'error'] = 'ERROR:0'
elif prev_row['country_ID'] in ['ESP_1','ESP_2']:
df.loc[i,'error'] = 'ERROR:6'
df.loc[i,'visit_FRA'] = 1
df.loc[i,'visit_ENG'] = 1
else:
df.loc[i,'error'] = 'ERROR:7'
df.loc[i,'visit_ENG'] = 1
# If the current row direction is 'OUT' and previous row is 'OUT'
else:
df.loc[i,'visit_ENG'] = 1
if prev_row['country_ID'] == 'ENG':
df.loc[i,'error'] = 'ERROR:8'
elif prev_row['country_ID'] in ['FRA','ESP_2']:
df.loc[i,'error'] = 'ERROR:9'
df.loc[i,'visit_FRA'] = 1
else:
df.loc[i,'error'] = 'ERROR:10'
df.loc[i,'visit_ESP'] = 1
df.loc[i,'visit_FRA'] = 1
The above function uses the information from the current row and the previous row (if any) to create new columns for visit_ENG, visit_ESP, visit_FRA, mean_time and error.
For the example dataframe the function, applying the function country_ID_ENG to rows whose country_ID is equal to ENG, should return the following result:
country_ID
ID
direction
date
visit_ENG
visit_FRA
visit_ESP
mean_time
error
ESP_1
0
IN
2021-02-28
-
-
-
-
-
ENG
0
IN
2021-03-03
0
1
0
NaN
ERROR:2
ENG
0
OUT
2021-03-04
0
0
0
1 days
ERROR:0
ESP_2
0
IN
2021-03-05
-
-
-
-
-
FRA
1
OUT
2021-03-07
-
-
-
-
-
ENG
1
OUT
2021-03-09
1
1
0
NaN
ERROR:9
ENG
1
OUT
2021-03-10
1
0
0
NaN
ERROR:8
ENG
2
IN
2021-03-13
1
0
0
NaN
ERROR:1
The function is very long, and the other functions for rows with country_ID equal to ESP or FRA will have the same complexity. I would like you to help me to simplify or optimise the code of this function to also take it into account when defining the country_ID_ESP and country_ID_FRA functions. I appreciate your help.
I recently had to accomplish something similar. My solution was to create a custom class to iterate over moving the some of the logic out of the loop and into the class. Its not a complete solution but enough to work with.
main.py
import pandas as pd
DATA= {
'country_id': ['ESP_1', 'FRA', 'ENG', 'FRA'],
'ID': [0, 1, 2, 0, ],
'direction': ['IN', 'IN', 'OUT', 'OUT'],
'date': ['2021-02-28', '2021-02-28', '2021-02-28', '2021-02-28']
}
class CountryIDs:
def __init__(self, df: pd.DataFrame):
self._list_ids = []
self._country_ids = []
self._df = df
def __iter__(self):
for tup in self._df.itertuples():
yield tup, self._list_ids
def update_list_ids(self, new_value):
self._list_ids = [*self._list_ids, new_value.ID]
self._country_ids = [*self._country_ids, new_value.country_id]
def get_list(self):
return [self._list_ids, self._country_ids]
def start():
country_data = CountryIDs(pd.DataFrame(DATA))
for named_tuple, list_ids in country_data:
if named_tuple.ID not in list_ids:
country_data.update_list_ids(named_tuple)
print(ids.get_list())
if __name__ == '__main__':
start()
result
[[0, 1, 2], ['ESP_1', 'FRA', 'ENG']]
Fairly new to Python. I'm parsing an XML file and the following code returns the undesired results. I can understand why I'm getting my results - there are two escalations in the XML for this deal and I'm getting results for each set. I'm need help updating my code to only return the monthly rent for each escalation in the XML:
<RentEscalations>
<RentEscalation ID="354781">
<BeginIn>7</BeginIn>
<Escalation>3.8</Escalation>
<RecurrenceInterval>12</RecurrenceInterval>
<EscalationType>bump</EscalationType>
</RentEscalation>
<RentEscalation ID="354782">
<BeginIn>61</BeginIn>
<Escalation>1.0</Escalation>
<RecurrenceInterval>12</RecurrenceInterval>
<EscalationType>bump</EscalationType>
</RentEscalation>
</RentEscalations>
The rent starts at $3.00/sqft for the first 6 months. This XML block shows that, for each 12 months (RecurrenceInterval), the rent will be $6.80/sqft ($3.00 base + $3.80 escalation). The following twelve months will be $10.60 ($6.80 + 3.80). Each year, the amount per square foot will increase by $3.80 until the 61st month in the term. At that point, the rent will increase by $1.00/sqft for the remainder of the term. The entire term of the lease is 120 months.
My results include 114 results based on the first escalation (3.80/sqft) followed by 114 rows showing as if the rent starts at $3.00/sqft incrementing by $1.00/sqft each year.
Any help is appreciated!
import xml.etree.ElementTree as ET
import pyodbc
import dateutil.relativedelta as rd
import datetime as dt
tree = ET.parse('C:\\FileLocation\\DealData.xml')
root = tree.getroot()
for deal in root.findall("Deals"):
for dl in deal.findall("Deal"):
dealid = dl.get("DealID")
for dts in dl.findall("DealTerms/DealTerm"):
dtid = dts.get("ID")
darea = float(dts.find("RentableArea").text)
dterm = int(dts.find("LeaseTerm").text)
for brrent in dts.findall("BaseRents/BaseRent"):
brid = brrent.get("ID")
rent = float(brrent.find("Rent").text)
darea = float(dts.find("RentableArea").text)
per = brrent.find("Period").text
dtstart = dts.find("CommencementDate").text
startyr = int(dtstart[0:4])
startmo = int(dtstart[5:7])
startday = int(dtstart[8:])
start = dt.date(startyr, startmo, startday)
end = start + rd.relativedelta(months=dterm)
if brrent.find("Duration").text is None:
duration = 0
else:
duration = int(brrent.find("Duration").text)
termbal = dterm - duration
for resc in dts.findall("RentEscalations/RentEscalation"):
rescid = resc.get("ID")
esctype = resc.find("EscalationType").text
begmo = int(resc.find("BeginIn").text)
esc = float(resc.find("Escalation").text)
intrvl = int(resc.find("RecurrenceInterval").text)
if intrvl != 0:
pers = termbal / intrvl
else:
pers = 0
escst = start + rd.relativedelta(months=begmo - 1)
i = 0
x = begmo
newrate = rent
while i < termbal:
billdt = escst + rd.relativedelta(months=i)
if per == "rsf/year":
monthlyamt = (newrate + esc) * darea / 12.0
if per == "month":
monthlyamt = newrate + esc
if per == "year":
monthlyamt = (newrate + esc) / 12.0
if per == "rsf/month":
monthlyamt = (newrate + esc) * darea
try:
if i % intrvl == 0:
level = x + 1
newrent = monthlyamt
x += 1
newrate += esc
else:
level = x
except ZeroDivisionError:
break
i += 1
if dealid == "1254278":
print(dealid, dtid, rescid, dterm, darea, escst, rent, intrvl, esctype, termbal, \
monthlyamt, billdt, pers, level, newrate, newrent)
I have to join two tables and create a table with dates, but my code is way to long and I believe that I done it the super long way.Apparently the soulution to this only had 22 lines. Is there another way and more shorter way to approach this problem. Here is the question
HERE IS MY CODE, and again I believe it is to long and I think there is a shorter way to do this.
import numpy as np
import pandas as pd
import datetime
#YOUR CODE GOES HERE#
def get_month(i):
"""this function returns the number of the month based on stringinput"""
if i == "January":
return 1
elif i == "February":
return 2
elif i == "March":
return 3
elif i == "April":
return 4
elif i == "May":
return 5
elif i == "June":
return 6
elif i == "July":
return 7
elif i == "August":
return 8
elif i == "September":
return 9
elif i == "October":
return 10
elif i == "November":
return 11
elif i == "December":
return 12
def get_reformatted_date(s):
"""this function reformats a datetime object to the output we're looking for"""
return s.strftime("%d-%b-%y")
month_names = []
tab1 = pd.read_csv("data1.csv")
tab2 = pd.read_csv("data2.csv")
tab1_tweets = tab1['Tweet'].tolist()[::-1]
tab2_tweets = tab2['Tweet'].tolist()[::-1]
tab1_months = tab1['Month'].tolist()[::-1]
tab2_months = tab2['Month'].tolist()[::-1]
tab1_days = tab1['Day'].tolist()[::-1]
tab2_days = tab2['Day'].tolist()[::-1]
tab1_years = tab1['Year'].tolist()[::-1]
tab2_years = tab2['Year'].tolist()[::-1]
all_dates = []
all_tweets = []
tab1_count = 0
tab2_count = 0
for i in range(len(tab1_tweets) + len(tab2_tweets)):
if(tab1_count < len(tab1_years) and tab2_count < len(tab2_years)):
t1_date = datetime.date(tab1_years[tab1_count], tab1_months[tab1_count], tab1_days[tab1_count])
t2_date = datetime.date(tab2_years[tab2_count], get_month(tab2_months[tab2_count]), tab2_days[tab2_count])
if t1_date > t2_date:
all_dates.append(t1_date)
all_tweets.append(tab1_tweets[tab1_count])
tab1_count += 1
else:
all_dates.append(t2_date)
all_tweets.append(tab2_tweets[tab2_count])
tab2_count += 1
elif(tab2_count < len(tab2_years)):
t2_date = datetime.date(tab2_years[tab2_count], get_month(tab2_months[tab2_count]), tab2_days[tab2_count])
all_dates.append(t2_date)
all_tweets.append(tab2_tweets[tab2_count])
tab2_count += 1
else:
t1_date = datetime.date(tab1_years[tab1_count], tab1_months[tab1_count], tab1_days[tab1_count])
all_dates.append(t1_date)
all_tweets.append(tab1_tweets[tab1_count])
tab1_count += 1
table_data = {'Date': all_dates, 'Tweet': all_tweets}
df = pd.DataFrame(table_data)
df['Date'] = df['Date'].apply(get_reformatted_date)
print(df)
data1.csv is
Tweet Month Day Year
Hello World 6 2 2013
I want ice-cream! 7 23 2013
Friends will be friends 9 30 2017
Done with school 12 12 2017
the data2.csv is
Month Day Year Hour Tweet
January 2 2015 12 Happy New Year
March 21 2016 7 Today is my final
May 30 2017 23 Summer is about to begin
July 15 2018 11 Ocean is still cold
I think that you can theoretically do this whole thing in one line:
finaldf = (pd.concat([pd.read_csv('data1.csv',
parse_dates={'Date':['Year', 'Month', 'Day']}),
pd.read_csv('data2.csv',
parse_dates={'Date':['Year', 'Month', 'Day']})
[['Date', 'Tweet']]])
.sort_values('Date', ascending=False))
But for the sake of readability, its better to split it into a few lines:
df1 = pd.read_csv('data1.csv', parse_dates={'Date':['Year', 'Month','Day']})
df2 = pd.read_csv('data2.csv', parse_dates={'Date':['Year', 'Month','Day']})
finaldf = (pd.concat([df1, df2[['Date', 'Tweet']]])
.sort_values('Date', ascending=False))
I think that for what you're trying to do, the main things to read up about are the parse_dates argument of pandas read_csv, and pd.concat to concatenate dataframes
Edit: in order to get the dates in the correct format as you have in your example output, you can call this after the code above, using Series.dt.strftime():
finaldf['Date'] = finaldf['Date'].dt.strftime('%d-%b-%y')
Write a python program that will take 3 lists:
Name Wage Hours
Juan 7.50 35
Rae 11.00 41
Ivanna 18.25 26
Lilly 9.25 35
Robert 11.10 45
and use this logic:
An employee gets overtime when they have worked more than 40 hours
Overtime pay is calculated using this formula:
Gross Pay = (35*Wage) + ((Hours-35)*Wage*1.5)
Regular pay is calculated using this formula:
Gross Pay = (Hours*Wage)
Use a loop to process these lists.
Print each employee, their wages, Hours and gross pay.
I'm running this program and I have the for loop. The input works fine, but the while loop that its supposed to have the same output is not giving me any output at all. Here's my code.
`Name = ["Juan","Rae","Ivanna", "Lilly", "Robert"]
Hours = [35,41,26,35,45]
Wage = [7.5,11,18.25,9.25,11.1]
print ("Name\tWage\tHours\tGP")
for X in range(5):
GP = 0
if(Hours[X] > 40):
GP = (35*Wage[X]) + ((Hours[X]-35)*Wage[X]*1.5)
else:
GP = Hours[X] * Wage[X]
print (Name[X],"\t", Wage[X],"\t", Hours[X],"\t", GP)
Name = ["Juan","Rae","Ivanna", "Lilly", "Robert"]
Hours = [35,41,26,35,45]
Wage = [7.5,11,18.25,9.25,11.1]
print ("Name\tWage\tHours\tGP")
counter = 5
Y = 0
while (Y):
if (Hours[Y] > 40):
GP = (35*Wage[Y]) + ((Hours[Y]-35)*Wage[Y]*1.5)
else:
GP = Hours[Y] * Wage[Y]
print (Name[Y],"\t", Wage[Y],"\t", Hours[Y],"\t", GP)`
my output is going as
Name Wage Hours GP
Juan 7.5 35 262.5
Rae 11 41 484.0
Ivanna 18.25 26 474.5
Lilly 9.25 35 323.75
Robert 11.1 45 555.0
Name Wage Hours GP
Juan 7.5 35 555.0
I don't know where the error is in the while loop.
Your last line needs to be indented. So instead of
while (Y):
if (Hours[Y] > 40):
GP = (35*Wage[Y]) + ((Hours[Y]-35)*Wage[Y]*1.5)
else:
GP = Hours[Y] * Wage[Y]
print (Name[Y],"\t", Wage[Y],"\t", Hours[Y],"\t", GP)
it needs to be
while (Y):
if (Hours[Y] > 40):
GP = (35*Wage[Y]) + ((Hours[Y]-35)*Wage[Y]*1.5)
else:
GP = Hours[Y] * Wage[Y]
print (Name[Y],"\t", Wage[Y],"\t", Hours[Y],"\t", GP)
because right now the print is outside of the while loop, so it is only printing with the value Y=0.
Have to follow python code indented and need to give proper while loop condition with (increment/decrement/boolean). As your code need to increment the value of Y.
Name = ["Juan","Rae","Ivanna", "Lilly", "Robert"]
Hours = [35,41,26,35,45]
Wage = [7.5,11,18.25,9.25,11.1]
print ("Name\tWage\tHours\tGP")
for X in range(5):
GP = 0
if(Hours[X] > 40):
GP = (35*Wage[X]) + ((Hours[X]-35)*Wage[X]*1.5)
else:
GP = Hours[X] * Wage[X]
print (Name[X],"\t", Wage[X],"\t", Hours[X],"\t", GP)
Name = ["Juan","Rae","Ivanna", "Lilly", "Robert"]
Hours = [35,41,26,35,45]
Wage = [7.5,11,18.25,9.25,11.1]
print ("Name\tWage\tHours\tGP")
counter = 5
Y = 0
while (Y<counter):
if (Hours[Y] > 40):
GP = (35*Wage[Y]) + ((Hours[Y]-35)*Wage[Y]*1.5)
else:
GP = Hours[Y] * Wage[Y]
print (Name[Y],"\t", Wage[Y],"\t", Hours[Y],"\t", GP)
Y=Y+1