Unable to join a dataframe even after following an example - python

Imports modules:
import Quandl
import pandas as pd
from pandas.tools.plotting import df_unique
read api key:
api_key = open('quandlapikey.txt','r').read()
Currently the function reads a csv file to get the codes however I plan to change this to sqllite..
def stock_list():
#stocks = pd.read_csv('TID.csv'.rstrip())
stocks = open('TID.csv').readlines()
return stocks[0:]
Get stock codes from quandl this works a treat.
def getStockValues():
stocks = stock_list()
main_df = pd.DataFrame()
for abbrv in stocks:
query = "LSE/" + str(abbrv).strip()
df = Quandl.get(query, authtoken=api_key,start_date='2016-04-05', end_date='2016-04-10')
df = df['Price']
df.columns = [abbrv]
print(query)
print(df)
This statement causes the issues for some reason whilst looping it cannot join additional stock prices.
#This statement Prints as
print(df.tail(5))
#causes error
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df)
# exit
print('Task done!')
getStockValues()
This is the output from the print statements and error from the join.
Result:
LSE/VOD
Date
2016-04-14 226.80
2016-04-15 229.75
<ETC for all stocks>
Traceback (most recent call last):
File "H:\Workarea\DataB\SkyDriveP\OneDrive\PyProjects\Learning\21 myPprojects\stockPrices.py", line 49, in <module>
getStockValues()
File "H:\Workarea\DataB\SkyDriveP\OneDrive\PyProjects\Learning\21 myPprojects\stockPrices.py", line 43, in getStockValues
main_df = main_df.join(df)
File "H:\APPS\Python35-32\lib\site-packages\pandas\core\generic.py", line 2669, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'join'
Further tests show that the issue seems to be with the scope of the pandas data object this causes and issue:
main_df = pd.DataFrame()
for abbrv in stocks:
query = "LSE/" + str(abbrv).strip()
df = Quandl.get(query, authtoken=api_key,start_date='2016-03-05', end_date='2016-04-10')
df = df['Price']
df.columns = [abbrv]
#causes error
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df)
However this does not cause an error however only returns one dataset:
for abbrv in stocks:
main_df = pd.DataFrame()
query = "LSE/" + str(abbrv).strip()
df = Quandl.get(query, authtoken=api_key,start_date='2016-03-05', end_date='2016-04-10')
df = df['Price']
df.columns = [abbrv]
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df)

Seems to me that the issue with your code is somewhere around here:
...
df = df['Price'] ## <- you are turning the DataFrame to a Series here
df.columns = [abbrv] ## <- no effect whatsoever on a Series
print(query)
print(df)
What I would do instead is simply add the new row to your existing DataFrame.
## if main_df.empty: ## <- remove this line
## main_df = df ## This should be changed to the line below
main_df[abbrv] = df ## This will just add the new column to you df and use the Series as data
## else: ## <- remove this line
## main_df = main_df.join(df) ## <- remove this line

Related

Add new column to DataFrame with same default value

I would like to add a name column based on the 'lNames' list. But my code is overwriting the whole column in the last iteration as follows:
import pandas as pd
def consulta_bc(codigo_bcb):
url = 'http://api.bcb.gov.br/dados/serie/bcdata.sgs.{}/dados?formato=json'.format(codigo_bcb)
df = pd.read_json(url)
df['data'] = pd.to_datetime(df['data'], dayfirst=True)
df.set_index('data', inplace=True)
return df
lCodigos = [12, 11, 1, 21619, 21623, 12466]
lNames = ['CDI', 'SELIC', 'USD', 'EUR', 'GPB', 'IMAB']
iter_len = len(lCodigos)
saida = pd.DataFrame()
for i in range(iter_len):
saida = saida.append(consulta_bc(lCodigos[i]))
saida['nome']= lNames[i]
saida.to_csv('Indice', sep=';', index=True)
saida
Any help will be fully appreciated
Change the for loop in this way:
for i in range(iter_len):
df = consulta_bc(lCodigos[i])
df['nome'] = lNames[i]
saida = saida.append(df)

How to get a list of the name of every open window and place that into dataframe?

So I'm trying to use both win32gui and Pandas to get a dataframe (df) of windows that are open. Below is what I wrote. I end up with an error. How can I get one dataframe returned?
# info http://timgolden.me.uk/pywin32-docs/win32gui__EnumWindows_meth.html
import win32gui
import pandas as pd
def winEnumHandler( hwnd, dfx ):
if win32gui.IsWindowVisible( hwnd ) and len(win32gui.GetWindowText( hwnd ))>0 :
idv = hex(hwnd)
winv = win32gui.GetWindowText(hwnd)
df = pd.DataFrame({'ID' : idv , 'Window': winv}, index = ['0'])
frames = [dfx, df]
dfx = pd.concat(frames)
# print(dfx)
return dfx # Comment out this and it runs but not the result I want.
dfx= pd.DataFrame() # empty dataframe
win32gui.EnumWindows( winEnumHandler, dfx )
print(dfx)
Traceback
Traceback (most recent call last):
File "c:\Users\s...\Python\List of windows.py", line 19, in <module>
win32gui.EnumWindows( winEnumHandler, dfx )
TypeError: an integer is required (got type DataFrame)
So the key to getting the dataframe out of the function is to use a global variable. This variable must be state as global inside the function so there is not confusion and python does not consider it as local variable. Here is the code.
import win32gui
import pandas as pd
dfx = pd.DataFrame()
i = 0
def winEnumHandler( hwnd, x ):
global dfx, i
if win32gui.IsWindowVisible( hwnd ) and len(win32gui.GetWindowText( hwnd ))>0 :
idv = hex(hwnd)
winv = win32gui.GetWindowText(hwnd)
df = pd.DataFrame({'ID' : idv , 'Window': winv}, index = [i])
frames = [dfx, df]
dfx = pd.concat(frames)
i += 1
win32gui.EnumWindows( winEnumHandler, i )
print(dfx)

Overwriting one data with another data in pandas(dataframe)

Periodically (every 120 seconds) get data but recent data overwrites previous data in SQL DB. I want all data to be saved.In addition, is the timer correct?
import sqlalchemy as sa
import psycopg2
import requests as rq
import pandas as pd
import json
import time
start_time = time.time()
while True:
temp = pd.DataFrame()
df = pd.DataFrame()
vehicleList = {"SN63NBK", "YY67UTP"}
for ids in vehicleList:
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
r = r.text
temp = pd.read_json(r)
temp['Type'] = 'ids'
df = pd.concat([df, temp], sort=False).reset_index(drop=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434#127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='replace', index=False)
time.sleep(120.0 - ((time.time() - start_time) % 120.0))
I changed your code slightly, but I think the main problem is in if_exists parameter which you should set to append, as #K753 have mentioned in the comments.
Also, YY67UTP id returns nothing, so I replaced it with another random id from the site to illustrate how code works.
def _data_gen(vehicles):
""" Yields a dataframe for each request """
for ids in vehicles:
time.sleep(1)
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
temp = pd.read_json(r.text)
temp['Type'] = ids
yield temp
while True:
# how do you break from while loop if you need to?
vehicleList = {"SN63NBK", "YY67UTP"}
df = pd.concat(_data_gen(vehicleList), sort=False, ignore_index=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434#127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='append', index=False)
time.sleep(120)

Webscraping data from a json source, why i get only 1 row?

I'am trying to get some information from a website with python, from a webshop.
I tried this one:
def proba():
my_url = requests.get('https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL')
data = my_url.json()
results = []
products = data['MainContent'][0]['contents'][0]['productList']['products']
for product in products:
name = product['productModel']['displayName']
try:
priceGross = product['priceInfo']['priceItemSale']['gross']
except:
priceGross = product['priceInfo']['priceItemToBase']['gross']
url = product['productModel']['url']
results.append([name, priceGross, url])
df = pd.DataFrame(results, columns = ['Name', 'Price', 'Url'])
# print(df) ## print df
df.to_csv(r'/usr/src/Python-2.7.13/test.csv', sep=',', encoding='utf-8-sig',index = False )
while True:
mytime=datetime.now().strftime("%H:%M:%S")
while mytime < "23:59:59":
print mytime
proba()
mytime=datetime.now().strftime("%H:%M:%S")
In this webshop there are 9 items, but i see only 1 row in the csv file.
Not entirely sure what you intend as end result. Are you wanting to update an existing file? Get data and write out all in one go? Example of latter shown below where I add each new dataframe to an overall dataframe and use a Return statement for the function call to provide each new dataframe.
import requests
from datetime import datetime
import pandas as pd
def proba():
my_url = requests.get('https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL')
data = my_url.json()
results = []
products = data['MainContent'][0]['contents'][0]['productList']['products']
for product in products:
name = product['productModel']['displayName']
try:
priceGross = product['priceInfo']['priceItemSale']['gross']
except:
priceGross = product['priceInfo']['priceItemToBase']['gross']
url = product['productModel']['url']
results.append([name, priceGross, url])
df = pd.DataFrame(results, columns = ['Name', 'Price', 'Url'])
return df
headers = ['Name', 'Price', 'Url']
df = pd.DataFrame(columns = headers)
while True:
mytime = datetime.now().strftime("%H:%M:%S")
while mytime < "23:59:59":
print(mytime)
dfCurrent = proba()
mytime=datetime.now().strftime("%H:%M:%S")
df = pd.concat([df, dfCurrent])
df.to_csv(r"C:\Users\User\Desktop\test.csv", encoding='utf-8')

Script not working anymore, .group() used to work but now is throwing an error

This is the portion of the code that's causing trouble:
import pandas as pd
import re
df
df.columns = ['Campaigns', 'Impressions', 'Attempts', 'Spend']
Campaigns = df['Campaigns']
IDs = []
for c in Campaigns:
num = re.search(r'\d{6}',c).group()
IDs.append(num)
pieces = [df,pd.DataFrame(IDs)]
frame = pd.concat(pieces, axis=1, join='outer',ignore_index=False)
frame['ID'] = frame[0]
del frame[0]
frame
This is the error:
Error: 'NoneType' object has no attribute 'group'
When I try things individually in ipython everything works, for example:
in>> test = 'YP_WON2_SP8_115436'
in>> num = re.search(r'\d{6}',test)
in>> num.group()
out>> '115436'
I've tried splitting up the code as above and it still throws the same error.
Fixed the code:
df
df.columns = ['Campaigns', 'Impressions', 'Attempts', 'Spend']
Campaigns = df['Campaigns']
ID = []
for c in Campaigns:
m = re.search(r'\d{6}',c)
if m:
num = re.search(r'\d{6}',c).group()
ID.append(num)
else:
ID.append('No ID')
pieces = [df,pd.DataFrame(ID)]
frame = pd.concat(pieces, axis=1, join='outer',ignore_index=False)
frame['ID'] = frame[0]
del frame[0]
frame

Categories