i'm trying to use pytrend to get several keywords from different countries over weekly periods
i don't know why but a much simpler code works, I'm not sure if it's the for loop or something else.
this is the code:
from pytrends.request import TrendReq
import pandas as pd
data = pd.read_csv('data.csv')
pytrends = TrendReq(hl='en-US', tz=360, )
contr = ['US', 'UK', 'IE', 'AU','CA','NZ',
'PE', 'MX','VE', 'CO','AR', 'CL']
en_keyw = [['key1', 'key2', 'key3'],
['key4', 'key5', 'key6'],
['key7', 'key8', 'key9']]
weeks = pd.date_range('2018-12-1',
'2020-06-20',
freq='W'
).strftime('%Y-%m-%d')
for i in range(len(contr)):
for ii in range(9):
for iii in range(len(weeks)):
pytrends.build_payload(kw_list=en_keyw[ii],
cat=0,
timeframe='{} {}'.format(weeks[iii], weeks[iii + 1]),
geo=contr[i],
data = pytrends.interest_over_time(),
gprop='')
data1 = data.drop(labels=['isPartial'],axis='columns')
dataset.append(data1)
result = pd.concat(dataset, axis=1)
result.to_csv('trends.csv')
but I'm getting a 'request' KeyError:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
27 timeframe='{} {}'.format(weeks[iii], weeks[iii + 1]),
28 geo=contr[i],
----> 29 data = pytrends.interest_over_time(),
30 gprop='')
31 data = data.drop(labels=['isPartial'],axis='columns')
in interest_over_time(self)
204 over_time_payload = {
205 # convert to string as requests will mangle
--> 206 'req': json.dumps(self.interest_over_time_widget['request']),
207 'token': self.interest_over_time_widget['token'],
208 'tz': self.tz
KeyError: 'request'
I guess that pytrends.interest_over_time() should be outside of build_payload, i.e.:
pytrends.build_payload(kw_list=en_keyw[ii],
cat=0,
timeframe='{} {}'.format(weeks[iii], weeks[iii + 1]),
geo=contr[i],
data=pytrends.interest_over_time(),
gprop='')
data1 = data.drop(labels=['isPartial'], axis='columns')
dataset.append(data1)
should be something like:
pytrends.build_payload(kw_list=en_keyw[ii],
cat=0,
timeframe='{} {}'.format(weeks[iii], weeks[iii + 1]),
geo=contr[i],
gprop='')
data = pytrends.interest_over_time()
if not data.empty:
data1 = data.drop(labels=['isPartial'], axis='columns')
dataset.append(data1)
Related
I am trying to get EIA data using its API, however I encountered json errors when it's calling the series. I recalled it was working fine about 6 months ago, not sure if it is something changed in EIA's API. Could anyone shed some light how to fix this?
Here's the code:
import pandas as pd
import eia
def retrieve_data():
# Create EIA API using your specific API key
api_key = "YOUR_API_KEY"
api = eia.API(api_key)
# Retrieve Data By Series ID
series_ID='STEO.PASC_OECD_T3.M'
series_search = api.data_by_series(series=series_ID)
df = pd.DataFrame(series_search)
df.index.names = ['Date']
df.columns=[ "Price"]
df.index = df.index.str.replace('^([\d]{4})\s([\d]{2})([\d]
{2})\s[\d] {2}', r'\1-\2-\3',regex=True)
df.index = pd.to_datetime(df.index)
return df
data = retrieve_data()
print(data)
and the error message is as the following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/tmp/ipykernel_942387/4124051913.py in <module>
17 return df
18
---> 19 data = retrieve_data()
20 print(data)
21 #data.to_csv('OK_WTI_Spot_Price_FOB.csv',index=True)
/tmp/ipykernel_942387/4124051913.py in retrieve_data()
9 # Retrieve Data By Series ID
10 series_ID='STEO.PASC_OECD_T3.M'
---> 11 series_search = api.data_by_series(series=series_ID)
12 df = pd.DataFrame(series_search)
13 df.index.names = ['Date']
~/miniconda3/lib/python3.7/site-packages/eia/api.py in data_by_series(self, series)
422 else:
423 lst_dates = [x[0][0:4] + " " + x[0][4:] + " " + x[0][6:8]
--> 424 for x in search.json()['series'][0]['data']]
425 lst_values = [x[1] for x in
426 search.json()['series'][0]['data']]
KeyError: 'series'
i want to obtain proper list of marks and models of boats from two dataset (one lambda an another of reference) with fuzzywuzzy (levensthein model in python) but i have an issue in my code that i don't understand
the two datasets:
https://www.transfernow.net/dl/202203070QxpVjYJ
there is my code :
#%%
from fuzzywuzzy import process
import pandas as pd
#%%
BASE_LAMBDA_PATH = '../ressources/marques_modeles_lambda_entier.csv'
BASE_REF_PATH = '../ressources/marques_modeles_ref_entier.csv'
#%%
lambda_df = pd.read_csv(BASE_LAMBDA_PATH, sep=";")
#%%
ref_df = pd.read_csv(BASE_REF_PATH, sep=";")
#%% j'ai créé ma liste de résultat (initée à vide)
df_result = pd.DataFrame(columns=['marque', 'lambda','ref','score'])
#%% je parcours ma table de modèles lambda
for ind in lambda_df.index:
marque = lambda_df['MARQUE_REF'][ind]
modele_lambda = lambda_df['MODELE'][ind]
ref_list = (ref_df[(ref_df['lib_marque'] == marque)]['lib_model']).to_list()
choices = process.extract(modele_lambda, ref_list, limit=1)
approx = choices[0][0]
score = choices[0][1]
df2 = pd.DataFrame(data = [(marque, modele_lambda, approx, score)],\
columns=['marque', 'lambda','ref','score'])
df_result = pd.concat([df_result, df2], axis=0, ignore_index=True)
df_result.to_csv('output_matching_groupe.csv', sep=';', index=False)
'''
tdep = time.time()
tfin = time.time()
print(f"duree de {tfin-tdep} secondes")
'''
# %%
the error:
IndexError Traceback (most recent call last)
c:\Users\boats\src\list_matching_groupe.py in <cell line: 1>()
20 ref_list = (ref_df[(ref_df['lib_marque'] == marque)]['lib_model']).to_list()
21 choices = process.extract(modele_lambda, ref_list, limit=1)
----> 22 approx = choices[0][0]
23 score = choices[0][1]
24 df2 = pd.DataFrame(data = [(marque, modele_lambda, approx, score)],\
25 columns=['marque', 'lambda','ref','score'])
IndexError: list index out of range
I don't understand it because choices[0][0] actually works i obtain: 'Guy Couach 1401'
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
df = pd.read_csv('pokemon_data.csv')
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df = df.loc[df['Total'] > 450]
df = df.loc[~df['Name'].str.contains('Mega')]
df = df.loc[~df['Name'].str.contains('Primal')]
df = df.drop(columns = ['Name'])
df = df.drop(columns = ['Generation'])
df = df.drop(columns = ['Legendary'])
df = df.drop(columns = ['Type 2'])
df = df.drop(columns = ['#'])
df_eval_sub = df.loc[df['Total'] < 500]
df_eval_over = df.loc[df['Total'] > 500]
y_train = df.pop('Type 1')
y_eval_sub = df_eval_sub.pop('Type 1')
y_eval_over = df_eval_over.pop('Type 1')
feature_columns = []
NUMERIC_COLUMNS = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size=32):
def input_function():
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds = ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_function
train_input_fn = make_input_fn(df, y_train)
eval_input_fn = make_input_fn(df_eval_sub, y_eval_sub, num_epochs = 1, shuffle = False)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)
clear_output()
print(result['accuracy'])
From the original file, all of the columns have numbers in them, aside from the 'Type 1' column. Whenever I tried to change Type 1 to numbers, I get further errors. The error gets triggered whenever train_input_fn gets called.
Error:
UnimplementedError: Cast string to float is not supported
[[{{node head/losses/Cast}}]]
During handling of the above exception, another exception occurred:
UnimplementedError Traceback (most recent call last)
<ipython-input-166-e9dbb248f085> in <module>
12
13 linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
---> 14 linear_est.train(train_input_fn)
15 result = linear_est.evaluate(eval_input_fn)
16
I can reproduce your error by using a tf.feature_column.numeric_column on a dataframe column that has string values.
import tensorflow as tf
import pandas as pd
import numpy as np
df = pd.DataFrame({
'float_values': np.random.rand(15),
'string_values': np.random.randint(0, 10, (15,))
})
df['string_values'] = df['string_values'].astype(str)
float_values float64
string_values object
ds = tf.data.Dataset.from_tensors(dict(df))
float_column = tf.feature_column.numeric_column('float_values')
string_column = tf.feature_column.numeric_column('string_values')
# This works, the 'float_values' column is numeric
float_layer = tf.keras.layers.DenseFeatures(float_column)
float_layer(next(iter(ds)))
# This doesn't work, the 'string_values' column is string
string_layer = tf.keras.layers.DenseFeatures(string_column)
string_layer(next(iter(ds)))
tensorflow.python.framework.errors_impl.UnimplementedError: Cast string to float is not supported [Op:Cast]
Make sure all your dataframe is of dtype float/int.
for col in NUMERIC_COLUMNS:
df[col] = pd.to_numeric(df[col])
Note that there might be a better to cast to numeric, I am admittedly not a Pandas expert.
I try to get data from google trends in a g sheet. First time it runned smoothly, second time not so much. I got an error called:
ValueError: No objects to concatenate
I searched this error on Stack Overflow before but couldn't find any solutions. I use the code displayed below:
!pip install Pytrends
!pip install pandas
!pip install pytrends --upgrade <---------Note: this solved a different error.
from pytrends.request import TrendReq
import pandas as pd
import time
startTime = time.time()
pytrend = TrendReq(hl='nl-NL', tz=360)
df = wb = gc.open_by_url('https://docs.google.com/spreadsheets/d/1QE1QilM-GDdQle6eVunepqG5RNWv39xO0By84C19Ehc/edit?usp=sharing')
sheet = wb.sheet1
df2 = sheet.col_values(5)
d_from = sheet.acell('B7').value
d_to = sheet.acell('B8').value
geo1 = sheet.acell('B10').value
dataset = []
for x in range(1,len(df2)):
keywords = [df2[x]]
pytrend.build_payload(
kw_list=keywords,
cat=0,
timeframe= str(d_from + " " + d_to),
geo= str(geo1))
data = pytrend.interest_over_time()
if not data.empty:
data = data.drop(labels=['isPartial'],axis='columns')
dataset.append(data)
result = pd.concat(dataset, axis=1)
result.to_csv('search_trends_DOWNLOAD_ME.csv')
!cp search_trends_DOWNLOAD_ME.csv "/content/drive/My Drive/Colab Notebooks/Output"
executionTime = (time.time() - startTime)
print('Execution time in sec.: ' + str(executionTime))
The error I got:
ValueError Traceback (most recent call last)
<ipython-input-5-b86c7b4df727> in <module>()
25 data = data.drop(labels=['isPartial'],axis='columns')
26 dataset.append(data)
---> 27 result = pd.concat(dataset, axis=1)
28 result.to_csv('search_trends_DOWNLOAD_ME.csv')
29 get_ipython().system('cp search_trends_DOWNLOAD_ME.csv "/content/drive/My Drive/Colab Notebooks/Output"')
1 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
327
328 if len(objs) == 0:
--> 329 raise ValueError("No objects to concatenate")
330
331 if keys is None:
ValueError: No objects to concatenate
The keywords I use are located in df = wb = gc.open_by_url. It is a g-sheet with the location, language and the keywords.
this happened to me earlier, it was just miss typing path\url of the file.
check the path again.
I am using this function to pull data from the Cryptocompare website into a pandas dataframe:
def daily_price_historical(symbol, comparison_symbol='USD', limit=1, aggregate=1, exchange='', allData='true'):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}&allData={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate, allData)
if exchange:
url += '&e={}'.format(exchange)
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp', inplace=True)
df['symbol'] = symbol
df['1dret'] = 100* df['close'].pct_change()
return df
This works fine for most symbols I pass in, but when I loop over a longer list of symbols I get the error: AttributeError: 'DataFrame' object has no attribute 'time'
I assume this is due to the API returning an error for certain symbols, e.g.:
https://min-api.cryptocompare.com/data/histoday?fsym=FAKE&tsym=USD
returns "Response":"Error" with no further data
I'm afraid I'm not very experienced with url requests/APIs. Is there code I can add to the function to skip the symbols that are causing the issue?
Thanks for your help!
Additional information:
Code used to loop over coins (which is a list of 130 symbols):
price_columns = ['close', 'high', 'low', 'open', 'time',
'volumefrom','volumeto', 'symbol', '1dret']
top_coin_prices = pd.DataFrame(columns=price_columns)
for coin in coins:
output = daily_price_historical(coin)
top_coin_prices = top_coin_prices.append(output)
Full Traceback:
AttributeError Traceback (most recent call last)
<ipython-input-277-126f5d1686b2> in <module>()
8 # populate df with data for all coins
9 for coin in coins:
---> 10 output = daily_price_historical(coin)
11 top_coin_prices = top_coin_prices.append(output)
12
<ipython-input-111-65b3fa76b4ab> in daily_price_historical(symbol, comparison_symbol, limit, aggregate, exchange, allData)
7 data = page.json()['Data']
8 df = pd.DataFrame(data)
----> 9 df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
10 df.set_index('timestamp', inplace=True)
11 df['symbol'] = symbol
/anaconda/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
2968 if name in self._info_axis:
2969 return self[name]
-> 2970 return object.__getattribute__(self, name)
2971
2972 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'time'