'numpy.ndarray' object has no attribute 'diff' - python

I'm trying this snippet (from web) in my script which calculates StochRSI.
The error output I get:
Traceback (most recent call last):
File "C:\Users\user2\PycharmProjects\xxx2\main.py", line 99, in <module>
main()
File "C:\Users\user2\PycharmProjects\xxx2\main.py", line 87, in main
stoch_rsi = StochRSI(closing_data)
File "C:\Users\user2\PycharmProjects\xxx2\main.py", line 56, in StochRSI
delta = series.diff().dropna()
AttributeError: 'numpy.ndarray' object has no attribute 'diff'
Tried searching for answers but couldn't find any to fix the problem. What's wrong here?
EDIT: Please check the full code below. I hope it clears out what I'm doing... I used numpy to get closing prices. I used that to get live EMA, and it worked fine - and now I'm trying to get StochRSI's live value.
import datetime
import config
import csv
import os.path
import sys
import numpy as np
import pandas as pd
import requests
import talib
from binance.client import Client
from binance.enums import *
from time import sleep
def get_data():
historical_data = client.get_historical_klines(symbol=config.SYMBOL, interval=config.TIME_PERIOD, start_str="1 year ago UTC", klines_type=HistoricalKlinesType.SPOT)
return_data = []
for each in historical_data:
kline = float(each[4])
return_data.append(kline)
return np.array(return_data)
def StochRSI(series, period=14, smoothK=3, smoothD=3):
# Calculate RSI
delta = series.diff().dropna()
ups = delta * 0
downs = ups.copy()
ups[delta > 0] = delta[delta > 0]
downs[delta < 0] = -delta[delta < 0]
ups[ups.index[period-1]] = np.mean(ups[:period]) #first value is sum of avg gains
ups = ups.drop(ups.index[:(period-1)])
downs[downs.index[period-1]] = np.mean(downs[:period]) #first value is sum of avg losses
downs = downs.drop(downs.index[:(period-1)])
rs = ups.ewm(com=period-1, min_periods=0, adjust=False, ignore_na=False).mean() / \
downs.ewm(com=period-1, min_periods=0, adjust=False, ignore_na=False).mean()
rsi = 100 - 100 / (1 + rs)
# Calculate StochRSI
stochrsi = (rsi - rsi.rolling(period).min()) / (rsi.rolling(period).max() - rsi.rolling(period).min())
stochrsi_K = stochrsi.rolling(smoothK).mean()
stochrsi_D = stochrsi_K.rolling(smoothD).mean()
return stochrsi, stochrsi_K, stochrsi_D
def main():
ema_200 = None
last_ema_200 = None
while True:
closing_data = get_data()
last_candle = closing_data[-1]
ema_200 = talib.EMA(closing_data, 10)[-1]
stoch_rsi = StochRSI(closing_data)
if last_candle > ema_200:
print(f"Price {last_candle} is above EMA 200 {ema_200} | RSI {stoch_rsi}")
elif last_candle < ema_200:
print(f"Price {last_candle} is below EMA 200 {ema_200} | RSI {stoch_rsi}")
if __name__ == "__main__":
# Client
client = Client(config.API_KEY, config.API_SECRET_KEY, tld='com')
print(f"Authenticated")
main()

This function is expecting a pandas DataFrame or Series but is getting a numpy ndarray.

Related

Fixing AttributeError in VectorBT (vbt) when using IndicatorFactory and from_talib()

New to VectorBT (vbt) here, but already loving it!
I am practising creating custom indicators using IndicatorFactory and from_talib().
However, I get an error that I can't find how to fix...
Here is the code:
import vectorbt as vbt
import numpy as np
import pandas as pd
import talib
from numba import njit
btc_price = vbt.YFData.download(symbols='BTC-GBP', period='100d', interval='1h').get("Close")
RSI = vbt.IndicatorFactory.from_talib('RSI')
#njit
def produce_signal(rsi, entry, exit):
trend = np.where(rsi > exit, -1, 0)
trend = np.where(rsi < entry, 1, trend)
return trend
def custom_indicator(close, rsi_window=14, entry=30, exit=70):
rsi = RSI.run(close, rsi_window).real.to_numpy()
return produce_signal(rsi, entry, exit)
ind = vbt.IndicatorFactory(
class_name="Combination",
short_name="comb",
input_names=["close"],
param_names=["rsi_window", "entry", "exit"],
output_names=["value"],
).from_apply_func(
custom_indicator,
rsi_window=14,
entry=30,
exit=70,
)
res = ind.run(btc_price,
rsi_window=np.arange(2, 40, step=2, dtype=int),
entry=np.arange(10, 40, step=2, dtype=int),
exit=np.arange(60, 90, step=2, dtype=int),
param_product = True
)
print(res.value)
entries = res.value == 1
exits = res.value == -1
pf = vbt.Portfolio.from_signals(btc_price,
entries,
exits,
freq='1h',
sl_stop=[0.02],
sl_trail=True,
)
print(pf.stats())
pf.plot().show()
And I get this error:
Traceback (most recent call last):
File "/Users/XXXXX/PycharmProjects/pythonProject/VectorBT test optimisation.py", line 41, in <module>
RSI = vbt.IndicatorFactory.from_talib('RSI')
File "/Users/XXXXX/PycharmProjects/pythonProject/venv/lib/python3.10/site-packages/vectorbt/indicators/factory.py", line 3441, in from_talib
info = abstract.Function(func_name)._Functioninfo
AttributeError: 'Function' object has no attribute '_Functioninfo'. Did you mean: '_Function__name'?
I checked on various forums, but can't find anyone else with this error.
So probably related to my own code, but can't find where...
Thanks for your help!

Stable Baselines3 Parameter Logits has invalid values

I am trying to run stable baselines on sports games but keep getting the following error
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 35, in <module>
model.learn(total_timesteps=10000)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/a2c/a2c.py", line 189, in learn
return super(A2C, self).learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 234, in learn
continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 166, in collect_rollouts
actions, values, log_probs = self.policy.forward(obs_tensor)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/policies.py", line 566, in forward
distribution = self._get_action_dist_from_latent(latent_pi, latent_sde=latent_sde)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/policies.py", line 607, in _get_action_dist_from_latent
return self.action_dist.proba_distribution(action_logits=mean_actions)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/distributions.py", line 326, in proba_distribution
self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/distributions.py", line 326, in <listcomp>
self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/torch/distributions/categorical.py", line 64, in __init__
super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/torch/distributions/distribution.py", line 53, in __init__
raise ValueError("The parameter {} has invalid values".format(param))
ValueError: The parameter logits has invalid values
I have removed all NaN's (replaced with 0) and normalised the data so that all data is between 0 and 1 but still cannot find the invalid value.
Here is my custom environment:
import gym
from gym import spaces
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
INITIAL_BALANCE = 100
class BettingEnv(gym.Env):
# metadata = {'render.modes': ['human']}
def __init__(self, df, results, INITIAL_BALANCE=100):
self.df = df
self.results = results
self.initial_balance = INITIAL_BALANCE
self.balance = INITIAL_BALANCE
self.profit = 0
self.starting_point = np.random.randint(len(self.df) - len(self.df) * 0.1) # Start anywhere but in the end 10%
self.timestep = 0
self.games_won = 0
self.game_bets = []
self.game_number = self.starting_point + self.timestep
self.action_space = spaces.MultiDiscrete([3,10])
self.observation_space = spaces.Box(
low = self.df.min().min(), # Lowest value found in df
high = self.df.max().max(), # Search the df for the max value (this may change with different data)
shape = (df.shape[1],), # shape of one row of the df
# dtype = np.float16
)
print('First ob: ',self.df.loc[self.game_number])
def _next_obs(self):
print('Get next obs')
# Get next game row
obs = self.df.loc[self.timestep]
print('next obs success')
return obs
def _print_bet_csv(self):
# Create bet_info_df
bet_info_df = pd.DataFrame(self.game_bets)
results_df = self.results.reset_index()
# #Merge dfs
self.merged_df = pd.merge(bet_info_df, results_df, on=['index', 'Home Odds', 'Vis Odds', 'Home Win'])
self.merged_df.set_index('index', inplace=True)
# #Print df
self.merged_df.to_csv('./temp/MLB Bot Betting DF.csv', index=True)
def _print_bet_chart(self):
x_axis = [i for i in range(self.timestep)]
plt.plot(x_axis, self.merged_df['Bankroll'])
plt.title('Bankroll')
plt.ylabel('Dollars')
plt.xlabel('Games')
plt.savefig('./temp/NBA_Bot_Betting.png')
def _take_action(self, action):
print('Start action')
# Init
action_type = action[0]
amount = action[1] + 1
self.game_number = self.starting_point + self.timestep
game_result = self.results['Home Win'][self.game_number]
odds = 0
bet_on = 'NA'
# VISITOR BET
if action_type == 0:
bet_on = 'False'
# Find vis odds
odds = self.results['Vis Odds'][self.game_number]
if odds == 0:
amount = 0
# Place bet
self.balance -= amount
# Check if win
if game_result == False:
self.balance += round(amount * odds, 2)
self.games_won += 1
# NO BET
if action_type == 1:
bet_on = 'No bet'
# HOME BET
if action_type == 2:
bet_on = 'True'
# Find home odds
odds = self.results['Home Odds'][self.game_number]
if odds == 0:
amount = 0
# Place bet
self.balance -= amount
# Check win
if game_result == True:
self.balance += round(amount * odds, 2)
self.games_won += 1
self.balance = round(self.balance, 2)
bet_info = {
'index': self.game_number,
'Home Odds': self.results['Home Odds'][self.game_number],
'Vis Odds': self.results['Vis Odds'][self.game_number],
'Bet on': bet_on,
'Home Win': game_result,
'Amount': amount,
'Odds': odds,
'Bankroll': self.balance
}
self.game_bets.append(bet_info)
print('Finish action')
return bet_info
def step(self, action):
print('Start step')
info = self._take_action(action)
self.timestep += 1
# Reward
gamma = (self.timestep / len(self.df)) # time discount
self.profit = self.balance - self.initial_balance
reward = self.profit * gamma
# Done
done = self.balance <= 0
# Obs
obs = self._next_obs()
# If last game, print results and start from beginning
#test the printing of csv
if self.timestep == 2500:
self._print_bet_csv()
self._print_bet_chart()
self.game_bets = []
print('Starting point: ',self.starting_point)
print('Chart printed')
print('Finished Step')
return obs, reward, done, info
def reset(self):
self.initial_balance = INITIAL_BALANCE
self.balance = INITIAL_BALANCE
self.profit = 0
self.starting_point = np.random.randint(len(self.df) - len(self.df) * 0.1) # Start anywhere but in the end 10%
self.timestep = 0
self.games_won = 0
self.game_bets = []
def render(self, mode='human', close=False):
print('Timestep: ', self.timestep)
print('Profit: ', self.profit)
print('Games Won: ', self.games_won)
print('Balance: ', self.balance)
Here is the file I run the environment from:
import time
start_time = time.time()
import os
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO, A2C
from Betting_env import BettingEnv
data = pd.read_csv('Scraping/Games and Stats.csv')
df = data.drop(['Date', 'Home', 'Visitor', 'Home PTS', 'Vis PTS', 'Home Points Dif', 'Home Win'], axis=1)
df = df.astype(float)
normed = (df-df.min())/(df.max()-df.min())
normed = normed.round(10)
env = DummyVecEnv([lambda: BettingEnv(normed, data, INITIAL_BALANCE=100)])
model = A2C('MlpPolicy', env, verbose=0)
model.learn(total_timesteps=10000)
save_path = os.path.join('Training', 'Saved Models', 'Betting_Model_A2C')
model.save(save_path)
end_time = time.time()
total_time = end_time - start_time
print(round(total_time / 60 / 60), ' Hours ', round(total_time / 60), ' Minutes')
UPDATE:
After using the VecCheckNan() and check_env() functions by stable_baselines3 I get the following error messages.
VecCheckNan() gives:
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 51, in <module>
model.learn(total_timesteps=10000)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/ppo/ppo.py", line 299, in learn
return super(PPO, self).learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 226, in learn
total_timesteps, callback = self._setup_learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/base_class.py", line 420, in _setup_learn
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/vec_env/vec_check_nan.py", line 46, in reset
self._check_val(async_step=False, observations=observations)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/vec_env/vec_check_nan.py", line 84, in _check_val
raise ValueError(msg)
ValueError: found nan in observations.
Originated from the environment observation (at reset)
I have printed out the first observations and there are no NaNs in there.
check_env() gives:
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 42, in <module>
check_env(env)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/env_checker.py", line 245, in check_env
assert isinstance(
AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
I have gym.Env in my Betting_Env class.
I had the same error. In my case the problem was due to the custom reset funciton of my environment. In found that in the BaseAlgorithm class in base_class.py of stablebaselines in line 429 (in my case) is this line of code
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
, where my environment should return an observation or at least None.
So you could try returning an observation in your reset function.
Although you are replacing NaNs and ensuring that there are none, the error is mostly probably due to some of the values being np.inf or -np.inf.
Try:
df = df.replace([np.inf, -np.inf], np.nan).
Hope it works!
You have commented this out:
# dtype = np.float16
So you may already have tried this. But it is possible your dataframe contains a float32 and your observation space is expecting the wrong dtype. Try changing to:
dtype = np.float32

Function returning same values for supposedly different inputs

I'm using pyalgotrade to create a trading strategy. I'm going through a list of tickers(testlist) and adding them to a dictionary(list_large{}) alongside their score which I'm getting using a get_score function. My latest problem is that each ticker in the dictionary(list_large{}) is getting the same score. Any idea why?
Code:
from pyalgotrade import strategy
from pyalgotrade.tools import yahoofinance
import numpy as np
import pandas as pd
from collections import OrderedDict
from pyalgotrade.technical import ma
from talib import MA_Type
import talib
smaPeriod = 10
testlist = ['aapl','ddd','gg','z']
class MyStrategy(strategy.BacktestingStrategy):
def __init__(self, feed, instrument):
super(MyStrategy, self).__init__(feed, 1000)
self.__position = []
self.__instrument = instrument
self.setUseAdjustedValues(True)
self.__prices = feed[instrument].getPriceDataSeries()
self.__sma = ma.SMA(feed[instrument].getPriceDataSeries(), smaPeriod)
def get_score(self,slope):
MA_Score = self.__sma[-1] * slope
return MA_Score
def onBars(self, bars):
global bar
bar = bars[self.__instrument]
slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
organized_list = OrderedDict(sorted(list_large.items(), key=lambda t: -t[1]))#organize the list from highest to lowest score
print list_large
def run_strategy(inst):
# Load the yahoo feed from the CSV file
feed = yahoofinance.build_feed([inst],2015,2016, ".") # feed = yahoofinance.build_feed([inst],2015,2016, ".")
# Evaluate the strategy with the feed.
myStrategy = MyStrategy(feed, inst)
myStrategy.run()
print "Final portfolio value: $%.2f" % myStrategy.getBroker().getEquity()
def main():
instruments = ['ddd','msft']
for inst in instruments:
run_strategy(inst)
if __name__ == '__main__':
main()
Check this code of the onBars() function:
slope = 8 # <---- value of slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
# Updating dict of each ticker based on ^
Each time self.get_score(slope) is called, it returns the same value and hence, all the value of tickers hold the same value in dict
I do not know how you want to deal with slope and how you want to update it's value. But this logic can be simplified without using .update as:
list_large = {}
for tickers in testlist:
list_large[tickers] = self.get_score(slope)
# ^ Update value of `tickers` key

Python: invalid syntax: <string>, line 1, pos 16

I have developed a code in Python in which -in order to run the program- I need to take some arguments from the command line. But I am getting continuously the same error:
Traceback (most recent call last):
File "<string>", line 1, in <fragment>
invalid syntax: <string>, line 1, pos 16
I have the faintest idea what is wrong with my code. So, I present my code below in case someone could help me:
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkutil.DataAccess as da
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import math
import copy
import QSTK.qstkstudy.EventProfiler as ep
import csv
import sys
import argparse
def readData(li_startDate, li_endDate, ls_symbols):
#Create datetime objects for Start and End dates (STL)
dt_start = dt.datetime(li_startDate[0], li_startDate[1], li_startDate[2])
dt_end = dt.datetime(li_endDate[0], li_endDate[1], li_endDate[2])
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
return [d_data, dt_start, dt_end, dt_timeofday, ldt_timestamps]
def marketsim(cash,orders_file,values_file):
orders = pd.read_csv(orders_file,index_col='Date',parse_dates=True,header=None)
ls_symbols = list(set(orders['X.4'].values))
df_lastrow = len(orders) - 1
dt_start = dt.datetime(orders.get_value(0, 'X.1'),orders.get_value(0, 'X.2'),orders.get_value(0, 'X.3'))
dt_end = dt.datetime(orders.get_value(df_lastrow, 'X.1'),orders.get_value(df_lastrow, 'X.2'),orders.get_value(df_lastrow, 'X.3') + 1 )
#d_data = readData(dt_start,dt_end,ls_symbols)
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
df_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
ls_symbols.append("_CASH")
trades = pd.Dataframe(index=list(ldt_timestamps[0]),columns=list(ls_symbols))
current_cash = cash
trades["_CASH"][ldt_timestamps[0]] = current_cash
current_stocks = dict()
for symb in ls_symbols:
current_stocks[symb] = 0
trades[symb][ldt_timestamps[0]] = 0
for row in orders.iterrows():
row_data = row[1]
current_date = dt.datetime(row_data['X.1'],row_data['X.2'],row_data['X.3'],16)
symb = row_data['X.4']
stock_value = d_data['close'][symb][current_date]
stock_amount = row_data['X.6']
if row_data['X.5'] == "Buy":
current_cash = current_cash - (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] + stock_amount
trades[symb][current_date] = current_stocks[symb]
else:
current_cash = current_cash + (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] - stock_amount
trades[symb][current_date] = current_stocks[symb]
#trades.fillna(method='ffill',inplace=True)
#trades.fillna(method='bfill',inplace=False)
trades.fillna(0)
#alt_cash = current_cash
#alt_cash = trades.cumsum()
value_data = pd.Dataframe(index=list(ldt_timestamps),columns=list("V"))
value_data = value_data.fillna(0)
value_data = value_data.cumsum(axis=0)
for day in ldt_timestamps:
value = 0
for sym in ls_symbols:
if sym == "_CASH":
value = value + trades[sym][day]
else:
value = calue + trades[sym][day]*d_data['close'][sym][day]
value_data["V"][day] = value
fileout = open(values_file,"w")
for row in value_data.iterrows():
file_out.writelines(str(row[0].strftime('%Y,%m,%d')) + ", " + str(row[1]["V"].round()) + "\n" )
fileout.close()
def main(argv):
if len(sys.argv) != 3:
print "Invalid arguments for marketsim.py. It should be of the following syntax: marketsim.py orders_file.csv values_file.csv"
sys.exit(0)
#initial_cash = int (sys.argv[1])
initial_cash = 1000000
ordersFile = str(sys.argv[1])
valuesFile = str(sys.argv[2])
marketsim(initial_cash,ordersFile,valuesFile)
if __name__ == "__main__":
main(sys.argv[1:])
The input I gave to the command line was:
python marketsim.py orders.csv values.csv
I guess that the problem lies either into the imports or probably into the main function(incl. the if below the def main(argv)
I have to point out that the files orders.csv and values.csv exist and are located into the same folder.
I hope have made everything clear.
So, I am looking forward to reading your answers community-mates! :D
Thank you!

object of type '_Task' has no len() error

I am using the parallel programming module for python I have a function that returns me an array but when I print the variable that contain the value of the function parallelized returns me "pp._Task object at 0x04696510" and not the value of the matrix.
Here is the code:
from __future__ import print_function
import scipy, pylab
from scipy.io.wavfile import read
import sys
import peakpicker as pea
import pp
import fingerprint as fhash
import matplotlib
import numpy as np
import tdft
import subprocess
import time
if __name__ == '__main__':
start=time.time()
#Peak picking dimensions
f_dim1 = 30
t_dim1 = 80
f_dim2 = 10
t_dim2 = 20
percentile = 80
base = 100 # lowest frequency bin used (peaks below are too common/not as useful for identification)
high_peak_threshold = 75
low_peak_threshold = 60
#TDFT parameters
windowsize = 0.008 #set the window size (0.008s = 64 samples)
windowshift = 0.004 #set the window shift (0.004s = 32 samples)
fftsize = 1024 #set the fft size (if srate = 8000, 1024 --> 513 freq. bins separated by 7.797 Hz from 0 to 4000Hz)
#Hash parameters
delay_time = 250 # 250*0.004 = 1 second#200
delta_time = 250*3 # 750*0.004 = 3 seconds#300
delta_freq = 128 # 128*7.797Hz = approx 1000Hz#80
#Time pair parameters
TPdelta_freq = 4
TPdelta_time = 2
#Cargando datos almacenados
database=np.loadtxt('database.dat')
songnames=np.loadtxt('songnames.dat', dtype=str, delimiter='\t')
separator = '.'
print('Please enter an audio sample file to identify: ')
userinput = raw_input('---> ')
subprocess.call(['ffmpeg','-y','-i',userinput, '-ac', '1','-ar', '8k', 'filesample.wav'])
sample = read('filesample.wav')
userinput = userinput.split(separator,1)[0]
print('Analyzing the audio sample: '+str(userinput))
srate = sample[0] #sample rate in samples/second
audio = sample[1] #audio data
spectrogram = tdft.tdft(audio, srate, windowsize, windowshift, fftsize)
mytime = spectrogram.shape[0]
freq = spectrogram.shape[1]
print('The size of the spectrogram is time: '+str(mytime)+' and freq: '+str(freq))
threshold = pea.find_thres(spectrogram, percentile, base)
peaks = pea.peak_pick(spectrogram,f_dim1,t_dim1,f_dim2,t_dim2,threshold,base)
print('The initial number of peaks is:'+str(len(peaks)))
peaks = pea.reduce_peaks(peaks, fftsize, high_peak_threshold, low_peak_threshold)
print('The reduced number of peaks is:'+str(len(peaks)))
#Store information for the spectrogram graph
samplePeaks = peaks
sampleSpectro = spectrogram
hashSample = fhash.hashSamplePeaks(peaks,delay_time,delta_time,delta_freq)
print('The dimensions of the hash matrix of the sample: '+str(hashSample.shape))
# tuple of all parallel python servers to connect with
ppservers = ()
#ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
# Creates jobserver with ncpus workers
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
# Creates jobserver with automatically detected number of workers
job_server = pp.Server(ppservers=ppservers)
print ("Starting pp with", job_server.get_ncpus(), "workers")
print('Attempting to identify the sample audio clip.')
Here I call the function in fingerprint, the commented line worked, but when I try parallelize don't work:
timepairs = job_server.submit(fhash.findTimePairs, (database, hashSample, TPdelta_freq, TPdelta_time, ))
# timepairs = fhash.findTimePairs(database, hashSample, TPdelta_freq, TPdelta_time)
print (timepairs)
#Compute number of matches by song id to determine a match
numSongs = len(songnames)
songbins= np.zeros(numSongs)
numOffsets = len(timepairs)
offsets = np.zeros(numOffsets)
index = 0
for i in timepairs:
offsets[index]=i[0]-i[1]
index = index+1
songbins[i[2]] += 1
# Identify the song
#orderarray=np.column_stack((songbins,songnames))
#orderarray=orderarray[np.lexsort((songnames,songbins))]
q3=np.percentile(songbins, 75)
q1=np.percentile(songbins, 25)
j=0
for i in songbins:
if i>(q3+(3*(q3-q1))):
print("Result-> "+str(i)+":"+songnames[j])
j+=1
end=time.time()
print('Tiempo: '+str(end-start)+' s')
print("Time elapsed: ", +time.time() - start, "s")
fig3 = pylab.figure(1003)
ax = fig3.add_subplot(111)
ind = np.arange(numSongs)
width = 0.35
rects1 = ax.bar(ind,songbins,width,color='blue',align='center')
ax.set_ylabel('Number of Matches')
ax.set_xticks(ind)
xtickNames = ax.set_xticklabels(songnames)
matplotlib.pyplot.setp(xtickNames)
pylab.title('Song Identification')
fig3.show()
pylab.show()
print('The sample song is: '+str(songnames[np.argmax(songbins)]))
The function in fingerprint that I try to parallelize is:
def findTimePairs(hash_database,sample_hash,deltaTime,deltaFreq):
"Find the matching pairs between sample audio file and the songs in the database"
timePairs = []
for i in sample_hash:
for j in hash_database:
if(i[0] > (j[0]-deltaFreq) and i[0] < (j[0] + deltaFreq)):
if(i[1] > (j[1]-deltaFreq) and i[1] < (j[1] + deltaFreq)):
if(i[2] > (j[2]-deltaTime) and i[2] < (j[2] + deltaTime)):
timePairs.append((j[3],i[3],j[4]))
else:
continue
else:
continue
else:
continue
return timePairs
The complete error is:
Traceback (most recent call last):
File "analisisPrueba.py", line 93, in <module>
numOffsets = len(timepairs)
TypeError: object of type '_Task' has no len()
The submit() method submits a task to the server. What you get back is a reference to the task, not its result. (How could it return its result? submit() returns before any of that work has been done!) You should instead provide a callback function to receive the results. For example, timepairs.append is a function that will take the result and append it to the list timepairs.
timepairs = []
job_server.submit(fhash.findTimePairs, (database, hashSample, TPdelta_freq, TPdelta_time, ), callback=timepairs.append)
(Each findTimePairs call should calculate one result, in case that isn't obvious, and you should submit multiple tasks. Otherwise you're invoking all the machinery of Parallel Python for no reason. And make sure you call job_server.wait() to wait for all the tasks to finish before trying to do anything with your results. In short, read the documentation and some example scripts and make sure you understand how it works.)

Categories