I am trying to run stable baselines on sports games but keep getting the following error
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 35, in <module>
model.learn(total_timesteps=10000)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/a2c/a2c.py", line 189, in learn
return super(A2C, self).learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 234, in learn
continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 166, in collect_rollouts
actions, values, log_probs = self.policy.forward(obs_tensor)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/policies.py", line 566, in forward
distribution = self._get_action_dist_from_latent(latent_pi, latent_sde=latent_sde)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/policies.py", line 607, in _get_action_dist_from_latent
return self.action_dist.proba_distribution(action_logits=mean_actions)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/distributions.py", line 326, in proba_distribution
self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/distributions.py", line 326, in <listcomp>
self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/torch/distributions/categorical.py", line 64, in __init__
super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/torch/distributions/distribution.py", line 53, in __init__
raise ValueError("The parameter {} has invalid values".format(param))
ValueError: The parameter logits has invalid values
I have removed all NaN's (replaced with 0) and normalised the data so that all data is between 0 and 1 but still cannot find the invalid value.
Here is my custom environment:
import gym
from gym import spaces
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
INITIAL_BALANCE = 100
class BettingEnv(gym.Env):
# metadata = {'render.modes': ['human']}
def __init__(self, df, results, INITIAL_BALANCE=100):
self.df = df
self.results = results
self.initial_balance = INITIAL_BALANCE
self.balance = INITIAL_BALANCE
self.profit = 0
self.starting_point = np.random.randint(len(self.df) - len(self.df) * 0.1) # Start anywhere but in the end 10%
self.timestep = 0
self.games_won = 0
self.game_bets = []
self.game_number = self.starting_point + self.timestep
self.action_space = spaces.MultiDiscrete([3,10])
self.observation_space = spaces.Box(
low = self.df.min().min(), # Lowest value found in df
high = self.df.max().max(), # Search the df for the max value (this may change with different data)
shape = (df.shape[1],), # shape of one row of the df
# dtype = np.float16
)
print('First ob: ',self.df.loc[self.game_number])
def _next_obs(self):
print('Get next obs')
# Get next game row
obs = self.df.loc[self.timestep]
print('next obs success')
return obs
def _print_bet_csv(self):
# Create bet_info_df
bet_info_df = pd.DataFrame(self.game_bets)
results_df = self.results.reset_index()
# #Merge dfs
self.merged_df = pd.merge(bet_info_df, results_df, on=['index', 'Home Odds', 'Vis Odds', 'Home Win'])
self.merged_df.set_index('index', inplace=True)
# #Print df
self.merged_df.to_csv('./temp/MLB Bot Betting DF.csv', index=True)
def _print_bet_chart(self):
x_axis = [i for i in range(self.timestep)]
plt.plot(x_axis, self.merged_df['Bankroll'])
plt.title('Bankroll')
plt.ylabel('Dollars')
plt.xlabel('Games')
plt.savefig('./temp/NBA_Bot_Betting.png')
def _take_action(self, action):
print('Start action')
# Init
action_type = action[0]
amount = action[1] + 1
self.game_number = self.starting_point + self.timestep
game_result = self.results['Home Win'][self.game_number]
odds = 0
bet_on = 'NA'
# VISITOR BET
if action_type == 0:
bet_on = 'False'
# Find vis odds
odds = self.results['Vis Odds'][self.game_number]
if odds == 0:
amount = 0
# Place bet
self.balance -= amount
# Check if win
if game_result == False:
self.balance += round(amount * odds, 2)
self.games_won += 1
# NO BET
if action_type == 1:
bet_on = 'No bet'
# HOME BET
if action_type == 2:
bet_on = 'True'
# Find home odds
odds = self.results['Home Odds'][self.game_number]
if odds == 0:
amount = 0
# Place bet
self.balance -= amount
# Check win
if game_result == True:
self.balance += round(amount * odds, 2)
self.games_won += 1
self.balance = round(self.balance, 2)
bet_info = {
'index': self.game_number,
'Home Odds': self.results['Home Odds'][self.game_number],
'Vis Odds': self.results['Vis Odds'][self.game_number],
'Bet on': bet_on,
'Home Win': game_result,
'Amount': amount,
'Odds': odds,
'Bankroll': self.balance
}
self.game_bets.append(bet_info)
print('Finish action')
return bet_info
def step(self, action):
print('Start step')
info = self._take_action(action)
self.timestep += 1
# Reward
gamma = (self.timestep / len(self.df)) # time discount
self.profit = self.balance - self.initial_balance
reward = self.profit * gamma
# Done
done = self.balance <= 0
# Obs
obs = self._next_obs()
# If last game, print results and start from beginning
#test the printing of csv
if self.timestep == 2500:
self._print_bet_csv()
self._print_bet_chart()
self.game_bets = []
print('Starting point: ',self.starting_point)
print('Chart printed')
print('Finished Step')
return obs, reward, done, info
def reset(self):
self.initial_balance = INITIAL_BALANCE
self.balance = INITIAL_BALANCE
self.profit = 0
self.starting_point = np.random.randint(len(self.df) - len(self.df) * 0.1) # Start anywhere but in the end 10%
self.timestep = 0
self.games_won = 0
self.game_bets = []
def render(self, mode='human', close=False):
print('Timestep: ', self.timestep)
print('Profit: ', self.profit)
print('Games Won: ', self.games_won)
print('Balance: ', self.balance)
Here is the file I run the environment from:
import time
start_time = time.time()
import os
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO, A2C
from Betting_env import BettingEnv
data = pd.read_csv('Scraping/Games and Stats.csv')
df = data.drop(['Date', 'Home', 'Visitor', 'Home PTS', 'Vis PTS', 'Home Points Dif', 'Home Win'], axis=1)
df = df.astype(float)
normed = (df-df.min())/(df.max()-df.min())
normed = normed.round(10)
env = DummyVecEnv([lambda: BettingEnv(normed, data, INITIAL_BALANCE=100)])
model = A2C('MlpPolicy', env, verbose=0)
model.learn(total_timesteps=10000)
save_path = os.path.join('Training', 'Saved Models', 'Betting_Model_A2C')
model.save(save_path)
end_time = time.time()
total_time = end_time - start_time
print(round(total_time / 60 / 60), ' Hours ', round(total_time / 60), ' Minutes')
UPDATE:
After using the VecCheckNan() and check_env() functions by stable_baselines3 I get the following error messages.
VecCheckNan() gives:
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 51, in <module>
model.learn(total_timesteps=10000)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/ppo/ppo.py", line 299, in learn
return super(PPO, self).learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 226, in learn
total_timesteps, callback = self._setup_learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/base_class.py", line 420, in _setup_learn
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/vec_env/vec_check_nan.py", line 46, in reset
self._check_val(async_step=False, observations=observations)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/vec_env/vec_check_nan.py", line 84, in _check_val
raise ValueError(msg)
ValueError: found nan in observations.
Originated from the environment observation (at reset)
I have printed out the first observations and there are no NaNs in there.
check_env() gives:
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 42, in <module>
check_env(env)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/env_checker.py", line 245, in check_env
assert isinstance(
AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
I have gym.Env in my Betting_Env class.
I had the same error. In my case the problem was due to the custom reset funciton of my environment. In found that in the BaseAlgorithm class in base_class.py of stablebaselines in line 429 (in my case) is this line of code
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
, where my environment should return an observation or at least None.
So you could try returning an observation in your reset function.
Although you are replacing NaNs and ensuring that there are none, the error is mostly probably due to some of the values being np.inf or -np.inf.
Try:
df = df.replace([np.inf, -np.inf], np.nan).
Hope it works!
You have commented this out:
# dtype = np.float16
So you may already have tried this. But it is possible your dataframe contains a float32 and your observation space is expecting the wrong dtype. Try changing to:
dtype = np.float32
With reference to the images in the attached files, i want to model using pyomo.
What I have done so far.
from pyomo.environ import *
from pyomo.opt import SolverFactory
import pyomo.environ
n=13
distanceMatrix=[[0,8,4,10,12,9,15,8,11,5,9,4,10],
[8,0,7,6,8,6,7,10,12,9,8,7,5],
[4,7,0,7,9,5,8,5,4,8,6 ,10,8],
[10,6 ,7,0,6,11,5 ,9,8,12,11,6,9],
[12,8 ,9,6, 0,7,9,6,9,8,4,11,10],
[9,6,5,11,7,0,10,4,3,10,6,5,7],
[15,7 ,8,5,9,10,0,10,9,8,5,9,10],
[8,10 ,5,9,6,4,10,0,11,5,9,6,7],
[11,12,4,8, 9,3,9,11,0, 9,11,11,6],
[5,9,8,12,8,10,8,5,9,0,6,7,5],
[9,8,6,11,4,6,5,9,11,6,0,10,7],
[4,7,10,6,11,5,9,6,11,7,10,0,9],
[10,5,8,9,10,7,10,7,6,5,7,9,0]]
travel_time=[[0,8,4,10,12,9,15,8,11,5,9,4,10],
[8,0,7,6,8,6,7,10,12,9,8,7,5],
[4,7,0,7,9,5,8,5,4,8,6 ,10,8],
[10,6 ,7,0,6,11,5 ,9,8,12,11,6,9],
[12,8 ,9,6, 0,7,9,6,9,8,4,11,10],
[9,6,5,11,7,0,10,4,3,10,6,5,7],
[15,7 ,8,5,9,10,0,10,9,8,5,9,10],
[8,10 ,5,9,6,4,10,0,11,5,9,6,7],
[11,12,4,8, 9,3,9,11,0, 9,11,11,6],
[5,9,8,12,8,10,8,5,9,0,6,7,5],
[9,8,6,11,4,6,5,9,11,6,0,10,7],
[4,7,10,6,11,5,9,6,11,7,10,0,9],
[10,5,8,9,10,7,10,7,6,5,7,9,0]]
Time_windows = [(1400,1500), (0000,2400), (0000,2400),(0700,2400),(0000,2400),(0000,0700),(0700,2400),(0700,2400),(0000,0700),(0000,2400),\
(0000,2400),(0000,2400),(0700,2400)]
Service_time = [0000, 1600,1600,180,30,120,120,60,30,30,90,120,330]
demand = [9999.00, 9999.00,9999.00,12.00, 4.00, 6.00, 8.00,16.00,6.00,16.00,12.00,24.00,8.00]
K = 4 # no. of vehicles
C = 280; # capacity
speed = 40; # default speed
M = 200;
startCity = 0
model = ConcreteModel()
# sets
#model.M = Set(initialize=range(1, n+1))
model.N = Set(initialize=range(1, n+1))
model.K = Set(initialize=range(1, K+1))
model.Nc = Set(initialize=range(3, n+1)) # set of customers
# Param
model.cost = Param(model.N, model.N, initialize=lambda model, i, j: distanceMatrix[i-1][j-1])
model.travel_time = Param(model.N, model.N,initialize=lambda model, i,j: travel_time[i-1][j-1])
model.Time_windows = Param(model.N, initialize=lambda model, i: travel_time[i-1]) # time_windows
model.Service_time = Param(model.N, initialize=lambda model, i: Service_time[i-1]) # Service time
model.demand = Param(model.N, initialize=lambda model, i: demand[i-1])
model.M = Param(initialize=M)
model.C = Param(initialize=C)
# variables
model.x_ijl = Var(model.N, model.N, model.K, within=Binary) # decision variable = 1 iff vehicle l in K uses arc (i,j) in A
model.d_il = Var(model.N, model.K, bounds=(0,None)) # the accumulative demand at node i in V for vehicle l in K
model.w_il = Var(model.N, model.K, bounds=(0,None)) # start time of service at node i in V for vehicle l in K
"""
Constriants
"""
# All l vehicles must leave the depot
def leave_depot(model,l):
return sum(model.x_ijl[0,j,l] for j in model.N) == 1
model.leave_depot = Constraint(model.K, rule=leave_depot)
# All l vehicles must return to the depot
def return_depot(model,l):
return sum(model.x_ijl[i,0,l] for i in model.N) == 1
model.return_depot = Constraint(model.K, rule=return_depot)
# ensures that all customers are serviced exactly once.
def customer_service(model, j):
return sum(sum(model.x_ijl[i,j,l] for l in model.K) for i in model.N) ==1
model.customer_service1 = Constraint(model.Nc, rule=customer_service)
# Inflow and outflow must be equal except for the depot nodes
def flow(model,j,l):
return sum(model.x_ijl[i,j,l] for i in model.N if i < j) == sum(model.x_ijl[j,i,l] for i in model.N if j < i)
model.flow1 = Constraint(model.N,model.K, rule=flow)
# Time windows
def time_windows1(model,i,l):
return model.Time_windows[i][0] <=model.w_il[i,l] <= model.Time_windows[i][1]
model.time_windows = Constraint(model.N,model.K, rule=time_windows1)
# service time
def service_time(model,i,j,l):
return model.w_il[i,l] + model.Service_time[i] + model.travel_time[i,j] <= model.w_il[j,l] + (1 - model.x_ijl[i,j,l])*200
model.service_time = Constraint(model.N, model.N, model.K, rule=service_time)
# vehicle must be empty at start and end of routes
def empty(model, l):
return model.d_il[0,l] + model.d_il[-1,l] == 0
model.empty = Constraint(model.K, rule=empty)
# accumulative demand for all nodes except disposal sites
def demands_forall_nodes(model,i,j,l):
return model.d_il[i,l] + model.demand[i] <= model.d_il[j,l]+(1 - model.x_ijl[i,j,l]*200)
model.demands_forall_nodes = Constraint(model.Nc, model.N,model.K,rule=demands_forall_nodes)
# Capacity contraints
def vehicle_capacity(model, i,l):
return model.d_il[i,l] <= model.C
model.vehicle_capacity = Constraint(model.N, model.K, rule=vehicle_capacity)
# Objective Function
def objective(model):
return sum(model.cost[i,j]*model.x_ijl[i,j,l] for i in model.N for j in model.N for l in model.K)
model.obj = Objective(rule=objective)
opt = SolverFactory("glpk")
results = opt.solve(model, tee=True)
results.write()
However, I got an error with constriant 2 (from image 2) which I know similar will apply to constraint 3 and constriant 9. The error is:
ERROR: Rule failed when generating expression for constraint leave_depot with
index 1: KeyError: "Index '(0, 1, 1)' is not valid for indexed component
'x_ijl'"
ERROR: Constructing component 'leave_depot' from data=None failed: KeyError:
"Index '(0, 1, 1)' is not valid for indexed component 'x_ijl'"
Traceback (most recent call last):
File "vrptwModel.py", line 81, in <module>
model.leave_depot = Constraint(model.K, rule=leave_depot)
File "/usr/local/lib/python2.7/dist-packages/pyomo/core/base/block.py", line 540, in __setattr__
self.add_component(name, val)
File "/usr/local/lib/python2.7/dist-packages/pyomo/core/base/block.py", line 980, in add_component
val.construct(data)
File "/usr/local/lib/python2.7/dist-packages/pyomo/core/base/constraint.py", line 793, in construct
ndx)
File "/usr/local/lib/python2.7/dist-packages/pyomo/core/base/misc.py", line 61, in apply_indexed_rule
return rule(model, index)
File "vrptwModel.py", line 80, in leave_depot
return sum(model.x_ijl[0,j,l] for j in model.N) == 1
File "vrptwModel.py", line 80, in <genexpr>
return sum(model.x_ijl[0,j,l] for j in model.N) == 1
File "/usr/local/lib/python2.7/dist-packages/pyomo/core/base/indexed_component.py", line 543, in __getitem__
index = self._validate_index(index)
File "/usr/local/lib/python2.7/dist-packages/pyomo/core/base/indexed_component.py", line 695, in _validate_index
% ( idx, self.name, ))
KeyError: "Index '(0, 1, 1)' is not valid for indexed component 'x_ijl'"
My problem is modeling constraint 2 and 3.
Please could someone help me to write these constraints correctly
The problem is that you started your indexing sets at 1 not 0. Change m.x_ijl[0,j,l] to m.x_ijl[1,j,l].