'ValueError: cannot set a row with mismatched columns' when adding a row to pandas DataFrame - python

While learning python, I decided to try create genetic algorithm and got stuck in the mutation step.
I will be glad for any advice both on solving this problem and in general on the architecture and style of the code.
one_generation = genlib.create_generation()
print(genlib.almost_generation(one_generation))
This code return error:
Traceback (most recent call last):
File "/home/rosrobot/PycharmProjects/gen2/main.py", line 23, in \<module\>
print(genlib.almost_generation(one_generation))
File "/home/rosrobot/PycharmProjects/gen2/genlib.py", line 83, in almost_generation
updated_generation.loc\[creature_index\] = sample\[updated_generation.columns\]
File "/home/rosrobot/PycharmProjects/gen2/venv/lib64/python3.10/site-packages/pandas/core/indexing.py", line 716, in __setitem__
iloc.\_setitem_with_indexer(indexer, value, self.name)
File "/home/rosrobot/PycharmProjects/gen2/venv/lib64/python3.10/site-packages/pandas/core/indexing.py", line 1682, in \_setitem_with_indexer
self.\_setitem_with_indexer_missing(indexer, value)
File "/home/rosrobot/PycharmProjects/gen2/venv/lib64/python3.10/site-packages/pandas/core/indexing.py", line 1998, in \_setitem_with_indexer_missing
raise ValueError("cannot set a row with mismatched columns")
ValueError: cannot set a row with mismatched columns
Process finished with exit code 1
Functions in 'genlib' file:
import random as rnd
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
pd.plotting.register_matplotlib_converters()
def create_creature(gen_length=10,
creature_name='one'):
creature = pd.Series(data=[rnd.randint(0, 1) for i in range(gen_length)],
name=creature_name)
return creature
def create_generation(generation_size=50,
gen_length=10):
generation = pd.DataFrame(data=[create_creature(creature_name=(str(i + 1)) + 'th',
gen_length=gen_length) for i in range(generation_size)])
generation['quality'] = generation.sum(axis=1)
return generation
def __indexes_of_quality(generation):
"""
:rtype: pd.Series
"""
for i in generation.quality.unique():
print('quality = ', i, ': ',
generation.loc[generation.quality == i, 'quality'].index.values,
'\n',
'__')
def create_many_generations(number_of_generations=10,
generation_size=50,
gen_length=10):
list_of_dataframes = pd.Series(data=[create_generation(generation_size=generation_size,
gen_length=gen_length
) for i in range(number_of_generations)],
name='creature_name')
return list_of_dataframes
def one_generation_pyplot(generation):
sns.barplot(x=generation.index,
y=generation.sort_values('quality').quality)
plt.show()
def many_generations_pyplot(list_of_generations):
qualities = [sum(generation.quality) for generation in list_of_generations]
sns.lineplot(data=qualities)
plt.show()
def __mutation(creature: pd.Series) -> pd.Series:
point = rnd.randint(0, len(creature))
creature[point] = int(not creature[point].values)
return creature
def almost_generation(generation):
sample = generation.sample()
sample = __mutation(sample)
updated_generation = pd.DataFrame(columns=generation.columns)
for creature_index in generation.index:
if creature_index == sample.index:
print(creature_index, ' == ', sample.index)
updated_generation.loc[creature_index] = sample[updated_generation.columns]
else:
updated_generation.loc[creature_index] = generation.loc[creature_index]
return updated_generation
I tried to convert "sample" to str, and also tried using loc, iloc and append

In your almost generation function, change the line within your if block to assign values:
if creature_index == sample.index:
print(creature_index, ' == ', sample.index)
updated_generation.loc[creature_index] = sample[updated_generation.columns].values
You can just simplify your entire function as follows:
def almost_generation(generation):
sample = generation.sample()
sample = __mutation(sample)
generation.loc[sample.index] = sample[generation.columns].values
return generation

Related

Blank Strings Are Returned in Python Dataframe

I wrote a code to convert PDF to CSV, read the CSV file, and export only relevant information from the CSV file. The function is supposed to return filtered information such as english_name: 'someones name', original_language_name: 'someones name' etc, but instead the command returned english_name: '', original_language_name: '' etc. Below is the code that I wrote:
import pandas as pd
import tabula
from pandas import DataFrame
from backend.classes import Shareholder, Officer
from typing import List
def strip_string(string):
return str(string).strip()
def get_float_without_thousands_separator(string, thousands_separator):
return float(string.replace(thousands_separator, ''))
def extract_officers_and_shareholders_lists_from_df(df, total_number_of_shares, no_data_placeholder, number_of_shares, thousands_separator):
officers = []
shareholders = []
NAME = 'Nama'
POSITION = 'Jabatan'
for row in range((df.shape[0])):
if str(df[POSITION][row]).strip() != no_data_placeholder:
original_language_name = strip_string(df[NAME][row])
english_name = strip_string(df[NAME][row])
position = strip_string(df[POSITION][row])
officer = Officer(english_name=english_name, original_language_name=original_language_name, position=position)
officers.append(officer)
elif str(df[number_of_shares][row]).strip() != no_data_placeholder:
original_language_name = strip_string(df[NAME][row])
english_name = strip_string(df[NAME][row])
number_of_shares_string = strip_string(df[number_of_shares][row])
number_of_shares_number = get_float_without_thousands_separator(number_of_shares_string, thousands_separator)
shareholding_percentage = (number_of_shares_number / total_number_of_shares) * 100
shareholder = Shareholder(english_name=english_name, original_language_name=original_language_name, shareholding_percentage=shareholding_percentage)
shareholders.append(shareholder)
return officers, shareholders
def get_officers_and_shareholders_lists(pdf_input_file):
NO_DATA_PLACEHOLDER = '-'
NUMBER_OF_SHARES = 'Jumlah Lembar Saham'
THOUSANDS_SEPARATOR = '.'
output_file_path = 'CSV/Officers_and_Shareholders.csv'
tabula.convert_into(pdf_input_file, output_file_path, output_format='csv', pages='all')
df = pd.read_csv(output_file_path, header=3, on_bad_lines='skip')
all_shares = df[NUMBER_OF_SHARES].to_list()
all_shares_strings = [strip_string(shares) for shares in all_shares if strip_string(shares) != NO_DATA_PLACEHOLDER]
all_shares_numbers = [get_float_without_thousands_separator(shares, THOUSANDS_SEPARATOR) for shares in all_shares_strings]
total_number_of_shares = sum(all_shares_numbers)
return extract_officers_and_shareholders_lists_from_df(
df=df,
total_number_of_shares=total_number_of_shares,
number_of_shares=NUMBER_OF_SHARES,
no_data_placeholder=NO_DATA_PLACEHOLDER,
thousands_separator=THOUSANDS_SEPARATOR)
The command call that I use for the codes on the above is python3 -m backend.officers_and_shareholders. Is there a method to pass in so that english_name returns a name, original_language_name returns a name?

Stable Baselines3 Parameter Logits has invalid values

I am trying to run stable baselines on sports games but keep getting the following error
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 35, in <module>
model.learn(total_timesteps=10000)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/a2c/a2c.py", line 189, in learn
return super(A2C, self).learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 234, in learn
continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 166, in collect_rollouts
actions, values, log_probs = self.policy.forward(obs_tensor)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/policies.py", line 566, in forward
distribution = self._get_action_dist_from_latent(latent_pi, latent_sde=latent_sde)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/policies.py", line 607, in _get_action_dist_from_latent
return self.action_dist.proba_distribution(action_logits=mean_actions)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/distributions.py", line 326, in proba_distribution
self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/distributions.py", line 326, in <listcomp>
self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/torch/distributions/categorical.py", line 64, in __init__
super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/torch/distributions/distribution.py", line 53, in __init__
raise ValueError("The parameter {} has invalid values".format(param))
ValueError: The parameter logits has invalid values
I have removed all NaN's (replaced with 0) and normalised the data so that all data is between 0 and 1 but still cannot find the invalid value.
Here is my custom environment:
import gym
from gym import spaces
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
INITIAL_BALANCE = 100
class BettingEnv(gym.Env):
# metadata = {'render.modes': ['human']}
def __init__(self, df, results, INITIAL_BALANCE=100):
self.df = df
self.results = results
self.initial_balance = INITIAL_BALANCE
self.balance = INITIAL_BALANCE
self.profit = 0
self.starting_point = np.random.randint(len(self.df) - len(self.df) * 0.1) # Start anywhere but in the end 10%
self.timestep = 0
self.games_won = 0
self.game_bets = []
self.game_number = self.starting_point + self.timestep
self.action_space = spaces.MultiDiscrete([3,10])
self.observation_space = spaces.Box(
low = self.df.min().min(), # Lowest value found in df
high = self.df.max().max(), # Search the df for the max value (this may change with different data)
shape = (df.shape[1],), # shape of one row of the df
# dtype = np.float16
)
print('First ob: ',self.df.loc[self.game_number])
def _next_obs(self):
print('Get next obs')
# Get next game row
obs = self.df.loc[self.timestep]
print('next obs success')
return obs
def _print_bet_csv(self):
# Create bet_info_df
bet_info_df = pd.DataFrame(self.game_bets)
results_df = self.results.reset_index()
# #Merge dfs
self.merged_df = pd.merge(bet_info_df, results_df, on=['index', 'Home Odds', 'Vis Odds', 'Home Win'])
self.merged_df.set_index('index', inplace=True)
# #Print df
self.merged_df.to_csv('./temp/MLB Bot Betting DF.csv', index=True)
def _print_bet_chart(self):
x_axis = [i for i in range(self.timestep)]
plt.plot(x_axis, self.merged_df['Bankroll'])
plt.title('Bankroll')
plt.ylabel('Dollars')
plt.xlabel('Games')
plt.savefig('./temp/NBA_Bot_Betting.png')
def _take_action(self, action):
print('Start action')
# Init
action_type = action[0]
amount = action[1] + 1
self.game_number = self.starting_point + self.timestep
game_result = self.results['Home Win'][self.game_number]
odds = 0
bet_on = 'NA'
# VISITOR BET
if action_type == 0:
bet_on = 'False'
# Find vis odds
odds = self.results['Vis Odds'][self.game_number]
if odds == 0:
amount = 0
# Place bet
self.balance -= amount
# Check if win
if game_result == False:
self.balance += round(amount * odds, 2)
self.games_won += 1
# NO BET
if action_type == 1:
bet_on = 'No bet'
# HOME BET
if action_type == 2:
bet_on = 'True'
# Find home odds
odds = self.results['Home Odds'][self.game_number]
if odds == 0:
amount = 0
# Place bet
self.balance -= amount
# Check win
if game_result == True:
self.balance += round(amount * odds, 2)
self.games_won += 1
self.balance = round(self.balance, 2)
bet_info = {
'index': self.game_number,
'Home Odds': self.results['Home Odds'][self.game_number],
'Vis Odds': self.results['Vis Odds'][self.game_number],
'Bet on': bet_on,
'Home Win': game_result,
'Amount': amount,
'Odds': odds,
'Bankroll': self.balance
}
self.game_bets.append(bet_info)
print('Finish action')
return bet_info
def step(self, action):
print('Start step')
info = self._take_action(action)
self.timestep += 1
# Reward
gamma = (self.timestep / len(self.df)) # time discount
self.profit = self.balance - self.initial_balance
reward = self.profit * gamma
# Done
done = self.balance <= 0
# Obs
obs = self._next_obs()
# If last game, print results and start from beginning
#test the printing of csv
if self.timestep == 2500:
self._print_bet_csv()
self._print_bet_chart()
self.game_bets = []
print('Starting point: ',self.starting_point)
print('Chart printed')
print('Finished Step')
return obs, reward, done, info
def reset(self):
self.initial_balance = INITIAL_BALANCE
self.balance = INITIAL_BALANCE
self.profit = 0
self.starting_point = np.random.randint(len(self.df) - len(self.df) * 0.1) # Start anywhere but in the end 10%
self.timestep = 0
self.games_won = 0
self.game_bets = []
def render(self, mode='human', close=False):
print('Timestep: ', self.timestep)
print('Profit: ', self.profit)
print('Games Won: ', self.games_won)
print('Balance: ', self.balance)
Here is the file I run the environment from:
import time
start_time = time.time()
import os
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO, A2C
from Betting_env import BettingEnv
data = pd.read_csv('Scraping/Games and Stats.csv')
df = data.drop(['Date', 'Home', 'Visitor', 'Home PTS', 'Vis PTS', 'Home Points Dif', 'Home Win'], axis=1)
df = df.astype(float)
normed = (df-df.min())/(df.max()-df.min())
normed = normed.round(10)
env = DummyVecEnv([lambda: BettingEnv(normed, data, INITIAL_BALANCE=100)])
model = A2C('MlpPolicy', env, verbose=0)
model.learn(total_timesteps=10000)
save_path = os.path.join('Training', 'Saved Models', 'Betting_Model_A2C')
model.save(save_path)
end_time = time.time()
total_time = end_time - start_time
print(round(total_time / 60 / 60), ' Hours ', round(total_time / 60), ' Minutes')
UPDATE:
After using the VecCheckNan() and check_env() functions by stable_baselines3 I get the following error messages.
VecCheckNan() gives:
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 51, in <module>
model.learn(total_timesteps=10000)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/ppo/ppo.py", line 299, in learn
return super(PPO, self).learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 226, in learn
total_timesteps, callback = self._setup_learn(
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/base_class.py", line 420, in _setup_learn
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/vec_env/vec_check_nan.py", line 46, in reset
self._check_val(async_step=False, observations=observations)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/vec_env/vec_check_nan.py", line 84, in _check_val
raise ValueError(msg)
ValueError: found nan in observations.
Originated from the environment observation (at reset)
I have printed out the first observations and there are no NaNs in there.
check_env() gives:
Traceback (most recent call last):
File "/home/dev/Desktop/Projects/AI/NBA2/stable_baselines_run.py", line 42, in <module>
check_env(env)
File "/home/dev/anaconda3/envs/sb/lib/python3.9/site-packages/stable_baselines3/common/env_checker.py", line 245, in check_env
assert isinstance(
AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
I have gym.Env in my Betting_Env class.
I had the same error. In my case the problem was due to the custom reset funciton of my environment. In found that in the BaseAlgorithm class in base_class.py of stablebaselines in line 429 (in my case) is this line of code
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
, where my environment should return an observation or at least None.
So you could try returning an observation in your reset function.
Although you are replacing NaNs and ensuring that there are none, the error is mostly probably due to some of the values being np.inf or -np.inf.
Try:
df = df.replace([np.inf, -np.inf], np.nan).
Hope it works!
You have commented this out:
# dtype = np.float16
So you may already have tried this. But it is possible your dataframe contains a float32 and your observation space is expecting the wrong dtype. Try changing to:
dtype = np.float32

error while reading serially incoming data to plot live graph

My input data is like 36,45,32
so when I tried to replace comma by '.' i.e dot, so that it doesn't give me a "cannot convert to float" error, the conversion now would be 36.45.32, which is still not plottable on a live graph.
My code is:
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import tkinter as tk
import numpy as np
import serial as sr
#------global variables
data = np.array([])
cond = False
#-----plot data-----
def plot_data():
global cond, data
if (cond == True):
x= s.read()
y=x.decode()
a=y.replace('\r', '').replace('\n', '').replace(',','.')
print(a)
if(len(data) < 100):
data = np.append(data,float(a[0:4]))
else:
data[0:99] = data[1:100]
data[99] = float(a[0:4])
lines.set_xdata(np.arange(0,len(data)))
lines.set_ydata(data)
canvas.draw()
root.after(1,plot_data)
def plot_start():
global cond
cond = True
s.reset_input_buffer()
def plot_stop():
global cond
cond = False
#-----Main GUI code-----
root = tk.Tk()
root.title('Real Time Plot')
root.configure(background = 'light blue')
root.geometry("700x500") # set the window size
#------create Plot object on GUI----------
# add figure canvas
fig = Figure();
ax = fig.add_subplot(111)
#ax = plt.axes(xlim=(0,100),ylim=(0, 120)); #displaying only 100 samples
ax.set_title('Serial Data');
ax.set_xlabel('Sample')
ax.set_ylabel('Voltage')
ax.set_xlim(0,100)
ax.set_ylim(-0.5,6)
lines = ax.plot([],[])[0]
canvas = FigureCanvasTkAgg(fig, master=root) # A tk.DrawingArea.
canvas.get_tk_widget().place(x = 10,y=10, width = 500,height = 400)
canvas.draw()
#----------create button---------
root.update();
start = tk.Button(root, text = "Start", font = ('calbiri',12),command = lambda: plot_start())
start.place(x = 100, y = 450 )
root.update();
stop = tk.Button(root, text = "Stop", font = ('calbiri',12), command = lambda:plot_stop())
stop.place(x = start.winfo_x()+start.winfo_reqwidth() + 20, y = 450)
#----start serial port----
s = sr.Serial('COM5',9600);
s.reset_input_buffer()
root.after(1,plot_data)
root.mainloop()
Now the error is:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\tkinter\__init__.py", line 1883, in __call__
return self.func(*args)
File "C:\ProgramData\Anaconda3\lib\tkinter\__init__.py", line 804, in callit
func(*args)
File "C:\Users\Indu shekar\Desktop\ggg.py", line 26, in plot_data
data = np.append(data,float(a[0:4]))
ValueError: could not convert string to float: '.'
at times it gives me error " a bytes like object is required not string"
please suggest the changes in my code
EDIT: based on the comments, I'm significantly revising my answer.
I first thought "36,42,45" was meant to be float(36.42), ignoring the 45 part, but now you've clarified that it should be interpreted as x=36, y=42 and z=45.
The updated solution I propose is to split a on comma and map each part to a float:
x, y, z = (float(v) for v in a.split(","))
Taking it apart:
a.split(",") returns a list of the comma-separated parts -> ['36', '42', '45']
(float(v) for v in ...) maps each element to a float using a comprehension -> (36.0, 42.0, 45.0). Use int(v) instead of float(v) if you want the numbers to be used as ints instead of floats.
x, y, z = ... unpacks the values into three variables.
and then you can use x, y and z as you need to in your plotting code.
Warning: this will raise an exception if the list has either fewer or more than 3 comma-separated values, or if those values cannot be converted to floats.

Function returning same values for supposedly different inputs

I'm using pyalgotrade to create a trading strategy. I'm going through a list of tickers(testlist) and adding them to a dictionary(list_large{}) alongside their score which I'm getting using a get_score function. My latest problem is that each ticker in the dictionary(list_large{}) is getting the same score. Any idea why?
Code:
from pyalgotrade import strategy
from pyalgotrade.tools import yahoofinance
import numpy as np
import pandas as pd
from collections import OrderedDict
from pyalgotrade.technical import ma
from talib import MA_Type
import talib
smaPeriod = 10
testlist = ['aapl','ddd','gg','z']
class MyStrategy(strategy.BacktestingStrategy):
def __init__(self, feed, instrument):
super(MyStrategy, self).__init__(feed, 1000)
self.__position = []
self.__instrument = instrument
self.setUseAdjustedValues(True)
self.__prices = feed[instrument].getPriceDataSeries()
self.__sma = ma.SMA(feed[instrument].getPriceDataSeries(), smaPeriod)
def get_score(self,slope):
MA_Score = self.__sma[-1] * slope
return MA_Score
def onBars(self, bars):
global bar
bar = bars[self.__instrument]
slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
organized_list = OrderedDict(sorted(list_large.items(), key=lambda t: -t[1]))#organize the list from highest to lowest score
print list_large
def run_strategy(inst):
# Load the yahoo feed from the CSV file
feed = yahoofinance.build_feed([inst],2015,2016, ".") # feed = yahoofinance.build_feed([inst],2015,2016, ".")
# Evaluate the strategy with the feed.
myStrategy = MyStrategy(feed, inst)
myStrategy.run()
print "Final portfolio value: $%.2f" % myStrategy.getBroker().getEquity()
def main():
instruments = ['ddd','msft']
for inst in instruments:
run_strategy(inst)
if __name__ == '__main__':
main()
Check this code of the onBars() function:
slope = 8 # <---- value of slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
# Updating dict of each ticker based on ^
Each time self.get_score(slope) is called, it returns the same value and hence, all the value of tickers hold the same value in dict
I do not know how you want to deal with slope and how you want to update it's value. But this logic can be simplified without using .update as:
list_large = {}
for tickers in testlist:
list_large[tickers] = self.get_score(slope)
# ^ Update value of `tickers` key

Using tkinter to plot pandas dataframes

So I'm trying to write a small GUI that will allow an end use to plot X vs Y of any 2 columns in an Excel file. Here's my code:
import pandas as pd
import matplotlib.pyplot as plt
import tkinter as tk
my_base=pd.read_excel('my_base.xlsx', 'the_tab', index_col=None, na_values = ['NA'])
my_base_header = list(my_base.columns.values)
my_base['Generated Date'] = pd.to_datetime(my_base['Generated Date'])
main_win = tk.Tk()
def plot_graph():
print(option1.get())
print(option2.get())
my_base.plot(x = option1.get(), y = option2.get(), style = 'x')
plt.show()
option1 = tk.StringVar(main_win)
option1.set(my_base_header[0])
option2 = tk.StringVar(main_win)
option2.set(my_base_header[0])
opt1 = tk.OptionMenu(main_win, option1, *my_base_header)
opt1.pack()
opt2 = tk.OptionMenu(main_win, option2, *my_base_header)
opt2.pack()
runbtn = tk.Button(main_win, text = 'Plot', command = plot_graph)
runbtn.pack()
main_win.mainloop()
I can get the program to plot if I put the dataframe headers in directly like so:
my_base.plot(x = 'Generated Date', y = 'How many', style = 'x')
But when I use for example x = option1.get() in there I get this traceback
Traceback (most recent call last):
File "<pyshell#14>", line 1, in <module>
my_base.plot(x= x_ax, y = y_ax, style = 'x')
File "C:\Python34\lib\site-packages\pandas\tools\plotting.py", line 2485, in plot_frame
**kwds)
File "C:\Python34\lib\site-packages\pandas\tools\plotting.py", line 2325, in _plot
plot_obj.generate()
File "C:\Python34\lib\site-packages\pandas\tools\plotting.py", line 921, in generate
self._compute_plot_data()
File "C:\Python34\lib\site-packages\pandas\tools\plotting.py", line 997, in _compute_plot_data
'plot'.format(numeric_data.__class__.__name__))
TypeError: Empty 'Series': no numeric data to plot
It's as the error says: the data you're trying to plot is non-numeric, so it's probably a string or from the looks of it, maybe a datetime. If you include the data and which column is giving you this error, we could point out the problem.
If it is a datetime, you may need to convert it to a pandas timestamp type, like:
pandas.DatetimeIndex([yourDatetime])
Your code seems to work for me. I used this demo dataset:
http://www.contextures.com/xlSampleData01.html
If I try to plot something non-numeric, such as 'Region' or 'Rep', it gives me the same error you have (no numeric data to plot). If I plot 'Unit Cost' vs 'Total', or any other combination of 2 numeric datasets, it works.
import pandas as pd
import matplotlib.pyplot as plt
import Tkinter as tk
my_base=pd.read_excel('SampleData.xls', 'SalesOrders', index_col=None, na_values = ['NA'])
my_base_header = list(my_base.columns.values)
#print my_base
my_base['OrderDate'] = pd.to_datetime(my_base['OrderDate'])
main_win = tk.Tk()
def plot_graph():
print(option1.get())
print(option2.get())
my_base.plot(x = option1.get(), y = option2.get(), style = 'x')
plt.show()
option1 = tk.StringVar(main_win)
option1.set(my_base_header[0])
option2 = tk.StringVar(main_win)
option2.set(my_base_header[0])
opt1 = tk.OptionMenu(main_win, option1, *my_base_header)
opt1.pack()
opt2 = tk.OptionMenu(main_win, option2, *my_base_header)
opt2.pack()
runbtn = tk.Button(main_win, text = 'Plot', command = plot_graph)
runbtn.pack()
main_win.mainloop()

Categories