python warning: Boolean Series key will be reindexed to match DataFrame index - python

Below code report a warning:
UserWarning: Boolean Series key will be reindexed to match DataFrame index.
ds = ds[(df[['ts']].diff() > threshold).any(axis=1)]
2019-08-31 08:18:57.731 python[58541:1317145] [QL] Can't get plugin bundle info at file:///Users/e12714/Library/QuickLook/NSQuickLookPlugin.qlgenerator
Code:
#!/usr/bin/env python
import pandas as pd
import numpy as np
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
from collections import OrderedDict
m = OrderedDict()
m["08-30 22:30:10.063"] = 5
m["08-30 22:30:15.023"] = 5
m["08-30 22:30:20.043"] = 5
m["08-30 22:30:25.015"] = 2
m["08-30 22:30:25.020"] = 2
m["08-30 22:30:26.025"] = 2
m["08-30 22:30:40.032"] = 5
m["08-30 22:30:45.045"] = 5
m["08-30 22:30:50.022"] = 5
df = pd.DataFrame(list(m.items()), columns = ['ts', 'value'])
df['ts'] = [datetime.strptime(x,'%m-%d %H:%M:%S.%f') for x in df['ts']]
plt.style.use('ggplot')
fig, ax = plt.subplots(figsize=(12,6))
ax.plot(df['ts'],df['value'],"--.")
dl = df[(df[['value']].shift() != df[['value']]).any(axis=1)]
dr = df[(df[['value']].shift(-1) != df[['value']]).any(axis=1)]
ds = pd.concat([dl,dr],ignore_index=True)
ds = ds.sort_values(['ts'])
threshold = timedelta(seconds=2)
ds = ds[(df[['ts']].diff() > threshold).any(axis=1)]
fig.autofmt_xdate()
ax.xaxis.set_ticks(np.array(ds['ts']))
ax.yaxis.grid(True)
plt.show()
Output sounds good:
How to fix this warning?

Change the line of code of
ds = ds[(df[['ts']].diff() > threshold).any(axis=1)]
to
ds = ds[(ds[['ts']].diff() > threshold).any(axis=1)]

Related

How to create a dashboard with widgets (selector) and interactivity (tap stream) between plots in HoloViews/Bokeh?

I'm trying to create a dashboard that consists of two plots (heatmap and line graph) and one widget (selector):
When you select an option from widget both plots get updated;
When you tap on the first plot the second plot is updated based on tap info.
Currently I'm trying to do it in HoloViews. It seems that this should be very easy to do but I somehow can't wrap my head around it.
The code below shows how it should look like. However, the selector is not connected in any way to the dashboard since I don't know how to do it.
import pandas as pd
import numpy as np
import panel as pn
import holoviews as hv
hv.extension('bokeh')
def create_test_df(k_features, n_tickers=5, m_windows=5):
start_date = pd.Timestamp('01-01-2020')
window_len = pd.Timedelta(days=1)
cols = ['window_dt', 'ticker'] + [f'feature_{i}' for i in range(k_features)]
data = {c: [] for c in cols}
for w in range(m_windows):
window_dt = start_date + w*window_len
for t in range(n_tickers):
ticker = f'ticker_{t}'
data['window_dt'].append(window_dt)
data['ticker'].append(ticker)
for f in range(k_features):
data[f'feature_{f}'].append(np.random.rand())
return pd.DataFrame(data)
k_features = 3
features = [f'feature_{i}' for i in range(k_features)]
df = create_test_df(k_features)
selector = pn.widgets.Select(options=features)
heatmap = hv.HeatMap(df[['window_dt', 'ticker', f'{selector.value}']])
posxy = hv.streams.Tap(source=heatmap, x='01-01-2020', y='ticker_4')
def tap_heatmap(x, y):
scalar = np.random.randn()
x = np.linspace(-2*np.pi, 2*np.pi, 100)
data = list(zip(x, np.sin(x*scalar)))
return hv.Curve(data)
pn.Row(heatmap, hv.DynamicMap(tap_heatmap, streams=[posxy]), selector)
Ok I finally got it. It turned out to be simple (just as expected) but not quite intuitive. Basically, different approach for implementing selector (dropdown menu) should be used. Working code for such example is below:
import pandas as pd
import numpy as np
import panel as pn
import holoviews as hv
hv.extension('bokeh')
def create_test_df(k_features, n_tickers=5, m_windows=5):
start_date = pd.Timestamp('01-01-2020')
window_len = pd.Timedelta(days=1)
cols = ['window_dt', 'ticker'] + [f'feature_{i}' for i in range(k_features)]
data = {c: [] for c in cols}
for w in range(m_windows):
window_dt = start_date + w*window_len
for t in range(n_tickers):
ticker = f'ticker_{t}'
data['window_dt'].append(window_dt)
data['ticker'].append(ticker)
for f in range(k_features):
data[f'feature_{f}'].append(np.random.rand())
return pd.DataFrame(data)
def load_heatmap(feature):
return hv.HeatMap(df[['window_dt', 'ticker', f'{feature}']])
def tap_heatmap(x, y):
scalar = np.random.randn()
x = np.linspace(-2*np.pi, 2*np.pi, 100)
data = list(zip(x, np.sin(x*scalar)))
return hv.Curve(data)
k_features = 3
features = [f'feature_{i}' for i in range(k_features)]
df = create_test_df(k_features)
heatmap_dmap = hv.DynamicMap(load_heatmap, kdims='Feature').redim.values(Feature=features)
posxy = hv.streams.Tap(source=heatmap_dmap, x='01-01-2020', y='ticker_0')
sidegraph_dmap = hv.DynamicMap(tap_heatmap, streams=[posxy])
pn.Row(heatmap_dmap, sidegraph_dmap)

Plotting Results from For Iteration

I am new to python and I want to ask how to plot a figure from for loop iteration?
Here is the code!
import numpy as np #numerical python
import matplotlib.pyplot as plt #python plotting
from math import exp #exponential math directory
T_initial = 293
T_reference = range(298,340,2)
R1_initial = 57500
R2_initial = 13300
R3_initial = 18000
R4_initial = 5600
Beta = 4150
Vin = 2.8
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
print(i,Vo_round)
You can plot the data like this:
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
plt.scatter(i, Vo_round)
plt.show()
Is this what you were looking for?
Put the values of the items you want to plot into two different arrays using the 'append' method (one for the 'x' axis and one for the 'y' axis).
Then just plot the graph with the matplotlib
It should be something like the below:
is1 = list()
vos = list()
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
print(i,Vo_round)
is1.append(i)
vos.append(Vo_round)
plt.plot(is1,vos)
Here is a reference for plotting
Two options without a for-loop
Create a function
def v_o(T_reference):
T_initial = 293
R1_initial = 57500
R2_initial = 13300
R3_initial = 18000
R4_initial = 5600
Beta = 4150
Vin = 2.8
R1_refe = R1_initial*exp(Beta*((1/T_reference)-(1/T_initial)))
Rs = (R2_initial/(R2_initial + R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
return Vo_round
Option 1: Use a pandas dataframe
import pandas as pd
import matplotlib.pyplot as plt
# create the dataframe with T_reference
df = pd.DataFrame({'t_ref': [*range(298, 340,2)]})
# Call the function to calculate v_o
df['v_o'] = df.t_ref.apply(v_o)
# plot
df.plot('t_ref', 'v_o', legend=False)
plt.show()
Option 2: use map
T_reference = [*range(298, 340,2)]
v_o = list(map(v_o, T_reference))
plt.plot(T_reference, v_o)
plt.show()
Plot
The plot from both options looks like the following

I do not know why the graph is not compiling and executing code for long time

There is no error in code ( I believe)
but when I run the program, the graph does not print in the plot. It just says executing code
and i've waited like an hour but doesn't show anything.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
excel_df = pd.read_csv('data.csv', header=None)
bool_idx = excel_df < 0.006
valid_data = excel_df[bool_idx]
true_data = valid_data.dropna()
tt = np.array(true_data.iloc[0:-1, 0])
print(tt)
tt2 = np.array(true_data.iloc[1:, 0])
print(tt2)
ts = abs(tt - tt2)
print(ts)
ind = np.array(np.where([ts < 0.001]))
graph1 = plt.plot(ind)
print(ind)
true_data0001 = true_data.iloc[0, ind]
print(true_data0001)
no error

index out of bounds// Python, dataframe, plot

I want to plot the point with max value from dataframe.
import pandas as pd
import matplotlib.pyplot as plt
dane = pd.read_table('C:\\xxx.txt', names=('rok', 'kroliki', 'lisy', 'marchewki'))
df = pd.DataFrame(dane)
data = df[1:]
data=data.astype(float)
x = int(data['kroliki'].max())
y = int(data['lisy'].max())
z = int(data['marchewki'].max())
p= data['rok'].where(data['kroliki'] == x)
q = data['rok'].where(data['lisy'] == y)
r = data['rok'].where(data['marchewki'] == z)
p1 = int(p[p.notnull()])
q1 = int(q[q.notnull()])
r1 = int(r[r.notnull()])
point = pd.DataFrame({'x':[p1],'y':[q1],'z':[r1]})
point.plot((p1,x),(q1,y),(r1,z))
I have such an error:
IndexError: index 1993 is out of bounds for axis 0 with size 4
May somebody know what is wrong with this code?
Thanks
I think that when you use Pandas to plot, it will look for indices within itself and not for values.
So, in your case, when you do:
point.plot(p1,x)
Pandas will look for the index 1993 in the x-direction, i.e, throughout all columns. In other words, you should have 1993 columns.
I tried to reproduce your problem as follows:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,100,size=(10, 4)), columns=('rok', 'kroliki', 'lisy', 'marchewki'))
data = df[1:]
data=data.astype(float)
x = int(data['kroliki'].max())
y = int(data['lisy'].max())
z = int(data['marchewki'].max())
p = data['rok'].where(data['kroliki'] == x)
q = data['rok'].where(data['lisy'] == y)
r = data['rok'].where(data['marchewki'] == z)
p1 = int(p[p.notnull()])
q1 = int(q[q.notnull()])
r1 = int(r[r.notnull()])
point = pd.DataFrame({'x':[p1],'y':[q1],'z':[r1]})
point.plot((p1,x),(q1,y),(r1,z))
I get the following error:
>>> AttributeError: 'tuple' object has no attribute 'lower'
And when I run each point separately:
>>>> IndexError: index 85 is out of bounds for axis 0 with size 3
To solve it:
import matplotlib.pyplot as plt
plt.plot((point.x, point.y, point.z), (x,y,z),'ko')
And I got the following result:
Hope it helps.

Q : Python CSV - Key Error when plotting

What I'm trying to plot a a dataframe but I'm encountering some errors that I don't know how to solve.
Python Code:
import numpy as np
from datetime import date,time,datetime
import pandas as pd
import csv
df = pd.read_csv('MainD2.csv', parse_dates=['Time_Stamp'], infer_datetime_format=True)
df["Time_Stamp"] = pd.to_datetime(df["Time_Stamp"]) # convert to Datetime
df_filter = df[df["Curr"].le(3.0)] # new df with less or equal to 0.5
#print(df_filter)
where = (df_filter[df_filter["Time_Stamp"].diff().dt.total_seconds() > 1] ["Time_Stamp"] - pd.Timedelta("1s")).astype(str).tolist() # Find where diff > 1 second
df_filter2 = df[df["Time_Stamp"].isin(where)] # Create new df with those
#print(df_filter2)
df_filter2["AC_Input_Current"] = 0.0 # Set c1 to 0.0
#df_filter2
df = df.set_index("Time_Stamp")
df_filter2 = df_filter2.set_index("Time_Stamp")
df.loc[df_filter2.index] = df_filter2
def getMask(start,end):
mask = (df['Time_Stamp'] > start) & (df['Time_Stamp'] <= end)
return mask;
start = '2017-06-26 01:05:00'
end = '2017-06-26 01:20:00'
timerange = df.loc[getMask(start, end)]
timerange.plot(x='Time_Stamp', y='AC_Input_Current', style='-', color='black')*
*------------------ Plotting Part -------------------
timerange.plot(x='Time_Stamp', y='AC_Input_Current', style='-', color='black')
I have encountered this error when trying to plot :
KeyError: 'Time_Stamp'

Categories