ValueError: condition must be a 1-d array - python

Hi I've been stuck on this error for a while now! I want to interpolate data 3 D and then display it in 2D (in Basemap). Unfortunately, I get this error when I want to plot the grid[long], grid[lat] and the interpolation values with contourf:
ValueError: condition must be a 1-d array
I already tried to import the values as y = df['variable'].values.tolist() but this did not change the error. Unfortunately, as I am new to arrays, I do not have a good understanding of them and need to solve this error in a timely manner.
def load_data():
df = pd.read_csv(r"File")
return(df)
def get_data(df):
return {
"lons": df['Longitude'],
"lats": df['Latitude'],
"alts": df['Altitude'],
"values": df['O18'],
}
def generate_grid(data, basemap, delta=1):
grid = {
'lon': np.arange(-180, 180, delta),
'lat': np.arange(np.amin(data["lats"]), np.amax(data["lats"]), delta),
'alt': np.arange(np.amin(data["alts"]), np.amax(data["alts"]), delta)
}
grid["x"], grid["y"], grid["z"] = np.meshgrid(grid["lon"], grid["lat"], grid["alt"], indexing="ij")
grid["x"], grid["y"] = basemap(grid["x"], grid["y"])
return grid
def interpolate(data, grid):
uk3d = UniversalKriging3D(
data["lons"],
data["lats"],
data["alts"],
data["values"],
variogram_model='exponential',
drift_terms=["specified"],
specified_drift=[data["alts"]],
)
return uk3d.execute("grid", grid["lon"], grid["lat"], grid["alt"], specified_drift_arrays=[grid["z"]])
def prepare_map_plot():
figure, axes = plt.subplots(figsize=(10,10))
basemap = Basemap(projection='robin', lon_0=0, lat_0=0, resolution='h',area_thresh=1000,ax=axes)
return figure, axes, basemap
def plot_mesh_data(interpolation, grid, basemap):
colormesh = basemap.contourf(grid["x"], grid["y"], interpolation,32, cmap='RdBu_r')
color_bar = my_basemap.colorbar(colormesh,location='bottom',pad="10%")
The error Message:
>>> plot_mesh_data(interpolation, grid,basemap)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 2, in plot_mesh_data
File "C:\Users\Name\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\mpl_toolkits\basemap\__init__.py", line 548, in with_transform
return plotfunc(self,x,y,data,*args,**kwargs)
File "C:\Users\Name \AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\mpl_toolkits\basemap\__init__.py", line 3666, in contourf
xl = xx.compress(condition).tolist()
ValueError: condition must be a 1-d array

Hmm..
Seems to be a problem with np.compress
condition needs to be a 1-d array of bools according to:
https://numpy.org/doc/stable/reference/generated/numpy.compress.html#numpy.compress
This is what happens to grid['x']
xx = x[x.shape[0]//2,:]
condition = (xx >= self.xmin) & (xx <= self.xmax)
So i would do this to your grid["x"] like:
```
def plot_mesh_data(interpolation, grid, basemap):
x = grid["x"]
xx = x[x.shape[0]//2,:]
condition = (xx >= self.xmin) & (xx <= self.xmax)
print(condition)
colormesh = basemap.contourf(grid["x"], grid["y"], interpolation,32,
cmap='RdBu_r')
color_bar = basemap.colorbar(colormesh,location='bottom',pad="10%")
```
Outside of your function to see why it is not a 1-D array of booleans.
so the print should give you smt like: [(True, False, True), type=ndarray)] or [True, False, True] etc.
Update since the self pointer was missing. This normally occurs when you try to act on an class method without having the object correctly instanciated.
i.e.:
import Class as Class_imp
Class_imp.dosmt()
Will give you positional argument self missing. SInce you did not do:
my_class_imp = Class_imp()
my_class_imp.dosmt()
Do you have a part in your complete script at bottom that does
if __name__ == '__main__':
df = get_data and data = load_data
fig, ax, basemap = prepare_map_plot()
interpol = interpolate(data, grid)
grid = generate_grid(data, basemap, delta=1)
plot_mesh_data(interpol, grid, basemap)
you can run this like
>>> import runpy
>>> runpy.run_path(path_name='path_to_script.py')
Cheers

Related

Sequence item 0: expected str instance, numpy.int64 found

I keep getting this issue when I try and plot p values. I don't understand what is the sequence item 0. I found a couple of similar questions but I still dont understand what is causing this issue in my code below nor how to fix it.
from statannotations.Annotator import Annotator
cluster_0_wmd = Hub_all_data.loc[(Hub_all_data.Module_ID == 0), "Within_module_degree"].values
cluster_1_wmd = Hub_all_data.loc[(Hub_all_data.Module_ID == 1), "Within_module_degree"].values
cluster_2_wmd = Hub_all_data.loc[(Hub_all_data.Module_ID == 2), "Within_module_degree"].values
with sns.plotting_context('notebook', font_scale=1.4):
# Plot with seaborn
sns.violinplot(**plotting_parameters)
stat_results = [mannwhitneyu(cluster_0_wmd, cluster_1_wmd, alternative="two-sided"),
mannwhitneyu(cluster_0_wmd, cluster_2_wmd, alternative="two-sided"),
mannwhitneyu(cluster_1_wmd, cluster_2_wmd, alternative="two-sided")]
pvalues = [result.pvalue for result in stat_results]
xval = [0,1,2]
plotting_parameters = {
'data': Hub_all_data,
'x': 'Module_ID',
'y': 'Within_module_degree',
'palette': my_col}
pairs = [('cluster_0_wmd', 'cluster_1_wmd'),
('cluster_0_wmd', 'cluster_2_wmd'),
('cluster_1_wmd', 'cluster_2_wmd')]
pairs2 = [(0,1), (0,2), (1,2)]
formatted_pvalues = [f"p={p:.2e}" for p in pvalues]
annotator = Annotator(ax, pairs2, **plotting_parameters)
annotator.set_custom_annotations(formatted_pvalues)
annotator.annotate()
plt.show()
I get the error on literally the annotator.annotate() line. Here is the error line:
runcell(27, '/Users/albitcabanmurillo/N5_nwxwryan.py')
p-value annotation legend:
ns: p <= 1.00e+00
*: 1.00e-02 < p <= 5.00e-02
**: 1.00e-03 < p <= 1.00e-02
***: 1.00e-04 < p <= 1.00e-03
****: p <= 1.00e-04
Traceback (most recent call last):
File "/Users/albitcabanmurillo/N5_nwxwryan.py", line 421, in <module>
annotator.annotate()
File "/Users/albitcabanmurillo/opt/anaconda3/envs/caiman2/lib/python3.7/site-packages/statannotations/Annotator.py", line 222, in annotate
orig_value_lim=orig_value_lim)
File "/Users/albitcabanmurillo/opt/anaconda3/envs/caiman2/lib/python3.7/site-packages/statannotations/Annotator.py", line 506, in _annotate_pair
annotation.print_labels_and_content()
File "/Users/albitcabanmurillo/opt/anaconda3/envs/caiman2/lib/python3.7/site-packages/statannotations/Annotation.py", line 43, in print_labels_and_content
for struct in self.structs])
TypeError: sequence item 0: expected str instance, numpy.int64 found
Here is some code to try to reproduce your issue using some dummy data.
It seems statannotations doesn't like the numeric values in 'Module_ID'. You could try to change them to strings (also using them for pairs2). Depending on when you change them, you might also need to change the numeric values to strings in Hub_all_data.loc[(Hub_all_data.Module_ID == "0"), ...].
Note that in your example code, you used plotting_parameters before assigning a value (I assume you just want to move the assignment to before the call to sns.violinplot). The code also uses an unknown ax as the first parameter to Annotator(...); here I assume ax is the return value of sns.violinplot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from statannotations.Annotator import Annotator
# first create some dummy data for testing
Hub_all_data = pd.DataFrame({'Module_ID': np.repeat(np.arange(3), 100),
'Within_module_degree': np.random.randn(300).cumsum()})
# change Module_ID from numeric to string
Hub_all_data['Module_ID'] = Hub_all_data['Module_ID'].astype(str)
plotting_parameters = {'data': Hub_all_data,
'x': 'Module_ID',
'y': 'Within_module_degree'}
with sns.plotting_context('notebook', font_scale=1.4):
ax = sns.violinplot(**plotting_parameters)
# also change these values to strings
pairs2 = [("0", "1"), ("0", "2"), ("1", "2")]
# just use some dummy data, as we don't have the original data nor the functions
pvalues = [0.001, 0.002, 0.03]
formatted_pvalues = [f"p={p:.2e}" for p in pvalues]
annotator = Annotator(ax, pairs2, **plotting_parameters)
annotator.set_custom_annotations(formatted_pvalues)
annotator.annotate()
sns.despine()
plt.tight_layout() # make all labels fit nicely
plt.show()
You could also update statannotations to version 0.5+, where it was fixed.

VALUE ERROR: x, y, and format string must not be None. (error while plotting hysteresis loop)

With the low cycle fatigue data, I'm trying to plot the Hysteresis loop. But I'm getting the following error:
[ -52.18297297 -45.58565338 16.9913185 ... -354.53630032 -295.50857248
-155.42088911]
[-0.01229182 -0.00891753 0.02256744 ... -0.33507242 -0.31283728
-0.24790212]
Traceback (most recent call last):
File "f:\I2M\LCF\Ep1_camp4_P4_TTH650 06-9-21 11 01 24\ep1_camp4_P4.py", line 16, in <module>
plt.plot(strain, Sigma, color = 'k')
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\pyplot.py", line 2840, in plot
return gca().plot(
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\axes\_axes.py", line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)]
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\axes\_base.py", line 273, in __call__
yield from self._plot_args(this, kwargs)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-
packages\matplotlib\axes\_base.py", line 379, in _plot_args
raise ValueError("x, y, and format string must not be None")
ValueError: x, y, and format string must not be None
And here is my code:
import matplotlib.pyplot as plt
import numpy as np
plt.style.use(['science','no-latex'])
x = np.loadtxt('F:\\I2M\\LCF\\Ep1_camp4_P4_TTH650 06-9-21 11 01 24\\data_1.csv',unpack = True,
skiprows = 2, usecols = 2, delimiter = ',')
y = np.loadtxt('F:\\I2M\\LCF\\Ep1_camp4_P4_TTH650 06-9-21 11 01 24\\data_1.csv',unpack = True,
skiprows = 2, usecols = 3, delimiter = ',')
stress = (x*1000)/28.27 #N/mm^2 = MPa
length = len(stress)
length = len(y)
plt.figure(figsize=(5, 5))
Sigma = print(stress[0:length:10]) #stress
strain = print(y[0:length:10])
plt.plot(strain, Sigma, color = 'k')
plt.show()
Data contains many rows. So I used some commands to access only particular values from the row
Your problem is here
Sigma = print(stress[0:length:10]) #stress
strain = print(y[0:length:10])
what you want plausibly is to sample every 10th data point, but what you get is … nothing or, from the point of view of Python: None, so that later your stack trace informs you that x, y, and format string must not be None.
Why this happens, and how you solve the problem?
When you make an assignment, the value of the expression on the right is saved and you can use the name on the left to use it later, so you save, e.g., the value returned by print(y[0:length:10]) to use it later under the name strain, but print() is used for its side effects (i.e., showing a bunch of characters on your terminal) and the value that is returned in these cases is by default None, not what was shown on your terminal.
If I have understood your intentions, you should omit the two lines above and just use
plt.plot(x[0:length:10], y[0:length:10], color='k')
A side note, you have
length = len(stress)
length = len(y)
but you read them from the same file, one assignment should be enough…
PS
x, y = np.loadtxt('…\\data_1.csv', unpack=1, skiprows=2, usecols=[2,3], delimiter=',')

ValueError: x and y must have same first dimension, but have shapes (41,) and (1, 41)

I'm trying to plot a vs kappa_inv and I keep getting the error: ValueError: x and y must have same first dimension, but have shapes (41,) and (1, 41).
I saw a prev post about changing plt.plot square brackets to round ones but the error is still occurring. Can anyone see what I'm doing wrong?
import numpy
L = [20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20]
L = numpy.array(L)
delta = [0.5, 0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5]
delta = numpy.array(delta)
x = L/delta
a =[-0.5,0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5,6,6.5,7,7.5,8,8.5,9,9.5,10,10.5,11,11.5,12,12.5,13,13.5,14,14.5,15,15.5,16,16.5,17,17.5,18,18.5,19,19.5]
numpy.array(a)
#Force
F = 100 #kN
#calc sigma
y = 250 #mm
E = 32800 #MPa
I =1.837E9 #mm4
sig = y/(E*I)
print (sig)
kappa = []
b = []
y = 20
while y >= 0:
   b.append(y)
   y = y-0.5
numpy.array(b)
for val in a:
val = "{:.1f}".format(val)
val = float(val)
fraction = b/L
kappa_i = fraction * val
kappa.append(kappa_i)
b = b - delta
N = 4
Length = len(kappa)
pad_kappa = numpy.pad(kappa,(0,N),'constant', constant_values = 0)
print(pad_kappa)
#Calc bending moment list
BM = []
for k in range (0,Length):
bendingMoment = (pad_kappa[k]*F) + (pad_kappa[k+3]*F)
BM.append(bendingMoment)
print(BM)
Strain =[]
for j in range(0,len(BM)):
strain = (BM[j] * sig) * 10E6
Strain.append(strain)
kappa_inv = [ -x for x in kappa]
numpy.array(kappa_inv)
import matplotlib.pyplot as plt
plt.plot(a,kappa_inv)
plt.ylabel('KAPPA')
plt.xlabel('LENGTH ALONG BEAM')
plt.show()
#E = BM*10E6 * sigma
strainCalcReverse = []
for s in Strain:
bendYourMomLOL = s/sig * (1/10E6)
bendYourMomLOL.append(strainCalcReverse)
print(strainCalcReverse)
There's a lot of messy stuff in your code,
Lines like:
numpy.array(a) # doesn't change list a
numpy.array(b) # same
but
fraction = b/L # only works if b is an array, not a list.
Looks like this is trying to turn all elements in a to float, but that's not how a python loop works.
for val in a:
val = "{:.1f}".format(val)
val = float(val)
a = np.array(a) will produce a float array, so there's no need for this loop.
Anyways, it looks like kappa_i is an array. If so then the following demonstrates your error:
In [311]: kappa=[]
In [312]: kappa.append(np.arange(3))
In [313]: kappa
Out[313]: [array([0, 1, 2])]
In [314]: plt.plot([1,2,3], kappa)
Traceback (most recent call last):
File "<ipython-input-314-e15645b5613f>", line 1, in <module>
plt.plot([1,2,3], kappa)
File "/usr/local/lib/python3.8/dist-packages/matplotlib/pyplot.py", line 2988, in plot
return gca().plot(
File "/usr/local/lib/python3.8/dist-packages/matplotlib/axes/_axes.py", line 1605, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)]
File "/usr/local/lib/python3.8/dist-packages/matplotlib/axes/_base.py", line 315, in __call__
yield from self._plot_args(this, kwargs)
File "/usr/local/lib/python3.8/dist-packages/matplotlib/axes/_base.py", line 501, in _plot_args
raise ValueError(f"x and y must have same first dimension, but "
ValueError: x and y must have same first dimension, but have shapes (3,) and (1, 3)
By using that list append, you made a list with one array element. When passed to plot that is produces a (1,n) array.
Correct your code, whether it's the actual code or the copy to the question. And pay closer attention to when variables are lists, or arrays, and if arrays, what's the shape and dtype.

Applying a function to meshgrids only under select polygon criteria?

My question is similar to this post but I'm having some trouble adapting it:
Ambiguous truth value for meshgrid and user-defined functions using if-statement
Essentially, I would like the conditional statement to not look like this:
import numpy as np
def test(x, y):
a = 1.0/(1+x*x)
b = np.ones(y.shape)
mask = (y!=0)
b[mask] = np.sin(y[mask])/y[mask]
return a*b
Rather, the "mask" to depend on whether x,y lie within a certain polygon. So every value in the resulting array is a 1, but a polygon between 4 values is generated. I only want the function to apply to points from the 2 meshgrid inputs (X,Y) which lay inside the polygon
x and y are real numbers that can be negative.
I'm not sure how to pass in the array items as singular values.
I ultimately want to plot Z on a colour plot
Thanks
i.e. points within a polygon undergo a transformation, points outside the polygon remain as 1
For example, I would expect my function to look like this
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
def f(x, y, poly):
a = 1.0/(1+x*x)
b = np.ones(y.shape)
mask = (Point(x,y).within(poly) == True)
b[mask] = a*b
return b
x and y are meshgrids of arbitrary dimensions
I should add that I get the following error:
"only size-1 arrays can be converted to Python scalars"
X and Y are generated and the function is called via
coords = [(0, 0), (4,0), (4,4), (0,4)]
poly = Polygon(coords)
x = np.linspace(0,10, 11, endpoint = True) # x intervals
y = np.linspace(0,10,11, endpoint = True) # y intervals
X, Y = np.meshgrid(x,y)
Z = f(X, Y, poly)
Thanks!
Error Message:
Traceback (most recent call last):
File "meshgrid_understanding.py", line 28, in <module>
Z = f(X, Y, poly)
File "meshgrid_understanding.py", line 16, in f
mask = (Point(x,y).within(poly) != True)
File "C:\Users\Nick\AppData\Local\Programs\Python\Python37\lib\site-packages\shapely\geometry\point.py", line 48, in __init__
self._set_coords(*args)
File "C:\Users\Nick\AppData\Local\Programs\Python\Python37\lib\site-packages\shapely\geometry\point.py", line 137, in _set_coords
self._geom, self._ndim = geos_point_from_py(tuple(args))
File "C:\Users\Nick\AppData\Local\Programs\Python\Python37\lib\site-packages\shapely\geometry\point.py", line 214, in geos_point_from_py
dx = c_double(coords[0])
TypeError: only size-1 arrays can be converted to Python scalars
Matplotlib has a function that accepts an array of points. Demo:
import numpy as np
from matplotlib.path import Path
coords = [(0, 0), (4,0), (4,4), (0,4)]
x = np.linspace(0, 10, 11, endpoint=True)
y = np.linspace(0, 10, 11, endpoint=True)
X, Y = np.meshgrid(x, y)
points = np.c_[X.ravel(), Y.ravel()]
mask = Path(coords).contains_points(points).reshape(X.shape)
You are passing an array to the function Point that accepts single values.

labeled intervals in matplotlib

I'm making a reference to the question on Plotting labeled intervals in matplotlib/gnuplot, the problem with the solution exposed there, is that doesn't work with only one line of data in the files. This is the code I'm trying:
#!/usr/bin/env python
#
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, MinuteLocator, SecondLocator
import numpy as np
from StringIO import StringIO
import datetime as dt
a=StringIO("""MMEX 2016-01-29T12:38:22 2016-01-29T12:39:03 SUCCESS
""")
#Converts str into a datetime object.
conv = lambda s: dt.datetime.strptime(s, '%Y-%m-%dT%H:%M:%S')
#Use numpy to read the data in.
data = np.genfromtxt(a, converters={1: conv, 2: conv},
names=['caption', 'start', 'stop', 'state'], dtype=None)
cap, start, stop = data['caption'], data['start'], data['stop']
#Check the status, because we paint all lines with the same color
#together
is_ok = (data['state'] == 'SUCCESS')
not_ok = np.logical_not(is_ok)
#Get unique captions and there indices and the inverse mapping
captions, unique_idx, caption_inv = np.unique(cap, 1, 1)
#Build y values from the number of unique captions.
y = (caption_inv + 1) / float(len(captions) + 1)
#Plot function
def timelines(y, xstart, xstop, color='b'):
"""Plot timelines at y from xstart to xstop with given color."""
plt.hlines(y, xstart, xstop, color, lw=4)
plt.vlines(xstart, y+0.005, y-0.005, color, lw=2)
plt.vlines(xstop, y+0.005, y-0.005, color, lw=2)
#Plot ok tl black
timelines(y[is_ok], start[is_ok], stop[is_ok], 'k')
#Plot fail tl red
timelines(y[not_ok], start[not_ok], stop[not_ok], 'r')
#Setup the plot
ax = plt.gca()
ax.xaxis_date()
myFmt = DateFormatter('%Y-%m-%dT%H:%M:%S')
ax.xaxis.set_major_formatter(myFmt)
ax.xaxis.set_major_locator(SecondLocator(interval=3600)) # used to be SecondLocator(0, interval=20)
#To adjust the xlimits a timedelta is needed.
delta = (stop.max() - start.min())/10
plt.yticks(y[unique_idx], captions)
plt.ylim(0,1)
plt.xlim(start.min()-delta, stop.max()+delta)
plt.xlabel('Time')
plt.xticks(rotation=70)
plt.show(block=True)
When I try this code, I get the following error:
Traceback (most recent call last):
File "./testPlot.py", line 49, in <module>
timelines(y[is_ok], start[is_ok], stop[is_ok], 'k')
ValueError: boolean index array should have 1 dimension
Also, when I try to add a dummy line on the data, let's said "MMEX 2016-01-01T00:00:00 2016-01-01T00:00:00 SUCCESS", the plot works but doesn't look good.
Any suggestions? I tried to put this question on the same post when I found the solution, but I don't have enough reputation...
Thanks in advance
The issue is that when you only read 1 item with np.genfromtxt, it is producing scalars (0-dimensions). We need them to be at least 1D.
You can add these lines just above where you define your timelines function, and then everything works ok.
This makes use of the numpy function np.atleast_1d(), to turn the scalars into 1D numpy arrays.
#Check the dimensions are at least 1D (for 1-item data input)
if start.ndim < 1:
start = np.atleast_1d(start)
if stop.ndim < 1::
stop = np.atleast_1d(stop)
if is_ok.ndim < 1:
is_ok = np.atleast_1d(is_ok)
if not_ok.ndim < 1:
not_ok = np.atleast_1d(is_ok)
The output:

Categories