ValueError: cannot reshape array - contour plot python - python

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
data = np.genfromtxt('file1.txt',delimiter=' ')
lats = data[:,0]
## lon => x
lons = data[:,1]
## values => z
values = data[:,2]
###
lat_uniq = list(set(lats.tolist()))
nlats = len(lat_uniq)
print(nlats)
print(lat_uniq)
lon_uniq = list(set(lons.tolist()))
print(lon_uniq)
nlons = len(lon_uniq)
print(nlons)
print (lats.shape, nlats, nlons)
yre = lats.reshape(nlats,nlons)
xre = lons.reshape(nlats,nlons)
zre = values.reshape(nlats,nlons)
#### later in the defined map
fig,ax=plt.subplots(1,1)
cp = ax.contourf(xre, yre, zre)
fig.colorbar(cp)
plt.savefig('f1.pdf')
file1.txt
1 2 3
4 5 6
7 8 9
10 11 12
..
First column - x values,
Second - y values,
third - z values
I'm using the code to make a contour plot in python, but getting the following error:
Traceback (most recent call last):
File "./yut.py", line 21, in
yre = lats.reshape(nlats,nlons)
ValueError: cannot reshape array of size 4 into shape (4,4)
Could you please help to fix this error? Thanks in advance!

Matplotlib expects a contour plot to receive data in a specific format. Your approach does not provide the data in this format; you have to transform your data like this:
import numpy as np
import matplotlib.pyplot as plt
#from matplotlib.colors import LogNorm
data = np.genfromtxt('test.txt', delimiter=' ')
#print(data)
lats = data[:,0]
## lon => x
lons = data[:,1]
## values => z
values = data[:,2]
###
#get unique lat lon values and their index positions
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
#create 2D array necessary for the contour plot
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
zre = np.full(xre.shape, np.nan)
#or if you know the standard value of the array, fill it with that
#zre = np.full(xre.shape, 0)
zre[lat_idx, lon_idx] = values
print(zre)
#you can fill in missing data with interpolation
from scipy.interpolate import griddata
zre_interpolated = griddata((lons, lats), values, (xre, yre), method = "linear")
print(zre_interpolated)
#### later in the defined map
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated")
cp2 = ax2.contourf(xre, yre, zre_interpolated, levels=4)
plt.colorbar(cp2, ax=ax2)
ax2.set_title("interpolated data")
plt.show()
Example output:
The example output was generated using the following data in the txt file:
1 1 1
1 2 2
2 4 9
4 5 2
6 1 1
6 2 8
6 4 9
6 5 2
2 5 3
4 2 5
4 3 8
4 4 5
1 3 4
1 5 2
2 1 1
2 3 4

Related

Why I am getting different plots for the same data file in python?

txt file
1 1 1
1 2 2
2 4 9
4 5 2
6 1 1
6 2 8
6 4 9
6 5 2
2 5 3
4 2 5
4 3 8
4 4 5
1 3 4
1 5 2
2 1 1
2 3 4
I got Fig 1 using the following code
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('file1.txt', delimiter=' ')
lats = data[:,0]
lons = data[:,1] values = data[:,2]
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
zre = np.full(xre.shape, np.nan)
zre[lat_idx, lon_idx] = values
print(zre)
fig, (ax1) = plt.subplots(1,1, figsize = (10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated") plt.show()
and I got fig2 by using the below code and same data file
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('test.txt', delimiter=' ')
lats = data[:, 0]
lons = data[:, 1]
values = data[:, 2]
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
# zre = np.full(xre.shape, np.nan)
zre = np.full(xre.shape, 0)
zre[lat_idx, lon_idx] = values
print(zre)
fig, (ax1) = plt.subplots(1, 1, figsize=(10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated")
plt.show()
Which of the code and contour plot is correct? The first, second and third column in txt file represents x, y and z values, and I want to create a contour plot using above data file. Thanks.

generate a 3d plot from data contained in a three columns file

I have a three columns data file structured in this way (example) :
X Y Z
0 0 0.2
0 1 0.3
0 2 0.1
1 0 0.2
1 1 0.3
1 2 0.9
2 0 0.6
2 1 0.8
2 2 0.99
I don't know how this kind of file is called ... but I did not find any example to plot this using 3d wireframe or 3d surface plot ...
EDIT But there is a way to produce a wireframe or surface plot with data structured in this way?
In order to create a surface plot, you first need to transform your x, y and z data into 2d arrays. Then you can plot it easily:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
# read out data from txt file
data = np.genfromtxt("data.txt")[1:]
x_data, y_data, z_data = data[:, 0], data[:, 1], data[:, 2]
# initialize a figure for the 3d plot
fig = plt.figure()
ax = Axes3D(fig)
# create matrix for z values
dim = int(np.sqrt(len(x_data)))
z = z_data.reshape((dim, dim))
# create matrix for the x and y points
x, y = np.arange(0, dim, 1), np.arange(0, dim, 1)
x, y = np.meshgrid(x, y)
# plot
ax.plot_surface(x, y, z, alpha=0.75)
plt.show()

Python: Plot from second and third columns while picking parameter values from the first one

I have three-column data in a file named "sample1.dat" and a code that reads the columns and tries to plot the 3rd column against the 2nd column. I pick up parameter values from the 1st column elements as long as their values remain the same.
"sample1.dat" reads
0 1 1
0 2 4
0 3 9
0 4 16
0 5 25
0 6 36
1 1 1
1 2 8
1 3 27
1 4 64
1 5 125
1 6 216
2 1 1
2 2 16
2 3 81
2 4 256
2 5 625
2 6 1296
And my code:
import matplotlib.pyplot as plt
import numpy as np
data = np.loadtxt('sample1.dat')
x = data[:,0]
y = data[:,1]
z = data[:,2]
L = len(data)
col = ['r','g','b']
x0 = x[0]; j=0; jold=-1
for i in range(L):
print('j, col[j]=',j, col[j])
if x[i] == x0:
print('y[i], z[i]=',y[i],z[i])
if i==0 or j != jold: # j-index decides new or the same paramet
label = 'parameter = {}'.format(x0)
else:
label = ''
print('label =',label)
plt.plot(y[i], z[i], color=col[j], marker='o', label=label)
else:
x0 = x[i] # Update when x-value changes,
# i.e. pick up the next parameter value
i -= 1 # Shift back else we miss the 1st point for new x-value
j += 1; jold = j
plt.legend()
plt.xlabel('2nd column')
plt.ylabel('3rd column')
plt.savefig('sample1.png')
plt.show()
The plot outcome:
One can clearly see that two issues persist:
The legends appear only for the first parameter though I tried to avoid the repitition in my code.
The default linestyle is not appearing though the legends show line plus marker plots.
How could I resolve these or is there a smarter way of coding to fulfill the same purpose.
The first issue is due to some strange logic involving j,jold and x0. The code can be simplified by drawing all y,z for each x-value at once. Numpy allows selecting the y's corresponding to a given x0 as y[x==x0s].
The second issue can be solved by explicitly setting the desired linestyle, i.e. ls=''.
import matplotlib.pyplot as plt
import numpy as np
data = np.loadtxt('sample1.dat')
x = data[:, 0]
y = data[:, 1]
z = data[:, 2]
colors = ['r', 'g', 'b']
for x0, color in zip(np.unique(x), colors):
plt.plot(y[x == x0], z[x == x0], color=color, marker='o', ls='', label=f'parameter = {x0:.0f}')
plt.legend()
plt.xlabel('2nd column')
plt.ylabel('3rd column')
plt.show()
An alternative approach would use the seaborn library, which does the selecting and coloring without a lot of intervention, for example:
import seaborn as sns
sns.scatterplot(x=y, y=z, hue=x, palette=['r', 'g', 'b'])
Seaborn can automatically add labels if the data is organized as a dictionary or a pandas dataframe:
data = {'first column': x.astype(int),
'second column': y,
'third column': z}
sns.scatterplot(data=data, x='second column', y='third column', hue='first column', palette=['r', 'g', 'b'])
You can get the result you want in a few lines by using pandas and seaborn.
If you add column names (for instance A, B, and C) to the data in the sample1.dat file as follow:
A B C
0 1 1
0 2 4
0 3 9
0 4 16
0 5 25
0 6 36
1 1 1
1 2 8
1 3 27
1 4 64
1 5 125
1 6 216
2 1 1
2 2 16
2 3 81
2 4 256
2 5 625
2 6 1296
You can then load your data in a pandas dataframe and plot it with seaborn:
import pandas as pd
import seaborn as sns
df=pd.read_fwf('sample1.dat')
col = ['r','g','b']
sns.scatterplot(data=df,x='B',y='C',hue='A',palette=col)
And the output gives:

Reorder xmin, xmax, ymin, and ymax for each column in CSV file into new columns

I am new to python, and am struggling with a calculation. I have several thousand rows of data in a CSV table in the following format:
Link to image table
This data is in the wrong format in that several of my xmin/ymin values are higher than the xmax/ymax values (examples can be seen in the image link above). I need to create new columns and use either numpy or pandas to reorder the data so that they are in the correct format, such as using this code:
import numpy as np
xmin_new = np.min(xmin, xmax)
xmax_new = np.max(xmin, xmax)
ymin_new = np.min(ymin, ymax)
ymax_new = np.max(ymin, ymax)
The trouble is that I'm having trouble defining a column in a CSV and iterating through rows to do this. Can anyone suggest how I could modify this script to accomplish this?
import pandas
import numpy as np
import os
import csv
#Set cwd
os.chdir("C:\\Users\\desired_directory")
#Open desired csv file
v = open("train.csv")
r = csv.reader(v)
row0 = r.next()
#print header to look at file
print row0
row0.append('xmin_new')
row0.append('xmax_new')
row0.append('ymin_new')
row0.append('ymax_new')
#Check appends
print row0
xmin_new = np.min(xmin, xmax)
xmax_new = np.max(xmin, xmax)
ymin_new = np.min(ymin, ymax)
ymax_new = np.max(ymin, ymax)
#Errors occur here saying that the "xmin_new" column is undefined.
#Also looking to save the file to the directory, but unsure of how to do this properly.
If you are looking for speed, numpy is a good way to go. I assume you know how to read the whole data into a DataFrame (look up pandas.read_csv()).
# First, make a reproducible example
# In your case, you would read the df instead
n = 6
np.random.seed(0)
cols = 'xmin xmax ymin ymax'.split()
df = pd.DataFrame(
np.random.randint(0, 10, (n,4)),
columns=cols,
).assign(foo=np.random.choice(list('abcd'), n))
>>> df
xmin xmax ymin ymax foo
0 5 0 3 3 a
1 7 9 3 5 d
2 2 4 7 6 a
3 8 8 1 6 d
4 7 7 8 1 b
5 5 9 8 9 c
Then, the actual bit:
# reorder min/max for both x and y
#
# Note: cols must be ['xmin', 'xmax', 'ymin', 'ymax']
# or ['ymin', 'ymax', 'xmin', 'xmax']
z = df[cols].values.reshape(-1, 2)
df[cols] = np.c_[z.min(1), z.max(1)].reshape(-1, 4)
And now:
>>> df
xmin xmax ymin ymax foo
0 0 5 3 3 a
1 7 9 3 5 d
2 2 4 6 7 a
3 8 8 1 6 d
4 7 7 1 8 b
5 5 9 8 9 c
Note: if instead, you want to create new columns as per your question, consider this instead:
cols_new = [f'{k}_new' for k in cols]
z = df[cols].values.reshape(-1, 2)
df[cols_new] = np.c_[z.min(1), z.max(1)].reshape(-1, 4)
There is a slightly more verbose way in pandas-only:
df = df.assign(
xmin=df[['xmin', 'xmax']].min(1),
xmax=df[['xmin', 'xmax']].max(1),
ymin=df[['ymin', 'ymax']].min(1),
ymax=df[['ymin', 'ymax']].max(1),
)
Same remark as before, if you intend to create new columns instead, then df.assign(xmin_new=...) etc.

Scatter Pie Plot Python Pandas

"Scatter Pie Plot" ( a scatter plot using pie charts instead of dots). I require this as I have to represent 3 dimensions of data.
1: x axis (0-6)
2: y axis (0-6)
3: Category lets say (A,B,C - H)
If two x and y values are the same I want a pie chart to be in that position representing that Category.
Similar to the graph seen in this link:
https://matplotlib.org/gallery/lines_bars_and_markers/scatter_piecharts.html#sphx-glr-gallery-lines-bars-and-markers-scatter-piecharts-py
or this image from Tableu:
[![enter image description here][1]][1]
As I am limited to only use python I have been struggling to manipulate the code to work for me.
Could anyone help me with this problem? I would very grateful!
Example data:
XVAL YVAL GROUP
1.3 4.5 A
1.3 4.5 B
4 2 E
4 6 A
2 4 A
2 4 B
1 1 G
1 1 C
1 2 B
1 2 D
3.99 4.56 G
The final output should have 6 pie charts on the X & Y with 1 containing 3 groups and 2 containing 3 groups.
My attempt:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def draw_pie(dist,
xpos,
ypos,
size,
ax=None):
if ax is None:
fig, ax = plt.subplots(figsize=(10,8))
# for incremental pie slices
cumsum = np.cumsum(dist)
cumsum = cumsum/ cumsum[-1]
pie = [0] + cumsum.tolist()
for r1, r2 in zip(pie[:-1], pie[1:]):
angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2)
x = [0] + np.cos(angles).tolist()
y = [0] + np.sin(angles).tolist()
xy = np.column_stack([x, y])
ax.scatter([xpos], [ypos], marker=xy, s=size)
return ax
fig, ax = plt.subplots(figsize=(40,40))
draw_pie([Group],'xval','yval',10000,ax=ax)
draw_pie([Group], 'xval', 'yval', 20000, ax=ax)
draw_pie([Group], 'xval', 'yval', 30000, ax=ax)
plt.show()
I'm not sure how to get 6 pie charts. If we group on XVAL and YVAL, there are 7 unique pairs. You can do something down this line:
fig, ax = plt.subplots(figsize=(40,40))
for (x,y), d in df.groupby(['XVAL','YVAL']):
dist = d['GROUP'].value_counts()
draw_pie(dist, x, y, 10000*len(d), ax=ax)
plt.show()
Output:

Categories