How to draw line between point using python - python

How to draw line between 2 or 3 point.
I have 2 text file, first text file is list of posisition for each point.
point long lat
A 115 12
B 89 13
C 100 13
etc.
and the second file is like this:
3, 4
A, B, C
R, X, Y
V, P, O
J, M, N
2, 3
Q, S
H, K
T, W
4, 1
E, D, F, G
And I want draw the lines like this pic:
Actually, I'm not sure with my code. This is my code::
import psycopg2
import psycopg2.extensions
import matplotlib.pyplot as plt
import itertools
import collections
import numpy as np
def readRules(_dir):
#_dir = r'D:\s2\semester 3\tesis\phyton\hasil\Hasil_20160116_09.12.11'
mydict = {}
with open(os.path.join(_dir, 'rule3.csv'), 'rb') as testfile:
for line in testfile:
# first line is the next record "matrix size"
columns, rows = (int(x) for x in strip_comment(line).split(','))
# next line is the header for this record
key = tuple(strip_comment(next(testfile)).split(','))
# the next lines are the rows for this record
vals = [tuple(int(x) for x in strip_comment(next(testfile)).split(','))
for _ in range(rows)]
mydict[key] = vals
#print(mydict)
return mydict
data=getDataReference(cur) # to get location for each point
myPoints={}
myLines=[]
mydict=readRules(_dir)
# print "value :", mydict.values()
# print "key:", mydict.keys()
for value in mydict.values():
for x in value:
for s in range(len(x)):
myPoints[x[s]]= data[x[s]][0]
#print x[s]
if len(x)>1:
myLines.append(x)
myPoints_reversed = collections.defaultdict(list)
for number, string in myPoints.items():
myPoints_reversed[string].append(number)
colors = plt.cm.Spectral(np.linspace(0, 1, len(myPoints_reversed)))
myplt={}
for k, col in zip(myPoints_reversed.keys(),colors):
Long=[]
Lat=[]
for x in myPoints_reversed[k]:
Long.append(data[x][2])
Lat.append(data[x][1])
myplt[k] =plt.plot( Lat,Long , 'o', markerfacecolor=col, markeredgecolor='k', markersize=10, label=k)
#plt.legend(myplt,myPoints_reversed.keys(),loc=3,ncol=2, mode="expand", borderaxespad=0.)
plt.legend(loc =3,ncol=2, borderaxespad=0.)
#print myLines
#plt.plot(getListLat(myPoints.keys(),data), getListLong(myPoints.keys(),data),'o')
for point in myPoints:
#plt.annotate(getName(point,data), xy=getLatLong(point,data)) #Print name of point
plt.annotate(point, xy=getLatLong(point,data))
for line in myLines:
plotLine(line[0],line[1],data)
plt.show()

Related

Skip first several rows when plotting a CSV-file

For a project at work I'm working on a code that reads a csv-file and generates a plot.
My problem is, I work with multiple csv-files but all of them contain 10-40 rows in the beginning, filled with device and sensor information.
I would wish for my code to detect where the first line of values are and to start reading the values from there into my array. But since my experience with Python is very low, I couldnt find a good solution.
If you can recommend me specific methods or change my code, feel free to comment.
Thanks to everyone taking their time to help me
import matplotlib.pyplot as plt
import csv
a = []
b = []
c = []
d = []
e = []
with open('PATH','r') as csvfile:
lines = csv.reader(csvfile, delimiter=',')
for row in lines:
a.append(float(row [0]))
b.append(float(row [1]))
#c.append(float(row [2]))
#d.append(float(row [3]))
#e.append(float(row [4]))
f = plt.figure()
f.set_figwidth(12)
f.set_figheight(8)
#plt.plot(X-Achse, Y-Achse, linewidth=* , color = ' ', label = " ")
plt.plot(a, b, linewidth=0.35, color = 'b', label = "Sensor 1")
#plt.plot(a, c, linewidth=0.35, color = 'g', label = "Sensor 2")
plt.title('Pressure Report', fontsize = 20)
plt.xlabel('Time(s)')
plt.ylabel('Pressure(bar)')
plt.grid()
plt.legend()
plt.show()
You can skip the lines using conditional statements as below:
count = 1
for row in lines:
if (count < 10 and count > 40):
a.append(float(row [0]))
b.append(float(row [1]))
count += 1
What is working for me are the following changes. It may not be the fastest and best solution, but it does what its supposed to.
import matplotlib.pyplot as plt
import csv
path = 'PATH'
line_number = 0
list_of_results = []
count = 1
a = []
b = []
c = []
d = []
e = []
with open(path, 'r') as read_obj:
for line in read_obj:
line_number += 1
if "0.000000000" in line:
list_of_results.append((line_number))
firstline = list_of_results[0]-1
with open(path,'r') as csvfile:
lines = csv.reader(csvfile, delimiter=',')
for row in lines:
if (count > firstline):
a.append(float(row [0]))
b.append(float(row [1]))
#c.append(float(row [2]))
#d.append(float(row [3]))
#e.append(float(row [4]))
count += 1
f = plt.figure()
f.set_figwidth(12)
f.set_figheight(8)
#plt.plot(X-Achse, Y-Achse, linewidth=* , color = ' ', label = " ")
plt.plot(a, b, linewidth=0.35, color = 'b', label = "Sensor 1")
#plt.plot(a, c, linewidth=0.35, color = 'g', label = "Sensor 2")
plt.title('Pressure Report', fontsize = 20)
plt.xlabel('Time(s)')
plt.ylabel('Pressure(bar)')
#plt.axis([x_min, x_max, y_min, y_max])
#plt.axis([350, 380, -6, 2.2])
plt.grid()
plt.legend()
plt.show()

Fitting dictionary into normal distribution curve

Here is the dictionary:
l= {31.2: 1,35.1: 4,39.0: 13,42.9: 33,46.8: 115,50.7: 271,54.6: 363,58.5:381,62.4:379,66.3:370,70.2:256,74.1: 47,78.0: 2}
So this means that 31.2 has occurred 1 time, 35.1 has occurred 4 times and so on.
I tried:
fig, ax = plt.subplots(1, 1)
ax.scatter(l.keys(), l.values)
ax.set_xlabel('Key')
ax.set_ylabel('Length of value')
Also I found mean and std by
np.mean([k for k in l.keys()])
np.std([k for k in l.keys()])
Is this the way to find mean and std for that data. I doubt that because it does not take into account of number of occurences of each data. I want to see the normal curve on this data. Also is there a way to know how often a value occurs. For example if I extend the curve to touch 0 on x axis , and if I want to know how many data points are involved for an occurrence of 0(can also be probability).
Here is a way to draw a normal gauss curve to fit the data:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
l = {31.2: 1, 35.1: 4, 39.0: 13, 42.9: 33, 46.8: 115, 50.7: 271, 54.6: 363, 58.5: 381, 62.4: 379, 66.3: 370, 70.2: 256, 74.1: 47, 78.0: 2}
# convert the dictionary to a list
l_list = [k for k, v in l.items() for _ in range(v)]
fig, ax = plt.subplots(1, 1)
ax.scatter(l.keys(), l.values())
ax.set_xlabel('Key')
ax.set_ylabel('Length of value')
mu = np.mean(l_list)
sigma = np.std(l_list)
u = np.linspace(mu - 4 * sigma, mu + 4 * sigma, 100)
ax2 = ax.twinx()
ax2.plot(u, stats.norm.pdf(u, mu, sigma), color='crimson')
ax2.set_ylabel('normal curve')
plt.show()
Here's a way to get the mean and std:
l= {31.2: 1,35.1: 4,39.0: 13,42.9: 33,46.8: 115,50.7: 271,54.6: 363,58.5:381,62.4:379,66.3:370,70.2:256,74.1: 47,78.0: 2}
ll=[[i]*j for i,j in zip(l.keys(),l.values())]
flat_list = [item for sublist in ll for item in sublist]
np.mean(flat_list), np.std(flat_list)
which prints (59.559194630872476, 7.528353520785996).
You could do a histogram with np.histogram(flat_list) to evaluate the frequency of each occurrence.

TypeError: list indices must be integers or slices, not list - matplotlib (scatter)

I am plotting data using matplotlib. I am following this example as base to plot with four labels. Below you can find the code. However, I am getting this error,
Traceback (most recent call last):
File "visualization_SH_Male_female.py", line 86, in <module>
main()
File "visualization_SH_Male_female.py", line 58, in main
plt.scatter(x_list[indices], y_list[indices], marker=markers[i], color=colors[j])
TypeError: list indices must be integers or slices, not list
in this scatter plot. Can someone point how I can transform indices into integers?
import matplotlib
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import csv
import numpy as np
from sklearn import preprocessing
def parse_features_from_csv(csv_file):
feat_lst = []
id_lst = []
count = 0
with open(csv_file) as fr:
reader = csv.reader(fr, delimiter=',')
for row in reader:
s_feat = row[:-1]
identifier = row[-1]
if count < 50:
if (
identifier == 'Alan_Cumming' or identifier == 'Chiwetel_Ejiofor' or identifier == 'James_Purefoy' or identifier == 'Johnathon_Schaech' or identifier == 'Will_Poulter'):
identifier = 0
else:
identifier = 2
else: # >= 50
if (
identifier == 'Alan_Cumming' or identifier == 'Chiwetel_Ejiofor' or identifier == 'James_Purefoy' or identifier == 'Johnathon_Schaech' or identifier == 'Will_Poulter'):
identifier = 1
else:
identifier = 3
s_feat = [float(i) for i in s_feat]
feat_lst.append(s_feat)
id_lst.append(identifier)
count += 1
return feat_lst, id_lst
def main():
face_file = 'comb.csv'
feat_lst, labels = parse_features_from_csv(face_file)
labels = np.array(labels)
X_embedded = TSNE(n_components=2).fit_transform(feat_lst)
x_list = [x for [x, y] in X_embedded]
y_list = [y for [x, y] in X_embedded]
# generate a list of markers and another of colors
markers = ["o", "<"]
colors = ['r', 'g']
for i in range(2):
for j in range(2):
lab = i + j
indices = list(map(int, labels == lab))
print(indices)
plt.scatter(x_list[indices], y_list[indices], marker=markers[i], color=colors[j])
plt.legend(['0', '1', '2', '3'])
plt.grid()
plt.show()
In python, this won't work:
a = [1,2,3,4]
b = [2,3]
c = a[b]
because your index ([]) needs to be an integer or slice, not a list.
Simplest method would be to create a sub-list, only containing the items you need by list comprehension. In your case, this is one way to do that:
indices = list(map(int, labels == lab))
x_sublist = [x_list[i] for i in range(len(x_list)) if i in indices]
y_sublist = [y_list[i] for i in range(len(y_list)) if i in indices]
plt.scatter(x_sublist, y_sublist, marker=markers[i], color=colors[j])
The problem seems to be that you use python lists instead of numpy arrays. Since the code isn't runnable, the following is a minimal example:
import numpy as np
import matplotlib.pyplot as plt
x = np.array([.4,.8,1.2,1.6,2.0,2.4])
y = np.array([.1,.2,.3,.7,.6,.5])
lab = np.array([1,1,2,2,1,2])
for l in np.unique(lab):
indices = (lab == l)
plt.scatter(x[indices],y[indices], label=str(l))
plt.legend()
plt.show()

Errors with matplotlib plot, python

I get this horrible massive error when trying to plot using matplotlib:
Traceback (most recent call last):
File "24oct_specanal.py", line 90, in <module>
main()
File "24oct_specanal.py", line 83, in main
plt.plot(Svar,Sav)
File "/usr/lib64/python2.6/site-packages/matplotlib/pyplot.py", line 2458, in plot
ret = ax.plot(*args, **kwargs)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 3849, in plot
self.add_line(line)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 1443, in add_line
self._update_line_limits(line)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 1451, in _update_line_limits
p = line.get_path()
File "/usr/lib64/python2.6/site-packages/matplotlib/lines.py", line 644, in get_path
self.recache()
File "/usr/lib64/python2.6/site-packages/matplotlib/lines.py", line 392, in recache
x = np.asarray(xconv, np.float_)
File "/usr/lib64/python2.6/site-packages/numpy/core/numeric.py", line 235, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
This is the code I am using:
import numpy as np
import numpy.linalg
import random
import matplotlib.pyplot as plt
import pylab
from scipy.optimize import curve_fit
from array import array
def makeAImatrix(n):
A=np.zeros((n,n))
I=np.ones((n))
for i in range(0,n):
for j in range(i+1,n):
A[j,i]=random.random()
for i in range(0,n):
for j in range(i+1,n):
A[i,j] = A[j,i]
for i in range(n):
A[i,i]=1
return (A, I)
def main():
n=5 #number of species
t=1 # number of matrices to check
Aflat = []
Aflatlist = [] #list of matrices
Aflatav = []
Aflatvar = []
Aflatskew = []
remspec = []
Afreeze = [] #this is a LIST OF VECTORS that stores the vector corresponding to each extinct species as
#it is taken out. it is NOT the same as the original A matrix as it is only
#coherant in one direction. it is also NOT A SQUARE.
Sex = [] # (Species extinct) this is a vector that corresponds to the Afreeze matrix. if a species is extinct then
#the value stored here will be -1.
Sav = [] # (Species average) The average value of the A cooefficiants for each species
Svar = [] # (Species variance)
for k in range (0,t):
allpos = 0
A, I = makeAImatrix(n)
while allpos !=1: #while all solutions are not positive
x = numpy.linalg.solve(A,I)
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
#now store the A coefficiants of the extinct species in the Afreeze list
Afreeze.append(A[p])
Sex.append(-1) #given -1 value as species is extinct.
x=np.delete(x, p, 0)
A=np.delete(A, p, 0)
A=np.delete(A, p, 1)
I=np.delete(I, p, 0)
else:
allpos = 1 #set allpos to one so loop is broken
l=len(x)
#now fill Afreeze and Sex with the remaining species that have survived
for m in range (0, l):
Afreeze.append(A[m])
Sex.append(1) # value of 1 as this species has survived
#now time to analyse the coefficiants for each species.
for m in range (0, len(Sex)):
X1 = sum(Afreeze[m])/len(Afreeze[m]) # this is the mean
X2 = 0
for p in range (len(Afreeze[m])):
X2 = X2 + Afreeze[m][p]
X2 = X2/len(Afreeze[m])
Sav.append(X1)
Svar.append(X2 - X1*X1)
spec = []
for b in range(0,n):
spec.append(b)
plt.plot(Svar,Sav)
plt.show()
#plt.scatter(spec, Sav)
#plt.show()
if __name__ == '__main__':
main()
I cannot figure this out at all! I think it was working before but then just stopped working. Any ideas?
Your problem is in this section:
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
#now store the A coefficiants of the extinct species in the Afreeze list
Afreeze.append(A[p])
You're indexing a 2D array, and the result is still a 2D array. So, your Afreeze will get a 2D array appended, instead of a 1D array. Later, where you sum the separate elements of Afreeze, a summed 2D array will result in a 1D array, and that gets added to Sav and Svar. By the time you feed these variables to plt.plot(), matplotlib will get an array as one of the elements instead of a single number, which it of course can't cope with.
You probably want:
if any(t<0 for t in x):
p=np.where(x==min(x))
Afreeze.append(A[p][0])
but I haven't tried to follow the logic of the script very much; that's up to you.
Perhaps good to see if this is indeed what you want: print the value of A[p][0] in the line before it gets appended to Afreeze.
I noted that because of the random.random() in the matrix creation, the if statement isn't always true, so the problem doesn't always show up. Minor detail, but could confuse people.
Fix your identation?
import numpy as np
import numpy.linalg
import random
import matplotlib.pyplot as plt
import pylab
from scipy.optimize import curve_fit
from array import array
def main():
n=20 #number of species
spec=np.zeros((n+1))
for i in range(0,n):
spec[i]=i
t=100 #initial number of matrices to check
B = np.zeros((n+1)) #matrix to store the results of how big the matrices have to be
for k in range (0,t):
A=np.zeros((n,n))
I=np.ones((n))
for i in range(0,n):
for j in range(i+1,n):
A[j,i]=random.random()
for i in range(0,n):
for j in range(i+1,n):
A[i,j] = A[j,i]
for i in range(n):
A[i,i]=1
allpos = 0
while allpos !=1: #while all solutions are not positive
x = numpy.linalg.solve(A,I)
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
x=np.delete(x, p, 0)
A=np.delete(A, p, 0)
A=np.delete(A, p, 1)
I=np.delete(I, p, 0)
else:
allpos = 1 #set allpos to one so loop is broken
l=len(x)
B[l] = B[l]+1
B = B/n
pi=3.14
resfile=open("results.txt","w")
for i in range (0,len(spec)):
resfile.write("%d " % spec[i])
resfile.write("%0.6f \n" %B[i])
resfile.close()
plt.hist(B, bins=n)
plt.title("Histogram")
plt.show()
plt.plot(spec,B)
plt.xlabel("final number of species")
plt.ylabel("fraction of total matrices")
plt.title("plot")
plt.show()
if __name__ == '__main__':
main()
Got this:

How do i annotate with a subscripted text in matplotlib?

Im attempting to plot some data with matplotlib and i need some of the annotate to be formated like math/chem formulas.
here is some of my code.
#!/usr/bin/python2
import numpy as np
import matplotlib.pyplot as pytl
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)
recdt = np.dtype([('compound',str,4),('H_v','f4'),('B_o','f4')]);
gat = np.loadtxt('tra',dtype=object, usecols=(0,1,2),unpack=True);
gct,ght,gbt=[],[],[]
for c,h,b in np.transpose(gat):
gct=np.append(gct,c)
ght=np.append(ght,h)
gbt=np.append(gbt,b)
ght= ght.astype(np.float)
gbt= gbt.astype(np.float)
hard = pytl
four = hard #####
four.scatter(gbt,ght)
hard.title( 'physical stuff' )
hard.xlabel('physical prop 1')
hard.ylabel('physical prop2 ')
for l,x1,y2 in zip ( gct,gbt,ght):
hard.annotate( l,xy=(x1,y2),xytext=(-24,12),textcoords = 'offset points', arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'),rotation=0 )
hard.ylim([0,10])
hard.savefig('hardcomp.png')
hard.show()
and here is some test data
ZrC 6.8 1
NbC 8 2
NbN 7 13
RuB2 30 5
BP 3 1
AlP 9.4 3
InSb 2.2 47
C 6 4
the data is in three columns one text the other two are numbers.
i'd like the '2' in 'RbB2' to end up as a subscript.
We can display the 2 in 'RbB2' with a subscript, by using TeX notation: $\tt{RbB_{2}}$. In the code, you just have to modify c:
import re
for c,h,b in np.transpose(gat):
c = r'$\tt{{{c}}}$'.format(c = re.sub(r'(\d+)',r'_{\1}', c))
which yields
import re
import numpy as np
import matplotlib.pyplot as pytl
from matplotlib import rc
rc('font', **{'family':'sans-serif', 'sans-serif':['Helvetica']})
rc('text', usetex = True)
recdt = np.dtype([('compound', str, 4), ('H_v', 'f4'), ('B_o', 'f4')]);
gat = np.loadtxt('tra', dtype = object, usecols = (0, 1, 2), unpack = True);
gct, ght, gbt = [], [], []
for c, h, b in np.transpose(gat):
c = r'$\tt{{{c}}}$'.format(c = re.sub(r'(\d+)', r'_{\1}', c))
gct = np.append(gct, c)
ght = np.append(ght, h)
gbt = np.append(gbt, b)
ght = ght.astype(np.float)
gbt = gbt.astype(np.float)
hard = pytl
four = hard #####
four.scatter(gbt, ght)
hard.title( 'physical stuff' )
hard.xlabel('physical prop 1')
hard.ylabel('physical prop2 ')
for l, x1, y2 in zip ( gct, gbt, ght):
print(l, x1, y2)
hard.annotate(
l, xy = (x1, y2), xytext = (-24, 12), textcoords = 'offset points',
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'),
rotation = 0 )
hard.ylim([0, 10])
hard.savefig('hardcomp.png')
hard.show()

Categories