Plot a graph by reading coulmns from CSV - python

I have a csv file which contains four columns. The first column in time, the second, third and fourth columns are Accelerometer readings. I want to plot Time on X-Axis and the Accelerometer reading on Y-Axis.
Sample Data:
0 1.0969 9.7721 0.614
20 1.1146 9.7501 0.7444
40 1.1146 9.7501 0.7444
60 1.0124 9.7151 0.7169
79 1.0124 9.7151 0.7169
100 1.0927 9.7324 0.7356
120 1.0927 9.7324 0.7356
Here is what I have so far.
from numpy import genfromtxt
import csv
import matplotlib.pyplot as plt
#import numpy as np
# Open the desired file for reading
f = open('walk-shoe.csv', "rb")
# create a object of csv class and read the file
# use ',' as a delimiter
reader = csv.reader(f, delimiter=',')
time_row = 0
accel_1_row = 0
accel_2_row = 0
accel_3_row = 0
time = []
accel_1 = []
accel_2 = []
accel_3 = []
# create a list of 'Time in ms'
for row in reader:
# Skip the first row
time_row = time_row + 1
if time_row == 1:
continue
time.append(row[0])
accel_1.append(row[1])
accel_2.append(row[2])
accel_3.append(row[3])
# print the contents of the list
# print time
#print accel_1
#print accel_2
#print accel_3
# append all the list accelerometer list together
final_accel = []
final_accel.append(accel_1)
final_accel.append(accel_2)
final_accel.append(accel_3)
#print final_accel
# plot the graph
for i in range(len(final_accel)):
plt.plot(time,[pt[i] for pt in final_accel],label = 'id %s'%i)
plt.legend()
plt.show()
I want to plot all the sensor readings on one graph on y axis and time in x axis

You seem to be importing numpy in the code you give, therefore I will take that to mean that library is available to you. Numpy lets you read in data very easily using numpy.loadtxt().
You can then create a for loop which goes through columns 1 to 3 and plots data against column 0 (time).
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('walk-shoe.csv', delimiter=',', dtype=float)
print (data)
#[[ 0. 1.0969 9.7721 0.614 ]
# [ 20. 1.1146 9.7501 0.7444]
# [ 40. 1.1146 9.7501 0.7444]
# [ 60. 1.0124 9.7151 0.7169]
# [ 79. 1.0124 9.7151 0.7169]
# [ 100. 1.0927 9.7324 0.7356]
# [ 120. 1.0927 9.7324 0.7356]]
for i in range(1,data.shape[1]):
plt.plot(data[:,0], data[:,i], label='id %s' %i)
plt.legend()
plt.show()

Related

Clustering near Lines using coordinates in Python

I have a list with x- and y-coordinates of start and Endpoints of some lines.Lines as csv
331,178,486,232
185,215,386,308
172,343,334,419
406,128,570,165
306,106,569,166
159,210,379,299
236,143,526,248
303,83,516,178
409,62,572,106
26,287,372,427
31,288,271,381
193,228,432,330
120,196,432,329
136,200,374,297
111,189,336,289
284,186,560,249
333,202,577,254
229,194,522,219
349,111,553,165
121,322,342,416
78,303,285,391
103,315,340,415
The lines look like this on my example image. Lines plotted
I want to group lines which are close to each other into clusters and create one line for each cluster. For this example i would like to have 5 clusters. After that i want to calculate the distance from each clusterline to the next.
import csv, math
file = open("lines.csv")
csvreader = csv.reader(file)
lines = []
for data in csvreader:
lines.append({'x1':int(data[0]), 'y1':int(data[1]), 'x2':int(data[2]), 'y2':int(data[3])})
def point_delta(p1, p2):
return abs(p1 - p2)
for line in lines[:2]:
for line_rev in lines:
#x_start_delta = abs(line['x1'] - line_rev['x1'])
x_start_delta = point_delta(line['x1'], line_rev['x1'])
y_start_delta = abs(line['y1'] - line_rev['y1'])
start_distance = math.sqrt(x_start_delta**2 + y_start_delta**2)
x_end_delta = abs(line['x2'] - line_rev['x2'])
y_end_delta = abs(line['y2'] - line_rev['y2'])
end_distance = math.sqrt(x_end_delta**2 + y_end_delta**2)
avg_distance = (start_distance + end_distance)/2
cluster = 0
if avg_distance < 100:
print(f"distance: {avg_distance}")
print("############## next line ##############")
I have written some code to calculate the distance between each line but cant find a way to save the lines which are near to each other in different lists.
Does somebody know how to do this or is there another way to create clusters? Im also thinking about using the middlepoint instead of the start-/endpoint
You could throw a clustering on it, but it has trouble with the lonely line at the end
data = [[331,178,486,232],
[185,215,386,308],
[172,343,334,419],
[406,128,570,165],
[306,106,569,166],
[159,210,379,299],
[236,143,526,248],
[303,83,516,178],
[409,62,572,106],
[26,287,372,427],
[31,288,271,381],
[193,228,432,330],
[120,196,432,329],
[136,200,374,297],
[111,189,336,289],
[284,186,560,249],
[333,202,577,254],
[229,194,522,219],
[349,111,553,165],
[121,322,342,416],
[78,303,285,391],
[103,315,340,415]]
import pandas as pd
import sklearn
from sklearn.cluster import MiniBatchKMeans
import numpy as np
lines = pd.DataFrame(data)
CLUSTERS = 5
X = lines.values
kmeans = MiniBatchKMeans(n_clusters=CLUSTERS,max_no_improvement=100).fit(X)
import numpy as np
import pylab as pl
from matplotlib import collections as mc
lines_segments = [ [ (l[0],l[1]),([l[2],l[3]]) ] for l in lines.values]
center_segments = [ [ (l[0],l[1]),([l[2],l[3]]) ] for l in kmeans.cluster_centers_]
line_collection = mc.LineCollection(lines_segments, linewidths=2)
centers = mc.LineCollection(center_segments, colors='red', linewidths=4, alpha=1)
fig, ax = pl.subplots()
ax.add_collection(line_collection)
ax.add_collection(centers)
ax.autoscale()
ax.margins(0.1)
You can see the centers with
kmeans.cluster_centers_

Plotting inside a loop gives me empty plots

I got this CSV:
and this code:
import csv
import numpy as np
import matplotlib.pyplot as plt
filename = '../dataset/data_validation/annotationValidation.csv'
fields = ['Image', 'Color', 'Validator']
imageIds = ['photo-1612694875299-4c379cb55ae2.jpg', 'photo-1611308382871-971045fcff4e.jpg', 'photo-1621329564823-1e0555fea622.jpg', 'photo-1624828002048-2681f0e67aed.jpg']
with open(filename, 'r') as csv_file:
dict_reader = csv.DictReader(csv_file)
headers = dict_reader.fieldnames
for imageId in imageIds:
# Color
blackAndWhite, colorful = 0, 0
count = 0
for row in dict_reader:
count = count + 1
print(count)
if imageId == row['Image']:
print(imageId)
# Color
if (row['Color']) == 'Black & White':
blackAndWhite = blackAndWhite + 1
else:
colorful = colorful + 1
labels = ['Black & White', 'Colorful']
counts = np.array([blackAndWhite, colorful])
# creating the bar plot
plt.barh(labels, counts, color='maroon')
plt.xlabel("No of Answers")
plt.ylabel("Labels")
plt.title("Metrics")
plt.ioff()
plt.show()
and I am trying to plot a bar for each image I find on my csv. In this example I have 3 images on my imageIds array, so 3 plots in total, showing the distribution between colorful and b&w.
The thing is, only my first plot appears, all the others are coming empty.
I am not sure if it something with my 2 loops or a matplotlib related thing.
Thank you
Ok found the error. I need to open csv before the second loop, meaning reopening csv from the start, cause probably once all the lines are read then the csv closes. Thank you all

Making python plots in a for loop that gives each plot a different title

....
1.I am making a python code that creates plots of data imported from a CITIfile. I want to run the code such that each plot made will have a different title. For example, plot one will have the title S11 Log Magnitude, the second plot will have the title S12 Log Magntitude, the third plot S12 Log Magnitude, and the fourth plot with the title S22 Log magnitude. The code I have written now will produce titles 0, 1, 2, and 3, using plt.title(str(i)). What modifications can I make to this code so that it will produce the desired plot titles in this sequence?
....
# modified based on https://github.com/feph/citidata
import citidata
import glob
import numpy as np
from numpy import *
import matplotlib.pyplot as plt
keyslist = [] # data name
datalist = [] # data arrays
M = N = 0
all_my_files = glob.glob("*.citi")
for filename in all_my_files:
M += 1
print("=== %s ===" % filename)
citi_file = citidata.genfromfile(filename)
for package in citi_file.packages:
print(package)
print(package.indep)
#print(package.deps) # suppress screen output
for key in package.deps:
N += 1
value = package.deps[key] # get data field
keyslist.append(key) # append key
datalist.append(value['data']) # append np array data
print('\n ', M, 'files read;', N, 'datasets recorded.')
print('dataset : name')
#plt.figure(0)
w = []
x = np.linspace(8, 12, 201)
for i in range(N):
fig = plt.figure(i)
print(i, ':', keyslist[i])
y = datalist[i] # data
# print(y)
test = np.abs(y)
f = sqrt(test)
mag = 20*log10(f)
print(mag)
# [S11, S21, S12,S22]
# y = np.append(mag)
plt.xlabel('Frequancy (Hz')
plt.ylabel('Log Magnitude (dB')
plt.plot(x, mag)
plt.title(str(i))
I think the only way to do this is by dictionary as there is no sequence in the name. Create a dictionary with integer key and the value being the name of the graph in the global scope:
name_dict = {
0: "S11 Log Magnitude",
1: "S12 Log Magntitude",
2: "S12 Log Magnitude",
3: "S22 Log magnitude"
}
After that, you just change the last code line to
plt.title(name_dict[i])
I hope this was helpful!
EDIT 1:
Sorry, I have changed the key number to start from 0.
EDIT 2:
Forgot commas in the dictionary and just added them

improve fit of polyval and polyfit

Can anyone help me to get a better fit on this curve? The plot shows test data for compression on an elastomer. The blue dots are the data, the red line is the 3rd order fit. If I increase the order of the fit the situation doesn't improve much.
I can supply the test data file if desired (There are 236 rows and 3 columns).
My code is given here:
import csv
import numpy as np
from matplotlib import pyplot as plt
import numpy.polynomial.polynomial as poly
from itertools import islice
with open('neoprene1.csv') as f:
readCSV = csv.reader(f, delimiter=',')
Cs = []
Rs = []
for row in islice(readCSV,1,None):
R = row[1] if row[1] != '' else 0.0 # Strain
C = row[2] if row[2] != '' else 0.0 # Stress
Rs.append(R)
Cs.append(C)
q = np.array([Rs],dtype = float).transpose()
s = np.array([Cs],dtype = float).transpose()
q1 = q[:,0]
s1 = s[:,0]
plt.cla()
z = poly.polyfit(q1,s1,3)
zz = poly.polyval(q,z)
plt.title('Neoprene True Stress-Strain')
plt.xlabel('Strain (%)')
plt.ylabel('Stress (MPa)')
aa = plt.plot(s1,zz,'r-', label = 'Fitting function')
bb = plt.plot(s1,q1,'bo', label = 'Raw data')

Create a weighted adjacency list from an alphanumeric edgelist in Python

I've been working on this dataset of protein-protein interactions. I have the edgelist in the following format:
AIG676464 AIG8475985 0.00035. Protein 1, Protein 2, weight.
I've tried several methods and can't get it to output the matrix. What I am hoping to get is the matrix form of the interactions. Any help would be greatly appreciated. Python or R is fine.
I've tried networkx:
import networkx as nx
fh = open("InWeb29.txt", 'rb')
#d = fh.write(textline)
#fh.close()
G = nx.read_edgelist(fh)
G = nx.Graph([()])
A = nx.adjacency_matrix(G)
print(A.todense())
A.setdiag(A.diagonal()*2)
print(A.todense())
Here is my other code so far:
import csv
import pandas as pd
"Load in data file"
"""Read in the data file"""
df = pd.read_csv("datafile.txt", sep= '\t', header=0)
headers = list(df)
prot1 = df[df.columns[0]]
prot2 = df[df.columns[1]]
weight = df[df.columns[2]]
print prot1
with open("datafile.txt") as f:
next(f)
data = [tuple(map(str,row)) for row in csv.reader(f)]
n = max(max(prot1, prot2) for prot1, prot2, weight in data)
matrix = [[None]* n for i in range(n)]
for prot1, prot2 in data:
matrix[prot1][prot2]= weight
for row in matrix:
print(row)
It NetworkX you can use read_weighted_edgelist
import networkx as nx
import StringIO
s = StringIO.StringIO("AIG676464 AIG8475985 0.00035")
G = nx.read_weighted_edgelist(s)
A = nx.adjacency_matrix(G)
print A.todense()
Output
[[ 0. 0.00035]
[ 0.00035 0. ]]

Categories