I am working on my dataset and I have to plot the cdf of it. I already did but due to large number of data the x-axis showing some overwritten values. Could anyone help me in this regard. my code is
import csv
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import collection
x = []
y=[]
row=[]
with open('SCPS-ADAPTIVE-1e8.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
y.append(row[1])
N=len(y)
data = np.sort(y)
P = np.arange(N) / float(N)
plt.plot(data, P, marker='o')
plt.show (
You didn't provide data so we cannot replicate your chart. However, this is what I would try:
# increase or decrease this variable to satisfy your needs
num_labels = 30
ticks = [t for t in range(N) if t % num_labels == 0]
labels = [l for i, l in enumerate(data) if i % num_labels == 0]
plt.xticks(ticks, labels)
Related
I want to plot a cardiac signal from forrestgump dataset in openneuro.I opened the tsv. file and I plot the signal.then I removed the noise by a median filter.But the signal in my opinion has baseline drift.I can't find out how I can remove the baseline drift from the figure.the figure must be straight in x axis
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import csv
import math
x = []
y = []
tsv_file ='tsvfile'
with open(tsv_file, 'r') as tsvfile:
lines = csv.reader(tsvfile, delimiter=" ")
for index, row in enumerate(lines):
x.append(index)
y.append(row[2])
window_size = 200
i = 0
moving_averages = []
yy=np.array(y).astype(np.float)
print(len(yy))
while i < len(yy) - window_size + 1:
window_average = np.sum(yy[i:i+window_size])/window_size
moving_averages.append(window_average)
i += 1
yd=moving_averages
xd = np.arange(len(yd))
print(len(yd))
plt.plot(xd[0:2000], yd[0:2000], color='g', linestyle='dashed', marker='.', label="Weather Data")
plt.show()
I'm trying to plot some measurement data with Matplotlib.
With the code shown below i get the plot window and gui but no plot is drawn. If i change the plot kind to circles or crosses it works just fine.
# coding=utf-8
import matplotlib.pyplot as plt
import csv
with open("AgPVP8.2.171g1L#2.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
frequencies = []
phases = []
for row in reader:
frequency = float(row[0])
phase = float(row[4])
frequencies.append(frequency)
phases.append(phase)
plt.plot([frequencies], [phases], "b-")
plt.xscale("log")
plt.show()
The problem is that frequencies and phases only exist in the scope of with open(..., you must place it within this:
import matplotlib.pyplot as plt
import csv
with open("AgPVP8.2.171g1L#2.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
frequencies = []
phases = []
for row in reader:
frequency = float(row[0])
phase = float(row[4])
frequencies.append(frequency)
phases.append(phase)
plt.plot(frequencies, phases, "-b")
plt.xscale("log")
plt.show()
plt.plot(x,y) requires x and y to be lists (or in general sequences) or arrays.
Here, you are trying to plot a list of a list, i.e. [x] is not the same as x.
So in your code you need to replace plt.plot([frequencies], [phases], "b-") with
plt.plot(frequencies, phases, "-b")
The complete code should then look like:
import matplotlib.pyplot as plt
import csv
with open("AgPVP8.2.171g1L#2.csv") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
frequencies = []
phases = []
for row in reader:
frequency = float(row[0])
phase = float(row[4])
frequencies.append(frequency)
phases.append(phase)
plt.plot(frequencies, phases, "b-")
plt.xscale("log")
plt.show()
I would suggest to have a look at numpy.loadtxt or numpy.genfromtxt. Both would make is much easier to read in a csv file, e.g. in this case
import matplotlib.pyplot as plt
import numpy as np
frequencies, phases = np.loadtxt("AgPVP8.2.171g1L#2.csv", unpack=True, usecols = (0,4), delimiter=",")
plt.plot(frequencies, phases, "b-")
plt.xscale("log")
plt.show()
Here is the plot I have currently:
The 'time' strings I import are like this: 08:12:46, so I would like to cut the zeros at the end, but I can't seem to find the problem. Also, is there a way to show the floats on the Y axis in the exponential format, which is the one I am importing from the csv?
I just started to look into matplotlib and numpy for work, so if you have some advice it would be fantastic.
Thank you in advance!
import numpy as np
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
print(plt.style.available)
style.use('ggplot')
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
def animate(i):
graph_data = open('C:\\Users\\arzuffi pc test\\Desktop\\VMI WIP - Copia (2)\\Cycle info\\_Current Cycle.csv','r').read()
#graph_data = open('C:\\Users\\arzuffi pc test\\Desktop\\Visual Machine Interface Alpha 1.4.3\\Cycle info\\_Current Cycle.csv','r').read()
lines = graph_data.split('\n')
xs = []
ys = []
skip = 0
for line in lines:
if skip < 7:
skip += 1
else:
if len(line) > 1:
time, cycle, pc, pd, co, hv, cr, ph, gd_volt, gd_amp, gd_power, eva_amp, eva_volt, p_rpm, p_amp, r1_rpm, r1_amp, r2_rpm, r2_amp, hmdso, gas, ahc, diff_l, diff_r = line.split(';')
#x, y = line.split(';')
print(time)
print(pc)
xs.append(dt.datetime.strptime(time,'%H:%M:%S'))#.date())
ys.append(pc)
#print(i)
#xs = matplotlib.dates.date2num(xs)
print(xs)
if len (xs) > 100:
xs = xs[-100:]
if len (ys) > 100:
ys = ys[-100:]
ax1.clear()
ax1.plot(xs, ys)
plt.gcf().autofmt_xdate()
ani = animation.FuncAnimation(fig, animate,interval = 1000)
plt.show()
these are the data:
You can specify the format to be used as follows:
xs = matplotlib.dates.date2num(xs) # You need to keep this line
hfmt = matplotlib.dates.DateFormatter('%H:%M:%S')
ax1.xaxis.set_major_formatter(hfmt)
ax1.plot(xs, ys) # You have this already
This would give you an output as follows:
What is the most idiomatic way to normalize each row of a pandas DataFrame? Normalizing the columns is easy, so one (very ugly!) option is:
(df.T / df.T.sum()).T
Pandas broadcasting rules prevent df / df.sum(axis=1) from doing this
To overcome the broadcasting issue, you can use the div method:
df.div(df.sum(axis=1), axis=0)
See pandas User Guide: Matching / broadcasting behavior
I would suggest to use Scikit preprocessing libraries and transpose your dataframe as required:
'''
Created on 05/11/2015
#author: rafaelcastillo
'''
import matplotlib.pyplot as plt
import pandas
import random
import numpy as np
from sklearn import preprocessing
def create_cos(number_graphs,length,amp):
# This function is used to generate cos-kind graphs for testing
# number_graphs: to plot
# length: number of points included in the x axis
# amp: Y domain modifications to draw different shapes
x = np.arange(length)
amp = np.pi*amp
xx = np.linspace(np.pi*0.3*amp, -np.pi*0.3*amp, length)
for i in range(number_graphs):
iterable = (2*np.cos(x) + random.random()*0.1 for x in xx)
y = np.fromiter(iterable, np.float)
if i == 0:
yfinal = y
continue
yfinal = np.vstack((yfinal,y))
return x,yfinal
x,y = create_cos(70,24,3)
data = pandas.DataFrame(y)
x_values = data.columns.values
num_rows = data.shape[0]
fig, ax = plt.subplots()
for i in range(num_rows):
ax.plot(x_values, data.iloc[i])
ax.set_title('Raw data')
plt.show()
std_scale = preprocessing.MinMaxScaler().fit(data.transpose())
df_std = std_scale.transform(data.transpose())
data = pandas.DataFrame(np.transpose(df_std))
fig, ax = plt.subplots()
for i in range(num_rows):
ax.plot(x_values, data.iloc[i])
ax.set_title('Data Normalized')
plt.show()
I have lot of binary and ascii files in one folder. I am reading them using glob module. Doing processing of the binary data so that I can plot them. And finally, I am trying to plot simplified binary data in one subplot and normal ascii file in another subplot. The problem I am facing is that it can generate plots for the corresponding binary files. But for the ascii files it just simply override the previous files and always generates the same plot. Here is the simplied version of the code for an example-
import glob
import numpy as np
from struct import unpack
import matplotlib.pyplot as plt
chi = sorted(glob.glob('C:/Users/Desktop/bin/*.chi'))
for index,fh in enumerate(chi):
data = np.genfromtxt(fh, dtype = float)
x = [row[0] for row in data]
y = [row[1] for row in data]
binary = sorted(glob.glob('C:/Users/Desktop/bin/*.bin'))
for count,FILE in enumerate(binary):
F = open(FILE,'rb')
B = unpack('f'*1023183, F.read(4*1023183))
A = np.array(B).reshape(1043, 981)
F.close()
#a = something column 1 # some further processing
#b = something column 2 # and generates 1D data
fig = plt.figure(figsize=(11, 8.0))
ax1 =fig.add_subplot(211,axisbg='w')
ax1.plot(a,b)
ax2 =fig.add_subplot(212, axisbg ='w')
ax2.plot(x,y)
plt.show()
Can somebody please explain why the files are replacing each other during plotting only for one set of data where the other set is plotting correctly?
the structures of the loops is not correct in your example, you must have the plot command inside the loop over the ascii file, else only the last one is plotted. This should work:
try it like this:
import glob
import numpy as np
from struct import unpack
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(11, 8.0))
chi = sorted(glob.glob('C:/Users/Desktop/bin/*.chi'))
for index,fh in enumerate(chi):
data = np.genfromtxt(fh, dtype = float)
x = [row[0] for row in data]
y = [row[1] for row in data]
ax1 =fig.add_subplot(211, axisbg ='w')
ax1.plot(x,y)
binary = sorted(glob.glob('C:/Users/Desktop/bin/*.bin'))
for count,FILE in enumerate(binary):
F = open(FILE,'rb')
B = unpack('f'*1023183, F.read(4*1023183))
A = np.array(B).reshape(1043, 981)
F.close()
#a = something column 1 # some further processing
#b = something column 2 # and generates 1D data
ax2 =fig.add_subplot(212,axisbg='w')
ax2.plot(a,b)
plt.show()