How to remove baseline drift after denoising the signal? - python

I want to plot a cardiac signal from forrestgump dataset in openneuro.I opened the tsv. file and I plot the signal.then I removed the noise by a median filter.But the signal in my opinion has baseline drift.I can't find out how I can remove the baseline drift from the figure.the figure must be straight in x axis
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import csv
import math
x = []
y = []
tsv_file ='tsvfile'
with open(tsv_file, 'r') as tsvfile:
lines = csv.reader(tsvfile, delimiter=" ")
for index, row in enumerate(lines):
x.append(index)
y.append(row[2])
window_size = 200
i = 0
moving_averages = []
yy=np.array(y).astype(np.float)
print(len(yy))
while i < len(yy) - window_size + 1:
window_average = np.sum(yy[i:i+window_size])/window_size
moving_averages.append(window_average)
i += 1
yd=moving_averages
xd = np.arange(len(yd))
print(len(yd))
plt.plot(xd[0:2000], yd[0:2000], color='g', linestyle='dashed', marker='.', label="Weather Data")
plt.show()

Related

x-axis get over written in matplot

I am working on my dataset and I have to plot the cdf of it. I already did but due to large number of data the x-axis showing some overwritten values. Could anyone help me in this regard. my code is
import csv
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import collection
x = []
y=[]
row=[]
with open('SCPS-ADAPTIVE-1e8.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
y.append(row[1])
N=len(y)
data = np.sort(y)
P = np.arange(N) / float(N)
plt.plot(data, P, marker='o')
plt.show (
You didn't provide data so we cannot replicate your chart. However, this is what I would try:
# increase or decrease this variable to satisfy your needs
num_labels = 30
ticks = [t for t in range(N) if t % num_labels == 0]
labels = [l for i, l in enumerate(data) if i % num_labels == 0]
plt.xticks(ticks, labels)

How to create a trendline using pandas and matplotlib on financial stock data

I am trying to create a trendline on financial stock data using the following code and getting all kinds of errors. Any suggestions are most appreciated.
import pandas as pd
from pandas_datareader import data
import numpy as np
import matplotlib.pyplot as plt
df = data.DataReader(name = "GHC", data_source = "google", start = "2010-01-01", end = "2017-11-01")
#reset the index
df['ID'] = " "
df.reset_index(inplace = True)
df.set_index("ID", inplace = True)
#print(df.head(10))
#create new df for plotting
data = df[['Date', 'Close']]
#print(data.head(10))
#plot stock data
x = data['Date']
y = data['Close']
plt.scatter(x, y)
#create and plot a trendline
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x, p(x), "r--")
plt.show()
mylist = [1, 2, 3, 4, 5, 6, 7]
N = 3
cumsum, moving_aves = [0], []
for i, x in enumerate(mylist, 1):
cumsum.append(cumsum[i-1] + x)
if i>=N:
moving_ave = (cumsum[i] - cumsum[i-N])/N
#can do stuff with moving_ave here
moving_aves.append(moving_ave)
I came up with this solution:
import pandas as pd
from pandas_datareader import data
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 25,16
from datetime import datetime
%matplotlib inline
df = data.DataReader(name = "GHC", data_source = "google", start = "2017-01-01", end = "2017-11-01")
x = list(range(0, len(df.index.tolist()), 1))
y = df['Close']
date_x = df.index
fit = np.polyfit(x,y, 1)
fit_fn = np.poly1d(fit)
plt.plot(date_x, fit_fn(x), 'k-')
plt.plot(date_x, y, 'go', ms=2)
But now the dates on the x axis don't come out right. Any suggestions on how to keep the original format of 2017-01-01 etc?

Matplotlib - when plotting time it puts decimal zeros after the seconds

Here is the plot I have currently:
The 'time' strings I import are like this: 08:12:46, so I would like to cut the zeros at the end, but I can't seem to find the problem. Also, is there a way to show the floats on the Y axis in the exponential format, which is the one I am importing from the csv?
I just started to look into matplotlib and numpy for work, so if you have some advice it would be fantastic.
Thank you in advance!
import numpy as np
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
print(plt.style.available)
style.use('ggplot')
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
def animate(i):
graph_data = open('C:\\Users\\arzuffi pc test\\Desktop\\VMI WIP - Copia (2)\\Cycle info\\_Current Cycle.csv','r').read()
#graph_data = open('C:\\Users\\arzuffi pc test\\Desktop\\Visual Machine Interface Alpha 1.4.3\\Cycle info\\_Current Cycle.csv','r').read()
lines = graph_data.split('\n')
xs = []
ys = []
skip = 0
for line in lines:
if skip < 7:
skip += 1
else:
if len(line) > 1:
time, cycle, pc, pd, co, hv, cr, ph, gd_volt, gd_amp, gd_power, eva_amp, eva_volt, p_rpm, p_amp, r1_rpm, r1_amp, r2_rpm, r2_amp, hmdso, gas, ahc, diff_l, diff_r = line.split(';')
#x, y = line.split(';')
print(time)
print(pc)
xs.append(dt.datetime.strptime(time,'%H:%M:%S'))#.date())
ys.append(pc)
#print(i)
#xs = matplotlib.dates.date2num(xs)
print(xs)
if len (xs) > 100:
xs = xs[-100:]
if len (ys) > 100:
ys = ys[-100:]
ax1.clear()
ax1.plot(xs, ys)
plt.gcf().autofmt_xdate()
ani = animation.FuncAnimation(fig, animate,interval = 1000)
plt.show()
these are the data:
You can specify the format to be used as follows:
xs = matplotlib.dates.date2num(xs) # You need to keep this line
hfmt = matplotlib.dates.DateFormatter('%H:%M:%S')
ax1.xaxis.set_major_formatter(hfmt)
ax1.plot(xs, ys) # You have this already
This would give you an output as follows:

Normalizing pandas DataFrame rows by their sums

What is the most idiomatic way to normalize each row of a pandas DataFrame? Normalizing the columns is easy, so one (very ugly!) option is:
(df.T / df.T.sum()).T
Pandas broadcasting rules prevent df / df.sum(axis=1) from doing this
To overcome the broadcasting issue, you can use the div method:
df.div(df.sum(axis=1), axis=0)
See pandas User Guide: Matching / broadcasting behavior
I would suggest to use Scikit preprocessing libraries and transpose your dataframe as required:
'''
Created on 05/11/2015
#author: rafaelcastillo
'''
import matplotlib.pyplot as plt
import pandas
import random
import numpy as np
from sklearn import preprocessing
def create_cos(number_graphs,length,amp):
# This function is used to generate cos-kind graphs for testing
# number_graphs: to plot
# length: number of points included in the x axis
# amp: Y domain modifications to draw different shapes
x = np.arange(length)
amp = np.pi*amp
xx = np.linspace(np.pi*0.3*amp, -np.pi*0.3*amp, length)
for i in range(number_graphs):
iterable = (2*np.cos(x) + random.random()*0.1 for x in xx)
y = np.fromiter(iterable, np.float)
if i == 0:
yfinal = y
continue
yfinal = np.vstack((yfinal,y))
return x,yfinal
x,y = create_cos(70,24,3)
data = pandas.DataFrame(y)
x_values = data.columns.values
num_rows = data.shape[0]
fig, ax = plt.subplots()
for i in range(num_rows):
ax.plot(x_values, data.iloc[i])
ax.set_title('Raw data')
plt.show()
std_scale = preprocessing.MinMaxScaler().fit(data.transpose())
df_std = std_scale.transform(data.transpose())
data = pandas.DataFrame(np.transpose(df_std))
fig, ax = plt.subplots()
for i in range(num_rows):
ax.plot(x_values, data.iloc[i])
ax.set_title('Data Normalized')
plt.show()

Plot Time values with matplotlib

I have a sample text file as follows:(test.txt)
06:00:41 2
06:10:41 4
06:20:41 6
06:25:41 8
I want to plot this taking time value for X-Axis and the 2nd column to the y axis.
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
import time
import numpy as np
f2 = open('test.txt', 'r')
lines = f2.readlines()
f2.close()
x1 = []
y1 = []
for line in lines:
p = line.split()
a = time.strptime((p[0]),"%H:%M:%S")
x1.append((a))
y1.append(float(p[1]))
xv = np.array(x1)
yv = np.array(y1)
plt.plot(xv, yv)
plt.show()
This is how I plot it. But the X-Axis shows 500,1000,1500 etc. Instead I want to show the time values as 06:00:41,06:10:41,06:20:41 etc. Please help me?
Have you tried plot_date?
It may help if you put your time series data as datetime values
from datetime import datetime
for line in lines:
p = line.split()
xv.append(datetime.strptime(p[0], '%H:%M:%S'))
fig, ax = plt.subplots()
ax.plot_date(xv, yv)
If you want a line plot, try changing the linestyle (docs)
ax.plot_date(xv, yv, linestyle='-')

Categories