Advice on plotting large amount of data - python

I'm working on a very cheap seismometer mainly for educational purposes and some research. I would like to show every few hours the seismic signal of one of the channels as the image I have attached, using matplotlib.
The problem is that every second I get 100 datapoints and while plotting this data on a raspberry pi, usually hangs and stop working.
The way I plot the data for each 4 hours subplot is reading again all the data and plotting only between the limits of the subplot, but I find this not efficient and probably the cause of the raspberry hanging.
I've been thinking for days how I could do this to avoid using a lot of memory for each subplot, but I can't find an answer as I'm a geologist and programming is a big issue for me.
Does anybody have a better idea for doing this?
import matplotlib.pyplot as plt
import time
import os.path
import datetime
import sys
import numpy
import pytz
import matplotlib.dates as mdates
import ftplib
from pylab import *
import numpy as np
from itertools import islice
from time import sleep
from matplotlib.pyplot import specgram
from scipy.signal import medfilt
import csv
archivo='sismo1545436800'
def subirftp(archivosubir):
session = ftplib.FTP('---', 's--- ', '----')
file = open(archivosubir+'.png', 'rb') # file to send
session.storbinary('STOR '+ archivosubir +'.png', file) # send the file
dirlist = session.retrlines('LIST')
file.close() # close file and FTP
session.quit()
font = {'family': 'serif',
'color': 'darkred',
'weight': 'normal',
'size': 16,
}
fu = open('Z:/nchazarra/sismografos/' + str(archivo) + '.txt')
nr_of_lines = sum(1 for line in fu)
fu.close()
f = open('Z:/nchazarra/sismografos/' + str(archivo) + '.txt')
print(nr_of_lines)
csv_f = csv.reader(f)
#row_count = sum(1 for row in csv_f)
#print(row_count)
tiempo = []
valora = []
valores = []
tiempor = []
i=0
final=0
empiezo=time.time()
for row in islice(csv_f,0,nr_of_lines-1):
# print (row[0])
if i == 0:
inicio = double(row[0])
valor = datetime.datetime.fromtimestamp(float(row[0]),tz=pytz.utc)
tiempo.append(valor)
i = i + 1
else:
valor = datetime.datetime.fromtimestamp(float(row[0]),tz=pytz.utc)
#print(valor)
tiempo.append(valor)
# print(row)
try:
valora.append(int(row[1]))
# print(row[0])
except IndexError:
valora.append(0)
except ValueError:
valora.append(0)
valores = valora
tiempor = tiempo
mediana = np.mean(valores)
minimo = np.amin(valores)
maximo = np.amax(valores)
std = np.std(valores)
for index in range(len(valores)):
valores[index] = float(((valores[index] - minimo) / (maximo - minimo))-1)
mediananueva = float(np.median(valores))
for index in range(len(valores)):
valores[index] = float(valores[index] - mediananueva)
valores2=np.asarray(valores)
tiempo2=np.asarray(tiempo)
#Franja de 0 a 4
franja1=plt.subplot(611)
franja1.axis([datetime.datetime(2018, 12, 22,00,00), datetime.datetime(2018, 12, 22,3,59,59),-0.05,0.05])
franja1.plot(tiempo2, valores2, lw=0.2,color='red')
#Franja de 4 a 8
franja2=plt.subplot(612)
franja2.axis([datetime.datetime(2018, 12, 22,4,00), datetime.datetime(2018, 12, 22,8,00),-0.05,0.05])
franja2.plot(tiempo2, valores2, lw=0.2,color='green')
#Franja de 8 a 12
franja3=plt.subplot(613)
franja3.axis([datetime.datetime(2018, 12, 22,8,00), datetime.datetime(2018, 12, 22,12,00),-0.05,0.05])
franja3.plot(tiempo2, valores2, lw=0.2,color='blue')
#Franja de 12 a 16
franja4=plt.subplot(614)
franja4.axis([datetime.datetime(2018, 12, 22,12,00), datetime.datetime(2018, 12, 22,16,00),-0.05,0.05])
franja4.plot(tiempo2, valores2, lw=0.2,color='red')
#franja de 16 a 20
franja5=plt.subplot(615)
franja5.axis([datetime.datetime(2018, 12, 22,16,00), datetime.datetime(2018, 12, 22,20,00),-0.05,0.05])
franja5.plot(tiempo2, valores2, lw=0.2,color='green')
#Franja de 20 a 24
franja6=plt.subplot(616)
franja6.axis([datetime.datetime(2018, 12, 22,20,00), datetime.datetime(2018, 12, 22,23,59,59),-0.05,0.05])
franja6.plot(tiempo2, valores2, lw=0.2,color='blue')
franja1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja3.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja5.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja6.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
acabo=time.time()
cuantotardo=acabo-empiezo
print('Madre mía, he tardado en hacer esto '+str(cuantotardo)+' segundos')
savefig(archivo + ".png", dpi=300)
subirftp(archivo)
plt.show()

Do you need to plot every data point? You could consider plotting every 100 or so. As long as the frequency of your signal isn't too high, I think it could work. Something like this:
import matplotlib.pyplot as plt
import numpy as np
X = np.arange(10000) / 10000 * 2 * np.pi
Y = np.sin(X) + np.random.normal(size=10000) / 10
plt.plot(X[::100], Y[::100])
versus all points:

You can save a fair bit of memory by sub-setting the arrays before you plot them:
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
n_times = 24 * 60 * 60 * 100
times = [
datetime.datetime(2018, 12, 22,00,00) +
datetime.timedelta(milliseconds=10 * x) for x in range(n_times)]
tiempo2 = np.array(times)
valores2 = np.random.normal(size=n_times)
#Franja de 0 a 4
franja1=plt.subplot(611)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 0, 0),
tiempo2 < datetime.datetime(2018, 12, 22, 4, 0, 0))
franja1.plot(tiempo2[index], valores2[index], lw=0.2,color='red')
#Franja de 4 a 8
franja2=plt.subplot(612)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 4, 0),
tiempo2 < datetime.datetime(2018, 12, 22, 8, 0, 0))
franja2.plot(tiempo2[index], valores2[index], lw=0.2,color='green')
#Franja de 8 a 12
franja3=plt.subplot(613)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 8, 0),
tiempo2 < datetime.datetime(2018, 12, 22, 12, 0, 0))
franja3.plot(tiempo2[index], valores2[index], lw=0.2,color='blue')
#Franja de 12 a 16
franja4=plt.subplot(614)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 12, 0),
tiempo2 < datetime.datetime(2018, 12, 22, 16, 0, 0))
franja4.plot(tiempo2[index], valores2[index], lw=0.2,color='red')
#franja de 16 a 20
franja5=plt.subplot(615)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 16, 0),
tiempo2 < datetime.datetime(2018, 12, 22, 20, 0, 0))
franja5.plot(tiempo2[index], valores2[index], lw=0.2,color='green')
#Franja de 20 a 24
franja6=plt.subplot(616)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 20, 0),
tiempo2 < datetime.datetime(2018, 12, 23, 0, 0, 0))
franja6.plot(tiempo2[index], valores2[index], lw=0.2,color='blue')
franja1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja3.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja5.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja6.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.show()

Related

how plt.show() gets the parameters here?

from the book data-science for supply chain forecast by Nicolas Vandeput
first chap, in this code snippet plt.show() dispaly data but I can not find at what point the arguments passed to it.
from chap_01_05_moving_average_function import (
moving_average,
)
import matplotlib.pyplot as plt
d = [28, 19, 18, 13, 19, 16, 19, 18, 13, 16, 16, 11, 18, 15, 13, 15, 13, 11, 13, 10, 12]
df = moving_average(d)
print(df)
df[["Demand", "Forecast"]].plot()
df.index.name = "Periods"
print("index", df.index.name)
df[["Demand", "Forecast"]].plot(
figsize=(8, 3), title="Moving average", ylim=(0, 30), style=["-", "--"]
)
plt.show()
this is chap_01_05_moving_average_function.py:
# page 5 - moving average
import numpy as np
import pandas as pd
def moving_average(d, extra_periods=1, n=3):
d = np.array(d)
cols = len(d)
d = np.append(d, [np.nan] * extra_periods)
f = np.full(cols + extra_periods, np.nan)
for t in range(n, cols + 1):
f[t] = np.mean(d[t - n : t])
f[cols + 1 :] = f[t]
df = pd.DataFrame.from_dict({"Demand": d, "Forecast": f, "Error": d - f})
return df
# numpy - add arrays
ts = np.array([1, 2, 3, 4, 5, 6])
ts2 = np.array([10, 20, 30, 40, 50, 60])
print(ts + ts2)
# numpy - list
alist = [1, 2, 3]
alistmean = np.mean(alist)
print(alistmean)
# slicing
alist = ["cat", "dog", "mouse"]
print(alist[1])
anarray = np.array([1, 2, 3])
print(anarray[0])
# slicing - start:end
alist = ["cat", "dog", "mouse"]
print(alist[1:])
anarray = np.array([1, 2, 3])
print(anarray[:1])
# slicing - negative
alist = ["cat", "dog", "mouse"]
print(alist[-1])
print(alist[:-1])
I am new to python I dont know if there is an implicit way of passing parameters

an efficient iterator for getting the top k minimum of a list

i have a list of many unsorted numbers, for example :
N=1000000
x = [random.randint(0,N) for i in range(N)]
I only want the top k minimum values, currently this is my approach
def f1(x,k): # O(nlogn)
return sorted(x)[:k]
This performs lots of redundant operations, as we are sorting the remaining N-k elements too. Enumerating doesn't work either:
def f2(x,k): # O(nlogn)
y = []
for idx,val in enumerate( sorted(x) ):
if idx == k: break
y.append(val)
return y
Verifying enumerating doesn't help:
if 1 : ## Time taken = 0.6364126205444336
st1 = time.time()
y = f1(x,3)
et1 = time.time()
print('Time taken = ', et1-st1)
if 1 : ## Time taken = 0.6330435276031494
st2 = time.time()
y = f2(x,3)
et2 = time.time()
print('Time taken = ', et2-st2)
Probably i need a generator that continually returns the next minimum of the list, and since getting the next minimum should be O(1) operation, the function f3() should be just O(k) right ?
What GENERATOR function will work best in this case?
def f3(x,k): # O(k)
y = []
for idx,val in enumerate( GENERATOR ):
if idx == k: break
y.append(val)
return y
EDIT 1 :
The analysis shown here are wrong, please ignore and jump to Edit 3
Lowest bound possible : In terms of time complexity i think this is the lower bound achievable, but as it will will augment the original list, it is
n't the solution for my problem.
def f3(x,k): # O(k) Time
y = []
idx=0
while idx<k:
curr_min = min(x)
x.remove(curr_min) # This removes from the original list
y.append(curr_min)
idx += 1
return y
if 1 : ## Time taken = 0.07096505165100098
st3 = time.time()
y = f3(x,3)
et3 = time.time()
print('Time taken = ', et3-st3)
O(N) Time | O(N) Storage : Best solution so far, however it requires a copy of the original list, hence resulting in O(N) time and storage, having an iterator that gets the next minimum, for k times, will be O(1) storage and O(k) time.
def f3(x,k): # O(N) Time | O(N) Storage
y = []
idx=0
while idx<k:
curr_min = min(x)
x.remove(curr_min)
y.append(curr_min)
idx += 1
return y
if 1 : ## Time taken = 0.0814204216003418
st3 = time.time()
y = f3(x,3)
et3 = time.time()
print('Time taken = ', et3-st3)
EDIT 2 :
Thanks for pointing out my above mistakes, getting minimum of a list should be O(n), not O(1).
EDIT 3 :
Here's a full script of analysis after using the recommended solution. Now this raised more questions
1) Constructing x as a heap using heapq.heappush is slower than using list.append x to a list, then to heapq.heapify it ?
2) heapq.nsmallest slows down if x is already a heap?
3) Current conclusion : don't heapq.heapify the current list, then use heapq.nsmallest.
import time, random, heapq
import numpy as np
class Timer:
def __init__(self, description):
self.description = description
def __enter__(self):
self.start = time.perf_counter()
return self
def __exit__(self, *args):
end = time.perf_counter()
print(f"The time for '{self.description}' took: {end - self.start}.")
def f3(x,k):
y = []
idx=0
while idx<k:
curr_min = min(x)
x.remove(curr_min)
y.append(curr_min)
idx += 1
return y
def f_sort(x, k):
y = []
for idx,val in enumerate( sorted(x) ):
if idx == k: break
y.append(val)
return y
def f_heapify_pop(x, k):
heapq.heapify(x)
return [heapq.heappop(x) for _ in range(k)]
def f_heap_pop(x, k):
return [heapq.heappop(x) for _ in range(k)]
def f_heap_nsmallest(x, k):
return heapq.nsmallest(k, x)
def f_np_partition(x, k):
return np.partition(x, k)[:k]
if True : ## Constructing list vs heap
N=1000000
# N= 500000
x_main = [random.randint(0,N) for i in range(N)]
with Timer('constructing list') as t:
x=[]
for curr_val in x_main:
x.append(curr_val)
with Timer('constructing heap') as t:
x_heap=[]
for curr_val in x_main:
heapq.heappush(x_heap, curr_val)
with Timer('heapify x from a list') as t:
x_heapify=[]
for curr_val in x_main:
x_heapify.append(curr_val)
heapq.heapify(x_heapify)
with Timer('x list to numpy') as t:
x_np = np.array(x)
"""
N=1000000
The time for 'constructing list' took: 0.2717265225946903.
The time for 'constructing heap' took: 0.45691753178834915.
The time for 'heapify x from a list' took: 0.4259336367249489.
The time for 'x list to numpy' took: 0.14815033599734306.
"""
if True : ## Performing experiments on list vs heap
TRIALS = 10
## Experiments on x as list :
with Timer('f3') as t:
for _ in range(TRIALS):
y = f3(x.copy(), 30)
print(y)
with Timer('f_sort') as t:
for _ in range(TRIALS):
y = f_sort(x.copy(), 30)
print(y)
with Timer('f_np_partition on x') as t:
for _ in range(TRIALS):
y = f_np_partition(x.copy(), 30)
print(y)
## Experiments on x as list, but converted to heap in place :
with Timer('f_heapify_pop on x') as t:
for _ in range(TRIALS):
y = f_heapify_pop(x.copy(), 30)
print(y)
with Timer('f_heap_nsmallest on x') as t:
for _ in range(TRIALS):
y = f_heap_nsmallest(x.copy(), 30)
print(y)
## Experiments on x_heap as heap :
with Timer('f_heap_pop on x_heap') as t:
for _ in range(TRIALS):
y = f_heap_pop(x_heap.copy(), 30)
print(y)
with Timer('f_heap_nsmallest on x_heap') as t:
for _ in range(TRIALS):
y = f_heap_nsmallest(x_heap.copy(), 30)
print(y)
## Experiments on x_np as numpy array :
with Timer('f_np_partition on x_np') as t:
for _ in range(TRIALS):
y = f_np_partition(x_np.copy(), 30)
print(y)
#
"""
Experiments on x as list :
[0, 1, 1, 4, 5, 5, 5, 6, 6, 7, 7, 7, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, 18, 18, 19, 19, 21, 22, 24, 25]
The time for 'f3' took: 10.180440502241254.
[0, 1, 1, 4, 5, 5, 5, 6, 6, 7, 7, 7, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, 18, 18, 19, 19, 21, 22, 24, 25]
The time for 'f_sort' took: 9.054768254980445.
[ 1 5 5 1 0 4 5 6 7 6 7 7 12 12 11 13 11 12 13 18 10 14 10 18 19 19 21 22 24 25]
The time for 'f_np_partition on x' took: 1.2620676811784506.
Experiments on x as list, but converted to heap in place :
[0, 1, 1, 4, 5, 5, 5, 6, 6, 7, 7, 7, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, 18, 18, 19, 19, 21, 22, 24, 25]
The time for 'f_heapify_pop on x' took: 0.8628390356898308.
[0, 1, 1, 4, 5, 5, 5, 6, 6, 7, 7, 7, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, 18, 18, 19, 19, 21, 22, 24, 25]
The time for 'f_heap_nsmallest on x' took: 0.5187360178679228.
Experiments on x_heap as heap :
[0, 1, 1, 4, 5, 5, 5, 6, 6, 7, 7, 7, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, 18, 18, 19, 19, 21, 22, 24, 25]
The time for 'f_heap_pop on x_heap' took: 0.2054140530526638.
[0, 1, 1, 4, 5, 5, 5, 6, 6, 7, 7, 7, 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, 18, 18, 19, 19, 21, 22, 24, 25]
The time for 'f_heap_nsmallest on x_heap' took: 0.6638103127479553.
[ 1 5 5 1 0 4 5 6 7 6 7 7 12 12 11 13 11 12 13 18 10 14 10 18 19 19 21 22 24 25]
The time for 'f_np_partition on x_np' took: 0.2107151597738266.
"""
This is a classic problem for which the generally accepted solution is a data structure known as a heap. Below I have done 10 trials for each algorithm f3 and f_heap. As the value for the second argument, k, gets larger the discrepancy between the two performances become even greater. For k = 3, we have algorithm f3 taking .76 seconds and algorithm f_heap taking .54 seconds. But with k = 30 these values become respectively 6.33 seconds and .54 seconds.
import time, random, heapq
class Timer:
def __init__(self, description):
self.description = description
def __enter__(self):
self.start = time.perf_counter()
return self
def __exit__(self, *args):
end = time.perf_counter()
print(f"The time for {self.description} took: {end - self.start}.")
def f3(x,k): # O(N) Time | O(N) Storage
y = []
idx=0
while idx<k:
curr_min = min(x)
x.remove(curr_min)
y.append(curr_min)
idx += 1
return y
def f_heap(x, k): # O(nlogn)
# if you do not need to retain a heap and just need the k smallest, then:
#return heapq.nsmallest(k, x)
heapq.heapify(x)
return [heapq.heappop(x) for _ in range(k)]
N=1000000
x = [random.randint(0,N) for i in range(N)]
TRIALS = 10
with Timer('f3') as t:
for _ in range(TRIALS):
y = f3(x.copy(), 30)
print(y)
print()
with Timer('f_heap') as t:
for _ in range(TRIALS):
y = f_heap(x.copy(), 30)
print(y)
Prints:
The time for f3 took: 6.3301973.
[0, 1, 1, 7, 9, 11, 11, 13, 13, 14, 17, 18, 18, 18, 19, 20, 20, 21, 23, 24, 25, 25, 26, 27, 28, 28, 29, 30, 30, 31]
The time for f_heap took: 0.5372357999999995.
[0, 1, 1, 7, 9, 11, 11, 13, 13, 14, 17, 18, 18, 18, 19, 20, 20, 21, 23, 24, 25, 25, 26, 27, 28, 28, 29, 30, 30, 31]
A Python Demo
Update
Selecting the k smallest using numpy.partition as suggested by #user2357112supportsMonica is indeed very fast if you are already dealing with a numpy array. But if you are starting with an ordinary list and factor in the time to convert to an numpy array just to use the numpy.partition method, then it is slower than using hepaq methods:
def f_np_partition(x, k):
return sorted(np.partition(x, k)[:k])
with Timer('f_np_partition') as t:
for _ in range(TRIALS):
x_np = np.array(x)
y = f_np_partition(x_np.copy(), 30) # don't really need to copy
print(y)
The relative timings:
The time for f3 took: 7.2039111.
[0, 2, 2, 3, 3, 3, 5, 6, 6, 6, 9, 9, 10, 10, 10, 11, 11, 12, 13, 13, 14, 16, 16, 16, 16, 17, 17, 18, 19, 20]
The time for f_heap took: 0.35521280000000033.
[0, 2, 2, 3, 3, 3, 5, 6, 6, 6, 9, 9, 10, 10, 10, 11, 11, 12, 13, 13, 14, 16, 16, 16, 16, 17, 17, 18, 19, 20]
The time for f_np_partition took: 0.8379164999999995.
[0, 2, 2, 3, 3, 3, 5, 6, 6, 6, 9, 9, 10, 10, 10, 11, 11, 12, 13, 13, 14, 16, 16, 16, 16, 17, 17, 18, 19, 20]

How do I customize the colours in the bars using custom number set in matplotlib?

I am trying to add colors to the bar according to the integer value, lets say the values are 1 to 20, 1 will be the lightest and 20 will be the darkest, but none of the colors can be the same, so far I am at using an incorrect colorbar method:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'values': [17, 16, 16, 15, 15, 15, 14, 13, 13, 13]})
df.plot(kind='barh')
plt.imshow(df)
plt.colorbar()
plt.show()
But it gives a strange result of:
How do I fix it?
I just realized using plt.barh and colormaps provide better plots, use:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'values': [0, 0, 0, 0, 0, 17, 16, 16, 15, 15, 15, 14, 13, 13, 13]})
df = df.sort_values(by='values').reset_index(drop=True)
s = df['values'].replace(0, df.loc[df['values'] != 0, 'values'].min())
s = s.sub(s.min())
colors = (1 - (s / s.max())).astype(str).tolist()
plt.barh(df.index, df['values'].values, color=colors)
plt.show()
Which gives:

How to randomly select a specific sequence from a list?

I have a list of hours starting from (0 is midnight).
hour = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
I want to generate a sequence of 3 consecutive hours randomly. Example:
[3,6]
or
[15, 18]
or
[23,2]
and so on. random.sample does not achieve what I want!
import random
hourSequence = sorted(random.sample(range(1,24), 2))
Any suggestions?
Doesn't exactly sure what you want, but probably
import random
s = random.randint(0, 23)
r = [s, (s+3)%24]
r
Out[14]: [16, 19]
Note: None of the other answers take in to consideration the possible sequence [23,0,1]
Please notice the following using itertools from python lib:
from itertools import islice, cycle
from random import choice
hours = list(range(24)) # List w/ 24h
hours_cycle = cycle(hours) # Transform the list in to a cycle
select_init = islice(hours_cycle, choice(hours), None) # Select a iterator on a random position
# Get the next 3 values for the iterator
select_range = []
for i in range(3):
select_range.append(next(select_init))
print(select_range)
This will print sequences of three values on your hours list in a circular way, which will also include on your results for example the [23,0,1].
You can try this:
import random
hour = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
index = random.randint(0,len(hour)-2)
l = [hour[index],hour[index+3]]
print(l)
You can get a random number from the array you already created hour and take the element that is 3 places afterward:
import random
def random_sequence_endpoints(l, span):
i = random.choice(range(len(l)))
return [hour[i], hour[(i+span) % len(l)]]
hour = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
result = random_sequence_endpoints(hour, 3)
This will work not only for the above hours list example but for any other list contain any other elements.

Formating timestams on x axis using matplotlib

How to format timestamps on x axis as "%Y-%m-%d %H:%M". ts is list of timestamps and how to show on x axis values:
"2018-5-23 14:00", "2018-5-23 14:15" and "2018-5-23 14:30".
My current chart shows:
23 14:00, 23 14:05, 23 14:10, 23 14:15, 23 14:20, 23 14:25, 23 14:30.
import datetime
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
ts = [datetime.datetime(2018, 5, 23, 14, 0), datetime.datetime(2018, 5, 23, 14, 15), datetime.datetime(2018, 5, 23, 14, 30)]
values =[3, 7, 6]
plt.plot(ts, values, 'o-')
plt.show()
Firstly, you need to set your x ticks so that only the values you want will be displayed. This can be done using plt.xticks(tick_locations, tick_labels).
To get the dates in the right format you need to specify a DateFormatter and apply it to your x axis.
Your code would look like:
import datetime
import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib.dates import DateFormatter
style.use('fivethirtyeight')
ts = [datetime.datetime(2018, 5, 23, 14, 0), datetime.datetime(2018, 5, 23, 14, 15), datetime.datetime(2018, 5, 23, 14, 30)]
values =[3, 7, 6]
plt.plot(ts, values, 'o-')
plt.xticks(ts, ts) # set the x ticks to your dates
date_formatter = DateFormatter("%Y-%m-%d %H:%M") # choose desired date format
ax = plt.gca()
ax.xaxis.set_major_formatter(date_formatter)
plt.show()

Categories