label ticks in Matplotlib colormap - python

I am trying to make a colormap plotting a function that calculates the pH of the ocean from a set of values for CO2 and DIC (Carbon Dioxide and Dissolved Inorganic Carbon), for CO2 (x axis) and DIC (y axis) I make a set of values between 0.000090 and 0.001100, 0.001526 and 0.002100 respectively. I used linspace to make 100 points.
When I make the plot, in the x and y axis the ticks show values from 0 to 100 in both axis. The function works OK and the expected output plot makes sense but I do not know why the ticks labels show those values. Any help will be appreciated.
Below you have the code tha makes the plot.
import numpy as np
def K0_Weiss(S, TC):
TK=TC+273.15 #Temperatura de Celsius a Kelvin
lnK0 = 9345.17/TK - 60.2409 + 23.3585 * np.log(TK/100) + S * (0.023517 - 0.00023656 * TK + 4.7036e-07 *
TK * TK)
K0=np.exp(lnK0)
return K0
"""
Created on Mon Sep 3 11:51:00 2018
#author: fergomez
Esta funcion calcula K1 y K2 del sistema CO2-H20 segun
Roy et al., (1993), The dissociation constant of carbonic acid in sewater
at salinities of 5 to 45 and temperatures of 0 to 45 desgrees Celsius.
Marine Chemistry 44(2-4), 249-267.
Unidades
K1, K2=mol*kg-soln-1
Ejemplo de uso:
Input
K1, K2=K1_K2_Roy(35, 25)
print(K1, K2)
Output:
1.3921075396202872e-06 1.1887254858040348e-09
"""
def K1_K2_Roy(S, TC):
TK=TC+273.15 #Temperatura de Celsius a Kelvin
#K1 usando Roy et al., 1993
tmp1 = 2.83655 - 2307.1266/TK - 1.5529413 * np.log(TK)
tmp2 = -(0.207608410 + 4.0484/TK) * np.sqrt(S)
tmp3 = 0.08468345 * S - 0.00654208 * S**1.5
tmp4 = np.log(1 - 0.001005 * S)
lnK1roy = tmp1 + tmp2 + tmp3 + tmp4
K1 = np.exp(lnK1roy)
#K2 usando Roy et al., 1993
tmp1 = -9.226508 - 3351.6106/TK - 0.2005743 * np.log(TK)
tmp2 = (-0.106901773 - 23.9722/TK) * np.sqrt(S)
tmp3 = 0.1130822 * S - 0.00846934 * S**1.5 + np.log(1 -
0.001005 * S)
lnK2roy = tmp1 + tmp2 + tmp3
K2 = np.exp(lnK2roy)
return K1, K2
def K1_K2_SBY(S):
pK1=6.1568-0.00352*S
pK2=8.5503-0.0080*S
K1=10**(-pK1)
K2=10**(-pK2)
return K1, K2
"""
Created on Mon Sep 3 11:53:53 2018
#author: fergomez
Esta funcion calcula K0 del sistema CO2-H20 segun recomienda Dickson et al 2007
Guide for best practices for ocean CO2 measurements. PICES SP 3, 191 pp.
Conforme a Mucci, A., 1983. The solubility of calcite and aragonite in seawater
at various salinities and temperatures and one atmosphere of total pressure.
American journal of Science, 283, 780-799.
Ejemplo de uso:
Input
Ksp_a=Kspa_Mucci(35, 25)
print(Ksp_a)
Output:
6.48175906801198e-07
"""
def Kspa_Mucci(S, TC):
TK=TC+273.15 #Temperatura de Celsius a Kelvin
tmp1 = -171.945 - 0.077993 * TK + 2903.293/TK + 71.595 * np.log10(TK)
tmp2 = +(-0.068393 + 0.0017276 * TK + 88.135/TK) * np.sqrt(S)
tmp3 = -0.10018 * S + 0.0059415 * S**1.5
log10Kspa = tmp1 + tmp2 + tmp3
Ksp_a=10**(log10Kspa)
return Ksp_a
"""
Created on Mon Sep 3 11:54:42 2018
#author: fergomez
Esta funcion calcula K0 del sistema CO2-H20 segun recomienda Dickson et al 2007
Guide for best practices for ocean CO2 measurements. PICES SP 3, 191 pp.
Conforme a Mucci, A., 1983. The solubility of calcite and aragonite in seawater
at various salinities and temperatures and one atmosphere of total pressure.
American journal of Science, 283, 780-799.
Ejemplo de uso:
Input
Ksp_c=Kspc_Mucci(35, 25)
print(Ksp_c)
Output:
4.2723509278626e-07
"""
def Kspc_Mucci(S, TC):
TK=TC+273.15 #Temperatura de Celsius a Kelvin
tmp1 = -171.9065 - 0.077993 * TK + 2839.319/TK + 71.595 * np.log10(TK)
tmp2 = +(-0.77712 + 0.0028426 * TK + 178.34/TK) * np.sqrt(S)
tmp3 = -0.07711 * S + 0.0041249 * S**1.5
log10Kspc = tmp1 + tmp2 + tmp3
Ksp_c=10**(log10Kspc)
return Ksp_c
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 3 11:55:56 2018
Calcula el factor de correccion de la presion para las constantes de
equilibrio Ki usando Millero, F.J., (1995). Thermodynamics of the carbon
dioxide system in the oceans. Geochimica et Cosmochimica Acta 59:661-677.
#author: fergomez
Ejemplo de uso:
A presion atmosferica Kspa=4.27 e-07
Input (para aragonita):
fcorr=P_corr_Millero(-45.96, 0.5304, 0.0, -11.76e-03, 0.3692e-03, 0.0, 25, 300)
print(fcorr)
Output:
1.4787577790538065
entonces Kspa(presion atmosferica)*fcorr=Kspa_corregido
4.27 e-07*1.4787577790538065= 9.58 e-07
"""
#pagina 186 Andreas Hoffman PhD Thesis
def P_corr_Millero(a0, a1, a2, b0, b1, b2, TC, P):
TK=TC+273.15 #Temperatura de Celsius a Kelvin
R=83.131
delta_V= a0+a1*TC+a2*TC*TC
delta_k= (b0+b1*TC+b2*TC*TC)
lnKp=-(delta_V/(R * TK))*P+0.5*(delta_k/(R*TK))*P*P
fcorr=np.exp(lnKp)
return fcorr
#Llamamos las librerias que utilizaremos
import numpy as np
#import matplotlib.pyplot as plt
import math
from numpy import exp,arange
from pylab import meshgrid,cm,imshow,contour, clabel,colorbar,axis,title,show, contourf
#from constantes import K0_Weiss, K1_K2_Roy, Kspc_Mucci, Kspa_Mucci, P_corr_Millero
import matplotlib.pyplot as plt
#============================================================================
#Inicio de seccion de INPUT==================================================
#============================================================================
S=6.88 #ppt
TC=6.42 #Celsius
P=1.0 #atmosferas
K0=K0_Weiss(S, TC)
K1, K2=K1_K2_Roy(S, TC)
Ksp_c=Kspc_Mucci(S, TC)
Ksp_a=Kspa_Mucci(S, TC)
#============================================================================
#Fin de seccion de INPUT======================================================
#============================================================================
#=============================================================================
#Estimacion de Calcio de Tyrrell et al., 2008 y valores de ejemplo agua de mar
#============================================================================
#Ca=0.0103 #valores agua de mar
Ca=(0.331*S+0.392)*1e-03 #Mar Baltico
#Ca=(0.375*S+0.0368)*1e-03 # Bothnian Bay (parte norte del mar Baltico)
#=============================================================================
#def co2_t(pco2, K0):
# co2=pco2*K0
# return co2
#=============================================================================
#vco2_t = np.vectorize(co2_t)
#Pasamos de pco2 a co2
#co2=co2_t(pco2, K0)
#=============================================================================
#=============================================================================
#Calculo del factor de correccion por presion
#=============================================================================
fcorr_K1=P_corr_Millero(-25.50, 0.1271, 0.0, -3.08e-03, 0.0877e-03, 0.0, TC, P)
fcorr_K2=P_corr_Millero(-15.82, -0.219, 0.0, 1.136e-03, -0.1475e-03, 0.0, TC, P)
fcorr_Kspc=P_corr_Millero(-48.76, 0.5304, 0.0, -11.76e-03, 0.3692e-03, 0.0, TC, P)
fcorr_Kspa=P_corr_Millero(-45.96, 0.5304, 0.0, -11.76e-03, 0.3692e-03, 0.0, TC, P)
#=============================================================================
#=============================================================================
#Correccion de ctes de equilibrio por presion
#=============================================================================
K1=K1*fcorr_K1
K2=K2*fcorr_K2
Ksp_c=Ksp_c*fcorr_Kspc
Ksp_a=Ksp_a*fcorr_Kspa
#=============================================================================
#Funcion que realiza calculos del sistema carbonato y omega
#=============================================================================
def carbEq(co2, dic, Ca):
#-----------------------------------------------
# Resolvemos para obtener H+ (cf. a Zeebe and Wolf-Gladrow, 2000)
a1=co2-dic
a2=K1*co2
a3=K1*K2*co2
p= [a1, a2, a3]
r = np.roots(p)
h= max(np.real(r)) # Esto es para seleccionar la raiz real mas grande
#
# Calculamos HCO3, CO3 and CO2aq, usando DIC, AlK y H+
hco3=dic/(1+h/K1+K2/h)
co3=dic/(1+h/K2+h*h/(K1*K2))
#co2=dic/(1+K1/h+K1*K2/(h*h))
fco2=co2 / K0
pH=-math.log10(h)
alk=2*co3+hco3
#Saturacion de Calcita y Aragonita
omega_ar=Ca*co3/Ksp_a
omega_cal=Ca*co3/Ksp_c
return fco2, pH, co2, hco3, co3, h, dic, alk, omega_ar, omega_cal
vcarbEq = np.vectorize(carbEq)
#=============================================================================
#Uso de la funcion - ejemplo
#=============================================================================
#co2=co2*1000000
#dic=dic*1000000
pco2=np.linspace(0.000090, 0.001100, 100)
co2=pco2*K0
dic=np.linspace(0.001526, 0.002100, 100) #moles (para pasar de micromoles a moles, 1587*e-06)
#pco2=pco2*1000
#dic=dic*1000
co2, dic=meshgrid(co2*1000000, dic*1000000)
fco2, pH, co2, hco3, co3, h, dic, alk, omega_ar, omega_cal=vcarbEq(co2=co2, dic=dic, Ca=Ca)
#Ejemplo con valores de referencia de uso para agua de mar normal
#fco2, pH, co2, hco3, co3, h, dic, alk, omega_ar, omega_cal=carbEq(co2=0.00001032997, dic=0.002108, Ca=Ca)
#para este usar calcio=0.0103
#=============================================================================
#Grafico de los resultados
#=============================================================================
print(co2)
print('====================================')
print(dic)
print('====================================')
print(pH)
#print(omega_cal)
#fig, ax = plt.subplots()
im = imshow(pH, cmap=cm.RdBu) # dibujo la funcion
plt.xlabel('CO2')
plt.ylabel('DIC')
plt.ylim(0, 100)
plt.scatter(40, 40, color='k')
#ax.set_xlim(0.00000525, 0.000065)
#ax.set_ylim(0.001, 0.0021)
#plt.xlim(min(co2), max(co2))
# agrego lineas de contorno y rotulos
cset = contour(pH, arange(6.0,9.0,0.2),linewidths=2, cmap=cm.Set2)
clabel(cset,inline=True,fmt='%1.1f',fontsize=10)
colorbar(im) # agrego la barra de colores al costado

Related

Tridiagonal matrix algorithm : math are correct but i don't get the right results

I'm trying to code tdma. I have a working program for most of the process, but I can't get the results asked.
I'm supposed to get [2,3,-1,4,-2] as a result, but I get : [-0.5120543981481481, -0.1787210648148148, 0.4824421296296296, 0.4879012345679012, -3.7802469135802474].
I've checked one by one the coefficients for each step, and they're good. I think the problem comes from resolve2, but it worked for gauss pivot. It still can be math, but I'm pretty sure it's not.
import numpy as np
A=np.array([[3.,-3.,0,0,0],[2.,8.,4.,0,0],[0,4.,-8.,3.,0],[0,0,-7.,5.,1.],[0,0,0,-1.,3.]])
B0=[-3.,3.,25.,13.,-10.]
B = np.array([B0]).reshape(len(B0),1)
def det(A):
return np.linalg.det(A)
def op_linescal(A,i,x):
n = len(A[0]) # nbre de colonnes de A
for k in range(n):
A[i,k] = x*A[i,k] # la ligne Li devient la ligne x*Li
return A #retourne la nouvelle matrice
def op_linecombi(A,i,j,x):
n = len(A[0]) # nbre de colonnes de A
for k in range(n):
A[i,k] = A[i,k] + x*A[j,k] # la ligne Li devient la ligne Li + x*Lj
return A #retourne la nouvelle matrice
def tdma1(a,b):
ne = A.shape[0] #donne le nombre d'équations donc de ligne
if ne < 3:
return print("On ne peut pas résoudre avec l'algorithme de Thomas")
if det(A) == 0: # vérifie condition pour appliquer Thomas (matrice inversible : det(A) != 0)
return print("On ne peut pas utiliser cet algorithme de Thomas, la matrice A est singulière.")
l = A.copy() #création de la matrice à modifier
b = B.copy()
for i in range(ne):
# print("{}\n{}".format(a,b))
if i == 0 :
x = 1/l[i,i]
op_linescal(l,i,x) # on divise L1 par b1
op_linescal(b,i,x) # reproduction en b
elif i != 0 and i < ne-1: # i = 1,...,N-1
x1 = -l[i,i-1]
op_linecombi(l,i,i-1,x1) # Li => Li - ai*Li-1
op_linecombi(b,i,i-1,x1)
x2 = 1/l[i,i] # où a[i,i] = bi - ai*ci-1
op_linescal(l,i,x2) # Li => Li / (bi')
op_linescal(b,i,x2)
else: # i = N
x1 = -l[i,i-1]
op_linecombi(l,i,i-1,x1) # Li => Li - ai*Li-1
op_linecombi(b,i,i-1,x1)
print('\n',np.round(a,3))
print('\n{}\n'.format(np.round(b,2)))
return a,b
def resolve2(a,b):
"Renvoie la solution du système Ax = b lorsque A est triangulaire supérieure inversible"
n =len(a[0])
x = [0 for i in range(n)]
x[n-1] = b[n-1,0]/a[n-1,n-1]
for i in range(n-2,-1,-1):
s = 0
for j in range(i+1, n):
s = s + a[i,j]*x[j]
x[i] = (b[i,0] - s)/ a[i,i]
return x
def thomas(a, b):
if det(a) == 0: # vérifie condition pour appliquer Gauss
return print("On ne peut pas utiliser Gauss, la matrice A n'est pas inversible.")
a1,b1 = tdma1(a, b)
x = resolve2(a1, b1)
return x
print(thomas(A, B))

Program is meant to picture a graph with vectors but output returns: "TypeError: 'int' object is not subscriptable" for last plt.arrow()

this python program is meant to picture a graph with vectors but python returns this: "TypeError: 'int' object is not subscriptable" for last plt.arrow(). I don't know if anyone can help me but I would be glad if someone could resolve this problem. Thanks in advance for your help !
# cellule 1: importation des différentes bibliothèques
from lycee import *
import matplotlib.pyplot as plt
import numpy as np
# cellule 2: coordonnées de la position du satellite
R = 42164000 # rayon en mètre
T = 84164 # période de révolution
t = np.arange(0, 84164, 500) # dates en secondes
a = 2*np.pi/T*t # α=2π/T*t
x = R*np.cos(a) # x=Rcosα
y = R*np.sin(a) # y=Rsinα
# cellule 3: coordonnées du vecteur vitesse (vx,vy)
def coordvit(t, u):
vu = []
for i in range(len(u)-1):
vui = (u[i+1]-u[i]/(t[i+1]-t[i]))
vu.append(vui)
return vu
vx = coordvit(x, t)
vy = coordvit(y, t)
# cellule 4: coordonnées du vecteur accélération
def coordaccelerer(vu, t):
au = []
for j in range(len(vu)-1):
auj = (vu[j+1]-vu[j]/(t[j+1]-t[j]))
au.append(auj)
return au
ax = coordaccelerer(vx, t)
ay = coordaccelerer(vy, t)
# cellule 5: coordonnées du vecteur force gravitationnelle
MT = 5.972*10**24 # masse de la Terre en kg
m = 4192 # masse du satellite après avoir consommé 1000kg de carburant, en kg
G = 6.67408*10**(-11) # consante de gravitation universelle en m^3.kg^-1.s^-2
Fx = (G*MT*m/(R**2))*(-x/R)
Fy = 0
# cellule 6: tracé du graphique (trajectoire du satellite, vecteurs accélération et force gravitationnelle)
plt.figure("acceleration et fg d'un satellite", figsize=(10, 10))
plt.plot(x, y, "r+-")
plt.xlabel("x en m")
plt.ylabel("y en m")
plt.xlim(-50000000, 50000000)
plt.ylim(-50000000, 50000000)
plt.title("Mouvement d'un satellite géostationnaire autour de la Terre \nComparaison des vecteurs accélération et ""force gravitationnelle")
for k in range(len(ax)):
plt.arrow(x[k], y[k], 50*ax[k], 500*ay[k], facecolor="b", edgecolor="b", width=20000, head_width=1000000, length_includes_head=True) # tracé vecteur accélération
plt.arrow(x[k], y[k+1], 50*Fx[k], 500*Fy[k], facecolor="r", edgecolor="r", width=20000, head_width=1000000, length_includes_head=True) # tracé vecteur fg
plt.show()
The last plt.arrow is where it doesn't work !
Response to an answer to my initial question by Green Cloak Guy:
I'm pretty sure it's the same. Here is my friend's code which runs perfectly:
# importation des différentes bibliothèques
from lycee import *
import matplotlib.pyplot as plt
import numpy as np
# cellule 2 : coordonnées de la position du satellite
R = 42164000 # rayon en mètre
T = 84164 # période de révolution en seconde
t = np.arange(0, 84164, 500)
alpha = 2 * np.pi / T * t # α=2π/T*t
x = R * np.cos(alpha) # x=Rcosα
y = R * np.sin(alpha) # y=Rsinα
# cellule 3 : coordonnées du vecteur vitesse (vx,vy) à l'aide de la création d'une fonction
def coordvit(t, u):
vu = []
for i in range(len(u) - 1):
vui = (u[i + 1] - u[i]) / (t[i + 1] - t[i])
vu.append(vui)
return vu
vx = coordvit(t, x)
vy = coordvit(t, y)
# cellule 4 : Coordonnées du vecteur accélération
def coordaccelerer(vu, t):
au = []
for j in range(len(vu) - 1):
auj = (vu[j + 1] - vu[j]) / (t[j + 1] - t[j])
au.append(auj)
return au
ax = coordaccelerer(vx, t)
ay = coordaccelerer(vy, t)
# cellule 5 : Coordonnées du vecteur force gravitationnelle
MT = 5.972 * 10 ** 24 # masse de la Terre en kg
m = 4192 # masse du satellite après avoir consommé 1000 kg de carburant
G = 6.67408 * 10 ** (-11) # constante de gravitation universelle en m^3.kg^-1.s^-2
Fx = (G * MT * m / (R ** 2)) * (-x / R)
Fy = (G * MT * m / (R ** 2)) * (-y / R)
# cellule 6 : Tracé du graphique permettant de visualiser la trajectoire du satellite, les vecteurs accélération et
# force gravitationnelle
plt.figure("Vecteurs acceleration et fg d'un satellite de la Terre", figsize=(10, 10))
plt.plot(x, y, "+", label="y=f(x)")
plt.xlabel("x en m")
plt.ylabel("y en m")
plt.xlim(-50000000, 50000000) # extrémités des échelles sur x et y
plt.ylim(-50000000, 50000000)
plt.title(
"Mouvement d'un satellite géostationnaire autour de la Terre \nComparaison des vecteurs accélération et force "
"gravitationnelle")
for k in range(len(ax)):
plt.arrow(x[k], y[k], 50000000 * ax[k], 50000000 * ay[k], facecolor="b", edgecolor="b", width=200000,
head_width=1000000, length_includes_head=True) # Tracé du vecteur accélération
plt.arrow(x[k], y[k], 20000 * Fx[k], 20000 * Fy[k], facecolor="r", edgecolor="r", width=200000, head_width=1000000,
length_includes_head=True) # Tracé du vecteur force gravitationnelle
plt.show()

Resampling after filtering on successive blocks with initial conditions (to avoid discontinuity)

I actually work on a real time graphic equalizer on python. I'm using pyaudio module, scipy, numpy. My equalizer is based on a third octave band filter bank from 25 Hz to 20 kHz (so 30 bands). This filter bank divides an input signal into 30 filtered signals (centered on the center frequency of each third octave band). Also, the streaming is implemented block by block (using pyaudio and callback method).
I used filtfilt from scipy.signal module but I had some discontinuities between each block (some audible click). So, I've followed Continuity issue when applying an IIR filter on successive time-frames and it works well for high frequencies.
But for low frequencies I need to follow these the steps :
1) downsampling input signal (to keep a good definition's filter);
2) filtering with lfilter_zi to keep continuity between each block (for streaming);
3) upsampling the filtered signal.
My problem is the upsampling because that breaks the continuity between each block (see figure below)
Discontinuities between 2 blocks when downsampling and upsampling a signal (sinus at 1000Hz here)
Also, here is my code of third octave band filter bank :
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 19 16:14:09 2019
#author: William
"""
from __future__ import division
import numpy as np
import scipy.signal as sc
def oct3_dsgn(fc, fs, type_filt='cheby2_bandpass', output='ba'):
"""
Calcul les coefficients B et A d'un filtre passe-bande pour chacune des
bandes de tiers d'octave entre 25 Hz et 20 kHz.
La fonction cheb2ord permet d'optimiser l'ordre et la bande passante du
filtre en fonction des fréquences de coupures et de leurs gains respectifs
(gpass, gstop) et de la fréquence d'échantillonnage.
"""
#------- Définition des fréquences inférieures et supérieures de la bande n
fc1 = fc / 2**(1/6)
fc2 = fc * 2**(1/6)
#------- Définition des fréquences centrales des bandes n-1 et n+1
fm1 = fc1 / 2**(1/6)
fm2 = fc2 * 2**(1/6)
#------- Définition des fréquences normalisées par rapport à f_nyquist
W1 = fc1/(fs/2)
W2 = fc2/(fs/2)
Wm1 = fm1/(fs/2)
Wm2 = fm2/(fs/2)
#------- Définition des filtres passe-bande, passe-bas et passe-haut
if type_filt == 'cheby2_bandpass':
gpass = 20*np.log10(np.sqrt(2)) # Équivalent à 3dB
gstop = 45
n_filt, Wn = sc.cheb2ord([W1, W2], [Wm1, Wm2], gpass, gstop)
if output=='ba':
B, A = sc.cheby2(n_filt, gstop, Wn, btype='band', output='ba')
elif output=='sos':
sos = sc.cheby2(n_filt, gstop, Wn, btype='band', output='sos')
elif type_filt == 'butter':
gpass = 20*np.log10(np.sqrt(2)) # Équivalent à 3dB
gstop = 30
n_filt, Wn = sc.buttord([W1, W2], [Wm1, Wm2], gpass, gstop)
if output=='ba':
B, A = sc.butter(n_filt, Wn, btype='band', output='ba')
elif output=='sos':
sos = sc.cheby2(n_filt, gstop, Wn, btype='band', output='sos')
elif type_filt == 'cheby2_low':
gpass = 20*np.log10(np.sqrt(2)) # Équivalent à 3dB
gstop = 45
n_filt, Wn = sc.cheb2ord(W2, Wm2, gpass, gstop)
if output=='ba':
B, A = sc.cheby2(n_filt, gstop, Wn, btype='low', output='ba')
elif output=='sos':
sos = sc.cheby2(n_filt, gstop, Wn, btype='low', output='sos')
elif type_filt == 'cheby2_high':
gpass = 20*np.log10(np.sqrt(2)) # Équivalent à 3dB
gstop = 45
n_filt, Wn = sc.cheb2ord(W1, Wm1, gpass, gstop)
if output=='ba':
B, A = sc.cheby2(n_filt, gstop, Wn, btype='high', output='ba')
elif output=='sos':
sos = sc.cheby2(n_filt, gstop, Wn, btype='high', output='sos')
if output == 'ba':
return B, A
elif output == 'sos':
return sos
def oct3_filter(signal, fs, gain_general = 0, gain_bande = np.zeros((30, )), plot=False):
"""
Calcul le signal filtré à partir du signal initial grâce une reconstruction
parfaite bande par bande. Le signal initial est filtré pour chacune des bandes.
Lorsque les fréquences sont trop basses, un sous-échantillonnage est opéré
pour gagner en résolution fréquentielle et en bande passante.
Pour la bande à 25 Hz, un filtre passe-bas est utilisé. La bande à 25 Hz
comprend donc toutes les fréquences inférieures ou égales à 25 Hz.
De même, pour la bande à 20 kHz un filtre passe-haut est utlisé. La bande à
20 kHz comprend donc toutes les fréquences au-dessus de 18 kHz.
"""
#------- Définition des fréquences centrales exactes en base 2
fc_oct3 = (1000) * (2**(1/3))**np.arange(-16, 14)
n_bande = len(fc_oct3)
n_signal = len(signal)
#------- Définition des matrices de stockage des signaux et des gains
signal_filt = np.zeros((n_signal, n_bande))
Gain = 10**(gain_bande/20)
#------- Affichage de la réponse des filtres 1/3 d'octaves
if plot == True:
import matplotlib.pyplot as plt
plt.figure(0, figsize=(10,5))
#------- Boucle sur les bandes de 10 Hz à 20 kHz
for ii in range(0, n_bande):
if ii == n_bande-1 :
## bande à 20 kHz
sos = oct3_dsgn(fc_oct3[ii], fs, type_filt='cheby2_high', output='sos')
signal_filt[:, ii] = Gain[ii] * sc.sosfilt(sos, signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*fs, 20*np.log10(abs(h)), 'k')
elif ii == 0 :
## bande à 25 Hz
n_decimate = 32
x = sc.decimate(signal, n_decimate)
sos = oct3_dsgn(fc_oct3[ii], fs//n_decimate, type_filt='cheby2_low', output='sos')
x = sc.sosfilt(sos, x)
signal_filt[:, ii] = Gain[ii] * sc.resample(x, n_signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*(fs//n_decimate), 20*np.log10(abs(h)), 'k')
elif n_bande-5 <= ii < n_bande-1:
## de 8 kHz à 16 kHz
sos = oct3_dsgn(fc_oct3[ii], fs, output='sos')
signal_filt[:, ii] = Gain[ii] * sc.sosfilt(sos, signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*fs, 20*np.log10(abs(h)), 'k')
elif n_bande-10 <= ii < n_bande-5:
## de 2,5 kHz à 6,3 kHz
n_decimate = 2
x = sc.decimate(signal, n_decimate)
sos = oct3_dsgn(fc_oct3[ii], fs//n_decimate, output='sos')
x = sc.sosfilt(sos, x)
signal_filt[:, ii] = Gain[ii] * sc.resample(x, n_signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*(fs//n_decimate), 20*np.log10(abs(h)), 'k')
elif n_bande-15 <= ii < n_bande-10:
## de 800 Hz à 2 kHz
n_decimate = 4#round(fs/(2*fmax))-1
x = sc.decimate(signal, n_decimate)
sos = oct3_dsgn(fc_oct3[ii], fs//n_decimate, output='sos')
x = sc.sosfilt(sos, x)
signal_filt[:, ii] = Gain[ii] * sc.resample(x, n_signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*(fs//n_decimate), 20*np.log10(abs(h)), 'k')
elif n_bande-20 <= ii < n_bande-15:
## de 250 Hz à 630 Hz
n_decimate = 8#round(fs/(2*fmax))-1
x = sc.decimate(signal, n_decimate)
sos = oct3_dsgn(fc_oct3[ii], fs//n_decimate, output='sos')
x = sc.sosfilt(sos, x)
signal_filt[:, ii] = Gain[ii] * sc.resample(x, n_signal)
### affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*(fs//n_decimate), 20*np.log10(abs(h)), 'k')
elif n_bande-25 <= ii < n_bande-20:
## de 80 Hz à 200 Hz
n_decimate = 16#round(fs/(2*fmax))-1
x = sc.decimate(signal, n_decimate)
sos = oct3_dsgn(fc_oct3[ii], fs//n_decimate, output='sos')
x = sc.sosfilt(sos, x)
signal_filt[:, ii] = Gain[ii] * sc.resample(x, n_signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*(fs//n_decimate), 20*np.log10(abs(h)), 'k')
elif n_bande-29 <= ii < n_bande-25:
## de 25 Hz à 63 Hz
n_decimate = 32#round(fs/(2*fmax))-1
x = sc.decimate(signal, n_decimate)
sos = oct3_dsgn(fc_oct3[ii], fs//n_decimate, output='sos')
x = sc.sosfilt(sos, x)
signal_filt[:, ii] = Gain[ii] * sc.resample(x, n_signal)
## affichage de la réponse du filtre
if plot == True:
w, h = sc.freqz(sos)
plt.semilogx(w/2/np.pi*(fs//n_decimate), 20*np.log10(abs(h)), 'k')
if plot == True:
plt.grid(which='both', linestyle='-', color='grey')
# plt.xticks([20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000],
# ["20", "50", "100", "200", "500", "1K",
# "2K", "5K", "10K", "20K"])
plt.xlabel('Fréquence [Hz]'), plt.ylabel('Gain [dB]')
plt.title('Réponse en fréquence des filtres 1/3 d\'octaves')
plt.xlim((10, 22e3)), plt.ylim((-5, 1))
plt.show()
#------- Sommation des signaux filtrés pour recomposer le signal d'origine
S = signal_filt.sum(axis=1)
S = S - np.mean(S)
## tuckey_window = sc.tukey(len(S), alpha=0.01)
## S = tuckey_window * S
G = 10**(gain_general/20)
return G * S

Histogram representing number of substitutions, insertions and deleting in sequences

l have two columns that represent : right sequence and predicted sequence. l want to make statistics on the number of deletion, substitution and insertion by comparing each right sequence with its predicted sequence.
l did the levenstein distance to get the number of characters which are different (see the function below) and error_dist function to get the most common errors (in terms of substitution) :
here is a sample of my data :
de de
date date
pour pour
etoblissemenls etablissements
avec avec
code code
communications communications
r r
seiche seiche
titre titre
publiques publiques
ht ht
bain bain
du du
ets ets
premier premier
dans dans
snupape soupape
minimum minimum
blanc blanc
fr fr
nos nos
au au
bl bl
consommations consommations
somme somme
euro euro
votre votre
offre offre
forestier forestier
cs cs
de de
pour pour
de de
paye r
cette cette
votre votre
valeurs valeurs
des des
gfda gfda
tva tva
pouvoirs pouvoirs
de de
revenus revenus
offre offre
ht ht
card card
noe noe
montant montant
r r
comprises comprises
quantite quantite
nature nature
ticket ticket
ou ou
rapide rapide
de de
sous sous
identification identification
du du
document document
suicide suicide
bretagne bretagne
tribunal tribunal
services services
cif cif
moyen moyen
gaec gaec
total total
lorsque lorsque
contact contact
fermeture fermeture
la la
route route
tva tva
ia ia
noyal noyal
brie brie
de de
nanterre nanterre
charcutier charcutier
semestre semestre
de de
rue rue
le le
bancaire bancaire
martigne martigne
recouvrement recouvrement
la la
sainteny sainteny
de de
franc franc
rm rm
vro vro
here is my code
import pandas as pd
import collections
import numpy as np
import matplotlib.pyplot as plt
import distance
def error_dist():
df = pd.read_csv('data.csv', sep=',')
df = df.astype(str)
df = df.replace(['é', 'è', 'È', 'É'], 'e', regex=True)
df = df.replace(['à', 'â', 'Â'], 'a', regex=True)
dictionnary = []
for i in range(len(df)):
if df.manual_raw_value[i] != df.raw_value[i]:
text = df.manual_raw_value[i]
text2 = df.raw_value[i]
x = len(df.manual_raw_value[i])
y = len(df.raw_value[i])
z = min(x, y)
for t in range(z):
if text[t] != text2[t]:
d = (text[t], text2[t])
dictionnary.append(d)
#print(dictionnary)
dictionnary_new = dict(collections.Counter(dictionnary).most_common(25))
pos = np.arange(len(dictionnary_new.keys()))
width = 1.0
ax = plt.axes()
ax.set_xticks(pos + (width / 2))
ax.set_xticklabels(dictionnary_new.keys())
plt.bar(range(len(dictionnary_new)), dictionnary_new.values(), width, color='g')
plt.show()
enter image description here
and the levenstein distance :
def levenstein_dist():
df = pd.read_csv('data.csv', sep=',')
df=df.astype(str)
df['string diff'] = df.apply(lambda x: distance.levenshtein(x['raw_value'], x['manual_raw_value']), axis=1)
plt.hist(df['string diff'])
plt.show()
enter image description here
Now l want to make a histograms showing three bins : number of substitution, number of insertion and number of deletion . How can l proceed ?
Thank you
Thanks to the suggestions of #YohanesGultom the answer for the problem can be found here :
http://www.nltk.org/_modules/nltk/metrics/distance.html
or
https://gist.github.com/kylebgorman/1081951

Error when I try to iterate more than once

I've got this program which calculate k-means for IA
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from random import sample
from itertools import repeat
from math import sqrt
# Parametros
k = 6
maxit = 2
def leeValoracionesFiltradas (nomFichero = "valoracionesFiltradas.data"):
lineas = [(l.strip()).split("\t") for l in (open(nomFichero).readlines())]
diccio = {}
for l in lineas:
diccio[int(l[0])] = {}
for l in lineas:
diccio[int(l[0])][int(l[1])] = (float(l[2]),float(l[3]))
return diccio
def distEuclidea(dic1, dic2):
# Se calcula la suma de cuadrados de los elementos comunes a los dos diccionarios
sum2 = sum([pow(dic1[elem]-dic2[elem], 2)
for elem in dic1 if elem in dic2])
return sqrt(sum2)
def similitudEuclidea(dic1, dic2):
return 1/(1+distEuclidea(dic1, dic2))
def coefPearson(dic1, dic2):
# Se consiguen los elementos comunes en ambos diccionarios
comunes = [x for x in dic1 if x in dic2]
nComunes = float(len(comunes))
# Si no hay comunes -> cero
if nComunes==0:
return 0
# Calculo de las medias de cada diccionario
media1 = sum([dic1[x][1] for x in comunes]) / nComunes
media2 = sum([dic2[x][1] for x in comunes]) / nComunes
# Numerador y denominador
num = sum([(dic1[x][1] - media1) * (dic2[x][1] - media2) for x in comunes])
den1 = sqrt(sum([pow(dic1[x][1] - media1, 2) for x in comunes]))
den2 = sqrt(sum([pow(dic2[x][1] - media2, 2) for x in comunes]))
den = den1 * den2
# Caculo del coeficiente
if den==0:
return 0
return num/den
# Dado un diccionario {key1 : {key2 : valor}} calcula el agrupamiento k-means
# con k clusters (grupo), ejecutando maxit iteraciones, con la funcion de similitud especificada
# Retorna una tupla
# -{key1:numero de clusters} con las asignaciones de clusters (a que clusters pertenece cada elemento)
# -[{key2:valores}] una lista con los k centroides (media de los valores para cada clusters)
def kmeans (diccionario, k, maxit, similitud = coefPearson):
# K puntos aleatorios son elegidos como centroides incialmente
# Cada centroide es {key2 : valor}
centroides = [diccionario[x] for x in sample(diccionario.keys(), k)]
# Se asigna cada key1 a un numero de cluster
previo = None
asignacion = {}
# En cada iteracion se asignan puntos a los centroides y se calculan nuevos centroides
for it in range(maxit):
# Se asignan puntos a los centroides mas cercanos
for key1 in diccionario:
similitudes = map(similitud,repeat(diccionario[key1],k), centroides)
asignacion[key1] = similitudes.index(max(similitudes))
# Si no hay cambios en la asignacion, se termina
if previo == asignacion: break
previo = asignacion
# Se recalculan los centroides (se anotan los valores de cada key a cada centroide)
valores = {x : {} for x in range(k)}
contadores = {x : {} for x in range(k)}
for key1 in diccionario:
grupo = asignacion[key1]
for key2 in diccionario[key1]:
if not valores[grupo].has_key(key2):
valores [grupo][key2] = 0
contadores [grupo][key2] = 0
valores [grupo][key2] += diccionario[key1][key2][1]
contadores[grupo][key2] += 1
# Se calculan las medias (nuevos centroides)
centroides = []
for grupo in valores:
centro = {}
for key2 in valores[grupo]:
centro[key2] = round((valores[grupo][key2] / contadores[grupo][key2]),2)
centroides.append(centro)
if None in centroides: break
return (asignacion, centroides)
# Se obtiene el diccionario de valoraciones (las valoraciones ya han sido filtradas)
diccionario = leeValoracionesFiltradas()
# Se obtienen las asignaciones y los centroides con la correlacion de Pearson
tupla = kmeans (diccionario, k, maxit)
asignaciones = tupla[0]
centroids = tupla[1]
print asignaciones
print centroids
And when I execute this for example for maxit = 2, it throws:
File "kmeans_dictio.py", line 46, in coefPearson
media2 = sum([dic2[x][1] for x in comunes]) / nComunes
TypeError: 'float' object has no attribute '__getitem__'
How can I fix this?
It looks like you have a dictionary (dic2) of floats and a dictionary of dictionaries of floats (dic1) that you are pulling an item out of with this line:
comunes = [x for x in dic1 if x in dic2]
Then you are trying to iterate over this float here:
media2 = sum([dic2[x][1] for x in comunes]) / nComunes
To fix this look at dic1 and dic2 and how they are defined.

Categories