How to add noise to a wav file in Python - python

I just started working with matplotlib and numpy. I need to add noise to an audio signal, in Python.
In order to do that, I need to receive the original audio signal and the noise amplitude, and then returning the original audio signal with the noise in it.
I have to use the rand function from numpy.random. The amplitude is what is supposed to help me get those values.
So far this is what I have:
import as spiowf
import sounddevice as sd
import matplotlib.pyplot as plt
import numpy as np
def noise(data, samplerate):
(ns, nc) = data.shape
Ts = 1 / samplerate
dur = ns / samplerate # audio duration
random = np.random.rand(0, 100), samplerate, blocking=True)
def main():
fName = "saxriff.wav"
[samplerate, data] =
if __name__ == "__main__":
How can I do the rest?


plot wavfile with python

i would like to plot a wav file with python. This should show a sinus.
This is my code so far:
import matplotlib.pyplot as plt
import numpy as np
import wave
import sys
spf ="sin.wav", "r")
# Extract Raw Audio from Wav File
signal = spf.readframes(-1)
signal = np.fromstring(signal, np.int8)
# If Stereo
if spf.getnchannels() == 2:
print("Just mono files")
plt.title("Signal Wave...")
I got this Error: WAV header is invalid: nAvgBytesPerSec must equal product of nSamplesPerSec and nBlockAlign, but file has nSamplesPerSec = 16384, nBlockAlign = 1, and nAvgBytesPerSec = 44100
Does anyone know how to get this working ?

How to convert a numpy array to a mp3 file

I am using the soundcard library to record my microphone input, it records in a NumPy array and I want to grab that audio and save it as an mp3 file.
import soundcard as sc
import numpy
import threading
speakers = sc.all_speakers() # Gets a list of the systems speakers
default_speaker = sc.default_speaker() # Gets the default speaker
mics = sc.all_microphones() # Gets a list of all the microphones
default_mic = sc.get_microphone('Headset Microphone (Arctis 7 Chat)') # Gets the default microphone
# Records the default microphone
def record_mic():
with default_mic.recorder(samplerate=48000) as mic, default_speaker.player(samplerate=48000) as sp:
for _ in range(1000000000000):
data = mic.record(numframes=None) # 'None' creates zero latency
# Save the mp3 file here
recordThread = threading.Thread(target=record_mic)
With Scipy (to wav file)
You can easily convert to wav and then separately convert wav to mp3. More details here.
from import write
samplerate = 44100; fs = 100
t = np.linspace(0., 1., samplerate)
amplitude = np.iinfo(np.int16).max
data = amplitude * np.sin(2. * np.pi * fs * t)
write("example.wav", samplerate, data.astype(np.int16))
With pydub (to mp3)
Try this function from this excellent thread -
import pydub
import numpy as np
def write(f, sr, x, normalized=False):
"""numpy array to MP3"""
channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
if normalized: # normalized array - each item should be a float in [-1, 1)
y = np.int16(x * 2 ** 15)
y = np.int16(x)
song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
song.export(f, format="mp3", bitrate="320k")
#[[-225 707]
# [-234 782]
# [-205 755]
# ...,
# [ 303 89]
# [ 337 69]
# [ 274 89]]
write('out2.mp3', sr, x)
Note: Output MP3 will of cause be 16-bit, because MP3s are always 16 bit. However, you can set sample_width=3 as suggested by #Arty for 24-bit input.
As of now the accepted answer produces extremely distorted sound atleast in my case so here is the improved version :
#librosa read
#pydub read
channel_sounds = sound.split_to_mono()
samples = [s.get_array_of_samples() for s in channel_sounds]
fp_arr = np.array(samples).T.astype(np.float32)
fp_arr /= np.iinfo(samples[0].typecode).max
fp_arr=np.array([x[0] for x in fp_arr])
#i normalize the pydub waveform with librosa for comparison purposes
so you read the audiofile from any library and you have a waveform then you can export it to any pydub supported codec with this code below, i also used librosa read waveform and it works perfect.
wav_io = io.BytesIO(), sample_rate, waveform)
sound = AudioSegment.from_wav(wav_io)
with open("file_exported_by_pydub.mp3",'wb') as af:

Can anyone guide me on how to calculate the frequency in hz from wav file? The wave file is of 50 secs

I am using numpy library to calculate freq = np.fft.rfftfreq(len_data, 1.0 / rate) , If I am not wrong then this frequency is without unit . How can i convert it into hertz. I am using the following code :
import numpy as np
from scipy import signal
def read_wav_file(file_name):
sample_rate, Data_audio =
return sample_rate, Data_audio
def getFFT(Data_audio, sample_rate):
len_data = len(Data_audio)
Data_audio = Data_audio * np.hamming(len_data)
fft = np.fft.rfft(Data_audio)
fft = np.abs(fft)
ret_len_FFT = len(fft)
freq = np.fft.rfftfreq(len_data, 1.0 / sample_rate)
return ( freq[:int(len(freq))], fft[:int(ret_len_FFT)], ret_len_FFT )
sample_rate_rec, Data_audio_rec = read_wav_file('2020rec.wav')
frequency_rec, fft_rec, ret_lenFFT_rec = getFFT(Data_audio_rec, sample_rate_rec)
print("frequency_rec: " + str(frequency_rec) )
frequency_rec: [0.00000000e+00 8.33481508e-03 1.66696302e-02 ... 2.39999833e+04
2.39999917e+04 2.40000000e+04]
# To convert frequencies into float format
print("frequency_rec: " + str(frequency_rec) )
frequency_rec: [0.000000 0.008307 0.016614 ... 23999.983386 23999.991693 24000.000000]
The numpy FFT package has a built-in function to calculate the frequency vector to go along with your FFT output. Note that scipy outputs the sample rate wile numpy wants the sample spacing so you must invert it first.
import numpy as np
def getFrequencies(Data_audio, sample_rate_rec):
return np.fft.fftfreq(n=len(Data_audio), d=1/sample_rate)
Since the output of claims to be in samples/sec, the output of np.fft.fftfreq will be in cycles/sec aka Hertz.

How to convert a .wav file to a spectrogram in python3

I am trying to create a spectrogram from a .wav file in python3.
I want the final saved image to look similar to this image:
I have tried the following:
This stack overflow post:
Spectrogram of a wave file
This post worked, somewhat. After running it, I got
However, This graph does not contain the colors that I need. I need a spectrogram that has colors. I tried to tinker with this code to try and add the colors however after spending significant time and effort on this, I couldn't figure it out!
I then tried this tutorial.
This code crashed(on line 17) when I tried to run it with the error TypeError: 'numpy.float64' object cannot be interpreted as an integer.
line 17:
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
I tried to fix it by casting
samples = int(np.append(np.zeros(np.floor(frameSize/2.0)), sig))
and I also tried
samples = np.append(np.zeros(int(np.floor(frameSize/2.0)), sig))
However neither of these worked in the end.
I would really like to know how to convert my .wav files to spectrograms with color so that I can analyze them! Any help would be appreciated!!!!!
Please tell me if you want me to provide any more information about my version of python, what I tried, or what I want to achieve.
Use scipy.signal.spectrogram.
import matplotlib.pyplot as plt
from scipy import signal
from import wavfile
sample_rate, samples ='path-to-mono-audio-file.wav')
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
plt.pcolormesh(times, frequencies, spectrogram)
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
Be sure that your wav file is mono (single channel) and not stereo (dual channel) before trying to do this. I highly recommend reading the scipy documentation at
Putting plt.pcolormesh before plt.imshow seems to fix some issues, as pointed out by #Davidjb, and if unpacking error occurs, follow the steps by #cgnorthcutt below.
I have fixed the errors you are facing for
This implementation is better because you can change the binsize (e.g. binsize=2**8)
import numpy as np
from matplotlib import pyplot as plt
import as wav
from numpy.lib import stride_tricks
""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(int(np.floor(frameSize/2.0))), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(int(cols), frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
""" scale frequency axis logarithmically """
def logscale_spec(spec, sr=44100, factor=20.):
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) ** factor
scale *= (freqbins-1)/max(scale)
scale = np.unique(np.round(scale))
# create spectrogram with new freq bins
newspec = np.complex128(np.zeros([timebins, len(scale)]))
for i in range(0, len(scale)):
if i == len(scale)-1:
newspec[:,i] = np.sum(spec[:,int(scale[i]):], axis=1)
newspec[:,i] = np.sum(spec[:,int(scale[i]):int(scale[i+1])], axis=1)
# list center freq of bins
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = []
for i in range(0, len(scale)):
if i == len(scale)-1:
freqs += [np.mean(allfreqs[int(scale[i]):])]
freqs += [np.mean(allfreqs[int(scale[i]):int(scale[i+1])])]
return newspec, freqs
""" plot spectrogram"""
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
samplerate, samples =
s = stft(samples, binsize)
sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
timebins, freqbins = np.shape(ims)
print("timebins: ", timebins)
print("freqbins: ", freqbins)
plt.figure(figsize=(15, 7.5))
plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
plt.xlabel("time (s)")
plt.ylabel("frequency (hz)")
plt.xlim([0, timebins-1])
plt.ylim([0, freqbins])
xlocs = np.float32(np.linspace(0, timebins-1, 5))
plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))
plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
if plotpath:
plt.savefig(plotpath, bbox_inches="tight")
return ims
ims = plotstft(filepath)
import os
import wave
import pylab
def graph_spectrogram(wav_file):
sound_info, frame_rate = get_wav_info(wav_file)
pylab.figure(num=None, figsize=(19, 12))
pylab.title('spectrogram of %r' % wav_file)
pylab.specgram(sound_info, Fs=frame_rate)
def get_wav_info(wav_file):
wav =, 'r')
frames = wav.readframes(-1)
sound_info = pylab.fromstring(frames, 'int16')
frame_rate = wav.getframerate()
return sound_info, frame_rate
for A Capella Science - Bohemian Gravity! this gives:
Use graph_spectrogram(path_to_your_wav_file).
I don't remember the blog from where I took this snippet. I will add the link whenever I see it again.
Beginner's answer above is excellent. I dont have 50 rep so I can't comment on it, but if you want the correct amplitude in the frequency domain the stft function should look like this:
import numpy as np
from matplotlib import pyplot as plt
import as wav
from numpy.lib import stride_tricks
""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(int(np.floor(frameSize/2.0))), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(int(cols), frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
fftResults = np.fft.rfft(frames)
windowCorrection = 1/(np.sum(np.hanning(frameSize))/frameSize) #This is amplitude correct (1/mean(window)). Energy correction is 1/rms(window)
FFTcorrection = 2/frameSize
scaledFftResults = fftResults*windowCorrection*FFTcorrection
return scaledFftResults
You can use librosa for your mp3 spectogram needs. Here is some code I found, thanks to Parul Pandey from medium. The code I used is this,
# Method described here
import librosa
import librosa.display
from pydub import AudioSegment
import matplotlib.pyplot as plt
from import wavfile
from tempfile import mktemp
def plot_mp3_matplot(filename):
plot_mp3_matplot -- using matplotlib to simply plot time vs amplitude waveplot
filename -- filepath to the file that you want to see the waveplot for
Returns -- None
# sr is for 'sampling rate'
# Feel free to adjust it
x, sr = librosa.load(filename, sr=44100)
plt.figure(figsize=(14, 5))
librosa.display.waveplot(x, sr=sr)
def convert_audio_to_spectogram(filename):
convert_audio_to_spectogram -- using librosa to simply plot a spectogram
filename -- filepath to the file that you want to see the waveplot for
Returns -- None
# sr == sampling rate
x, sr = librosa.load(filename, sr=44100)
# stft is short time fourier transform
X = librosa.stft(x)
# convert the slices to amplitude
Xdb = librosa.amplitude_to_db(abs(X))
# ... and plot, magic!
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr = sr, x_axis = 'time', y_axis = 'hz')
# same as above, just changed the y_axis from hz to log in the display func
def convert_audio_to_spectogram_log(filename):
x, sr = librosa.load(filename, sr=44100)
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr = sr, x_axis = 'time', y_axis = 'log')

How to change audio speed without changing pitch?

I need to apply audio to video at certain time with certain duration, but some audio duration is bigger(or smaller) then needed. How to change speed of audio without changing pitch? I tried to change fps(by multiplying to division of needed duration to audio duration) but it is not work as I want.
original = VideoFileClip("orig.mp4")
clips = []
subs = [] #some array
i = 0
for sub in subs:
clip = AudioFileClip("\\temp{}.mp3")
mult = clip.duration / (sub.end - sub.start) + 0.00001
clip = AudioArrayClip(clip.to_soundarray(buffersize=500, fps=24000/mult), fps=24000).set_start(sub.start).set_end(sub.end)
i += 1
final = CompositeAudioClip(clips)
you can use librosa module:
from import wavfile
import librosa, numpy as np
song, fs = librosa.load("song.wav")
song_2_times_faster = librosa.effects.time_stretch(song, 2)"song_2_times_faster.wav", fs, song_2_times_faster) # save the song
Using wave: Change the sampling rate
import wave
swidth = 2
Change_RATE = 2
spf ='VOZ.wav', 'rb')
signal = spf.readframes(-1)
wf ='changed.wav', 'wb')
