I'm new to working with audio files. I have several 60 second long files that I want to split into 15 second files (or any length). I'm able to split files into 1 second long files (so 60 files) but can't seem to get 15 second intervals to work. How can I create the intervals I'm looking for?
import os
import numpy as np
import librosa
import librosa.display
audio_dir = r'data\acoustics\recordings'
out_dir = r'data\acoustics\splits'
os.makedirs(out_dir, exist_ok=True)
audio_file = os.path.join(audio_dir, 'rec_20220729T160547Z.wav')
wave, sr = librosa.load(audio_file, sr=None)
num_sections = int(np.ceil(len(wave) / sr)
split = []
for i in range(num_sections):
t = wave[i * sr : i * sr + sr]
split.append(t)
for i in range(num_sections):
recording_name = os.path.basename(audio_file[:-4])
out_file = f"{recording_name}_{str(i)}.wav"
sf.write(os.path.join(out_dir, out_file), split[i], sr)
What you have done is mostly correct. It just need minor changes.
First is getting the data which you have done correctly.
import os
import numpy as np
import librosa
import librosa.display
import soundfile as sf # Missing import
audio_dir = r'data\acoustics\recordings'
out_dir = r'data\acoustics\splits'
os.makedirs(out_dir, exist_ok=True)
audio_file = os.path.join(audio_dir, 'rec_20220729T160547Z.wav')
wave, sr = librosa.load(audio_file, sr=None)
Calculate the length of segment:
segment_dur_secs = 15
segment_length = sr * segment_dur_secs
Breaking up the data and saving to file:
num_sections = int(np.ceil(len(wave) / segment_length))
split = []
for i in range(num_sections):
t = wave[i * segment_length: (i + 1) * segment_length]
split.append(t)
for i in range(num_sections):
recording_name = os.path.basename(audio_file[:-4])
out_file = f"{recording_name}_{str(i)}.wav"
sf.write(os.path.join(out_dir, out_file), split[i], sr)
Alternatively:
split = []
for s in range(0, len(wave), segment_length):
t = wave[s: s + segment_length]
split.append(t)
recording_name = os.path.basename(audio_file[:-4])
for i, segment in enumerate(split):
out_file = f"{recording_name}_{i}.wav"
sf.write(os.path.join(out_dir, out_file), segment, sr)
Edit: There is an issue with the code here because sf is not defined. (Fixed the import)
Related
I want to resample a recording in 32000 KHz to 16000 KHz. I have done this with the code below. But the output audio is somewhat messed up.
You can find the original audio as well output in the following folder
https://drive.google.com/drive/folders/1vr-ib8zvZagH_QeE4JSUtAUpp3EG75va
Any ideas what I am doing wrong?
import os
import librosa
import soundfile as sf
folder_name = "trial_sess"
os.chdir(os.path.join("process",folder_name))
for file in os.listdir():
if file.endswith(".m4a") or file.endswith(".mp4") or file.endswith(".mp3"):
nm,ext = file.split(".")
sr = librosa.get_samplerate(file)
y, sr = librosa.load(file, sr = sr)
sf.write(os.path.join(zoom_loc,"sessions",folder_name,"output_resampled_audio" + "." + "wav"), data = y, samplerate=16000)
There is no resampling performed in your code, and the incorrect samplerate is passed to write.
librosa.load will resample on-demand if the sr argument is different from that of the original file. So the code should be something like:
target_sr = 16000
y, sr = librosa.load(file, sr=target_sr)
assert sr == target_sr # check that librosa did resample
out_path = os.path.join(zoom_loc,"sessions",folder_name,"output_resampled_audio" + "." + "wav")
sf.write(out_path, data = y, samplerate=target_sr)
mat` files from the main folder which contains seven subfolders. Each folder is named with class number.
import glob
import os
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
files = glob.glob(DATASET_PATH + "**/*.mat", recursive= True)
class_labels = [i.split(os.sep)[-2] for i in files]
for label in range(0, len(class_labels)):
class_labels [label] = int(class_labels[label])
files variable contains the following:
Class labels contains the following:
I want to ask couple of things:
1) when I read the .mat files, it comes if dict and each dict contains different variable name. I want to know how can I read the key and assign to the array?
array_store=[]
for f in files:
mat = hdf5storage.loadmat(f)
arrays = np.array(mat.keys())
array_store.append(arrays)
2) files = glob.glob(DATASET_PATH + "**/*.mat", recursive= True) Is it possible to randomly read the specific amount of files from each folder inside the main folder? like 60% for training and 40% testing?
UPDATE
I have tried what #vopsea sugeested in Answer.
The output looks like that for train variable.
How I make the final array of images each files foy Key 1 - 7 (array (256 x 256 x 11 x total number of images))and labels (total number of images x 1 )? Labels will be same as key values, for example for all the files associated with Key 1 (188 files) will have label 1 (188 x 1).
UPDATE
resolving issue of making label and accessing key without key name.
import os
import random
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train_images = []
test_images = []
train_label = list()
test_label = list()
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
train_i = []
test_i = []
for i,f in enumerate(files):
abs_path= os.path.join(DATASET_PATH,x,f)
mat = hdf5storage.loadmat(abs_path)
if(i < n):
train_i.append(mat.values())
train_label.append(x)
else:
test_i.append(mat.values())
test_label.append(x)
train_images.append(train_i)
test_images.append(test_i)
1) Could you explain a bit more what you want in question 1? What is being appended? I might be misunderstanding, but it's easy to read unknown key, value pairs
for key, value in mat.items():
print(key, value)
2) I did this without glob. Shuffle the class files and slice them into two lists according to training percent. Probably best to have the same number of files for each class (or close) so training doesn't favor one especially.
import os
import random
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train = {}
test = {}
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
train[x] = files[:n]
test[x] = files[n:]
EDIT 2:
Is this what you mean?
import os
import random
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train_images = []
test_images = []
train_label = []
test_label = []
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
for i,f in enumerate(files):
abs_path= os.path.join(DATASET_PATH,x,f)
mat = hdf5storage.loadmat(abs_path)
if(i < n):
train_images.append(mat.values())
train_label.append(x)
else:
test_images.append(mat.values())
test_label.append(x)
EDIT 3: Using dict for simplicity
Notice how simple it is to run through the images at the end. The alternative is storing two lists (data and labels) and one will have many duplicate items. You then have to through them both at the same time.
Although depending on what you're doing with this later, two lists could be the right choice.
import os
import random
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train_images = {}
test_images = {}
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
for i,f in enumerate(files):
abs_path= os.path.join(DATASET_PATH,x,f)
mat = hdf5storage.loadmat(abs_path)
if(i < n):
train_images[x] = mat.values()
else:
test_images[x] = mat.values()
for img_class,img_data in train_images.items():
print( img_class, img_data )
I am new to python and Machine Learning. I have a huge image dataset of cars having more than 27000 images and labels. I am trying to create a dataset so I can use it in my training classifier, but ofcourse handling this amount of data will be a real pain for the Memory, and that's where I am stuck. At first I was trying to do something like this.
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpg
import cv2
import gc
import numpy as np
from sklearn.preprocessing import normalize
import gc
import resource
import h5py
bbox = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/labels"
imagepath = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/image"
training_data = []
training_labels = []
count = 0
for root, _, files in os.walk(bbox):
cdp = os.path.abspath(root)
for rootImage , _ , fileImage in os.walk(imagepath):
cdpimg = os.path.abspath(r)
for f in files:
ct = 0
name,ext = os.path.splitext(f)
for fI in fileImage:
n , e = os.path.splitext(fI)
if name == n and ext == ".txt" and e == ".jpg":
cip = os.path.join(cdp,f)
cipimg = os.path.join(cdpimg,fI)
txt = open(cip,"r")
for q in txt:
ct = ct + 1
if ct == 3:
x1 = int(q.rsplit(' ')[0])
y1 = int(q.rsplit(' ')[1])
x2 = int(q.rsplit(' ')[2])
y2 = int(q.rsplit(' ')[3])
try:
read_img = mpg.imread(cipimg)
read_img = read_img.astype('float32')
read_img_bbox = read_img[y1:y2, x1:x2,:]
resize_img = cv2.cv2.resize(read_img_bbox,(300,300))
resize_img /= 255.0
training_labels.append(int(cipimg.split('\\')[4]))
training_data.append(resize_img)
print("len Of Training_data",len(training_data))
training_labels.append(int(cipimg.split('/')[8]))
del resize_img
print("len Of Training Labels", len(training_labels))
gc.collect()
except Exception as e:
print("Error",str(e), cip)
count = count + 1
print(count)
txt.flush()
txt.close()
np.save('/run/media/fdai5182/LAMA MADAN/Training_Data_4000Samples',training_data)
np.save('/run/media/fdai5182/LAMA MADAN/Training_Labels_4000Samples',training_labels)
print("DONE")
But it always gives me a huge Memory error after reading images even on 32gb RAM.
So, for that I want to do some other steps which may be useful taking less memory and get this working.
The Steps I want to do are as follows.
allocate np array X of shape N,150,150,3/300,300,3 of type
float32 (not astype)
iterate through images and fill each row of array X with 150,150,3 image pixels
normalize in-place: X /= 255
Write in file (.npy format)
What I did till now is
import cv2
import matplotlib.pyplot as plt
import matplotlib.iamge as mpg
import numpy as np
bbox = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/labels"
imagepath = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/image"
for root, _, files in os.walk(bbox):
cdp = os.path.abspath(root)
for rootImage, _, fileImage in os.walk(imagepath):
cdpimg = os.path.abspath(rootImage)
for f in files:
ct = 0
name,ext = os.path.splitext(f)
for fI in fileImage:
n , e = os.path.splitext(fI)
if name == n and ext == ".txt" and e == ".jpg":
nparrayX = np.zeros((150,150,3)).view('float32')
cip = os.path.join(cdp,f)
cipImg = os.path.join(cdpimg,fI)
read_image = mpg.imread(cip)
resize_image = cv2.cv2.resize(read_image,(150,150))
Am I on the right path?
Also, How can I fill each row of imageformat with 150,150,3 image pixels. I don't want to use list anymore as they take more Memory and time consuming.
Please help me through this.
Also, as a new member if the question is not obeying the rules and regulations of StackOverflow please tell me and I will edit it more.
Thank you,
Both tensorflow/keras and pytorch provide data set / generator classes, which you can use to construct memory efficient data loaders.
For tensorflow/keras there is an excellent tutorial created by Stanford's Shervine Amidi.
For pytorch you find a good tutorial on the project's man page.
I would strongly suggest to make use of these frameworks for your implementation since they allow you to avoid writing boiler-plate code and make your training scalable.
Thank you for your help . But I wanted to do it manually to check How can we do it without using other generators. Below is my Code.
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpg
import numpy as np
import os
N = 0
training_labels = []
bbox = "D:/Morethan4000samples/data/labels"
imagepath = "D:/Morethan4000samples/data/image/"
for root, _, files in os.walk(imagepath):
cdp = os.path.abspath(root)
for f in files:
name, ext = os.path.splitext(f)
if ext == ".jpg":
cip = os.path.join(cdp,f)
N += 1
print(N)
imageX = np.zeros((N,227,227,3), dtype='float32')
i = 0
for root, _ , files in os.walk(imagepath):
cdp = os.path.abspath(root)
print(cdp)
for f in files:
ct = 0
name, ext = os.path.splitext(f)
if ext == ".jpg":
cip = os.path.join(cdp,f)
read = mpg.imread(cip)
cipLabel = cip.replace('image','labels')
cipLabel = cipLabel.replace('.jpg','.txt')
nameL , extL = os.path.splitext(cipLabel)
if extL == '.txt':
boxes = open(cipLabel, 'r')
for q in boxes:
ct = ct + 1
if ct == 3:
x1 = int(q.rsplit(' ')[0])
y1 = int(q.rsplit(' ')[1])
x2 = int(q.rsplit(' ')[2])
y2 = int(q.rsplit(' ')[3])
readimage = read[y1:y2, x1:x2]
resize = cv2.cv2.resize(readimage,(227,227))
resize = cv2.cv2.GaussianBlur(resize, (5,5),0)
imageX[i] = resize
#training_labels.append(int(cip.split('\\')[4]))
training_labels.append(int(cip.split('/')[8]))
print(len(training_labels), len(imageX))
i += 1
print(i)
imageX /= 255.0
plt.imshow(imageX[10])
plt.show()
print(imageX.shape)
print(len(training_labels))
np.save("/run/media/fdai5182/LAMA MADAN/Morethan4000samples/227227/training_images", imageX)
np.save("/run/media/fdai5182/LAMA MADAN/Morethan4000samples/227227/trainin_labels",training_labels)
To save each of your image in a row of matrix of same dimensions is the most efficient way to do that.
I have two programs that the first program is about to convert .h5 file to .tiff file and second program is predict temperature using arima model.
here is first program for converting .h5 file to .tiff file and min and max .tiff and it also call geo tiff.
from osgeo import gdal
import numpy as np
import os
import h5py
from collections import defaultdict
from osgeo import osr
import datetime
in_dir = r'/Users/sunnybhargav/Desktop/jan'
out_dir = r'/Users/sunnybhargav/Desktop/new_output'
#in_dir = input('Enter input directory path where hdf files are stored: ')
#out_dir = input('Enter output directory path where geotiff files are to be stored: ')
def arrayToTif(array,tifFilePath,proj,transform,nodatavalue):
with open(tifFilePath,'a') as file:
pass
# write raster
out_ds = gdal.GetDriverByName('GTiff').Create(tifFilePath,
array.shape[1],
array.shape[0],
1, #Number of bands
gdal.GDT_Float32)
out_ds.GetRasterBand(1).WriteArray(array)
out_ds.GetRasterBand(1).SetNoDataValue(nodatavalue)
# close tif to write into disk (free tif file)
out_ds = None
dates_dict = defaultdict(list)
for root,directories,filenames in os.walk(in_dir):
for filename in filenames:
if (filename.endswith('.h5')):
hdffileDate = filename[6:15]
hdfdate = (int(hdffileDate[0:2]))
dates_dict[hdfdate].append(filename)
print(filename)
for key in dates_dict.keys():
file_list = dates_dict[key]
min_lst = 1000*np.ones((2816,2805))
max_lst = -1000*np.ones((2816,2805))
for v in file_list:
hdf_ds = h5py.File(os.path.join(in_dir,v))
lst = np.array(hdf_ds['LST'])[0,:,:]
hdf_ds = gdal.Open(os.path.join(in_dir,v))
metadata = hdf_ds.GetMetadata_Dict()
lst = lst.astype('Float32')
max_lst = np.maximum(max_lst,lst)
lst[lst==-999] = 999
min_lst = np.minimum(min_lst,lst)
min_lst[min_lst==999] = -999
transform = (0,1,0,0,0,-1)
proj = None
nodatavalue = -999
tiffileDate = v[6:15]
MinName = 'MIN' +v[0:2]+str.lower(tiffileDate) + '.tif'
MaxName = 'MAX' +v[0:2]+str.lower(tiffileDate) + '.tif'
arrayToTif
(max_lst,os.path.join(out_dir,MaxName),proj,transform,nodatavalue)
arrayToTif
(min_lst,os.path.join(out_dir,MinName),proj,transform,nodatavalue)
del lst
del min_lst
del max_lst
second program
in second program is for tiff to get ndarray and then get out put for particular all max temprature and predict next 5 day prediction.
import pandas as pd
import seaborn as sns
import matplotlib
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from numpy import genfromtxt
import csv
import datetime
from datetime import datetime
import time
from matplotlib import pyplot
from pandas import Series
from statsmodels.tsa.arima_model import ARIMA
import numpy
from statsmodels.tsa.stattools import adfuller
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import acf, pacf
from sklearn.metrics import mean_squared_error
import numpy as np
import subprocess
import gdal,osr
from gdalconst import *
import os
import numpy as np
from PIL import Image
import scipy.misc
from datetime import datetime
# import timeseries as ts
count = 1
max_temp = []
min_temp = []
filename = []
filenamer = []
max_temp_points = []
min_temp_points = []
source = r'/Volumes/bhargav 1/data/NEW_MAX'
for root, dirs, filenames in os.walk(source):
print(filenames)
for f in filenames:
print (f)
dataset = gdal.Open( source + '//' + f ,gdal.GA_ReadOnly)
#print(dataset)
geotransform = dataset.GetGeoTransform()
band = dataset.GetRasterBand(1)
data = band.ReadAsArray(0,0,dataset.RasterXSize,dataset.RasterYSize).astype(np.float64)
#print(np.histogram(data,bins=500))
print(np.shape(data))
max_temp_point = data[793][1160]
max_temp_point = max_temp_point - 273
print(max_temp_point)
print("Count:",count)
max_temp_points.append(max_temp_point)
count = count + 1
print(np.shape(max_temp_points))
print(np.mean(max_temp_points))
count = 1
np.save("Max_temp_points_1",max_temp_points)
X = max_temp_points
model = ARIMA(X, order=(5,0,4))
model_fit = model.fit(disp=-1)
# print summary of fit model
print(model_fit.summary())
forecast = model_fit.predict()
print (forecast)
# plot
start_index = len(X)
end_index = start_index + 6
predict_val = model_fit.predict(start=start_index, end=end_index)
print('Prediction:',predict_val)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()
I need to apply audio to video at certain time with certain duration, but some audio duration is bigger(or smaller) then needed. How to change speed of audio without changing pitch? I tried to change fps(by multiplying to division of needed duration to audio duration) but it is not work as I want.
original = VideoFileClip("orig.mp4")
clips = [orig.audio.volumex(0.3)]
subs = [] #some array
i = 0
for sub in subs:
clip = AudioFileClip("\\temp{}.mp3")
mult = clip.duration / (sub.end - sub.start) + 0.00001
clip = AudioArrayClip(clip.to_soundarray(buffersize=500, fps=24000/mult), fps=24000).set_start(sub.start).set_end(sub.end)
clips.append(clip)
i += 1
final = CompositeAudioClip(clips)
final.write_audiofile("final.mp3")
you can use librosa module:
from scipy.io import wavfile
import librosa, numpy as np
song, fs = librosa.load("song.wav")
song_2_times_faster = librosa.effects.time_stretch(song, 2)
scipy.io.wavfile.write("song_2_times_faster.wav", fs, song_2_times_faster) # save the song
Using wave: Change the sampling rate
import wave
CHANNELS = 1
swidth = 2
Change_RATE = 2
spf = wave.open('VOZ.wav', 'rb')
RATE=spf.getframerate()
signal = spf.readframes(-1)
wf = wave.open('changed.wav', 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(swidth)
wf.setframerate(RATE*Change_RATE)
wf.writeframes(signal)
wf.close()