Segmentation fault (core dumped) in scikit-learn Dictionary Learning - python

I have written a code for Dictionary Learning. It is working fine for 100 Images but if I use 200 Images to learn Dictionary I am getting Segment fault error .
Here is my code =>
from time import time
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.feature_extraction.image import reconstruct_from_patches_2d
from sklearn.utils.fixes import sp_version
from sklearn.datasets import load_sample_image
from scipy import ndimage
from skimage import color
from skimage import io
from PIL import Image
from sklearn.decomposition import SparseCoder
from sklearn.decomposition import sparse_encode
#from skimage import data,restoration
from scipy.misc import imfilter, imread
from scipy.signal import convolve2d as conv2
from skimage import data, img_as_float
from scipy import ndimage as ndi
from skimage import feature
from scipy.misc import imsave
from os import listdir
patch_size = (8,8)
reshape_size = (512,512)
c = np.asarray(Image.open('test/ILSVRC2012_test_00016644.JPEG').resize((512,512), Image.ANTIALIAS))
data = extract_patches_2d(c,(8,8))
total_img = 0
for f in listdir('test/'):
if f.endswith('.JPEG') and f.startswith('I'):
if (total_img <= 200):
total_img = total_img + 1
print(f)
print('data shape : ',data.shape)
print('total file : ',total_img)
A = np.asarray(Image.open('test/'+f).resize(reshape_size, Image.ANTIALIAS))
X = extract_patches_2d(A,patch_size)
data = np.append(data,X,axis=0)
print('patch shape',X.shape)
else:
break
print('Total number of Image : ',total_img)
print('Total Patches : ',data.shape)
print('total size of the array : ',data.nbytes)
n0 = np.asarray(Image.open('data2/004.jpg').resize((512,512), Image.ANTIALIAS))
n0 = n0 / 255
height, width, channel = n0.shape
n0 = n0 + 0.075 * np.random.randn(height, width,3)
n0 = n0 * 255
imsave('gray.png',n0)
patchsize = (8,8)
t0 = time()
data = data.reshape(data.shape[0], -1)
print('Extract patch shape :',data.shape)
data = data - np.mean(data, axis=0)
data = data / np.std(data, axis=0)
t1 = time()
print('Total time : ',round((t1-t0),2),' sec')
print('Learning the dictionary ....')
t2 = time()
n_iter = 5000
dico = MiniBatchDictionaryLearning(n_components=100,alpha=3,n_iter=n_iter)
V = dico.fit(data).components_
print('Dic shape : ',V.shape)
t3 = time()
print('No of iteration : ',n_iter)
print('Total time taken for Dictionary learning : ',round((t3-t2),2),' sec')
I have sufficient RAM around 500GB. But I am getting segment fault error.
How do I Fix this??

Related

Trying to Plot FFT for an Image Array

I'm trying to create a signal plot for an array of pictures using the following code:
import numpy as np
import sys
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
imgArr = {}
stnArr = {}
frmArr = {}
sgnArr = {}
for i in range(1,2397):
imgArr[i] = mpimg.imread("20210209_themis_rank"+ str(i)+ ".png")
stnArr[i] = np.mean([imgArr[i]]/std(imgArr[i]))
frmArr[i] = i
signal = np.fft.fft(imgArr[i])
for i in range(1,2397):
plt.plot(frmArr,np.abs(signal))
plt.show()
However, I keep on running into the following error. How can I get it to work?
raise ValueError(f"x and y must have same first dimension, but "
ValueError: x and y must have same first dimension, but have shapes (1,) and (600, 600, 4)

To fix an error : could not broadcast input array from shape (5,5) into shape (5,5,4)

I want to preprocess such an image dataset using an unsupervised wiener algorithm. But it doesn't work properly. when I run the code, it shows me a value attribute error. For convenience, my code is given below -
import cv2
import glob
from matplotlib import pyplot as plt
from skimage import io, restoration, img_as_float
import scipy.stats as st
import numpy as np
dataset = glob.glob('input/train/*.png')
directory = 'output/train/'
for img_id, img_path in enumerate(dataset):
img = img_as_float(io.imread(img_path))
def gkern(kernlen=21, nsig=2): #Returns a 2D Gaussian kernel.
lim = kernlen//2 + (kernlen % 2)/2
x = np.linspace(-lim, lim, kernlen+1)
kern1d = np.diff(st.norm.cdf(x))
kern2d = np.outer(kern1d, kern1d)
return kern2d/kern2d.sum()
psf = gkern(5,3) #Kernel length and sigma
deconvolved, _ = restoration.unsupervised_wiener(img, psf)
cl2 = cv2.resize(deconvolved, (512,512), interpolation = cv2.INTER_CUBIC)
plt.imsave(f"output/unsupervised_{img_id}.png", cl2, cmap='gray')
I am getting the error :
File "C:\Users\Junaed\.spyder-py3\unsupervised_wiener.py", line 33, in <module>
deconvolved, _ = restoration.unsupervised_wiener(img, psf)
ValueError: could not broadcast input array from shape (5,5) into shape (5,5,4)
How could I fix this issue, Can someone help me here?

normalizing mel spectrogram to unit peak amplitude?

I am new to both python and librosa. I am trying to follow this method for a speech recognizer: acoustic front end
My code:
import librosa
import librosa.display
import numpy as np
y, sr = librosa.load('test.wav', sr = None)
normalizedy = librosa.util.normalize(y)
stft = librosa.core.stft(normalizedy, n_fft = 256, hop_length=16)
mel = librosa.feature.melspectrogram(S=stft, n_mels=32)
melnormalized = librosa.util.normalize(mel)
mellog = np.log(melnormalized) - np.log(10**-5)
The problem is that when I apply librosa.util.normalize to variable mel, I expect values to be between 1 and -1, which they aren't. What am I missing?
If you want your output to be log-scaled and normalized to between -1 and +1, you should log-scale first, then normalize:
import librosa
import librosa.display
import numpy as np
y, sr = librosa.load('test.wav', sr = None)
normalizedy = librosa.util.normalize(y)
stft = librosa.core.stft(normalizedy, n_fft = 256, hop_length=16)
mel = librosa.feature.melspectrogram(S=stft, n_mels=32)
mellog = np.log(mel + 1e-9)
melnormalized = librosa.util.normalize(mellog)
# use melnormalized

python program to convert mapper and reducer program for hadoop

I have two programs that the first program is about to convert .h5 file to .tiff file and second program is predict temperature using arima model.
here is first program for converting .h5 file to .tiff file and min and max .tiff and it also call geo tiff.
from osgeo import gdal
import numpy as np
import os
import h5py
from collections import defaultdict
from osgeo import osr
import datetime
in_dir = r'/Users/sunnybhargav/Desktop/jan'
out_dir = r'/Users/sunnybhargav/Desktop/new_output'
#in_dir = input('Enter input directory path where hdf files are stored: ')
#out_dir = input('Enter output directory path where geotiff files are to be stored: ')
def arrayToTif(array,tifFilePath,proj,transform,nodatavalue):
with open(tifFilePath,'a') as file:
pass
# write raster
out_ds = gdal.GetDriverByName('GTiff').Create(tifFilePath,
array.shape[1],
array.shape[0],
1, #Number of bands
gdal.GDT_Float32)
out_ds.GetRasterBand(1).WriteArray(array)
out_ds.GetRasterBand(1).SetNoDataValue(nodatavalue)
# close tif to write into disk (free tif file)
out_ds = None
dates_dict = defaultdict(list)
for root,directories,filenames in os.walk(in_dir):
for filename in filenames:
if (filename.endswith('.h5')):
hdffileDate = filename[6:15]
hdfdate = (int(hdffileDate[0:2]))
dates_dict[hdfdate].append(filename)
print(filename)
for key in dates_dict.keys():
file_list = dates_dict[key]
min_lst = 1000*np.ones((2816,2805))
max_lst = -1000*np.ones((2816,2805))
for v in file_list:
hdf_ds = h5py.File(os.path.join(in_dir,v))
lst = np.array(hdf_ds['LST'])[0,:,:]
hdf_ds = gdal.Open(os.path.join(in_dir,v))
metadata = hdf_ds.GetMetadata_Dict()
lst = lst.astype('Float32')
max_lst = np.maximum(max_lst,lst)
lst[lst==-999] = 999
min_lst = np.minimum(min_lst,lst)
min_lst[min_lst==999] = -999
transform = (0,1,0,0,0,-1)
proj = None
nodatavalue = -999
tiffileDate = v[6:15]
MinName = 'MIN' +v[0:2]+str.lower(tiffileDate) + '.tif'
MaxName = 'MAX' +v[0:2]+str.lower(tiffileDate) + '.tif'
arrayToTif
(max_lst,os.path.join(out_dir,MaxName),proj,transform,nodatavalue)
arrayToTif
(min_lst,os.path.join(out_dir,MinName),proj,transform,nodatavalue)
del lst
del min_lst
del max_lst
second program
in second program is for tiff to get ndarray and then get out put for particular all max temprature and predict next 5 day prediction.
import pandas as pd
import seaborn as sns
import matplotlib
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from numpy import genfromtxt
import csv
import datetime
from datetime import datetime
import time
from matplotlib import pyplot
from pandas import Series
from statsmodels.tsa.arima_model import ARIMA
import numpy
from statsmodels.tsa.stattools import adfuller
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import acf, pacf
from sklearn.metrics import mean_squared_error
import numpy as np
import subprocess
import gdal,osr
from gdalconst import *
import os
import numpy as np
from PIL import Image
import scipy.misc
from datetime import datetime
# import timeseries as ts
count = 1
max_temp = []
min_temp = []
filename = []
filenamer = []
max_temp_points = []
min_temp_points = []
source = r'/Volumes/bhargav 1/data/NEW_MAX'
for root, dirs, filenames in os.walk(source):
print(filenames)
for f in filenames:
print (f)
dataset = gdal.Open( source + '//' + f ,gdal.GA_ReadOnly)
#print(dataset)
geotransform = dataset.GetGeoTransform()
band = dataset.GetRasterBand(1)
data = band.ReadAsArray(0,0,dataset.RasterXSize,dataset.RasterYSize).astype(np.float64)
#print(np.histogram(data,bins=500))
print(np.shape(data))
max_temp_point = data[793][1160]
max_temp_point = max_temp_point - 273
print(max_temp_point)
print("Count:",count)
max_temp_points.append(max_temp_point)
count = count + 1
print(np.shape(max_temp_points))
print(np.mean(max_temp_points))
count = 1
np.save("Max_temp_points_1",max_temp_points)
X = max_temp_points
model = ARIMA(X, order=(5,0,4))
model_fit = model.fit(disp=-1)
# print summary of fit model
print(model_fit.summary())
forecast = model_fit.predict()
print (forecast)
# plot
start_index = len(X)
end_index = start_index + 6
predict_val = model_fit.predict(start=start_index, end=end_index)
print('Prediction:',predict_val)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()

Is that the expected result on ndimage.gaussian_filter?

I'm trying to calculate the discrete derivative using gaussian_filter from scipy.ndimage and so the output is presenting some strange behavior with boundary conditions. The code is below:
from scipy import ndimage
import numpy as np
import matplotlib.pyplot as plt
y = np.linspace(0.2*np.pi,0.7*np.pi,100)
U = np.sin(y)
sg = 1
Uy = ndimage.gaussian_filter1d(U, sigma=sg,order=1,mode='constant',cval=0)
Uy2 = ndimage.gaussian_filter1d(U, sigma=sg,order=1,mode='nearest')
Uy3 = ndimage.gaussian_filter1d(U, sigma=sg,order=1,mode='reflect')
Uy4 = ndimage.gaussian_filter1d(U, sigma=sg,order=1,mode='mirror')
Uy5 = ndimage.gaussian_filter1d(U, sigma=sg,order=1,mode='wrap')
fig,(a1,a2) = plt.subplots(1,2)
a1.plot(U , y,label='data')
a2.plot(Uy, y,label='constant')
a2.plot(Uy2,y,label='nearest')
a2.plot(Uy3,y,label='reflect')
a2.plot(Uy4,y,label='mirror')
a2.plot(Uy5,y,label='wrap')
a1.legend(loc='best')
a2.legend(loc='best')
What happened? Constant mode should result cval on boudary? Is that the expected result?

Categories