Below is the snippet that is giving me 'Memory Error' when the counter reaches to about 53009525. I am running this on Ubuntu Virtual Machine with 12GB of memory.
from collections import defaultdict
from collections import OrderedDict
import math
import numpy as np
import operator
import time
from itertools import product
class State():
def __init__(self, field1, field2, field3, field4, field5, field6):
self.lat = field1
self.lon = field2
self.alt = field3
self.temp = field4
self.ws = field5
self.wd = field6
...
trans = defaultdict(dict)
freq = {}
...
matrix_col = {}
matrix_col = OrderedDict(sorted(freq.items(), key=lambda t: t[0].lat, reverse=True))
trans_mat = []
counter = 0
for u1, u2 in product(matrix_col, matrix_col):
print counter, time.ctime()
if (u1 in trans) and (u2 in trans[u1]):
trans_mat.append([trans[u1][u2]*1.0])
else:
trans_mat.append([[0.0001]])
counter += 1
trans_mat = np.asarray(trans_mat)
trans_mat = np.reshape(trans_mat, (10734, 10734))
print trans_mat
Both freq and trans store a type "State". Ant help is appreciated. Here is the error:
...
53009525 Mon Oct 12 18:11:16 2015
Traceback (most recent call last):
File "hmm_freq.py", line 295, in <module>
trans_mat.append([[0.0001]])
MemoryError
It looks as though on each iteration you are appending a Python float inside two nested lists to trans_mat. You can check the size of each element using sys.getsizeof:
import sys
# each of the two nested lists is 80 bytes
print(sys.getsizeof([]))
# 80
# a Python float object is 24 bytes
print(sys.getsizeof(0.0001))
# 24
On each iteration you are appending 2 * 80 + 24 = 184 bytes to trans_mat. After 53009525 iterations you will have appended 9753752600 bytes or 9.75GB.
A very simple way to make this much more memory-efficient would be to store the results directly to a numpy array rather than in nested lists:
trans_mat = np.empty(10734 * 10734, np.double)
counter = 0
for u1, u2 in product(matrix_col, matrix_col):
print counter, time.ctime()
if (u1 in trans) and (u2 in trans[u1]):
# you may need to check that this line yields a float
# (it's hard for me to tell exactly what `trans` is from your code snippet)
trans_mat[counter] = trans[u1][u2]*1.0
else:
trans_mat[counter] = 0.0001
counter += 1
For reference:
# the size of the Python container is 80 bytes
print(sys.getsizeof(trans_mat))
# 80
# the size of the array buffer is 921750048 bytes or 921 MB
print(trans_mat.nbytes)
# 921750048
Related
I have a FASTA file that looks like this
>Spike|hCoV-19/Wuhan/WIV04/2019|2019-12-30|EPI_ISL_402124|Original|hCoV-19^^Hubei|Human|Wuhan Jinyintan Hospital|Wuhan Institute of Virology|Shi|China
MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*
>Spike|hCoV-19/Philippines/PH-PGC-03696/2020|2020-12-23|EPI_ISL_2155626|Original|hCoV-19^^Central Luzon|Human|Research Institute for Tropical Medicine|Philippine Genome Center|Tablizo|Philippines
MFVFLVLLPLVFSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYYPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*
>Spike|hCoV-19/Belgium/UZA-UA-8350/2021|2021-01-22|EPI_ISL_940774|Original|hCoV-19^^Berchem|Human|Platform BIS UZA/UAntwerpen|UAntwerp|Xavier|Belgium
MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNTVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAQHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCT*
I need to sort these sequences based on the date column, I found this code in stack overflow but it doesn't do the job for
from Bio.SeqIO.FastaIO import SimpleFastaParser
import pandas as pd
with open('F:/newone.fasta') as fasta_file:
identifiers = []
lengths = []
seq = []
for title, sequence in SimpleFastaParser(fasta_file):
identifiers.append(title.split(None, 3)[0])
lengths.append(len(sequence))
seq.append(sequence)
#converting lists to pandas Series
s1 = pd.Series(identifiers, name='ID')
s2 = pd.Series(lengths, name='length')
s3 = pd.Series(seq, name='seq')
Qfasta = pd.DataFrame(dict(ID=s1, length=s2)).set_index(['ID'])
this is the error that happens with the second code and I don't know why this happens
IndexError Traceback (most recent call last)
in <module>
12 SeqIO.write(records, output_file, "fasta")
13
---> 14 sort_fasta(input_file, output_file)
in sort_fasta(input_file, output_file)
8 def get_data(id_name):
9 return (id_name.split("|")[2], seguid(id_name))
---> 10 dict_fasta = SeqIO.index(input_file, "fasta", key_function=get_data)
11 records = (dict_fasta[i] for i in sorted(list(dict_fasta), reverse=True, key = lambda d: list(map(int, d[0].split('-')))))
12 SeqIO.write(records, output_file, "fasta")
~\anaconda3\envs\deeplearning\lib\site-packages\Bio\SeqIO\__init__.py in index(filename, format, alphabet, key_function)
873 key_function,
874 )
--> 875 return _IndexedSeqFileDict(
876 proxy_class(filename, format), key_function, repr, "SeqRecord"
877 )
~\anaconda3\envs\deeplearning\lib\site-packages\Bio\File.py in __init__(self, random_access_proxy, key_function, repr, obj_repr)
185 offset_iter = random_access_proxy
186 offsets = {}
--> 187 for key, offset, length in offset_iter:
188 # Note - we don't store the length because I want to minimise the
189 # memory requirements. With the SQLite backend the length is kept
~\anaconda3\envs\deeplearning\lib\site-packages\Bio\File.py in <genexpr>(.0)
181 self._obj_repr = obj_repr
182 if key_function:
--> 183 offset_iter = ((key_function(k), o, l) for (k, o, l) in random_access_proxy)
184 else:
185 offset_iter = random_access_proxy
in get_data(id_name)
7 def sort_fasta(input_file, output_file):
8 def get_data(id_name):
----> 9 return (id_name.split("|")[2], seguid(id_name))
10 dict_fasta = SeqIO.index(input_file, "fasta", key_function=get_data)
11 records = (dict_fasta[i] for i in sorted(list(dict_fasta), reverse=True, key = lambda d: list(map(int, d[0].split('-')))))
what should I do about this?
With the following code the fasta entries in the input file are sorted and saved in the output file using the SeqIO index function. So, the function should also work with file having a big size that cannot be fitted in memory.
import re
from Bio import SeqIO
from Bio.SeqUtils.CheckSum import seguid
input_file = "fasta.fasta"
output_file = "out.fasta"
def sort_fasta(input_file: str, output_file: str) -> None:
def get_index_key(id_name: str) -> tuple:
try:
key = (re.search(r'\d{4}-\d{2}-\d{2}', id_name).group(), seguid(id_name))
except AttributeError:
key = ('0001-01-01', seguid(id_name))
return key
dict_fasta = SeqIO.index(input_file, "fasta", key_function=get_index_key)
sorted_keys_by_date = sorted(list(dict_fasta), reverse=True, key = lambda d: list(map(int, d[0].split('-'))))
records = (dict_fasta[i] for i in sorted_keys_by_date if i[0] != '0001-01-01')
SeqIO.write(records, output_file, "fasta")
sort_fasta(input_file, output_file)
You can split the string on \n>, and sort on the extracted date using a combination of sorted and re.search to set the date as key.
Use reverse=True as option for sorted to get the most recent date first.
I am assuming the string fasta as input here.
import re
sorted_fasta = ('>'+'\n>'.join(sorted(fasta[1:].strip().split('\n>'),
key=lambda s: re.search(r'\|\d{4}-\d{2}-\d{2}\|',
s).group()
)
)
)
example input:
>xxx|2020-12-30|xxx
NNN
>yyy|2020-12-23|yyy
NNN
>zzz|2021-01-22|zzz
NNN
matching output:
>yyy|2020-12-23|yyy
NNN
>xxx|2020-12-30|xxx
NNN
>zzz|2021-01-22|zzz
NNN
I am trying to find the global minima of a function using scipy.optimizer methods and keep running into NoneType issues. I have tried multiple algorithms including differential_evolution, shgo, and brute but keep running into errors.
Here is the setup:
def sizing_trade_study(ranges, payload):
with open("config.yml", "r") as yml:
cfg = yaml.load(yml)
first_int = True
km = []
for range in ranges:
km.append( range * 1000)
print(km)
params = (km, payload)
if first_int:
x0 = [float(cfg['design_variables']['initial_guess']['prop_radius']),
float(cfg['design_variables']['initial_guess']['speed']),
float(cfg['design_variables']['initial_guess']['battery_mass']),
float(cfg['design_variables']['initial_guess']['motor_mass']),
float(cfg['design_variables']['initial_guess']['mtow'])]
lb = [float(cfg['design_variables']['lower_bound']['prop_radius']),
float(cfg['design_variables']['lower_bound']['speed']),
float(cfg['design_variables']['lower_bound']['battery_mass']),
float(cfg['design_variables']['lower_bound']['motor_mass']),
float(cfg['design_variables']['lower_bound']['mtow'])] # Min cruise at 1.3 * VStall
ub = [float(cfg['design_variables']['upper_bound']['prop_radius']),
float(cfg['design_variables']['upper_bound']['speed']),
float(cfg['design_variables']['upper_bound']['battery_mass']),
float(cfg['design_variables']['upper_bound']['motor_mass']),
float(cfg['design_variables']['upper_bound']['mtow'])]
# bounds = (slice(lb[0], ub[0]), slice(lb[1], ub[1]), slice(lb[2], ub[2]), slice(lb[3], ub[3]), slice(lb[4], ub[4]))
# bounds = [(lb[0], ub[0]), (lb[1], ub[1]), (lb[2], ub[2]), (lb[3], ub[3]), (lb[4], ub[4])]
bounds = optimize.Bounds(lb,ub)
result = optimize.differential_evolution(objective_function, bounds, args=(params,))
print(result)
def objective_function(x, *params):
global trials
trials = trials+1
print(trials)
performance.compute_performance(x, params[0][0], params[0][1])
Here is the function I am trying to optimize:
import yaml
import simple_mission
import reserve_mission
import config_weight
def compute_performance(x, range, payload):
rprop = x[0]
speed = x[1]
battery = x[2]
motors = x[3]
mtow = x[4]
w = mtow * 9.8
with open("config.yml", "r") as yml:
cfg = yaml.load(yml)
bat_energy_density = int(cfg['performance']['bat_energy_density'])
motor_power_density = int(cfg['performance']['motor_power_density'])
discharge_depth = float(cfg['performance']['discharge_depth'])
e_nominal, flight_time, hover_output, cruise_output = simple_mission.run_simple_mission(rprop, speed, w, range)
reserve_e = reserve_mission.reserve_mission(rprop,speed, w, range)
mass = config_weight.config_weight(battery,motors, rprop, w, mtow, hover_output, cruise_output, payload)
batt = reserve_e - battery * bat_energy_density * discharge_depth / 1000
motor = hover_output.pow_hover / 1000 - motors * motor_power_density
weight = mass - w
return batt+ motor+ weight
The failure doesn't happen immediately but after a couple of runs of the optimizer function. For example, with differential_evolution, it always happens after the 75th trial.
Here is the stacktrace:
69
70
71
72
73
74
75
76
Traceback (most recent call last):
File "sizing_trade_study.py", line 62, in <module>
sizing_trade_study(args.ranges, args.payload)
File "sizing_trade_study.py", line 42, in sizing_trade_study
result = optimize.differential_evolution(objective_function, bounds, args=(params,))
File "/usr/local/anaconda3/envs/simple_mission/lib/python3.7/site-packages/scipy/optimize/_differentialevolution.py", line 308, in differential_evolution
ret = solver.solve()
File "/usr/local/anaconda3/envs/simple_mission/lib/python3.7/site-packages/scipy/optimize/_differentialevolution.py", line 759, in solve
next(self)
File "/usr/local/anaconda3/envs/simple_mission/lib/python3.7/site-packages/scipy/optimize/_differentialevolution.py", line 1082, in __next__
self.constraint_violation[candidate]):
File "/usr/local/anaconda3/envs/simple_mission/lib/python3.7/site-packages/scipy/optimize/_differentialevolution.py", line 1008, in _accept_trial
return energy_trial <= energy_orig
TypeError: '>=' not supported between instances of 'float' and 'NoneType'
Any help is greatly appreciated!
The issue is with one of the retrieved values from your bounds or objective_function, which in turn is being passed in as a NoneType to energy_orig within differential_evolution()
Source: https://github.com/scipy/scipy/blob/master/scipy/optimize/_differentialevolution.py
if feasible_orig and feasible_trial:
return energy_trial <= energy_orig
You should make sure that each key value is not empty from your config.yml or other function parameters. It's hard to tell which could be the problem. However, you could wrap it around a try/catch to get this to not stop on the 75th try for the meantime.
try:
result = optimize.differential_evolution(
objective_function,
bounds,
args=(params,),
)
except TypeError:
import pdb; pdb.set_trace()
I've set pdb, which will allow to to debug the values of each parameter, feel free to swap it out with a pass if you need to continue swiftly
I'm using pyalgotrade to create a trading strategy. I'm going through a list of tickers(testlist) and adding them to a dictionary(list_large{}) alongside their score which I'm getting using a get_score function. My latest problem is that each ticker in the dictionary(list_large{}) is getting the same score. Any idea why?
Code:
from pyalgotrade import strategy
from pyalgotrade.tools import yahoofinance
import numpy as np
import pandas as pd
from collections import OrderedDict
from pyalgotrade.technical import ma
from talib import MA_Type
import talib
smaPeriod = 10
testlist = ['aapl','ddd','gg','z']
class MyStrategy(strategy.BacktestingStrategy):
def __init__(self, feed, instrument):
super(MyStrategy, self).__init__(feed, 1000)
self.__position = []
self.__instrument = instrument
self.setUseAdjustedValues(True)
self.__prices = feed[instrument].getPriceDataSeries()
self.__sma = ma.SMA(feed[instrument].getPriceDataSeries(), smaPeriod)
def get_score(self,slope):
MA_Score = self.__sma[-1] * slope
return MA_Score
def onBars(self, bars):
global bar
bar = bars[self.__instrument]
slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
organized_list = OrderedDict(sorted(list_large.items(), key=lambda t: -t[1]))#organize the list from highest to lowest score
print list_large
def run_strategy(inst):
# Load the yahoo feed from the CSV file
feed = yahoofinance.build_feed([inst],2015,2016, ".") # feed = yahoofinance.build_feed([inst],2015,2016, ".")
# Evaluate the strategy with the feed.
myStrategy = MyStrategy(feed, inst)
myStrategy.run()
print "Final portfolio value: $%.2f" % myStrategy.getBroker().getEquity()
def main():
instruments = ['ddd','msft']
for inst in instruments:
run_strategy(inst)
if __name__ == '__main__':
main()
Check this code of the onBars() function:
slope = 8 # <---- value of slope = 8
for instrument in bars.getInstruments():
list_large = {}
for tickers in testlist: #replace with real list when ready
list_large.update({tickers : self.get_score(slope)})
# Updating dict of each ticker based on ^
Each time self.get_score(slope) is called, it returns the same value and hence, all the value of tickers hold the same value in dict
I do not know how you want to deal with slope and how you want to update it's value. But this logic can be simplified without using .update as:
list_large = {}
for tickers in testlist:
list_large[tickers] = self.get_score(slope)
# ^ Update value of `tickers` key
I am using the parallel programming module for python I have a function that returns me an array but when I print the variable that contain the value of the function parallelized returns me "pp._Task object at 0x04696510" and not the value of the matrix.
Here is the code:
from __future__ import print_function
import scipy, pylab
from scipy.io.wavfile import read
import sys
import peakpicker as pea
import pp
import fingerprint as fhash
import matplotlib
import numpy as np
import tdft
import subprocess
import time
if __name__ == '__main__':
start=time.time()
#Peak picking dimensions
f_dim1 = 30
t_dim1 = 80
f_dim2 = 10
t_dim2 = 20
percentile = 80
base = 100 # lowest frequency bin used (peaks below are too common/not as useful for identification)
high_peak_threshold = 75
low_peak_threshold = 60
#TDFT parameters
windowsize = 0.008 #set the window size (0.008s = 64 samples)
windowshift = 0.004 #set the window shift (0.004s = 32 samples)
fftsize = 1024 #set the fft size (if srate = 8000, 1024 --> 513 freq. bins separated by 7.797 Hz from 0 to 4000Hz)
#Hash parameters
delay_time = 250 # 250*0.004 = 1 second#200
delta_time = 250*3 # 750*0.004 = 3 seconds#300
delta_freq = 128 # 128*7.797Hz = approx 1000Hz#80
#Time pair parameters
TPdelta_freq = 4
TPdelta_time = 2
#Cargando datos almacenados
database=np.loadtxt('database.dat')
songnames=np.loadtxt('songnames.dat', dtype=str, delimiter='\t')
separator = '.'
print('Please enter an audio sample file to identify: ')
userinput = raw_input('---> ')
subprocess.call(['ffmpeg','-y','-i',userinput, '-ac', '1','-ar', '8k', 'filesample.wav'])
sample = read('filesample.wav')
userinput = userinput.split(separator,1)[0]
print('Analyzing the audio sample: '+str(userinput))
srate = sample[0] #sample rate in samples/second
audio = sample[1] #audio data
spectrogram = tdft.tdft(audio, srate, windowsize, windowshift, fftsize)
mytime = spectrogram.shape[0]
freq = spectrogram.shape[1]
print('The size of the spectrogram is time: '+str(mytime)+' and freq: '+str(freq))
threshold = pea.find_thres(spectrogram, percentile, base)
peaks = pea.peak_pick(spectrogram,f_dim1,t_dim1,f_dim2,t_dim2,threshold,base)
print('The initial number of peaks is:'+str(len(peaks)))
peaks = pea.reduce_peaks(peaks, fftsize, high_peak_threshold, low_peak_threshold)
print('The reduced number of peaks is:'+str(len(peaks)))
#Store information for the spectrogram graph
samplePeaks = peaks
sampleSpectro = spectrogram
hashSample = fhash.hashSamplePeaks(peaks,delay_time,delta_time,delta_freq)
print('The dimensions of the hash matrix of the sample: '+str(hashSample.shape))
# tuple of all parallel python servers to connect with
ppservers = ()
#ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
# Creates jobserver with ncpus workers
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
# Creates jobserver with automatically detected number of workers
job_server = pp.Server(ppservers=ppservers)
print ("Starting pp with", job_server.get_ncpus(), "workers")
print('Attempting to identify the sample audio clip.')
Here I call the function in fingerprint, the commented line worked, but when I try parallelize don't work:
timepairs = job_server.submit(fhash.findTimePairs, (database, hashSample, TPdelta_freq, TPdelta_time, ))
# timepairs = fhash.findTimePairs(database, hashSample, TPdelta_freq, TPdelta_time)
print (timepairs)
#Compute number of matches by song id to determine a match
numSongs = len(songnames)
songbins= np.zeros(numSongs)
numOffsets = len(timepairs)
offsets = np.zeros(numOffsets)
index = 0
for i in timepairs:
offsets[index]=i[0]-i[1]
index = index+1
songbins[i[2]] += 1
# Identify the song
#orderarray=np.column_stack((songbins,songnames))
#orderarray=orderarray[np.lexsort((songnames,songbins))]
q3=np.percentile(songbins, 75)
q1=np.percentile(songbins, 25)
j=0
for i in songbins:
if i>(q3+(3*(q3-q1))):
print("Result-> "+str(i)+":"+songnames[j])
j+=1
end=time.time()
print('Tiempo: '+str(end-start)+' s')
print("Time elapsed: ", +time.time() - start, "s")
fig3 = pylab.figure(1003)
ax = fig3.add_subplot(111)
ind = np.arange(numSongs)
width = 0.35
rects1 = ax.bar(ind,songbins,width,color='blue',align='center')
ax.set_ylabel('Number of Matches')
ax.set_xticks(ind)
xtickNames = ax.set_xticklabels(songnames)
matplotlib.pyplot.setp(xtickNames)
pylab.title('Song Identification')
fig3.show()
pylab.show()
print('The sample song is: '+str(songnames[np.argmax(songbins)]))
The function in fingerprint that I try to parallelize is:
def findTimePairs(hash_database,sample_hash,deltaTime,deltaFreq):
"Find the matching pairs between sample audio file and the songs in the database"
timePairs = []
for i in sample_hash:
for j in hash_database:
if(i[0] > (j[0]-deltaFreq) and i[0] < (j[0] + deltaFreq)):
if(i[1] > (j[1]-deltaFreq) and i[1] < (j[1] + deltaFreq)):
if(i[2] > (j[2]-deltaTime) and i[2] < (j[2] + deltaTime)):
timePairs.append((j[3],i[3],j[4]))
else:
continue
else:
continue
else:
continue
return timePairs
The complete error is:
Traceback (most recent call last):
File "analisisPrueba.py", line 93, in <module>
numOffsets = len(timepairs)
TypeError: object of type '_Task' has no len()
The submit() method submits a task to the server. What you get back is a reference to the task, not its result. (How could it return its result? submit() returns before any of that work has been done!) You should instead provide a callback function to receive the results. For example, timepairs.append is a function that will take the result and append it to the list timepairs.
timepairs = []
job_server.submit(fhash.findTimePairs, (database, hashSample, TPdelta_freq, TPdelta_time, ), callback=timepairs.append)
(Each findTimePairs call should calculate one result, in case that isn't obvious, and you should submit multiple tasks. Otherwise you're invoking all the machinery of Parallel Python for no reason. And make sure you call job_server.wait() to wait for all the tasks to finish before trying to do anything with your results. In short, read the documentation and some example scripts and make sure you understand how it works.)
I have written a python (2.7) script but it use a lot of memory so I get a out of memory error. Is it possible to use memory?
My code (or the github):
from itertools import combinations
import numpy
# Find the unused members and put this in a other group
def findMembers(listIn,listMembers):
lengthlist2 = (len(listMembers)-len(listIn[0]))
group2 = [0] * lengthlist2 #making the other groups based on the length of the first group
for i in listIn:
wichRow = 0
for x in listMembers:
if not (x in i) :
group2[wichRow] = x
wichRow += 1
listIn.append(group2)
return listIn
#you give a list of members and the numbers of groups
#you get back all the possibilities of combinations
def findCombinations(listMembers,numbersOfGroups):
groupTemp = [] #list needed to save correctly all the combinations
group = [] #list needed for keep it simple
newGroup = [] #list that will be returned
for listPossibilities in combinations(listMembers,(len(listMembers)/numbersOfGroups)):
groupTemp.append(list(listPossibilities))
group.append(groupTemp) #saving all the possibilities
groupTemp = []
for k in group:
# place the unused members in group2
k = (findMembers(k,listMembers))
if numbersOfGroups > 2:
groupTemp = []
groupTemp = findCombinations(k[1],numbersOfGroups-1)
for i in groupTemp:
listTemp = []
listTemp.append(k[0])
listTemp.extend(i)
newGroup.append(listTemp)
else:
newGroup = group
return newGroup
# Calculate the happiness of the group
def findHappiness(tabel,listIn):
happiness = 0
for i in listIn:
for j in i:
for k in i:
happiness += tabel[j][k]
return happiness
def buildTabel(members): #build a random survey
tabel = numpy.random.random((members,members))
return tabel
def calculateHappiness(group):
print "Finding all the happiness: "
maxhappiness = 0
i = 0
for x in group:
happiness = findHappiness(tabel,x)
if happiness > maxhappiness:
maxhappiness = happiness
y = x
progress = int(round((((i)*1.0/(len(group)))*100.0)))
update_progress(progress)
i += 1
print "\n Best solution: ", y, " with: ", maxhappiness, " happiness"
def update_progress(progress):
print '\r[{0}] {1}%'.format('#'*(progress/5), progress),
if __name__ == "__main__":
members = 24 # members of the group
numbersOfGroups = 3
tabel = buildTabel(members) #preferences will be stored here
listMembers = (range(members)) #members of the group that need to be divided
print "Searching all the combinations..."
group = findCombinations(listMembers,numbersOfGroups) #find all the combinations (recursive)
print len(group)," combinations"
calculateHappiness(group) #calculate the most happiest group and print
the error:
Searching all the combinations...
Traceback (most recent call last):
File "main.py", line 75, in <module>
calculateHappiness(group) #calculate the most happiest group and print
File "main.py", line 38, in findCombinations
newGroup = group
MemoryError
I'm using windows 10 64bit with 6gb ram. Is it possible to use virtual ram or disk space of mine hard drive disk?