could not convert string to float in tabu search algorithm - python

I am having this error for the part in bold:
Traceback (most recent call last):
File "C:/Users/appan/OneDrive/Documents/Year 3/AI Assignment Semester 1/Tabu Search/Tabu-search-on-Travelling-Salesman-Problem-master/TabuSearch2.py", line 238, in
solution, value, exec_time = tabu_search("five_d.txt")
File "C:/Users/appan/OneDrive/Documents/Year 3/AI Assignment Semester 1/Tabu Search/Tabu-search-on-Travelling-Salesman-Problem-master/TabuSearch2.py", line 175, in tabu_search
graph, max_weight = read_data(input_file_path)
File "C:/Users/appan/OneDrive/Documents/Year 3/AI Assignment Semester 1/Tabu Search/Tabu-search-on-Travelling-Salesman-Problem-master/TabuSearch2.py", line 64, in read_data
link.append(float(tmp[0]))
ValueError: could not convert string to float:
Process finished with exit code 1
can you help please
import math
from random import randint
import time
from random import shuffle
#import numpy as np
### Data Format is dict:
# data[node_name] = gives you a list of link info
# data[link_index][0] = name of node that edge goes to
# data[link_index][1] = weight of that edge
def read_data(path):
linkset = []
links = {}
max_weight = 0
'''
with open(path, "r") as f:
for line in f:
print (line)
link = []
#tmp = list(map(float,line.strip().split(' ')))
tmp=line.strip().split(' ')
arr=np.array(tmp)
print(arr)
link.append(float(tmp[0]))
link.append(float(tmp[1]))
link.append(float(tmp[2]))
linkset.append(link)
if float(tmp[2]) > max_weight:
max_weight = float(tmp[2])
link.append(int(tmp[0]))
link.append(int(tmp[1]))
link.append(int(tmp[2]))
linkset.append(link)
if int(tmp[2]) > max_weight:
max_weight = int(tmp[2])
'''
**with open(path,'r') as f:
for line in f:
#print(line)
link = []
#tmp = list(map(float,line.strip().split(' ')))
tmp = line.strip().split(' ')
#tmp = np.array()
print(tmp)
'''
for i in tmp:
link.append([i])
'''
link.append(float(tmp[0]))
link.append(float(tmp[1]))
link.append(float(tmp[2]))
linkset.append(link)
#print(link)
'''
link.append(list(map(float,tmp[0])))
link.append(list(map(float,tmp[1])))
link.append(list(map(float,tmp[2])))
linkset.append(link)
'''
if float(tmp[2]) > max_weight:
max_weight = float(tmp[2])**
for link in linkset:
try:
linklist = links[str(link[0])]
linklist.append(link[1:])
links[str(link[0])] = linklist
except:
links[str(link[0])] = [link[1:]]
return links, max_weight
def getNeighbors(state):
# return hill_climbing(state)
return two_opt_swap(state)
def hill_climbing(state):
node = randint(1, len(state) - 1)
neighbors = []
for i in range(len(state)):
if i != node and i != 0:
tmp_state = state.copy()
tmp = tmp_state[i]
tmp_state[i] = tmp_state[node]
tmp_state[node] = tmp
neighbors.append(tmp_state)
return neighbors
def two_opt_swap(state):
global neighborhood_size
neighbors = []
for i in range(neighborhood_size):
node1 = 0
node2 = 0
while node1 == node2:
node1 = randint(1, len(state) - 1)
node2 = randint(1, len(state) - 1)
if node1 > node2:
swap = node1
node1 = node2
node2 = swap
tmp = state[node1:node2]
tmp_state = state[:node1] + tmp[::-1] + state[node2:]
neighbors.append(tmp_state)
return neighbors
def fitness(route, graph):
path_length = 0
for i in range(len(route)):
if (i + 1 != len(route)):
dist = weight_distance(route[i], route[i + 1], graph)
if dist != -1:
path_length = path_length + dist
else:
return max_fitness # there is no such path
else:
dist = weight_distance(route[i], route[0], graph)
if dist != -1:
path_length = path_length + dist
else:
return max_fitness # there is no such path
return path_length
# not used in this code but some datasets has 2-or-more dimensional data points, in this case it is usable
def euclidean_distance(city1, city2):
return math.sqrt((city1[0] - city2[0]) ** 2 + ((city1[1] - city2[1]) ** 2))
def weight_distance(city1, city2, graph):
global max_fitness
neighbors = graph[str(city1)]
for neighbor in neighbors:
if neighbor[0] == int(city2):
return neighbor[1]
return -1 # there can't be minus distance, so -1 means there is not any city found in graph or there is not such edge
def tabu_search(input_file_path):
global max_fitness, start_node
graph, max_weight = read_data(input_file_path)
## Below, get the keys (node names) and shuffle them, and make start_node as start
s0 = list(graph.keys())
shuffle(s0)
if int(s0[0]) != start_node:
for i in range(len(s0)):
if int(s0[i]) == start_node:
swap = s0[0]
s0[0] = s0[i]
s0[i] = swap
break;
# max_fitness will act like infinite fitness
max_fitness = ((max_weight) * (len(s0))) + 1
sBest = s0
vBest = fitness(s0, graph)
bestCandidate = s0
tabuList = []
tabuList.append(s0)
stop = False
best_keep_turn = 0
start_time = time.time()
while not stop:
sNeighborhood = getNeighbors(bestCandidate)
bestCandidate = sNeighborhood[0]
for sCandidate in sNeighborhood:
if (sCandidate not in tabuList) and ((fitness(sCandidate, graph) < fitness(bestCandidate, graph))):
bestCandidate = sCandidate
if (fitness(bestCandidate, graph) < fitness(sBest, graph)):
sBest = bestCandidate
vBest = fitness(sBest, graph)
best_keep_turn = 0
tabuList.append(bestCandidate)
if (len(tabuList) > maxTabuSize):
tabuList.pop(0)
if best_keep_turn == stoppingTurn:
stop = True
best_keep_turn += 1
exec_time = time.time() - start_time
return sBest, vBest, exec_time
## Tabu Search Takes edge-list in a given format:
# nodefrom nodeto weight
# 0 1 5
# 3 2 4
# 1 0 3
# Undirectional edges should be written 2 times for both nodes.
# maxTabuSize = 10000
maxTabuSize = 500
neighborhood_size = 500
stoppingTurn = 500
max_fitness = 0
start_node = 0
# solution, value, exec_time = tabu_search("test.txt")
solution, value, exec_time = tabu_search("five_d.txt")
print(solution)
print(value)
print(exec_time)

Related

Flajolet Martin Algorithm Implementation

I am trying to implement Flajolet Martin algorithm. I have a dataset with over 6000 records but the output of the following code is 4096. Please help me in understanding the mistake being made by me.
import xxhash
import math
def return_trailing_zeroes(s):
s = str(s)
rev = s[::-1]
count = 0
for i in rev:
if i is '0':
count = count + 1
else:
break
return count
def gethash(line):
num=abs(xxhash.xxh32(line).intdigest())
return num
fp=open("/content/drive/MyDrive/Data.txt","r")
h_max=0
for line in fp:
hash_value_1 = gethash(line)
binary_1 = format(hash_value_1, '032b')
t1 = return_trailing_zeroes(binary_1)
if t1>h_max:
h_max=t1
fp.close()
print(2**h_max)
I tried this implementation of HyperLogLog algorithm and the output of the following code is 2560.
def return_trailing_zeroes(s): s = str(s) rev = s[::-1] count = 0
for i in rev: if i is '0': count = count + 1 else: break
return count
h1_m=0
h2_m=0
h3_m=0
h4_m=0
fp=open("/content/drive/MyDrive/Data.txt","r")
for line in fp:
hash_value_1 = abs(xxhash.xxh32(line).intdigest())
hash_value_2 = abs(hash32(line))
hash_value_3 = abs(jhashcode.hashcode(line))
hash_value_4 = abs(mmh3.hash(line))
binary_1 = format(hash_value_1, '032b')
binary_2 = format(hash_value_2, '032b')
binary_3 = format(hash_value_3, '032b')
binary_4 = format(hash_value_4, '032b')
t1 = return_trailing_zeroes(binary_1)
t2 = return_trailing_zeroes(binary_2)
t3 = return_trailing_zeroes(binary_3)
t4 = return_trailing_zeroes(binary_4)
if t1>h1_m: h1_m=t1
if t2>h2_m: h2_m=t2
if t3>h3_m: h3_m=t3
if t4>h4_m: h4_m=t4
fp.close()
avg_hash12 = (2**(h1_m) + 2**(h2_m))/ float(2)
avg_hash34 = (2**(h3_m) + 2**(h4_m))/ float(2)
distinct_elements = math.ceil(statistics.median([avg_hash12,
avg_hash34]))
print(distinct_elements)

IndexError: index 4 is out of bounds for axis 0 with size 4

Hey I am having this Index Error where I am trying to composite events but my indices start at 0 and not 1 and while have tried to do a number of things like trying to .append[i+1] I am unable to fix this error I am having.
Theres apparently something wrong with this specific line of code : dset_IDX[offset:offset_next] = event_id[file_indices]
While the .py file is over a 1000 lines of code so I can not show all of it I am able to show the part of the function that is giving me the error
def count_events(files):
# Because we want to remove events with 0 hits,
# we need to count the events beforehand (to create the h5 file).
# This function counts and indexes the events with more than 0 hits.
# Files need to be iterated in the same order to use the indexes.
""" This is where we manually specify the file"""
num_events = 0
nonzero_file_events = []
for file_index, f in enumerate(files):
data = np.load(f, allow_pickle=True)
nonzero_file_events.append([])
hits = data['digi_hit_pmt']
for i in range(len(hits)):
if len(hits[i]) != 0:
nonzero_file_events[file_index].append(i)
num_events += 1
return num_events, nonzero_file_events
def GenMapping(csv_file):
mPMT_to_index = {}
with open(csv_file) as f:
rows = f.readline().split(",")[1:]
rows = [int(r.strip()) for r in rows]
for line in f:
line_split = line.split(",")
col = int(line_split[0].strip())
for row, value in zip(rows, line_split[1:]):
value = value.strip()
if value: # If the value is not empty
mPMT_to_index[int(value)] = [col, row]
npmap = np.zeros((max(mPMT_to_index) + 1, 2), dtype=np.int)
for k, v in mPMT_to_index.items():
npmap[k] = v
return npmap
def GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2):
"""
Inputs:
avg_mu_per_ev == Poisson distribution mean for number of muons in each spill
sigma_time_offset == Width of spill (Gaussian) in nanoseconds
"""
files = ['event998.npz']
# Remove whitespace
files = [x.strip() for x in files]
# Check that files were provided
if len(files) == 0:
raise ValueError("No files provided!!")
print("Merging " + str(len(files)) + " files")
# Start merging
num_nonzero_events, nonzero_event_indexes = count_events(files)
print(num_nonzero_events)
# np.random.poisson( avg_mu_per_ev, number_of_throws )
num_muons = np.random.poisson(avg_mu_per_ev, num_nonzero_events - 2954)
# creates h5 file to generate the h5 file
dtype_events = np.dtype(np.float32)
dtype_labels = np.dtype(np.int32)
dtype_energies = np.dtype(np.float32)
dtype_positions = np.dtype(np.float32)
dtype_IDX = np.dtype(np.int32)
dtype_PATHS = h5py.special_dtype(vlen=str)
dtype_angles = np.dtype(np.float32)
# sets h5 file to be written
h5_file = h5py.File('multimuonfile(2).h5', 'w')
dset_event_data = h5_file.create_dataset("event_data",
shape=(num_nonzero_events,) + IMAGE_SHAPE,
dtype=dtype_events)
dset_labels = h5_file.create_dataset("labels",
shape=(num_nonzero_events,),
dtype=dtype_labels)
dset_energies = h5_file.create_dataset("energies",
shape=(num_nonzero_events, 1),
dtype=dtype_energies)
dset_positions = h5_file.create_dataset("positions",
shape=(num_nonzero_events, 1, 3),
dtype=dtype_positions)
dset_IDX = h5_file.create_dataset("event_ids",
shape=(num_nonzero_events,),
dtype=dtype_IDX)
dset_PATHS = h5_file.create_dataset("root_files",
shape=(num_nonzero_events,),
dtype=dtype_PATHS)
dset_angles = h5_file.create_dataset("angles",
shape=(num_nonzero_events, 2),
dtype=dtype_angles)
# 22 -> gamma, 11 -> electron, 13 -> muon
# corresponds to labelling used in CNN with only barrel
# IWCDmPMT_4pi_full_tank_gamma_E0to1000MeV_unif-pos-R371-y521cm_4pi-dir_3000evts_329.npz has an event
# with pid 11 though....
# pid_to_label = {22:0, 11:1, 13:2}
offset = 0
offset_next = 0
mPMT_to_index = GenMapping(PMT_LABELS)
# Loop over files
for file_index, filename in enumerate(files):
data = np.load(filename, allow_pickle=True)
nonzero_events_in_file = len(nonzero_event_indexes[file_index])
x_data = np.zeros((nonzero_events_in_file,) + IMAGE_SHAPE,
dtype=dtype_events)
digi_hit_pmt = data['digi_hit_pmt']
# digi_hit_charge = data['digi_hit_charge']
# digi_hit_time = data['digi_hit_time']
# digi_hit_trigger = data['digi_hit_trigger']
# trigger_time = data['trigger_time']
delay = 0
# Loop over events in file
# Loop over number of muons in each event
event_id = np.array([], dtype=np.int32)
root_file = np.array([], dtype=np.str)
pid = np.array([])
position = np.array([])
direction = np.array([])
energy = np.array([])
labels = np.array([])
# with open("ResultFile.txt", "w") as text_file:
# sys.stdout = open("Result2.txt", "w")
for i, nmu in enumerate(num_muons):
# np.savetxt(text_file, i, nmu,fmt="%d")
# text_file.write("processing output entry " + str(i) + " with " + nmu + " muons")
print("processing output entry ", i, " with ", nmu, " muons")
indices = np.random.randint(0, len(digi_hit_pmt), max(1, nmu))
time_offs = [0.]
if nmu > 1:
time_offs = np.append(time_offs, np.random.normal(0., sigma_time_offset, nmu - 1))
hit_pmts, charge, time = SumEvents(indices, time_offs, data, nmu == 0)
hit_mpmts = hit_pmts // 19
pmt_channels = hit_pmts % 19
rows = mPMT_to_index[hit_mpmts, 0]
cols = mPMT_to_index[hit_mpmts, 1]
x_data[i - delay, rows, cols, pmt_channels] = charge
x_data[i - delay, rows, cols, pmt_channels + 19] = time
# fix below!!!
idx0 = indices[0]
event_id = np.append(event_id, data['event_id'][idx0])
root_file = np.append(root_file, data['root_file'][idx0])
pid = np.append(pid, data['pid'][idx0])
position = np.append(position, data['position'][idx0])
direction = np.append(direction, data['direction'][idx0])
energy = np.append(energy, np.sum(data['energy'][indices]))
labels = np.append(labels, nmu)
offset_next += nonzero_events_in_file
file_indices = nonzero_event_indexes[file_index]
dset_IDX[offset:offset_next] = event_id[file_indices]
dset_PATHS[offset:offset_next] = root_file[file_indices]
dset_energies[offset:offset_next, :] = energy[file_indices].reshape(-1, 1)
dset_positions[offset:offset_next, :, :] = position[file_indices].reshape(-1, 1, 3)
dset_labels[offset:offset_next] = labels[file_indices]
print(event_id)
direction = direction[file_indices]
polar = np.arccos(direction[:, 1])
azimuth = np.arctan2(direction[:, 2], direction[:, 0])
dset_angles[offset:offset_next, :] = np.hstack((polar.reshape(-1, 1), azimuth.reshape(-1, 1)))
dset_event_data[offset:offset_next, :] = x_data
offset = offset_next
print("Finished file: {}".format(filename))
#sys.stdout.close()
print("Saving")
#h5_file.close()
print("Finished")
# In[ ]:
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
Traceback
Merging 1 files
2958
processing output entry 0 with 3 muons
processing output entry 1 with 1 muons
processing output entry 2 with 3 muons
processing output entry 3 with 3 muons
Traceback (most recent call last):
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1068, in <module>
GenerateMultiMuonSample_h5(avg_mu_per_ev=2.5, sigma_time_offset=21.2)
File "C:/Users/abdul/OneDrive/Desktop/ISSP/ISSP-AA/TriumfCNN-AA/EventDisplay.py", line 1044, in GenerateMultiMuonSample_h5
dset_IDX[offset:offset_next] = event_id[file_indices]
IndexError: index 4 is out of bounds for axis 0 with size 4
not much info is provided but what i have understood,
the error says that axis 0 has size=4 and you are trying to access index 4 which is not possible with size 4 as it starts with 0 and max index could be 3.

time complexity issues of my program

my viterbi code program becomes exponential. can you help me find the place i can change to make it dynamic program. I need to remember and use only the 2 previous tags of words.
thanks a lot.
from collections import defaultdict
import sys
import re
import feature_maker as fm
bla = ''
all_states = set()
#distirbuition over all of the corpus
POS_probability = fm.load_obj('probas')
POS_probability['START'] = 1.0
def cpd_tagwords(words, tag):
pattern = re.compile("\W")# to check for .,: etc.
if pattern.match(words) and tag == words:
return 1
elif pattern.match(tag):
return 0
for word in emle.split("\n"):
if word.__contains__(words) and word.__contains__(tag):
return word[word.index(":") + 2:]
#if we dont have data about the word with the tag,just retturn the probability
#to get the tag over all of the word in the corpus.
return POS_probability[tag]
def cpd_tags(early, prev, current):
lambda1 = 0
lambda3 = 0
lambda6 = 0
for word in qmle.split("\n"):
word1 = word.split()
if len(word1) > 0:
if word1[0].__contains__(current): #for tuple of 1
if len(word1) == 2:
lambda1 = word[word.index("]:") + 3:]
if len(word1) > 2 and word1[1].__contains__(prev): #for tuple of 2
if len(word1) == 3:
lambda3 = word[word.index("]:") + 3:]
if len(word1) > 3 and word1[2].__contains__(early): #for tuple of 3
if len(word1) == 4:
lambda6 = word[word.index("]:") + 3:]
return (0.6*float(lambda6)) + (0.3*float(lambda3)) + (0.1*float(lambda1))
#map: popular_copuler['POS'] = list of all pos that can come before it.
popular_copules = fm.load_obj('popular_copules')
# Viterbi Algo
def viterbi(sentence, tags1):
def findSet(index,tag):
if tag == 'ALL':
return tags1
if index in range(1, len(sentence) + 1):
possible_tags = set(popular_copules[tag])
if possible_tags == set([]):
return tags1
return set(popular_copules[tag])
elif index == 0 or index == -1:
return {'START'}
# stores (word:tag) in this whole sentence
sentence_with_tag = defaultdict(str)
# inner function to commpute pi values--start
def pi_viterbi(k, u, v, sentence):#here is the start of the bad sequence
prob = defaultdict(float)
# initialization
if k == 0 and u == 'START' and v == 'START':
return (1., 'START')
else:
for w in findSet(k - 2,u):
prev = pi_viterbi(k - 1, w, u, sentence)[0]
# tuple((w,u,v))
q = cpd_tags(w, u, v)**
e = cpd_tagwords(sentence[k - 1].lower(), v)
probability = float(prev) * q * float(e)
prob[tuple((w, u))] = probability**
#here is the end of the bad sequence
max_tuple = max(prob.items(), key=lambda x: x[1])
# print (max_tuple[1],max_tuple[0][0])
return max_tuple[1], max_tuple[0][0]
# inner function to commpute pi values--end
sentence_with_tag = list()
backpointer = defaultdict(str)
tags = defaultdict(str)
k = len(sentence)
u_glob = ''
v_glob = ''
glob = 0.
for i in range(1, k + 1):
prob = defaultdict(float)
#for current word we check all the tags
""" changed from for u in findSet(i - 1):"""
for u in findSet(i ,'ALL'):
#going backwards we call findset with u so it gives us only
# tags v that go togeter alot with u(this is purnnig)
""" changed from for v in findSet(i)"""
for v in findSet(i-1,u_glob):
#siwtched u and v
value, w = pi_viterbi(i, v, u, sentence)#the v recursion in the algorithm
prob[tuple((i, u, v))] = value
backpointer[tuple((i, u, v))] = w #bp from the algorithm
max_tuple = max(prob.items(), key=lambda x: x[1])
backpointer[tuple((i, max_tuple[0][1], max_tuple[0][-1]))] = max_tuple[0][1] # bp (k,u,v)= tag w
# sentence_with_tag.append(max_tuple[0][-1])
u_glob = max_tuple[0][-2]
v_glob = max_tuple[0][-1]
glob = max_tuple[1]
print ('Max', max_tuple)
tags[k - 1] = u_glob
tags[k] = v_glob
for i in range((k - 2), 0, -1):
tag = backpointer[tuple(((i + 2), tags[i + 1], tags[i + 2]))]
tags[i] = tag
tag_list = list()
for i in range(1, len(tags) + 1):
tag_list.append(tags[i])
file = open(sys.argv[4], 'w')
file.truncate()
for word in tag_list:
file.write(word)
# tag list as results
return tag_list
file=open(sys.argv[1],"r+")
fQ = open(sys.argv[2], 'r')
qmle = fQ.read()
fQ.close()
f = open("tags.txt",'r+')
tags = f.read()
f.close()
fe = open(sys.argv[3], 'r')
emle = fe.read()
distinct_tags = set()
# what is the list of all tags?
for word in tags.split():
distinct_tags.add(word)
sentence = []
sentence1 = []
sentence1 = file.read()
sentence = sentence1.split()
file.close()
file = open(sys.argv[4], 'w')
file.truncate()
viterbi(sentence, distinct_tags)
how can I reduce the time complexity?

python scripts showing different result( with one error ) in two similar input files

The script, originally taken and modified from (http://globplot.embl.de/):
#!/usr/bin/env python
# Copyright (C) 2003 Rune Linding - EMBL
# GlobPlot TM
# GlobPlot is licensed under the Academic Free license
from string import *
from sys import argv
from Bio import File
from Bio import SeqIO
import fpformat
import sys
import tempfile
import os
from os import system,popen3
import math
# Russell/Linding
RL = {'N':0.229885057471264,'P':0.552316012226663,'Q':-0.187676577424997,'A':-0.261538461538462,'R':-0.176592654077609, \
'S':0.142883029808825,'C':-0.0151515151515152,'T':0.00887797506611258,'D':0.227629796839729,'E':-0.204684629516228, \
'V':-0.386174834235195,'F':-0.225572305974316,'W':-0.243375458622095,'G':0.433225711769886,'H':-0.00121743364986608, \
'Y':-0.20750516775322,'I':-0.422234699606962,'K':-0.100092289621613,'L':-0.337933495925287,'M':-0.225903614457831}
def Sum(seq,par_dict):
sum = 0
results = []
raws = []
sums = []
p = 1
for residue in seq:
try:
parameter = par_dict[residue]
except:
parameter = 0
if p == 1:
sum = parameter
else:
sum = sum + parameter#*math.log10(p)
ssum = float(fpformat.fix(sum,10))
sums.append(ssum)
p +=1
return sums
def getSlices(dydx_data, DOM_join_frame, DOM_peak_frame, DIS_join_frame, DIS_peak_frame):
DOMslices = []
DISslices = []
in_DOMslice = 0
in_DISslice = 0
beginDOMslice = 0
endDOMslice = 0
beginDISslice = 0
endDISslice = 0
for i in range( len(dydx_data) ):
#close dom slice
if in_DOMslice and dydx_data[i] > 0:
DOMslices.append([beginDOMslice, endDOMslice])
in_DOMslice = 0
#close dis slice
elif in_DISslice and dydx_data[i] < 0:
DISslices.append([beginDISslice, endDISslice])
in_DISslice = 0
# elseif inSlice expandslice
elif in_DOMslice:
endDOMslice += 1
elif in_DISslice:
endDISslice += 1
# if not in slice and dydx !== 0 start slice
if dydx_data[i] > 0 and not in_DISslice:
beginDISslice = i
endDISslice = i
in_DISslice = 1
elif dydx_data[i] < 0 and not in_DOMslice:
beginDOMslice = i
endDOMslice = i
in_DOMslice = 1
#last slice
if in_DOMslice:
DOMslices.append([beginDOMslice, endDOMslice])
if in_DISslice:
DISslices.append([beginDISslice,endDISslice])
k = 0
l = 0
while k < len(DOMslices):
if k+1 < len(DOMslices) and DOMslices[k+1][0]-DOMslices[k][1] < DOM_join_frame:
DOMslices[k] = [ DOMslices[k][0], DOMslices[k+1][1] ]
del DOMslices[k+1]
elif DOMslices[k][1]-DOMslices[k][0]+1 < DOM_peak_frame:
del DOMslices[k]
else:
k += 1
while l < len(DISslices):
if l+1 < len(DISslices) and DISslices[l+1][0]-DISslices[l][1] < DIS_join_frame:
DISslices[l] = [ DISslices[l][0], DISslices[l+1][1] ]
del DISslices[l+1]
elif DISslices[l][1]-DISslices[l][0]+1 < DIS_peak_frame:
del DISslices[l]
else:
l += 1
return DOMslices, DISslices
def SavitzkyGolay(window,derivative,datalist):
SG_bin = 'sav_gol'
stdin, stdout, stderr = popen3(SG_bin + '-D' + str(derivative) + ' -n' + str(window)+','+str(window))
for data in datalist:
stdin.write(`data`+'\n')
try:
stdin.close()
except:
print stderr.readlines()
results = stdout.readlines()
stdout.close()
SG_results = []
for result in results:
SG_results.append(float(fpformat.fix(result,6)))
return SG_results
def reportSlicesTXT(slices, sequence, maskFlag):
if maskFlag == 'DOM':
coordstr = '|GlobDoms:'
elif maskFlag == 'DIS':
coordstr = '|Disorder:'
else:
raise SystemExit
if slices == []:
#by default the sequence is in uppercase which is our search space
s = sequence
else:
# insert seq before first slide
if slices[0][0] > 0:
s = sequence[0:slices[0][0]]
else:
s = ''
for i in range(len(slices)):
#skip first slice
if i > 0:
coordstr = coordstr + ', '
coordstr = coordstr + str(slices[i][0]+1) + '-' + str(slices[i][1]+1)
#insert the actual slice
if maskFlag == 'DOM':
s = s + lower(sequence[slices[i][0]:(slices[i][1]+1)])
if i < len(slices)-1:
s = s + upper(sequence[(slices[i][1]+1):(slices[i+1][0])])
#last slice
elif slices[i][1] < len(sequence)-1:
s = s + lower(sequence[(slices[i][1]+1):(len(sequence))])
elif maskFlag == 'DIS':
s = s + upper(sequence[slices[i][0]:(slices[i][1]+1)])
#insert untouched seq between disorder segments, 2-run labelling
if i < len(slices)-1:
s = s + sequence[(slices[i][1]+1):(slices[i+1][0])]
#last slice
elif slices[i][1] < len(sequence)-1:
s = s + sequence[(slices[i][1]+1):(len(sequence))]
return s,coordstr
def runGlobPlot():
try:
smoothFrame = int(sys.argv[1])
DOM_joinFrame = int(sys.argv[2])
DOM_peakFrame = int(sys.argv[3])
DIS_joinFrame = int(sys.argv[4])
DIS_peakFrame = int(sys.argv[5])
file = str(sys.argv[6])
db = open(file,'r')
except:
print 'Usage:'
print ' ./GlobPipe.py SmoothFrame DOMjoinFrame DOMpeakFrame DISjoinFrame DISpeakFrame FASTAfile'
print ' Optimised for ELM: ./GlobPlot.py 10 8 75 8 8 sequence_file'
print ' Webserver settings: ./GlobPlot.py 10 15 74 4 5 sequence_file'
raise SystemExit
for cur_record in SeqIO.parse(db, "fasta"):
#uppercase is searchspace
seq = upper(str(cur_record.seq))
# sum function
sum_vector = Sum(seq,RL)
# Run Savitzky-Golay
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
dydx_vector = SavitzkyGolay('smoothFrame',1, sum_vector)
#test
sumHEAD = sum_vector[:smoothFrame]
sumTAIL = sum_vector[len(sum_vector)-smoothFrame:]
newHEAD = []
newTAIL = []
for i in range(len(sumHEAD)):
try:
dHEAD = (sumHEAD[i+1]-sumHEAD[i])/2
except:
dHEAD = (sumHEAD[i]-sumHEAD[i-1])/2
try:
dTAIL = (sumTAIL[i+1]-sumTAIL[i])/2
except:
dTAIL = (sumTAIL[i]-sumTAIL[i-1])/2
newHEAD.append(dHEAD)
newTAIL.append(dTAIL)
dydx_vector[:smoothFrame] = newHEAD
dydx_vector[len(dydx_vector)-smoothFrame:] = newTAIL
globdoms, globdis = getSlices(dydx_vector, DOM_joinFrame, DOM_peakFrame, DIS_joinFrame, DIS_peakFrame)
s_domMask, coordstrDOM = reportSlicesTXT(globdoms, seq, 'DOM')
s_final, coordstrDIS = reportSlicesTXT(globdis, s_domMask, 'DIS')
sys.stdout.write('>'+cur_record.id+coordstrDOM+coordstrDIS+'\n')
print s_final
print '\n'
return
runGlobPlot()
My input and output files are here: link
This script takes a input (input1.fa) and gives following output output1.txt
But when I try to run this script with similar type but larger input file (input2.fa) .. It shows following error:
Traceback (most recent call last):
File "final_script_globpipe.py", line 207, in <module>
runGlobPlot()
File "final_script_globpipe.py", line 179, in runGlobPlot
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
File "final_script_globpipe.py", line 105, in SavitzkyGolay
stdin.write(`data`+'\n')
IOError: [Errno 22] Invalid argument
I have no idea where the problem is. Any type of suggestion is appriciated.
I am using python 2.7 in windows 7 machine. I have also attached the Savitzky Golay module which is needed to run the script.
Thanks
UPDATE:
After trying to reproduce the error on linux it's showing a similar behavior, working fine with the first file but with the second is returning Errno32.
Traceback:
Traceback (most recent call last):
File "Glob.py", line 207, in <module>
runGlobPlot()
File "Glob.py", line 179, in runGlobPlot
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
File "Glob.py", line 105, in SavitzkyGolay
stdin.write(`data`+'\n')
IOError: [Errno 32] Broken pipe
Update:
Some calls of the SG_bin return that the -n parameter is the wrong type.
Wrong type of parameter for flag -n. Has to be unsigned,unsigned
This parameter comes from the window variable that is passed to the SavitzkyGolay function.
Surrounding the stdin.write with a trycatch block reveals that it breaks a hadnfull of times.
try:
for data in datalist:
stdin.write(repr(data)+'\n')
except:
print "It broke"

Python: "MemoryErrorUnhandled exception in thread started by"

I find my code unable to run because of a "MemoryErrorUnhandled exception in thread started by " error.
This is my code:
def waterMark(surface,hidden,structure=(2,1,5)):
if sum(structure) == 8 and len(structure) == 3:
B = int(structure[0])
G = int(structure[1])
R = int(structure[2])
for i in xrange(surface.shape[1]):
for j in xrange(surface.shape[0]):
if i < hidden.shape[0] and j < hidden.shape[1]:
surface[i,j][0] = surface[i,j][0][:8-B] + hidden[i,j][:B]
surface[i,j][1] = surface[i,j][2][:8-G] + hidden[i,j][:G]
surface[i,j][2] = surface[i,j][2][:8-R] + hidden[i,j][:R]
else:
print 'the param must be 3-dim list or turtle ,and its sum is 8'
return surface
def to_Bin(array):
b_list = []
if len(array.shape) == 2:
for i in range(array.shape[0]):
InterVariable = []
for j in range(array.shape[1]):
binnum = bin(array[i,j])[2:]
InterVariable.append((8-len(binnum))*'0'+ binnum)
b_list.append(InterVariable)
elif len(array.shape) == 3:
for i in xrange(array.shape[0]):
InterVariable = []
for j in xrange(array.shape[1]):
InterVariable.append(
[(8-len(bin(array[i,j][c])[2:]))*'0'+ \
bin(array[i,j][c])[2:] for c in range(3)]
)
b_list.append(InterVariable)
return np.array(b_list)
def MED(orgpath,datapath):
base_img = cv2.imread(orgpath)
hidden_img = cv2.imread(datapath)
gray_hidden = cv2.cvtColor(hidden_img,cv2.COLOR_BGR2GRAY)
while base_img.size < 2*hidden_img.size:
base_img = cv2.pyrUp(base_img)
print 'base_img%d' %(base_img.size)
gray_b_hidden = to_Bin(gray_hidden)
bgr_b_base = to_Bin(base_img)
wm = waterMark(bgr_b_base,gray_b_hidden,(3,2,3))
encry_img = to_Dec(wm)
return encry_img
file1 = 'im.jpg'
file2 = 'IMG_0284.jpg'
img = MED(file1,file2)
The size of file2 is only 1.2M and file1 is 20K, I don't know why the memory is not enough. I want to know how to fit it in memory. Thank you.
The feedback:
Traceback (most recent call last):
File "H:\signature.py", line 146, in
img = MED(file1,file2)
File "H:\signature.py", line 139, in MED
bgr_b_base = to_Bin(base_img)
MemoryError

Categories