I find my code unable to run because of a "MemoryErrorUnhandled exception in thread started by " error.
This is my code:
def waterMark(surface,hidden,structure=(2,1,5)):
if sum(structure) == 8 and len(structure) == 3:
B = int(structure[0])
G = int(structure[1])
R = int(structure[2])
for i in xrange(surface.shape[1]):
for j in xrange(surface.shape[0]):
if i < hidden.shape[0] and j < hidden.shape[1]:
surface[i,j][0] = surface[i,j][0][:8-B] + hidden[i,j][:B]
surface[i,j][1] = surface[i,j][2][:8-G] + hidden[i,j][:G]
surface[i,j][2] = surface[i,j][2][:8-R] + hidden[i,j][:R]
else:
print 'the param must be 3-dim list or turtle ,and its sum is 8'
return surface
def to_Bin(array):
b_list = []
if len(array.shape) == 2:
for i in range(array.shape[0]):
InterVariable = []
for j in range(array.shape[1]):
binnum = bin(array[i,j])[2:]
InterVariable.append((8-len(binnum))*'0'+ binnum)
b_list.append(InterVariable)
elif len(array.shape) == 3:
for i in xrange(array.shape[0]):
InterVariable = []
for j in xrange(array.shape[1]):
InterVariable.append(
[(8-len(bin(array[i,j][c])[2:]))*'0'+ \
bin(array[i,j][c])[2:] for c in range(3)]
)
b_list.append(InterVariable)
return np.array(b_list)
def MED(orgpath,datapath):
base_img = cv2.imread(orgpath)
hidden_img = cv2.imread(datapath)
gray_hidden = cv2.cvtColor(hidden_img,cv2.COLOR_BGR2GRAY)
while base_img.size < 2*hidden_img.size:
base_img = cv2.pyrUp(base_img)
print 'base_img%d' %(base_img.size)
gray_b_hidden = to_Bin(gray_hidden)
bgr_b_base = to_Bin(base_img)
wm = waterMark(bgr_b_base,gray_b_hidden,(3,2,3))
encry_img = to_Dec(wm)
return encry_img
file1 = 'im.jpg'
file2 = 'IMG_0284.jpg'
img = MED(file1,file2)
The size of file2 is only 1.2M and file1 is 20K, I don't know why the memory is not enough. I want to know how to fit it in memory. Thank you.
The feedback:
Traceback (most recent call last):
File "H:\signature.py", line 146, in
img = MED(file1,file2)
File "H:\signature.py", line 139, in MED
bgr_b_base = to_Bin(base_img)
MemoryError
Related
I am having this error for the part in bold:
Traceback (most recent call last):
File "C:/Users/appan/OneDrive/Documents/Year 3/AI Assignment Semester 1/Tabu Search/Tabu-search-on-Travelling-Salesman-Problem-master/TabuSearch2.py", line 238, in
solution, value, exec_time = tabu_search("five_d.txt")
File "C:/Users/appan/OneDrive/Documents/Year 3/AI Assignment Semester 1/Tabu Search/Tabu-search-on-Travelling-Salesman-Problem-master/TabuSearch2.py", line 175, in tabu_search
graph, max_weight = read_data(input_file_path)
File "C:/Users/appan/OneDrive/Documents/Year 3/AI Assignment Semester 1/Tabu Search/Tabu-search-on-Travelling-Salesman-Problem-master/TabuSearch2.py", line 64, in read_data
link.append(float(tmp[0]))
ValueError: could not convert string to float:
Process finished with exit code 1
can you help please
import math
from random import randint
import time
from random import shuffle
#import numpy as np
### Data Format is dict:
# data[node_name] = gives you a list of link info
# data[link_index][0] = name of node that edge goes to
# data[link_index][1] = weight of that edge
def read_data(path):
linkset = []
links = {}
max_weight = 0
'''
with open(path, "r") as f:
for line in f:
print (line)
link = []
#tmp = list(map(float,line.strip().split(' ')))
tmp=line.strip().split(' ')
arr=np.array(tmp)
print(arr)
link.append(float(tmp[0]))
link.append(float(tmp[1]))
link.append(float(tmp[2]))
linkset.append(link)
if float(tmp[2]) > max_weight:
max_weight = float(tmp[2])
link.append(int(tmp[0]))
link.append(int(tmp[1]))
link.append(int(tmp[2]))
linkset.append(link)
if int(tmp[2]) > max_weight:
max_weight = int(tmp[2])
'''
**with open(path,'r') as f:
for line in f:
#print(line)
link = []
#tmp = list(map(float,line.strip().split(' ')))
tmp = line.strip().split(' ')
#tmp = np.array()
print(tmp)
'''
for i in tmp:
link.append([i])
'''
link.append(float(tmp[0]))
link.append(float(tmp[1]))
link.append(float(tmp[2]))
linkset.append(link)
#print(link)
'''
link.append(list(map(float,tmp[0])))
link.append(list(map(float,tmp[1])))
link.append(list(map(float,tmp[2])))
linkset.append(link)
'''
if float(tmp[2]) > max_weight:
max_weight = float(tmp[2])**
for link in linkset:
try:
linklist = links[str(link[0])]
linklist.append(link[1:])
links[str(link[0])] = linklist
except:
links[str(link[0])] = [link[1:]]
return links, max_weight
def getNeighbors(state):
# return hill_climbing(state)
return two_opt_swap(state)
def hill_climbing(state):
node = randint(1, len(state) - 1)
neighbors = []
for i in range(len(state)):
if i != node and i != 0:
tmp_state = state.copy()
tmp = tmp_state[i]
tmp_state[i] = tmp_state[node]
tmp_state[node] = tmp
neighbors.append(tmp_state)
return neighbors
def two_opt_swap(state):
global neighborhood_size
neighbors = []
for i in range(neighborhood_size):
node1 = 0
node2 = 0
while node1 == node2:
node1 = randint(1, len(state) - 1)
node2 = randint(1, len(state) - 1)
if node1 > node2:
swap = node1
node1 = node2
node2 = swap
tmp = state[node1:node2]
tmp_state = state[:node1] + tmp[::-1] + state[node2:]
neighbors.append(tmp_state)
return neighbors
def fitness(route, graph):
path_length = 0
for i in range(len(route)):
if (i + 1 != len(route)):
dist = weight_distance(route[i], route[i + 1], graph)
if dist != -1:
path_length = path_length + dist
else:
return max_fitness # there is no such path
else:
dist = weight_distance(route[i], route[0], graph)
if dist != -1:
path_length = path_length + dist
else:
return max_fitness # there is no such path
return path_length
# not used in this code but some datasets has 2-or-more dimensional data points, in this case it is usable
def euclidean_distance(city1, city2):
return math.sqrt((city1[0] - city2[0]) ** 2 + ((city1[1] - city2[1]) ** 2))
def weight_distance(city1, city2, graph):
global max_fitness
neighbors = graph[str(city1)]
for neighbor in neighbors:
if neighbor[0] == int(city2):
return neighbor[1]
return -1 # there can't be minus distance, so -1 means there is not any city found in graph or there is not such edge
def tabu_search(input_file_path):
global max_fitness, start_node
graph, max_weight = read_data(input_file_path)
## Below, get the keys (node names) and shuffle them, and make start_node as start
s0 = list(graph.keys())
shuffle(s0)
if int(s0[0]) != start_node:
for i in range(len(s0)):
if int(s0[i]) == start_node:
swap = s0[0]
s0[0] = s0[i]
s0[i] = swap
break;
# max_fitness will act like infinite fitness
max_fitness = ((max_weight) * (len(s0))) + 1
sBest = s0
vBest = fitness(s0, graph)
bestCandidate = s0
tabuList = []
tabuList.append(s0)
stop = False
best_keep_turn = 0
start_time = time.time()
while not stop:
sNeighborhood = getNeighbors(bestCandidate)
bestCandidate = sNeighborhood[0]
for sCandidate in sNeighborhood:
if (sCandidate not in tabuList) and ((fitness(sCandidate, graph) < fitness(bestCandidate, graph))):
bestCandidate = sCandidate
if (fitness(bestCandidate, graph) < fitness(sBest, graph)):
sBest = bestCandidate
vBest = fitness(sBest, graph)
best_keep_turn = 0
tabuList.append(bestCandidate)
if (len(tabuList) > maxTabuSize):
tabuList.pop(0)
if best_keep_turn == stoppingTurn:
stop = True
best_keep_turn += 1
exec_time = time.time() - start_time
return sBest, vBest, exec_time
## Tabu Search Takes edge-list in a given format:
# nodefrom nodeto weight
# 0 1 5
# 3 2 4
# 1 0 3
# Undirectional edges should be written 2 times for both nodes.
# maxTabuSize = 10000
maxTabuSize = 500
neighborhood_size = 500
stoppingTurn = 500
max_fitness = 0
start_node = 0
# solution, value, exec_time = tabu_search("test.txt")
solution, value, exec_time = tabu_search("five_d.txt")
print(solution)
print(value)
print(exec_time)
This is the code:
import numpy as np
import cv2
import os
big_img_path = 'bloodredboy.jpg'
resolution = 0.05
small_img_path = 'Minecraft'
def LoadImages():
global small_img_path
imagelist = list(os.walk(small_img_path))[0][2]
images = {}
for im in imagelist:
img = cv2.imread(r'C:\Users\VVA\Desktop\\'+small_img_path+'\\'+im)
r = 0
g = 0
b = 0
count = 0
for y in img:
for x in y:
r += x[0]
g += x[1]
b += x[2]
count += 1
r /= count
b /= count
g /= count
images[r'C:\Users\VVA\Videos\Desktop\\'+small_img_path+'\\'+im] = [r, g, b]
return images
def change_resolution(img, resolution):
res = []
height = len(img)
width = len(img[0])
yy = 0
for y in range(0, height, round(1/resolution)):
yy += 1
xx = 0
res.append([])
for x in range(0, width, round(1/resolution)):
res[yy-1].append(img[y][x])
xx += 1
return np.array(res)
def ask_block(rgb):
global images
record_block = None
record_value = 99999999999999
for image in images.items():
r = abs(rgb[0] - image[1][0])
g = abs(rgb[1] - image[1][1])
b = abs(rgb[2] - image[1][2])
if r+g+b < record_value:
record_block = image[0]
record_value = r+g+b
return record_block
def make_img_out_of_blocks(img):
count = 0
total = len(img) * len(img[0])
rows = []
yy = 0
for y in img:
yy += 1
xx = 0
row = []
for x in y:
block = ask_block(x)
block = cv2.imread(block)
try:
row = np.concatenate((row, block), axis=1)
except:
row = block
xx += 1
count += 1
rows.append(row)
print(f'{round(count/total*100, 2)}% Completed')
for row in rows:
try:
res = np.concatenate((res, row), axis=0)
except:
res = row
return res
print('Loading Images...')
images = LoadImages()
img = cv2.imread(big_img_path)
print('Done')
if resolution != 1:
print('Changing resolution...')
img = change_resolution(img, resolution)
print('Done')
print('Transforming to Blocks...')
img = make_img_out_of_blocks(img)
print('Done')
cv2.imwrite('output.jpg', img)
cv2.imshow('Result', img)
cv2.waitKey(0)
and this is the error:
I tried to change the file path file name folder images
but no progress.
As you can see i am using numpy and cv2
Loading Images...
Traceback (most recent call last):
File "C:\Users\VVA\Desktop\Image_out_of_Minecraft_Blocks.py", line 94, in
images = LoadImages()
File "C:\Users\VVA\Desktop\Image_out_of_Minecraft_Blocks.py", line 20, in LoadImages
for y in img:
TypeError: 'NoneType' object is not iterable
I am trying to write a list to an excel column and encounter an error. I am attempting to write each value of matchingName to the worksheet aSheet in column V.
Traceback (most recent call last):
File "C:/Users/PycharmProjects/smartCompare/excelmain.py", line 40, in
aSheet[V] = matchingName[i3]
File
"C:\Users\AppData\Local\Programs\Python\Python36\lib\site-packages\openpyxl\worksheet\worksheet.py", line 380, in setitem
self[key].value = value
File "C:\Users\AppData\Local\Programs\Python\Python36\lib\site-packages\openpyxl\worksheet\worksheet.py", line 357, in getitem
min_col, min_row, max_col, max_row = range_boundaries(key)
File "C:\Users\AppData\Local\Programs\Python\Python36\lib\site-packages\openpyxl\utils\cell.py", line 129, in range_boundaries
raise ValueError("{0} is not a valid coordinate or range")
ValueError: {0} is not a valid coordinate or range
Process finished with exit code 1
This error seems to be occurring in the for loop. I checked the openpyxl documentation but no luck on solving it. Any advice?
import openpyxl
from difflib import SequenceMatcher
fruit = []
fruit2 = []
compareScore = []
matchingName = []
matchingRatioNum = []
wb = openpyxl.load_workbook('test.xlsx')
aSheet = wb.get_sheet_by_name('AMIS')
cSheet = wb.get_sheet_by_name('CMMS')
for col in aSheet['F']:
fruit.append(col.value)
for col in cSheet['E']:
fruit2.append(col.value)
length = 5
length2 = 5
i = 0
i2 = 0
for i in range(0, length):
for i2 in range(0, length2):
ratio = SequenceMatcher(None, fruit[i], fruit2[i2]).ratio()
compareScore.append(ratio)
i2 += 1
matchRatio = compareScore.index(max(compareScore))
match = fruit2[matchRatio]
ratioNum = compareScore[matchRatio]
matchingName.append(match)
matchingRatioNum.append(ratioNum)
compareScore = []
i += 1
i3 = 0
for i3 in range(0, length):
V = "'" + 'V' + str(i3+1) + "'"
aSheet[V] = matchingName[i3]
del V
i3 += 1
i4 = 0
for i4 in range(0, length):
W = "'" + 'W' + str(i4+1) + "'"
aSheet[W] = matchingRatioNum[i4]
del W
i4 += 1
wb.save('test.xlsx')
You're creating lookups like this ws["'W4'"] which are invalid coordinates, you want ws["W4"]. You should always use ws.cell() for programmatic access.
The script, originally taken and modified from (http://globplot.embl.de/):
#!/usr/bin/env python
# Copyright (C) 2003 Rune Linding - EMBL
# GlobPlot TM
# GlobPlot is licensed under the Academic Free license
from string import *
from sys import argv
from Bio import File
from Bio import SeqIO
import fpformat
import sys
import tempfile
import os
from os import system,popen3
import math
# Russell/Linding
RL = {'N':0.229885057471264,'P':0.552316012226663,'Q':-0.187676577424997,'A':-0.261538461538462,'R':-0.176592654077609, \
'S':0.142883029808825,'C':-0.0151515151515152,'T':0.00887797506611258,'D':0.227629796839729,'E':-0.204684629516228, \
'V':-0.386174834235195,'F':-0.225572305974316,'W':-0.243375458622095,'G':0.433225711769886,'H':-0.00121743364986608, \
'Y':-0.20750516775322,'I':-0.422234699606962,'K':-0.100092289621613,'L':-0.337933495925287,'M':-0.225903614457831}
def Sum(seq,par_dict):
sum = 0
results = []
raws = []
sums = []
p = 1
for residue in seq:
try:
parameter = par_dict[residue]
except:
parameter = 0
if p == 1:
sum = parameter
else:
sum = sum + parameter#*math.log10(p)
ssum = float(fpformat.fix(sum,10))
sums.append(ssum)
p +=1
return sums
def getSlices(dydx_data, DOM_join_frame, DOM_peak_frame, DIS_join_frame, DIS_peak_frame):
DOMslices = []
DISslices = []
in_DOMslice = 0
in_DISslice = 0
beginDOMslice = 0
endDOMslice = 0
beginDISslice = 0
endDISslice = 0
for i in range( len(dydx_data) ):
#close dom slice
if in_DOMslice and dydx_data[i] > 0:
DOMslices.append([beginDOMslice, endDOMslice])
in_DOMslice = 0
#close dis slice
elif in_DISslice and dydx_data[i] < 0:
DISslices.append([beginDISslice, endDISslice])
in_DISslice = 0
# elseif inSlice expandslice
elif in_DOMslice:
endDOMslice += 1
elif in_DISslice:
endDISslice += 1
# if not in slice and dydx !== 0 start slice
if dydx_data[i] > 0 and not in_DISslice:
beginDISslice = i
endDISslice = i
in_DISslice = 1
elif dydx_data[i] < 0 and not in_DOMslice:
beginDOMslice = i
endDOMslice = i
in_DOMslice = 1
#last slice
if in_DOMslice:
DOMslices.append([beginDOMslice, endDOMslice])
if in_DISslice:
DISslices.append([beginDISslice,endDISslice])
k = 0
l = 0
while k < len(DOMslices):
if k+1 < len(DOMslices) and DOMslices[k+1][0]-DOMslices[k][1] < DOM_join_frame:
DOMslices[k] = [ DOMslices[k][0], DOMslices[k+1][1] ]
del DOMslices[k+1]
elif DOMslices[k][1]-DOMslices[k][0]+1 < DOM_peak_frame:
del DOMslices[k]
else:
k += 1
while l < len(DISslices):
if l+1 < len(DISslices) and DISslices[l+1][0]-DISslices[l][1] < DIS_join_frame:
DISslices[l] = [ DISslices[l][0], DISslices[l+1][1] ]
del DISslices[l+1]
elif DISslices[l][1]-DISslices[l][0]+1 < DIS_peak_frame:
del DISslices[l]
else:
l += 1
return DOMslices, DISslices
def SavitzkyGolay(window,derivative,datalist):
SG_bin = 'sav_gol'
stdin, stdout, stderr = popen3(SG_bin + '-D' + str(derivative) + ' -n' + str(window)+','+str(window))
for data in datalist:
stdin.write(`data`+'\n')
try:
stdin.close()
except:
print stderr.readlines()
results = stdout.readlines()
stdout.close()
SG_results = []
for result in results:
SG_results.append(float(fpformat.fix(result,6)))
return SG_results
def reportSlicesTXT(slices, sequence, maskFlag):
if maskFlag == 'DOM':
coordstr = '|GlobDoms:'
elif maskFlag == 'DIS':
coordstr = '|Disorder:'
else:
raise SystemExit
if slices == []:
#by default the sequence is in uppercase which is our search space
s = sequence
else:
# insert seq before first slide
if slices[0][0] > 0:
s = sequence[0:slices[0][0]]
else:
s = ''
for i in range(len(slices)):
#skip first slice
if i > 0:
coordstr = coordstr + ', '
coordstr = coordstr + str(slices[i][0]+1) + '-' + str(slices[i][1]+1)
#insert the actual slice
if maskFlag == 'DOM':
s = s + lower(sequence[slices[i][0]:(slices[i][1]+1)])
if i < len(slices)-1:
s = s + upper(sequence[(slices[i][1]+1):(slices[i+1][0])])
#last slice
elif slices[i][1] < len(sequence)-1:
s = s + lower(sequence[(slices[i][1]+1):(len(sequence))])
elif maskFlag == 'DIS':
s = s + upper(sequence[slices[i][0]:(slices[i][1]+1)])
#insert untouched seq between disorder segments, 2-run labelling
if i < len(slices)-1:
s = s + sequence[(slices[i][1]+1):(slices[i+1][0])]
#last slice
elif slices[i][1] < len(sequence)-1:
s = s + sequence[(slices[i][1]+1):(len(sequence))]
return s,coordstr
def runGlobPlot():
try:
smoothFrame = int(sys.argv[1])
DOM_joinFrame = int(sys.argv[2])
DOM_peakFrame = int(sys.argv[3])
DIS_joinFrame = int(sys.argv[4])
DIS_peakFrame = int(sys.argv[5])
file = str(sys.argv[6])
db = open(file,'r')
except:
print 'Usage:'
print ' ./GlobPipe.py SmoothFrame DOMjoinFrame DOMpeakFrame DISjoinFrame DISpeakFrame FASTAfile'
print ' Optimised for ELM: ./GlobPlot.py 10 8 75 8 8 sequence_file'
print ' Webserver settings: ./GlobPlot.py 10 15 74 4 5 sequence_file'
raise SystemExit
for cur_record in SeqIO.parse(db, "fasta"):
#uppercase is searchspace
seq = upper(str(cur_record.seq))
# sum function
sum_vector = Sum(seq,RL)
# Run Savitzky-Golay
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
dydx_vector = SavitzkyGolay('smoothFrame',1, sum_vector)
#test
sumHEAD = sum_vector[:smoothFrame]
sumTAIL = sum_vector[len(sum_vector)-smoothFrame:]
newHEAD = []
newTAIL = []
for i in range(len(sumHEAD)):
try:
dHEAD = (sumHEAD[i+1]-sumHEAD[i])/2
except:
dHEAD = (sumHEAD[i]-sumHEAD[i-1])/2
try:
dTAIL = (sumTAIL[i+1]-sumTAIL[i])/2
except:
dTAIL = (sumTAIL[i]-sumTAIL[i-1])/2
newHEAD.append(dHEAD)
newTAIL.append(dTAIL)
dydx_vector[:smoothFrame] = newHEAD
dydx_vector[len(dydx_vector)-smoothFrame:] = newTAIL
globdoms, globdis = getSlices(dydx_vector, DOM_joinFrame, DOM_peakFrame, DIS_joinFrame, DIS_peakFrame)
s_domMask, coordstrDOM = reportSlicesTXT(globdoms, seq, 'DOM')
s_final, coordstrDIS = reportSlicesTXT(globdis, s_domMask, 'DIS')
sys.stdout.write('>'+cur_record.id+coordstrDOM+coordstrDIS+'\n')
print s_final
print '\n'
return
runGlobPlot()
My input and output files are here: link
This script takes a input (input1.fa) and gives following output output1.txt
But when I try to run this script with similar type but larger input file (input2.fa) .. It shows following error:
Traceback (most recent call last):
File "final_script_globpipe.py", line 207, in <module>
runGlobPlot()
File "final_script_globpipe.py", line 179, in runGlobPlot
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
File "final_script_globpipe.py", line 105, in SavitzkyGolay
stdin.write(`data`+'\n')
IOError: [Errno 22] Invalid argument
I have no idea where the problem is. Any type of suggestion is appriciated.
I am using python 2.7 in windows 7 machine. I have also attached the Savitzky Golay module which is needed to run the script.
Thanks
UPDATE:
After trying to reproduce the error on linux it's showing a similar behavior, working fine with the first file but with the second is returning Errno32.
Traceback:
Traceback (most recent call last):
File "Glob.py", line 207, in <module>
runGlobPlot()
File "Glob.py", line 179, in runGlobPlot
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
File "Glob.py", line 105, in SavitzkyGolay
stdin.write(`data`+'\n')
IOError: [Errno 32] Broken pipe
Update:
Some calls of the SG_bin return that the -n parameter is the wrong type.
Wrong type of parameter for flag -n. Has to be unsigned,unsigned
This parameter comes from the window variable that is passed to the SavitzkyGolay function.
Surrounding the stdin.write with a trycatch block reveals that it breaks a hadnfull of times.
try:
for data in datalist:
stdin.write(repr(data)+'\n')
except:
print "It broke"
I'm running a piece of freely available python code used to detect CNVs in single cell sequencing data:
#!/usr/bin/env python
import sys
def main():
infilename = sys.argv[1]
outfilename = sys.argv[2]
statfilename = sys.argv[3]
chrominfo = ("/path/hg19.chrom.sizes.txt", 0)
bins = ("/path/hg19.bin.boundaries.50k.bowtie.k50.sorted.txt", 0)
INFILE = open(infilename, "r")
OUTFILE = open(outfilename, "w")
STATFILE = open(statfilename, "w")
binCounts = []
for i in range(len(bins)):
binCounts.append(0)
print len(binCounts)
print len(bins)
counter = 0
totalReads = 0
prevChrompos = ""
for x in INFILE:
arow = x.rstrip().split("\t")
thisChrom = arow[2]
thisChrompos = arow[3]
if thisChrom.find("_") > -1:
#print thisChrom
continue
if thisChrom == "chrM":
#print thisChrom
continue
if thisChrom == "":
continue
if chrominfo.has_key(thisChrom):
pass
else:
continue
totalReads += 1
thisChrominfo = chrominfo[thisChrom]
thisAbspos = long(thisChrompos) + long(thisChrominfo[2])
counter += 1
indexUp = len(bins) - 1
indexDown = 0
indexMid = int((indexUp - indexDown) / 2.0)
while True:
if thisAbspos >= long(bins[indexMid][2]):
indexDown = indexMid + 0
indexMid = int((indexUp - indexDown) / 2.0) + indexMid
else:
indexUp = indexMid + 0
indexMid = int((indexUp - indexDown) / 2.0) + indexDown
if indexUp - indexDown < 2:
break
binCounts[indexDown] += 1
prevChrompos = thisChrompos
for i in range(len(binCounts)):
thisRatio = float(binCounts[i]) / (float(counter) / float(len(bins)))
OUTFILE.write("\t".join(bins[i][0:3]))
OUTFILE.write("\t")
OUTFILE.write(str(binCounts[i]))
OUTFILE.write("\t")
OUTFILE.write(str(thisRatio))
OUTFILE.write("\n")
binCounts.sort()
STATFILE.write("TotalReads\tMedianBinCount\n")
STATFILE.write(str(totalReads))
STATFILE.write("\t")
STATFILE.write(str(binCounts[len(bins)/2]))
STATFILE.write("\n")
INFILE.close()
OUTFILE.close()
STATFILE.close()
def fileToDictionary(inputFile, indexColumn):
input = open(inputFile, "r")
rd = dict()
# input.readline()
for x in input:
arow = x.rstrip().split("\t")
id = arow[indexColumn]
if rd.has_key(id):
#rd[id].append(arow)
print "duplicate knowngene id = " + id
print "arow = " + str(arow)
print "rd[id] = " + str(rd[id])
else:
rd[id] = arow
input.close()
return(rd)
def fileToArray(inputFile, skipFirst):
input = open(inputFile, "r")
ra = []
for i in range(skipFirst):
input.readline()
for x in input:
arow = x.rstrip().split("\t")
ra.append(arow)
input.close()
return(ra)
if __name__ == "__main__":
main()
I'm getting an error on line 40:
Traceback (most recent call last):
File "/path/varbin.50k.sam.py", line 129, in <module>
main()
File "/path/varbin.50k.sam.py", line 40, in main
**if chrominfo.has_key(thisChrom):
AttributeError: 'tuple' object has no attribute 'has_key'**
I don't work regularly in Python, can someone offer a suggestion?
Where do I begin?
Your code is expecting a dictionary and getting a tuple. I think you've missed a step: You need to change
chrominfo = ("/path/hg19.chrom.sizes.txt", 0)
To
chrominfo = fileToDictionary("/path/hg19.chrom.sizes.txt", 0)
Note also that if dict.has_key(key) has been deprecated in favour of if key in dict.keys()