I know that python 2.7 str and bytes are the same(according to Changing string to byte type in Python 2.7)
but I'm using savReaderWriter to write in a .sav file, that is, first column will be a string, and the others are numbers..
my definition is basically the same as in the documentation of savReaderWriter:
savFileName = 'someFile.sav'
records = [[b'Test1', 1, 1], [b'Test2', 2, 1]]
varNames = ['var1', 'v2', 'v3']
varTypes = {'var1': 5, 'v2': 0, 'v3': 0}
with SavWriter(savFileName, varNames, varTypes) as writer:
for record in records:
writer.writerow(record)
on this, they use b'' to convert the string to bytes, but that's a string example, that was written with the code..
but I have a list of strings strings = ["string1", "string2", ...] and each of the strings will be on the my "records"...something like this:
records = [
[strings[0], 0, 0, ...],
[strings[1], 1, 0, ...],
...
]
so, how do I convert the variable to bytes?
I'm getting 'str' object does not support item assigment
Edit: my code: and the traceback "error" that my exception generates is what i've wrote before..
def exporta_sav(matrizes, listas):
#matrizes = [matriz_grupo, matriz_n8]
#listas = [cpcs_grupo, patentes_grupo, cpcs_n8, patentes_n8]
savFile1 = "matrizgrupo.sav"
savFile2 = "matrizsubgrupo.sav"
varTypesGrupo = {}
varTypesNivel8 = {}
stringpatente = 'PATENTE'
#Cria lista auxiliar para fazer a matriz corretamente no formato .sav
varTypesGrupo[stringpatente] = 7
varTypesNivel8[stringpatente] = 7
for i in xrange(len(listas[0])):
varTypesGrupo[listas[0][i]] = 0
for i in xrange(len(listas[2])):
varTypesNivel8[listas[2][i]] = 0
cpcsGrupoAux = [stringpatente]
for i in xrange(len(listas[0])):
cpcsGrupoAux.append(listas[0][i])
cpcsN8Aux = [stringpatente]
for i in xrange(len(listas[2])):
cpcsN8Aux.append(listas[2][i])
#Com as colunas definidas, vai criar listas auxiliares para cada linha
matrizGrupoAux = []
matrizN8Aux = []
if len(matrizes[0]) == len(listas[1]):
for i in xrange(len(matrizes[0])):
matrizGrupoAux.append([ bytes(listas[1][i]) ])
for j in xrange(len(matrizes[0][i])):
matrizGrupoAux[i].append(matrizes[0][i][j])
if len(matrizes[1]) == len(listas[3]):
for i in xrange(len(matrizes[1])):
matrizN8Aux.append([ bytes(listas[3][i]) ])
for j in xrange(len(matrizes[1][i])):
matrizN8Aux[i].append(matrizes[1][i][j])
del listas
del matrizes
#Escrever nos arquivos
try:
with SavWriter(savFile1, cpcsGrupoAux, varTypesGrupo) as writer:
for patente in matrizGrupoAux:
writer.writerows(patente)
with SavWriter(savFile2, cpcsN8Aux, varTypesNivel8) as writer:
for patente in matrizN8Aux:
writer.writerows(patente)
except Exception as error:
print("Ocorreu um erro ao escrever nos arquivos: " + str(error))
Since it's asked, the full traceback(I removed the handling):
Traceback (most recent call last):
File "matrizbinaria.py", line 242, in (module)
exporta_sav(resultados[0], resultados[1])
File "matrizbinaria.py", line 233, in exporta_sav
writer.writerows(patente)
File "C:\Python27\lib\site-packages\savReaderWriter\savWriter.py", line 429 in writerows
self.writerow(record)
File "C:\Python27\lib\site-packages\savReaderWriter\savWriter.py", line 387 in writerow
self._pyWriterow(record)
File "C:\Python27\lib\site-packages\savReaderWriter\savWriter.py", line 379 in _pyWriterow
record[i] = value
TypeError: 'str' object does not support item assignment
Related
I have written out my code and when I run it, I get a KeyError:
Traceback (most recent call last):
File "C:/Users/sagar/Desktop/Sagar CS131B Files/convert_to_fixed.py", line 21, in <module>
birthdate = sample['Birthdate']
KeyError: 'Birthdate'
my code:
inputFile = 'raw.data.py'
data = list()
columns = ['First name','Last name','Telephone','Address','City','State','Birthdate']
for line in open(inputFile):
# Assuming comments in the text file as '#'
if line.startswith('#'): continue
row = line.strip().split(':')
data.append(dict(zip(columns, row)))
#print(data)
formatted_data = list()
for sample in data:
birthdate = sample['Birthdate']
mm,dd,yy = birthdate.split('/')
if len(yy)==2:
yy = '19' + yy
birthdate = '/'.join([mm,dd,yy])
sample['Birthdate'] = birthdate
modified_row = ':'.join(
[sample['Last name'], sample['First name'],
sample['Telephone'], sample['Address'],
sample['City'], sample['State'], sample['Birthdate']])
formatted_data.append(modified_row + '\n')
with open('fixed.data','w') as f:
f.writelines(formatted_data)
I have looked up how to fix it, just not sure on the execution of a try-except function. If someone could help me out with this that would be amazing..
This is what is inside the file given:
'Betty:Boop:245-836-8357:635 Cutesy Lane:Hollywood:CA:6/23/1923',
'Ephram:Hardy:293-259-5395:235 Carlton Lane:Joliet:IL:8/12/1920',
'Fred:Fardbarkle:674-843-1385:20 Parak Lane:DeLuth:MN:4/12/23',
'Igor:Chevsky:385-375-8395:3567 Populus Place:Caldwell:NJ:6/18/68',
'James:Ikeda:834-938-8376:23445 Aster Ave.:Allentown:NJ:12/1/1938',
'Jennifer:Cowan:548-834-2348:408 Laurel Ave.:Kingsville:TX:10/1/35',
'Jesse:Neal:408-233-8971:45 Rose Terrace:San Francisco:CA:2/3/2001',
'Jon:DeLoach:408-253-3122:123 Park St.:San Jose:CA:7/25/53',
'Jose:Santiago:385-898-8357:38 Fife Way:Abilene:TX:1/5/58',
'Karen:Evich:284-758-2867:23 Edgecliff Place:Lincoln:NB:11/3/35',
'Lesley:Kirstin:408-456-1234:4 Harvard Square:Boston:MA:4/22/2001',
'Lori:Gortz:327-832-5728:3465 Mirlo Street:Peabody:MA:10/2/65',
'Norma:Corder:397-857-2735:74 Pine Street:Dearborn:MI:3/28/45',
'Paco:Gutierrez:835-365-1284:454 Easy Street:Decatur:IL:2/28/53',
'Popeye:Sailor:156-454-3322:945 Bluto Street:Anywhere:USA:3/19/35',
'Sir:Lancelot:837-835-8257:474 Camelot Boulevard:Bath:WY:5/13/69',
'Steve:Blenheim:238-923-7366:95 Latham Lane:Easton:PA:11/12/1956',
'Tommy:Savage:408-724-0140:1222 Oxbow Court:Sunnyvale:CA:5/19/66',
'Vinh:Tranh:438-910-7449:8235 Maple Street:Wilmington:VM:9/23/63',
'William:Kopf:846-836-2837:6937 Ware Road:Milton:PA:9/21/46',
'Yukio:Takeshida:387-827-1095:13 Uno Lane:Ashville:NC:7/1/29',
'Zippy:Pinhead:834-823-8319:2356 Bizarro Ave.:Farmount:IL:1/1/67',
'Andy:Warhol:212-321-7654:231 East 47th Street:New York City:NY:8/6/1928'
zip() only produces results up to the shorter iterables length:
print(list(zip([1,2],[1,2,3,4,5,6]))) # [(1, 1), (2, 2)]
Your source data somehow at least one line with less elements in it that is why one of your dicts does not have the 'Birthdate' key (the last one).
You can guard against it:
data = list()
columns = ['First name', 'Last name', 'Telephone',
'Address', 'City', 'State', 'Birthdate']
# use a context manager for file open
with open(inputFile) as f:
for line in f:
# Assuming comments in the text file as '#'
if line.startswith('#'):
continue
# ignore empty lines (you can combine with above)
if not line.strip():
continue
row = line.strip().split(':')
# raise exception if not enough data found
if len(row) != len(columns):
raise AttributeError("Not enough datapoints in line: ", line)
data.append(dict(zip(columns, row)))
Code to extract sequences
from Bio import SeqIO
def get_cds_feature_with_qualifier_value(seq_record, name, value):
for feature in genome_record.features:
if feature.type == "CDS" and value in feature.qualifiers.get(name, []):
return feature
return None
genome_record = SeqIO.read("470.8208.gbk", "genbank")
db_xref = ['fig|470.8208.peg.2198', 'fig|470.8208.peg.2200', 'fig|470.8208.peg.2203', 'fig|470.8208.peg.2199', 'fig|470.8208.peg.2201', 'fig|470.8208.peg.2197', 'fig|470.8208.peg.2202', 'fig|470.8208.peg.2501', 'fig|470.8208.peg.2643', 'fig|470.8208.peg.2193', 'fig|470.8208.peg.2670', 'fig|470.8208.peg.2695', 'fig|470.8208.peg.2696', 'fig|470.8208.peg.2189', 'fig|470.8208.peg.2458', 'fig|470.8208.peg.2191', 'fig|470.8208.peg.2190', 'fig|470.8208.peg.2188', 'fig|470.8208.peg.2192', 'fig|470.8208.peg.2639', 'fig|470.8208.peg.3215', 'fig|470.8208.peg.2633', 'fig|470.8208.peg.2682', 'fig|470.8208.peg.3186', 'fig|470.8208.peg.2632', 'fig|470.8208.peg.2683', 'fig|470.8208.peg.3187', 'fig|470.8208.peg.2764', 'fig|470.8208.peg.2686', 'fig|470.8208.peg.2638', 'fig|470.8208.peg.2680', 'fig|470.8208.peg.2685', 'fig|470.8208.peg.2684', 'fig|470.8208.peg.2633', 'fig|470.8208.peg.2682', 'fig|470.8208.peg.3186', 'fig|470.8208.peg.2632', 'fig|470.8208.peg.2683', 'fig|470.8208.peg.3187', 'fig|470.8208.peg.2640', 'fig|470.8208.peg.3221', 'fig|470.8208.peg.3222', 'fig|470.8208.peg.3389', 'fig|470.8208.peg.2764', 'fig|470.8208.peg.2653', 'fig|470.8208.peg.3216', 'fig|470.8208.peg.3231', 'fig|470.8208.peg.2641', 'fig|470.8208.peg.2638', 'fig|470.8208.peg.2680', 'fig|470.8208.peg.2637', 'fig|470.8208.peg.2642', 'fig|470.8208.peg.2679', 'fig|470.8208.peg.3230', 'fig|470.8208.peg.2676', 'fig|470.8208.peg.2677', 'fig|470.8208.peg.1238', 'fig|470.8208.peg.2478', 'fig|470.8208.peg.2639', 'fig|470.8208.peg.854', 'fig|470.8208.peg.382', 'fig|470.8208.peg.383']
with open("nucleotides.fasta", "w") as nt_output, open("proteins.fasta", "w") as aa_output:
for xref in db_xref:
print ("Looking at " + xref)
cds_feature = get_cds_feature_with_qualifier_value (genome_record, "db_xref", xref)
gene_sequence = cds_feature.extract(genome_record.seq)
protein_sequence = gene_sequence.translate(table=11, cds=True)
# This is asking Python to halt if the translation does not match:
assert protein_sequence == cds_feature.qualifiers["translation"][0]
# Output FASTA records - note \n means insert a new line.
# This is a little lazy as it won't line wrap the sequence:
nt_output.write(">%s\n%s\n" % (xref, gene_sequence))
aa_output.write(">%s\n%s\n" % (xref, gene_sequence))
print("Done")
getting following error
/usr/local/lib/python3.7/dist-packages/Bio/GenBank/Scanner.py:1394: BiopythonParserWarning: Truncated LOCUS line found - is this correct?
:'LOCUS CP027704 3430798 bp DNA linear UNK \n'
BiopythonParserWarning,
Looking at fig|470.8208.peg.2198
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-32-323ff320990a> in <module>()
15 print ("Looking at " + xref)
16 cds_feature = get_cds_feature_with_qualifier_value (genome_record, "db_xref", xref)
---> 17 gene_sequence = cds_feature.extract(genome_record.seq)
18 protein_sequence = gene_sequence.translate(table=11, cds=True)
19
AttributeError: 'NoneType' object has no attribute 'extract'
You have a space between the get_cds_feature_with_qualifier_value call and its parameters (genome_record, "db_xref", xref), so the call is probably failing, leaving cds_feature as None.
Generally, you should provide reproducible examples so that someone else (who doesn't have the gbk file you're referencing) can still reproduce and troubleshoot your error.
Solved the problem.
'''
from Bio import SeqIO
def get_cds_feature_with_qualifier_value(seq_record, name, value):
for feature in genome_record.features:
if feature.type == "CDS" and value in feature.qualifiers.get(name, []):
return feature
return None
genome_record = SeqIO.read("470.8208.gbk", "genbank")
da_xref = ['fig|470.8208.peg.2198', 'fig|470.8208.peg.2200', 'fig|470.8208.peg.2203', 'fig|470.8208.peg.2199', 'fig|470.8208.peg.2201', 'fig|470.8208.peg.2197', 'fig|470.8208.peg.2202', 'fig|470.8208.peg.2501', 'fig|470.8208.peg.2643', 'fig|470.8208.peg.2193', 'fig|470.8208.peg.2670', 'fig|470.8208.peg.2695', 'fig|470.8208.peg.2696', 'fig|470.8208.peg.2189', 'fig|470.8208.peg.2458', 'fig|470.8208.peg.2191', 'fig|470.8208.peg.2190', 'fig|470.8208.peg.2188', 'fig|470.8208.peg.2192', 'fig|470.8208.peg.2639', 'fig|470.8208.peg.3215', 'fig|470.8208.peg.2633', 'fig|470.8208.peg.2682', 'fig|470.8208.peg.3186', 'fig|470.8208.peg.2632', 'fig|470.8208.peg.2683', 'fig|470.8208.peg.3187', 'fig|470.8208.peg.2764', 'fig|470.8208.peg.2686', 'fig|470.8208.peg.2638', 'fig|470.8208.peg.2680', 'fig|470.8208.peg.2685', 'fig|470.8208.peg.2684', 'fig|470.8208.peg.2633', 'fig|470.8208.peg.2682', 'fig|470.8208.peg.3186', 'fig|470.8208.peg.2632', 'fig|470.8208.peg.2683', 'fig|470.8208.peg.3187', 'fig|470.8208.peg.2640', 'fig|470.8208.peg.3221', 'fig|470.8208.peg.3222', 'fig|470.8208.peg.3389', 'fig|470.8208.peg.2764', 'fig|470.8208.peg.2653', 'fig|470.8208.peg.3216', 'fig|470.8208.peg.3231', 'fig|470.8208.peg.2641', 'fig|470.8208.peg.2638', 'fig|470.8208.peg.2680', 'fig|470.8208.peg.2637', 'fig|470.8208.peg.2642', 'fig|470.8208.peg.2679', 'fig|470.8208.peg.3230', 'fig|470.8208.peg.2676', 'fig|470.8208.peg.2677', 'fig|470.8208.peg.1238', 'fig|470.8208.peg.2478', 'fig|470.8208.peg.2639', 'fig|470.8208.peg.854', 'fig|470.8208.peg.382', 'fig|470.8208.peg.383']
db_xref=[]
for xref in da_xref:
db_xref.append('SEED:' + xref)
with open("nucleotides.fasta", "w") as nt_output, open("proteins.fasta", "w") as aa_output:
for xref in db_xref:
print ("Looking at", xref)
cds_feature = get_cds_feature_with_qualifier_value(genome_record, "db_xref", xref)
gene_sequence = cds_feature.extract(genome_record.seq)
protein_sequence = gene_sequence.translate(table=11, cds=True)
# This is asking Python to halt if the translation does not match:
assert protein_sequence == cds_feature.qualifiers["translation"][0]
# Output FASTA records - note \n means insert a new line.
# This is a little lazy as it won't line wrap the sequence:
nt_output.write(">%s\n%s\n" % (xref, gene_sequence))
aa_output.write(">%s\n%s\n" % (xref, gene_sequence))
print("Done")
'''
I've been messing around with this code for some hours, and it still give me errors. I want to do an 'automark' for tests, where you introduce the good answers and the bad ones, and it calculates the final mark.
Code:
class puntuacion:
def __init__(self, aciertos, errores):
self.aciertos = int(input("Introduce el numero de aciertos"))
if self.aciertos > 40:
print("Numero maximo de aciertos: 40")
exit()
self.errores = int(input("Introduce el numero de errores: "))
def calculo(self):
totalac = self.aciertos * 0.25
print(totalac)
while self.errores > 3:
totalerr += -0.25
self.errores - 3
print'Nota final: ', totalac-totalerr
calculo({})
Error:
C:\Users\Admin>python C:\Users\Admin\Documents\python\test2.py
Traceback (most recent call last):
File "C:\Users\Admin\Documents\python\test2.py", line 1, in <module>
class puntuacion:
File "C:\Users\Admin\Documents\python\test2.py", line 17, in puntuacion
calculo({})
File "C:\Users\Admin\Documents\python\test2.py", line 10, in calculo
totalac = self.aciertos * 0.25
AttributeError: 'dict' object has no attribute 'aciertos'
You can't call an instance method directly: you have to instantiate the class first:
p = puntuacion(something, something_else)
p.calculo()
The calculo() method does not take any arguments: the first argument, selfis implicit, and corresponds to the class instance. You don't pass it.
You should also remove the input() functions from your __init__; you're already passing aciertos and errores as arguments, and generally you don't want to do "complicated" stuff (like user interaction) in the initialization of a class:
...
def __init__(self, aciertos, errores):
self.aciertos = aciertos
self.errores = errores
...
and then:
aciertos = int(input("Introduce el numero de aciertos"))
if aciertos > 40:
sys.exit("Numero maximo de aciertos: 40")
errores = int(input("Introduce el numero de errores: "))
p = puntuacion(aciertos, errores)
p.calculo()
I don't managed to make leastsq run with
The programme etude is run 5 times but there is no change of the parameters p[0] and p[1].
I launch the etude file witch produce a file of data witch I read by the Read_And_Save_Value fonction. The data print depend on the p[0] p[&] paramaters.
The template is launch by Lancer_Etude it remplace the etude file by the etude.template with the caractère $A and $B are remplaced by their actual value.
def Read_And_Save_Value(p,x):
fichier = open( "sechage_granger.dat", "r") # ouverture en lecture
txt = fichier.read()
fichier.close()
b1 = zeros(23)
b2 = zeros(23)
for i in range(6, 29):
print 'i', i
line = txt.split("\n")[i] # iere ligne de donnees
line = line.split(" ") # le separateur utilise a l'ecriture
b1[i-6] = float(line[1])
b2[i-6] = float(line[3])
print 'b1', b1
print 'b2', b2
c= [0,24, 46, 143, 166, 190, 388, 646, 843, 1011, 2015,
2853, 3554, 4514, 5187, 6046, 6525, 7222,
7923, 8877, 10415, 11068, 12317, 13251, 14757, 15843,
17447, 18459, 19874, 22325, 24216,
27071, 33651, 39987, 103995, 108660,109000]
def f_int(p,x):
return UnivariateSpline(b1, b2, s=2)(x)
#x = s(c)
plt.plot(b1, b2, '.-')
#plt.plot(c, x)
plt.show()
return f_int(p,x)
def Lancer_Etude(p,x,C0,Ceq):
file_in = open('etude.template', 'r')
file_out = open('etude', 'w')
dico = dict(A=p[0], B=p[1],C0=C0, Ceq=Ceq)
for line_0 in file_in.readlines():
line_1 = Template(line_0).substitute(dico)
file_out.write(line_1)
file_in.close()
file_out.close()
os.system('bash ./launch_study.sh')
return Read_And_Save_Value(p,x)
# Define the error
def residuals(p,x, y):
fichier.write(str(p[0])+";"+str(p[1])+"\n") # ecriture d'une ligne de donnees (choix arbitraire du format)
print 'x'*80
print 'fin etude'
#print 'fres', fres
print 'y', y
err = y - Lancer_Etude(p,x,150.,50.)
fichier.write('erreur -> ')
fichier.write(str(sum(np.multiply(err, err)))+"\n")
# print 'err', err
return err
I'm new in python and I would like to know how could I write a new CSV file which contains a simple list. Then I will use this file in an Excel Worksheet.
My ENTIRE code:
import csv
import urllib
from bs4 import BeautifulSoup
sock = urllib.urlopen("http://www.fatm.com.es/Datos_Equipo.asp?Cod=03CA0007")
htmlSource = sock.read()
sock.close()
soup = BeautifulSoup(htmlSource)
form = soup.find("form", {'id': "FORM1"})
table = form.find("table")
entidad = [item.text.strip() for item in table.find_all('td')]
valores = [item.get('value') for item in form.find_all('input')]
lista = entidad
i = 0
x = 1
while i <=10:
lista.insert(i+x, valores[i])
i += 1
x += 1
print lista
w = csv.writer(file(r'C:\Python27\yo.csv','wb'),dialect='excel')
w.writerows(lista)
lista = [u'Club',
u'CLUB TENIS DE MESA PORTUENSE',
u'Nombre Equipo',
u'C.T.M. PORTUENSE',
u'Telefono fijo',
u'630970055',
u'Telefono Movil',
u'630970055',
u'E_Mail',
u'M.LOPEZ_70#HOTMAIL.COM',
u'Local de Juego',
u'INSTITUTO MAR DE CADIZ',
u'Categoria',
u'DIVISION HONOR ANDALUZA',
u'Grupo',
u'GRUPO 9',
u'Delegado',
u'SANCHEZ SANTANO, JOSE MARIA',
u'*Dias de Juego',
u'SABADO',
u'*Hora de Juego',
u'17:00']
My results: an empty CSV file. :(
Thanks in advance!!!
Here you go:
import csv
lista = [u'Club',
u'CLUB TENIS DE MESA PORTUENSE',
u'Nombre Equipo',
u'C.T.M. PORTUENSE',
u'Telefono fijo',
u'630970055',
u'Telefono Movil',
u'630970055', u'E_Mail', u'M.LOPEZ_70#HOTMAIL.COM', u'Local de Juego', u'INSTITUTO MAR DE CADIZ', u'Categoria', u'DIVISION HONOR ANDALUZA', u'Grupo', u'GRUPO 9', u'Delegado', u'SANCHEZ SANTANO, JOSE MARIA', u'*Dias de Juego', u'SABADO', u'*Hora de Juego', u'17:00']
header = []
row = []
for i, val in enumerate(lista):
if i%2==0:
header.append(val)
else:
row.append(val)
out = open('file.csv', 'w')
w = csv.writer(out, dialect='excel')
w.writerow(header)
w.writerow(row)
out.close()
As a follow up side question. What I would do in your place is to create one list for your column names, like:
header = ['col_name1', 'col_name2', ... ]
and a list of lists for the values like:
values = [
[row1_val1, row1_val2, ...],
[row2_val1, row2_val2, ...],
...
]
Then you can do:
w.writerow(header)
for row in values:
w.writerow(row)
Check the doc of the csv module, there might be a way to write all rows in one go. I've never used it myself.