Python code, not able to write into xls - python

When the below code is executed in Python 3.5 and xlwt 1.2.0, following error is generated:
"Cannot convert byte objects to str implicitly"
The code works fine for python 2.7.
Anyone please let me know what could be the problem.
Thanks in Advance!!
import xlwt
import re
import os
wb = xlwt.Workbook()
ws = wb.add_sheet('A Test Sheet')
ws_1 = wb.add_sheet('A Test Sheet_B')
cnt_row = 0
cnt_col_1 = 0
cnt_col_2 = 0
path = "E:\Python_Scripts"
files = os.listdir("E:\Python_Scripts")
for filename in files:
if filename.endswith(".ptu"):
fo = open(os.path.join(path, filename), 'r')
while(1):
str = fo.readline()
if (str == ""):
print ("file finished")
break
else:
matchObj = re.match(r'\s* TEST (.*?).*', str)
if (matchObj):
str = str.split('TEST', 1)[1]
ws.write(cnt_row, 0, str)
matchObj_Author = re.match(r' (.*) Author (.*?).*', str)
if (matchObj_Author):
str = str.split('(', 1)[1]
str = str.rsplit(')', 1)
ws.write(cnt_row, 1, str)
cnt_row = cnt_row + 1
fo.close()
wb.save('example.xls')

Your data input has changed. And one or more of its lines contain multiple strings.
If you're reading a file where a line has multiple entires then your str will be a list not a string. If it is a list, this will cause the error when invoking wb.save('example.xls'): TypeError: must be str, not bytes
Here's a pared down version of your program that I used to test this out:
import xlwt
wb = xlwt.Workbook()
ws = wb.add_sheet('A Test Sheet')
ws_1 = wb.add_sheet('A Test Sheet_B')
cnt_row = 0
cnt_col_1 = 0
cnt_col_2 = 0
f = open('<an xml file with one string per line except the last line which has two strings', 'r', encoding='utf-8')
while 1:
str = f.readline()
wb.save('example.xls')
if str == "":
print ("file finished")
break
str = str.split('<', 1)[1]
str = str.rsplit('<', 1)
ws.write(cnt_row, 1, str)
cnt_row = cnt_row + 1
print('debug:last')
print(str)
print(type(str))
wb.save('example.xls')
f.close()

Related

I Want to Compare two XML Files Using Python and Print Common attribute Values and Uncommon attribute Values in both the files

As Iam New to Python I need some help to compare two XML files.
These are the Following Conditions:
To print Common fullPath Name and Name (fullPath and Name are the attributes present in the XML file) between the two XML files.
To print the values which is present in only first file and not in second file.
To print the values which is present in only second file and not in first file.
Later, Have to print this output in excel file having different sheets.
for example (1st condition in sheet 1, 2nd condition in sheet2, 3rd condition in sheer3 of the same excel file.)
Can please anyone help me with the code that satisfies the above condition which I have mentioned.
This is the code which I have tried.
from lxml import etree
Base = etree.parse('Base.xml')
Target = etree.parse('Target.xml')
Base_fullPath = Base.xpath("//Member/#fullPath")
Target_fullPath = Target.xpath("//Member/#fullPath")
Base_name = Base.xpath("//Member/#name")
Target_name = Target.xpath("//Member/#name")
def match(Base_fullPath, Target_fullPath, Base_name,Target_name):
Base_fullPath_set = set(Base_fullPath)
Target_fullPath_set = set(Target_fullPath)
Base_name_set = set(Base_name)
Target_name_set = set(Target_name)
if (Base_fullPath_set & Target_fullPath_set, Base_name_set & Target_name_set):
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Common_FullPath.csv', 'w')
y=(Base_fullPath_set & Target_fullPath_set)
z=(Base_name_set & Target_name_set)
print("common details Full Path: \n", *y, sep='\n', file = x)
print("\n")
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Common_name.csv', 'w')
print("\n common details Name: \n", *z, sep='\n', file=x)
else:
print("No Matches Found")
match(Base_fullPath, Target_fullPath, Base_name,Target_name)
def non_match_elements(list_base, list_target):
non_match_base = []
non_match_target = []
for i in list_base:
if i not in list_target:
non_match_base.append(i)
for i in list_target:
if i not in list_base:
non_match_target.append(i)
return non_match_base
return non_match_target
list_base = Base.xpath("//Member/#*")
list_target = Target.xpath("//Member/#*")
non_match_base = non_match_elements(list_base, list_target)
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_base.csv', 'w')
print("\n Base Details: \n", *non_match_base, sep='\n', file = x)
non_match_target = non_match_elements(list_target, list_base)
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_target.csv', 'w')
print("\n Target Details: \n", *non_match_target, sep='\n', file = x)
import pandas as pd
df = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Common_FullPath.csv')
df1 = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Common_name.csv')
df2 = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_base.csv', delimiter=';;', on_bad_lines = 'skip', engine = 'python' )
df3 = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_target.csv', delimiter=';', on_bad_lines = 'skip', engine = 'python')
with pd.ExcelWriter("C:\\Users\\pvl\\Desktop\\New folder\\combined.xlsx") as writer:
df1.to_excel(writer, sheet_name="Common_name", index=False)
df2.to_excel(writer, sheet_name="base_Details", index=False)
df3.to_excel(writer, sheet_name = "target_Details", index=Fal

CS50 PSET6 - DNA - Works fine on SMALL but not for LARGE database

I'm taking CS50 and got stuck on this pset6.
I made this code and it's working fine for 'small' given database.
On 'large' one i get wrong values in my DNA sequence.
Like, using debug50 i got that Albus sequence should be 15,49,38... and my seq is 21, 55, 64...
whats wrong? AND why it works fine on small database and not in large one?
Thanks for the help!
# Import ARGV and CSV library
from sys import argv, exit
import pandas as pd
import csv
# Check if argv has 3 arguments (program name, cvs file and dna sequence)
while True:
if len(argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
exit(1)
else:
break
with open(argv[2], 'r', encoding="UTF-8") as txt:
dna_seq = txt.read()
#Find the number of STR - AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG
AGATC = dna_seq.count("AGATC")
TTTTTTCT = dna_seq.count("TTTTTTCT")
AATG = dna_seq.count("AATG")
TCTAG = dna_seq.count("TCTAG")
GATA = dna_seq.count("GATA")
TATC = dna_seq.count("TATC")
GAAA = dna_seq.count("GAAA")
TCTG = dna_seq.count("TCTG")
name = 0
if argv[1] == "databases/small.csv":
with open(argv[1], 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
dna_db = row['name'], row['AGATC'], row['AATG'], row['TATC']
dna_db = list(dna_db)
seq = [AGATC, AATG, TATC]
seq = [str(x) for x in seq]
if dna_db[1:4] == seq:
name = dna_db[:1]
break
else:
name = "No match"
elif argv[1] == "databases/large.csv":
with open(argv[1], 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
dna_db = row['name'], row['AGATC'], row['TTTTTTCT'], row['AATG'], row['TCTAG'],
row['GATA'], row['TATC'], row['GAAA'], row['TCTG']
dna_db = list(dna_db)
seq = [AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG]
seq = [str(x) for x in seq]
if dna_db[1:9] == seq:
name = dna_db[:1]
break
else:
name = "No match"
print(name)

pyExcelerator and utf8?

I have stolen found the following code on stackoverflow (but forgot where, sorry):
#!/usr/local/bin/python
import string
import sys
import getopt
import re
import os
import os.path
import csv
from pyExcelerator import *
def usage():
""" Display the usage """
print "Usage: " + sys.argv[0] + " [OPTIONS] csvfile"
print "OPTIONS:"
print "--title|-t: If set, the first line is the title line"
print "--lines|-l n: Split output into files of n lines or less each"
print "--sep|-s c [def:,] : The character to use for field delimiter"
print "--output|o : output file name/pattern"
print "--help|h : print this information"
sys.exit(2)
def openExcelSheet(outputFileName):
""" Opens a reference to an Excel WorkBook and Worksheet objects """
workbook = Workbook()
worksheet = workbook.add_sheet("Sheet 1")
return workbook, worksheet
def writeExcelHeader(worksheet, titleCols):
""" Write the header line into the worksheet """
cno = 0
for titleCol in titleCols:
worksheet.write(0, cno, titleCol)
cno = cno + 1
def writeExcelRow(worksheet, lno, columns):
""" Write a non-header row into the worksheet """
cno = 0
for column in columns:
worksheet.write(lno, cno, column)
cno = cno + 1
def closeExcelSheet(workbook, outputFileName):
""" Saves the in-memory WorkBook object into the specified file """
workbook.save(outputFileName)
def getDefaultOutputFileName(inputFileName):
""" Returns the name of the default output file based on the value
of the input file. The default output file is always created in
the current working directory. This can be overriden using the
-o or --output option to explicitly specify an output file """
baseName = os.path.basename(inputFileName)
rootName = os.path.splitext(baseName)[0]
return string.join([rootName, "xls"], '.')
def renameOutputFile(outputFileName, fno):
""" Renames the output file name by appending the current file number
to it """
dirName, baseName = os.path.split(outputFileName)
rootName, extName = os.path.splitext(baseName)
backupFileBaseName = string.join([string.join([rootName, str(fno)], '-'), extName], '')
backupFileName = os.path.join(dirName, backupFileBaseName)
try:
os.rename(outputFileName, backupFileName)
except OSError:
print "Error renaming output file:", outputFileName, "to", backupFileName, "...aborting"
sys.exit(-1)
def validateOpts(opts):
""" Returns option values specified, or the default if none """
titlePresent = False
linesPerFile = -1
outputFileName = ""
sepChar = ","
for option, argval in opts:
if (option in ("-t", "--title")):
titlePresent = True
if (option in ("-l", "--lines")):
linesPerFile = int(argval)
if (option in ("-s", "--sep")):
sepChar = argval
if (option in ("-o", "--output")):
outputFileName = argval
if (option in ("-h", "--help")):
usage()
return titlePresent, linesPerFile, sepChar, outputFileName
def main():
""" This is how we are called """
try:
opts,args = getopt.getopt(sys.argv[1:], "tl:s:o:h", ["title", "lines=", "sep=", "output=", "help"])
except getopt.GetoptError:
usage()
if (len(args) != 1):
usage()
inputFileName = args[0]
try:
inputFile = open(inputFileName, 'r')
except IOError:
print "File not found:", inputFileName, "...aborting"
sys.exit(-1)
titlePresent, linesPerFile, sepChar, outputFileName = validateOpts(opts)
if (outputFileName == ""):
outputFileName = getDefaultOutputFileName(inputFileName)
workbook, worksheet = openExcelSheet(outputFileName)
fno = 0
lno = 0
titleCols = []
reader = csv.reader(inputFile, delimiter=sepChar)
for line in reader:
if (lno == 0 and titlePresent):
if (len(titleCols) == 0):
titleCols = line
writeExcelHeader(worksheet, titleCols)
else:
writeExcelRow(worksheet, lno, line)
lno = lno + 1
if (linesPerFile != -1 and lno >= linesPerFile):
closeExcelSheet(workbook, outputFileName)
renameOutputFile(outputFileName, fno)
fno = fno + 1
lno = 0
workbook, worksheet = openExcelSheet(outputFileName)
inputFile.close()
closeExcelSheet(workbook, outputFileName)
if (fno > 0):
renameOutputFile(outputFileName, fno)
if __name__ == "__main__":
main()
My problem is, when using it to convert a utf8 file (containing e.g. 'LATIN SMALL LETTER O WITH ACUTE' (U+00F3) "ó"), it produces the byte sequence 0x43 0x04, which is rendered by both open office and ms excel as a "y" (0x43, while just dropping the 0x04).
does anyone know what I or pyExcelerator are doing wrong?
You should manually encode/decode data from utf-8 file:
reader = csv.reader(inputFile, delimiter=sepChar)
for line in reader:
if (lno == 0 and titlePresent):
if (len(titleCols) == 0):
titleCols = line
writeExcelHeader(worksheet, titleCols)
else:
# writeExcelRow(worksheet, lno, line)
# in unicode function source file encoding should be passed
writeExcelRow(worksheet, lno, [unicode(cell, 'utf-8') for cell in line])
def writeExcelRow(worksheet, lno, columns):
""" Write a non-header row into the worksheet """
cno = 0
for column in columns:
#worksheet.write(lno, cno, column)
worksheet.write(lno, cno, column.encode('utf-8'))
cno = cno + 1
Check examples here (unicode_csv_reader, utf_8_encoder): http://docs.python.org/2/library/csv.html#examples

python chinese character not writing to file correctly…..with some programs

So I've been working with the CC-CEDICT, a free downloadable Chinese-English dictionary. I've been using python to make some small changes and reformat the dictionary. When I ran code that just reorganized the dictionary as a csv file, I had no issues and the characters were written into the file as expected. Here is the code for that:
filename = 'cedict_ts.u8.txt'
newname = 'cedict_ts.u8.csv'
f = open(filename,'r')
allLines = f.readlines()
f.close()
newf = open(newname, 'w')
endofhash = False
for i in range(0, len(allLines)):
curLine = allLines[i]
if curLine[0] == '#':
newf.write(curLine)
else:
if(not endofhash):
newarr = ['Traditional','Simplified','Pinyin','Definition(s)\r\n']
newline = ','.join(newarr)
newf.write(newline)
endofhash = True
firstws = curLine.find(' ')
lsbrack = curLine.find('[')
rsbrack = curLine.find(']')
fslash = curLine.find('/')
lslash = curLine.rfind('/')
trad = curLine[0:firstws]
simp = curLine[firstws+1:lsbrack-1]
piny = curLine[lsbrack+1:rsbrack]
defin = curLine[fslash+1:lslash]
defin = defin.replace('/','; ')
defin = defin + '\r\n'
newarr = [trad, simp, piny, defin]
newline = ','.join(newarr)
newf.write(newline)
newf.close()
However, when I run a program that also changes the pinyin system and adds it to the dictionary, the content of the text file is gobbly-gook. But, as a test I had the program print out each line before it was written to the text file, and it prints to the terminal as expected. Here is the code that does that:
from pinyinConverter import *
filename = 'cedict_ts.u8.txt'
newname = 'cedict_ts_wpym.u8.csv'
f = open(filename,'r')
allLines = f.readlines()
f.close()
apy = readPinyinTextfile('pinyinchars.txt')
newf = open(newname, 'w')
endofhash = False
for i in range(0, len(allLines)):
curLine = allLines[i]
if curLine[0] == '#':
newf.write(curLine)
else:
if(not endofhash):
newarr = ['Traditional','Simplified','Pinyin','PinyinWithMarks','Definition(s)\r\n']
newline = ','.join(newarr)
newf.write(newline)
endofhash = True
firstws = curLine.find(' ')
lsbrack = curLine.find('[')
rsbrack = curLine.find(']')
fslash = curLine.find('/')
lslash = curLine.rfind('/')
trad = curLine[0:firstws]
simp = curLine[firstws+1:lsbrack-1]
piny = curLine[lsbrack+1:rsbrack]
split_piny = piny.split(' ')
for i in range(0, len(split_piny)):
curPin = split_piny[i]
newPin = convertPinyinSystem(curPin, apy)
split_piny[i] = newPin
pnwm = ' '.join(split_piny)
defin = curLine[fslash+1:lslash]
defin = defin.replace('/','; ')
defin = defin + '\r\n'
newarr = [trad, simp, piny, pnwm, defin]
newline = ','.join(newarr)
newf.write(newline)
newf.close()
And here is the pinyinConverter file code:
def convertPinyinSystem(inputString, allPinyin):
chars = ['a','e', 'i', 'o','u','u:']
tone = grabTone(inputString)
toneIdx = (tone - 1) * 2
hasIdx = -1
for i in range(0, len(chars)):
if(chars[i] in inputString):
hasIdx = i
newString = inputString
newString = newString.replace(str(tone),'')
if(not ('iu' in inputString)):
newChar = allPinyin[hasIdx][toneIdx:toneIdx+2]
else:
newChar = allPinyin[4][toneIdx:toneIdx+2]
newString = newString.replace(chars[hasIdx],newChar)
if(tone == 5):
newString = inputString
newString = newString.replace(str(tone),'')
return newString
elif(tone == -1):
return inputString
else:
return newString
def readPinyinTextfile(pinyintextfile):
f = open(pinyintextfile, 'r')
allLines = f.readlines()
f.close()
for i in range(0, len(allLines)):
curLine = allLines[i]
curLine = curLine[0:len(curLine)-1]
allLines[i] = curLine
return allLines
def grabTone(inputText):
isToneIdx = False
idx = 0
while(not isToneIdx):
isToneIdx = is_int(inputText[idx])
if(isToneIdx):
break
else:
idx += 1
if(idx == len(inputText)):
return -1
return int(inputText[idx])
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
And the content of the pinyin chars.txt file is this:
āáăà
ēéĕè
īíĭì
ōóŏò
ūúŭù
ǖǘǚǜ
I'm on a 2009 MacBook Pro, running OSX version 10.8.5, python is version 2.7.6 and the coding of the dictionary is UTF-8. Also I know some of the code for doing the pinyin conversion is not optimized, but for this it doesn't really matter.
If your pinyin file is encoded as utf-8, you might want to try using the codecs package, which is part of the standard library, like this:
import codecs
...
def readPinyinTextfile(pinyintextfile):
f = codecs.open(pinyintextfile, 'r', 'utf-8')
If it looks okay in the terminal, then it's likely that you need to specifically change the writing function to use the codecs package:
apy = readPinyinTextfile('pinyinchars.txt')
newf = codecs.open(newname, 'w', 'utf-8')

Parsing through newline characters in Python

I am working on a fairly basic encoder/decoder where you can input your own text file (as a string) and your own encoder (also as a string: it must be a text file).
Here is my decoder function:
def cDecode(file_name, encoder='standard_encoder.txt', save_new=True): # does not decode multi-lines correctly -- everything goes on a single line. See next comment
'''Decodes <'file_name'> with the reverse method of <'encoder'>.'''
if type(file_name) != str or type(encoder) != str: raise TypeError("<'file_name'> and <'encoder'> must be of type <'str'>.")
if type(save_new) != bool: raise TypeError("<'save_new'> must be of type <'bool'>.")
if file_name[-4:] != '.txt': file_name += '.txt'
if encoder[-4:] != '.txt': encoder += '.txt'
decoder_set = {}
try:
with open(encoder, 'r') as encoding_file:
for line in encoding_file:
line_parts = line.split(': ')
my_key, my_value = line_parts[1], line_parts[0]
I think the error is in here:
I have to remove the '\n' because every character (in the decoding file) is on a new line, like such: 'A: Ð'.
if '\n' in my_key:
loc = my_key.find('\n') # this may be the cause of the single-line of the decoding.
my_key = my_key[:loc] + my_key[loc + 1:]
decoder_set[my_key] = my_value
encoding_file.close()
except IOError:
encoder = 'standard_encoder.txt'
with open(encoder, 'r') as encoding_file:
for line in encoding_file:
line_parts = line.split(': ')
my_key, my_value = line_parts[1], line_parts[0]
# every key has a new line character automatically because it's on a different line
if '\n' in my_key:
loc = my_key.find('\n')
my_key = my_key[:loc] + my_key[loc + 1:]
decoder_set[my_key] = my_value
encoding_file.close()
decodingKeys = decoder_set.keys()
Here is the rest of the function:
if save_new:
try:
decoded_file_name = file_name[:-12] + '_decoded' + file_name[-4:]
encoded_file = open(decoded_file_name, 'a+')
with open(file_name, 'r') as my_file:
for line in my_file:
de_line = ''
for char in line:
if char in decodingKeys: de_char = decoder_set[char]
else: de_char = char
de_line += de_char
encoded_file.write(de_line)
except IOError:
raise NameError(file_name + ' was not found. Decoding process terminated.')
else:
try:
import os
encoded_file = file_name[:-12] + '_decoded' + file_name[-4:]
with open(file_name, 'r+') as my_file:
for line in my_file:
de_line = ''
for char in line:
if char in decodingKeys: en_char = decoding_set[char]
else: de_char = char
de_line += de_char
encoded_file.write(de_line)
os.remove(file_name)
os.rename(encoded_file, file_name)
except IOError:
raise NameError(file_name + ' was not found. Decoding process terminated.')
Say I have a multi-line text-file:
This is a test.
As is this one.
Good bye!
When encoded and then decoded afterward, it shows up like this: This is a test.As is this one.Good bye!.
How can I fix this? I'm expecting it to show up like:
This is a test.
As is this one.
Good bye!
Thanks!
Add a '\n' while writing back the line to file:
encoded_file.write(de_line+'\n')

Categories