I am trying to write something in an existing workbook with the openpyxl tools.
But i am getting Err no 22 and dont know why.
My Script looks like this :
#Reading & writing to a workbook
from openpyxl import Workbook
from openpyxl.compat import range
from openpyxl.cell import get_column_letter
wb = Workbook()
dest_filename = 'J:\Python_Script\book2.xls'
ws = wb.active
ws.title = "Tabelle 1"
for col_idx in range(1, 40):
col = get_column_letter(col_idx)
for row in range(1, 600):
ws.cell('%s%s'%(col, row)).value = '%s%s' % (col, row)
ws = wb.create_sheet()
ws.title = 'Pi'
ws['F5'] = 3.14
wb.save(filename = dest_filename)
and this is the Console output with the error message i got :
//------------------
Traceback (most recent call last):
File "J:/Python_Script/xlsx_test.py", line 26, in <module>
wb.save(filename = dest_filename)
File "build\bdist.win32\egg\openpyxl\workbook\workbook.py", line 281, in save
save_workbook(self, filename)
File "build\bdist.win32\egg\openpyxl\writer\excel.py", line 214, in save_workbook
writer.save(filename)
File "build\bdist.win32\egg\openpyxl\writer\excel.py", line 196, in save
archive = ZipFile(filename, 'w', ZIP_DEFLATED)
File "C:\Python27\lib\zipfile.py", line 752, in __init__
self.fp = open(file, modeDict[mode])
IOError: [Errno 22] invalid mode ('wb') or filename: 'J:\\Python_Script\x08ook2.xls'
//----------------------
I am not sure why the file path is different now, also the file name different from the filename in the input section.
thanks
EDIT:
Solved. Just had to change from \ to / in the path.
In Python the \ is used in strings to escape characters. You can avoid this by using "raw strings" by prefixing with "r". So r'J:\Python_Script\book2.xls' should work.
However, when working with paths it's most common to use the os.path module to make sure this are correct.
dest_filename = os.path.join("J:", "Python_Script", "book2.xlsx")
This is invaluable when writing portable code.
Related
I'm tryin to find a way to put multiple .xls files into a single one .xls with separated sheets. (So 1.xls will go under Sheet1, etc.)
Here's my code
mypath = raw_input("Please enter the directory path for the input files: ")
from os import listdir
from os.path import isfile, join
textfiles = [ join(mypath,f) for f in listdir(mypath) if isfile(join(mypath,f)) and '.txt' in f]
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
import xlwt
import xlrd
style = xlwt.XFStyle()
style.num_format_str = '#,###0.00'
for textfile in textfiles:
f = open(textfile, 'r+')
row_list = []
for row in f:
row_list.append(row.split('\t'))
column_list = zip(*row_list)
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Sheet1')
i = 0
for column in column_list:
for item in range(len(column)):
value = column[item].strip()
if is_number(value):
worksheet.write(item, i, float(value), style=style)
else:
worksheet.write(item, i, value)
i+=1
workbook.save(textfile.replace('.txt', '.xls'))
import glob, os
import pandas as pd
writer = pd.ExcelWriter('C:\Users\xxx\Desktop\forpythonscript\minonna.xls')
i=1
for xlsfile in glob.glob(os.path.abspath('C:\Users\xxx\Desktop\forpythonscript\*.xls')):
df = pd.read_excel(xlsfile)
df.to_excel(writer, 'sheet%s' % i)
i +=1
writer.save()
Here's the error while run it into anaconda.
Traceback (most recent call last):
File "C:\Users\xxx\Desktop\provaimport.py", line 51, in
writer.save()
File "C:\Users\xxx\Anaconda2\lib\site-packages\pandas\io\excel.py", line 1423, in save
return self.book.save(self.path)
File "C:\Users\xxx\Anaconda2\lib\site-packages\xlwt\Workbook.py", line 710, in save
doc.save(filename_or_stream, self.get_biff_data())
File "C:\Users\xxx\Anaconda2\lib\site-packages\xlwt\Workbook.py", line 680, in get_biff_data
self.__worksheets[self.__active_sheet].selected = True
IndexError: list index out of range
I cant comment but I think its cause of two possible issues.
Do the saved workbooks in the folder only have a single sheet? typically with read_excel you also select a tab name to read from.
also have you tried setting the following
df.to_excel(writer, 'sheet%s' % i, index=False)
Hi I am trying to run a utility script i found in github
https://gist.github.com/Athmailer/4cdb424f03129248fbb7ebd03df581cd
Update 1:
Hi I modified the logic a bit more so that rather than splitting the csv into multiple csvs again i am creating a single excel file with multiple sheets containing the splits. Below is my code
import os
import csv
import openpyxl
import argparse
def find_csv_filenames( path_to_dir, suffix=".csv" ):
filenames = os.listdir(path_to_dir)
return [ filename for filename in filenames if filename.endswith( suffix ) ]
def is_binary(filename):
"""
Return true if the given filename appears to be binary.
File is considered to be binary if it contains a NULL byte.
FIXME: This approach incorrectly reports UTF-16 as binary.
"""
with open(filename, 'rb') as f:
for block in f:
if '\0' in block:
return True
return False
def split(filehandler, delimiter=',', row_limit=5000,
output_name_template='.xlsx', output_path='.', keep_headers=True):
class MyDialect(csv.excel):
def __init__(self, delimiter=','):
self.delimiter = delimiter
lineterminator = '\n'
my_dialect = MyDialect(delimiter=delimiter)
reader = csv.reader(filehandler, my_dialect)
index = 0
current_piece = 1
# Create a new Excel workbook
# Create a new Excel sheet with name Split1
current_out_path = os.path.join(
output_path,
output_name_template
)
wb = openpyxl.Workbook()
ws = wb.create_sheet(index=index, title="Split" + str(current_piece))
current_limit = row_limit
if keep_headers:
headers = reader.next()
ws.append(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
ws = wb.create_sheet(index=index, title="Split" + str(current_piece))
if keep_headers:
ws.append(headers)
ws.append(row)
wb.save(current_out_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Splits a CSV file into multiple pieces.',
prefix_chars='-+')
parser.add_argument('-l', '--row_limit', type=int, default=5000,
help='The number of rows you want in each output file. (default: 5000)')
args = parser.parse_args()
#Check if output path exists else create new output folder
output_path='Output'
if not os.path.exists(output_path):
os.makedirs(output_path)
with open('Logger.log', 'a+') as logfile:
logfile.write('Filename --- Number of Rows\n')
logfile.write('#Unsplit\n')
#Get list of all csv's in the current folder
filenames = find_csv_filenames(os.getcwd())
filenames.sort()
rem_filenames = []
for filename in filenames:
if is_binary(filename):
logfile.write('{} --- binary -- skipped\n'.format(filename))
rem_filenames.append(filename)
else:
with open(filename, 'rb') as infile:
reader_file = csv.reader(infile,delimiter=";",lineterminator="\n")
value = len(list(reader_file))
logfile.write('{} --- {} \n'.format(filename,value))
filenames = [item for item in filenames if item not in rem_filenames]
filenames.sort()
logfile.write('#Post Split\n')
for filename in filenames:
#try:
with open(filename, 'rb') as infile:
name = filename.split('.')[0]
split(filehandler=infile,delimiter=';',row_limit=args.row_limit,output_name_template= name + '.xlsx',output_path='Output')
I have a folder called 'CSV Files' which contains a lot of csv's which need to be split.
I am keeping this utility script in the same folder
Getting the following error on running the script:
Traceback (most recent call last):
File "csv_split.py", line 96, in <module>
split(filehandler=infile,delimiter=';',row_limit=args.row_limit,output_name_template= name + '.xlsx',output_path='Output')
File "csv_split.py", line 57, in split
ws.append(row)
File "/home/ramakrishna/.local/lib/python2.7/site-packages/openpyxl/worksheet/worksheet.py", line 790, in append
cell = Cell(self, row=row_idx, col_idx=col_idx, value=content)
File "/home/ramakrishna/.local/lib/python2.7/site-packages/openpyxl/cell/cell.py", line 114, in __init__
self.value = value
File "/home/ramakrishna/.local/lib/python2.7/site-packages/openpyxl/cell/cell.py", line 294, in value
self._bind_value(value)
File "/home/ramakrishna/.local/lib/python2.7/site-packages/openpyxl/cell/cell.py", line 191, in _bind_value
value = self.check_string(value)
File "/home/ramakrishna/.local/lib/python2.7/site-packages/openpyxl/cell/cell.py", line 156, in check_string
raise IllegalCharacterError
openpyxl.utils.exceptions.IllegalCharacterError
Can some one let me know if i have to add another for loop and go each cell in the row and append it to the sheet or can it be done in a single go. Also I seem to have made this logic a lot clumsy can this be optimized further.
Folder structure for your reference
You must pass just a name of the file as command line argument:
python splitter.py 'Sports & Outdoors 2017-08-26'
Also, I tried running the above script and no matter on what CSS I run it, it doesn't return the first line (which should normally be a header) although keep_headers = True. Setting keep_headers = False also prints out the header line, which is a bit counterintuitive.
This script is meant to read a single CSV. If you want to read every CSV in a directory, you want to make another script that will loop through all the files in that directory.
import splitter as sp
import os
files = [ f for f in os.listdir('/your/directory') if f[-4:] == '.csv' ]
for file in files:
with open(file, 'r') as f:
sp.split(f)
Attempting to extract .xlsx docs from a file and compile the data into a single worksheet.
Receiving a IOError despite that the files exist
Program is as follows
#-------------- loop that pulls in files from folder--------------
import os
#create directory from which to pull the files
rootdir = r'C:\Users\username\Desktop\Mults'
for subdir, dir, files in os.walk(rootdir):
for file in files:
print os.path.join(subdir,file)
#----------------------merge work books-----------------------
import xlrd
import xlsxwriter
wb = xlsxwriter.Workbook('merged.xls')
ws = wb.add_worksheet()
for file in files:
r = xlrd.open_workbook(file)
head, tail = os.path.split(file)
count = 0
for sheet in r:
if sheet.number_of_rows()>0:
count += 1
for sheet in r:
if sheet.number_of_rosw()>0:
if count == 1:
sheet_name = tail
else:
sheet_name = "%s_%s" (tail, sheet.name)
new_sheet = wb.create_sheet(sheet_name)
new_sheet.write_reader(sheet)
new_sheet.close()
wb.close()
Return error as follows
doc1.xlsx
doc2.xlsx
doc3.xlsx
doc4.xlsx
Traceback (most recent call last):
File "C:\Users\username\Desktop\Work\Python\excel practice\xlsx - loops files - 09204.py", line 23, in <module>
r = xlrd.open_workbook(file)
File "C:\Python27\lib\site-packages\xlrd\__init__.py", line 394, in open_workbook
f = open(filename, "rb")
IOError: [Errno 2] No such file or directory: 'doc1.xlsx'
Any suggestions or changes?
Also, any advice if I'm heading in the right direction?
I'm new to the python world, so any advice will be much appreciated!
Thank you!!
You are opening the plain filename without the path; you are ignoring the directory component.
Don't just print the os.path.join() result, actually use it:
filename = os.path.join(subdir, file)
r = xlrd.open_workbook(filename)
For the first problem...
Instead of:
r = xlrd.open_workbook(file)
Use:
r = xlrd.open_workbook(os.path.join(subdir,file))
For the TypeError:
Instead of:
for sheet in r:
if sheet.number_of_rows()>0:
count += 1
Use:
for nsheet in r.sheet_names() #you need a list of sheet names to loop throug
sheet = r.sheet_by_name(nsheet) #then you create a sheet object with each name in the list
if sheet.nrows>0: #use the property nrows of the sheet object to count the number of rows
count += 1
Do the same for the second for loop.
Is there an easy way to find out what the BOF record, error messages means?
A list or something like that where they can be looked up?
I have just installed XLRD 0.9.2, in addition to XLutils 1.6.0 . (I know it's an overkill, so can that might be the problem?) I am reading through a huge bunch of Excel files, where I know that all are excelfiles, at least in their filename.
The errormessage seems to show even though I've put up a Try, Except test. Here's the code where the error shows:
def locate_vals():
val_dict = {}
Fcount = 0
for filename in file_list:
try:
wb = xlrd.open_workbook(os.path.join(start_dir, filename))
sheet = wb.sheet_by_index(5) # kan ogsaa velge sheet_by_name('navn')
# model = sheet.cell_value(2, 3)
lenghtvalue = sheet.cell_value(9, 7) # (y,x)
dispvalue = sheet.cell_value(15, 7)
try:
Froudemax = max(Fdict.get(filename)[Fcount - 1], key=str)
Froudemin = min(Fdict.get(filename)[Fcount - 1])
Fcount += 1
except:
Froudemax = 5555555555555555555555555555
Froudemin = 6666666666666666666666666666
print 'Froudemax(5) eller Froudemin(6) har problem'
val_dict[filename] = [lenghtvalue, dispvalue, Froudemax, Froudemin]
except XLRDError and IndexError:
print 'Problem in locate_vals with:', filename
return val_dict
val_dict = locate_vals()
My errormessage says:
Traceback (most recent call last):
File "C:\Documents and Settings\OPC\My Documents\Haavard_Refvik_Workspace\STT_ComparisonTool\Run_Comparison_Tool.py", line 129, in <module>
val_dict = locate_vals()
File "C:\Documents and Settings\OPC\My Documents\Haavard_Refvik_Workspace\STT_ComparisonTool\Run_Comparison_Tool.py", line 111, in locate_vals
wb = xlrd.open_workbook(os.path.join(start_dir, filename))
File "C:\Python27\lib\site-packages\xlrd-0.9.2-py2.7.egg\xlrd\__init__.py", line 435, in open_workbook
ragged_rows=ragged_rows,
File "C:\Python27\lib\site-packages\xlrd-0.9.2-py2.7.egg\xlrd\book.py", line 91, in open_workbook_xls
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
File "C:\Python27\lib\site-packages\xlrd-0.9.2-py2.7.egg\xlrd\book.py", line 1258, in getbof
bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
File "C:\Python27\lib\site-packages\xlrd-0.9.2-py2.7.egg\xlrd\book.py", line 1252, in bof_error
raise XLRDError('Unsupported format, or corrupt file: ' + msg)
xlrd.biffh.XLRDError: Unsupported format, or corrupt file: Expected BOF record; found '\x03OPC '
EDIT: OPC is the username on the computer.
Added a try:, except: test, and found the files that caused the problem. Seemed like it was some old "automatic-security-save-files" that had been saved in the same folder as the rest of my files, but where hidden. The name of these files looked like this:
~$test spreadsheet modelxxx.xlsx
I should of course have ran the test earlier, but I had put the Try function a indent too far left, so I never got to see the file that held the error. Thanks for the response.
I would still appreciate if anyone could point me in the direction of a table of error-messages though, if such a table exists.
I'm new to python and bioinformatics field. I'm using python-2.6. Now I'm trying to select all fastq.gz files, then gzip.open(just a few lines because it's too huge and time-wasting), then count 'J' , then pick out those files with 'J' count NOT equal to 0.
The following is my code:
#!/usr/bin/python
import os,sys,re,gzip
path = "/home/XXX/nearline"
for file in os.listdir(path):
if re.match('.*\.recal.fastq.gz', file):
text = gzip.open(file,'r').readlines()[:10]
word_list = text.split()
number = word_list.count('J') + 1
if number !== 0:
print file
But I got some errors:
Traceback (most recent call last):
File "fastqfilter.py", line 9, in <module>
text = gzip.open(file,'r').readlines()[:10]
File "/share/lib/python2.6/gzip.py", line 33, in open
return GzipFile(filename, mode, compresslevel)
File "/share/lib/python2.6/gzip.py", line 79, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
IOError: [Errno 2] No such file or directory: 'ERR001268_1.recal.fastq.gz'
What's this traceback: File......
Is there anything wrong with gzip here?
And why can't it find ERR001268_1.recal.fastq.gz? It's the first fastq file in the list, and DOES exist there.
Hope give me some clues, and any point out any other errors in the script.
THanks a lot.
Edit: thx everyone. I followed Dan's suggestion. And I tried on ONE fastq file first. My script goes like:
#!/usr/bin/python
import os,sys
import gzip
import itertools
file = gzip.open('/home/xug/nearline/ERR001274_1.recal.fastq.gz','r')
list(itertools.islice(file.xreadlines(),10))
word_list = list.split()
number = word_list.count('J') + 1
if number != 0:
print 'ERR001274_1.recal.fastq.gz'
Then errors are:
Traceback (most recent call last):
File "try2.py", line 8, in <module>
list(itertools.islice(text.xreadlines(),10))
AttributeError: GzipFiles instance has no attribute 'xreadlines'
Edit again: Thx Dan, I've solved the problem yesterday. Seems GzipFiles don't support xreadlines. So I tried the similar way as you suggested later. And it works. See below:
#!/usr/bin/python
import os,sys,re
import gzip
from itertools import islice
path = "/home/XXXX/nearline"
for file in os.listdir(path):
if re.match('.*\.recal.fastq.gz', file):
fullpath = os.path.join(path, file)
myfile = gzip.open(fullpath,'r')
head = list(islice(myfile,1000))
word_str = ";".join(str(x) for x in head)
number = word_str.count('J')
if number != 0:
print file
on this line:
text = gzip.open(file,'r').read()
file is a filename not a full path so
fullpath = os.path.join(path, file)
text = gzip.open(fullpath,'r').read()
about F.readlines()[:10] will read the whole file in to a list of lines and then take the first 10
import itertools
list(itertools.islice(F.xreadlines(),10))
this will not read the whole file into memory and will only read the first 10 lines into a list
but as gzip.open returns an object that doesn't have .xreadlines() and but as files are iterable on their lines just:
list(itertools.islice(F,10))
would work as this test shows:
>>> import gzip,itertools
>>> list(itertools.islice(gzip.open("/home/dan/Desktop/rp718.ps.gz"),10))
['%!PS-Adobe-2.0\n', '%%Creator: dvips 5.528 Copyright 1986, 1994 Radical Eye Software\n', '%%Title: WLP-94-int.dvi\n', '%%CreationDate: Mon Jan 16 16:24:41 1995\n', '%%Pages: 6\n', '%%PageOrder: Ascend\n', '%%BoundingBox: 0 0 596 842\n', '%%EndComments\n', '%DVIPSCommandLine: dvips -f WLP-94-int.dvi\n', '%DVIPSParameters: dpi=300, comments removed\n']
Change your code to:
#!/usr/bin/python
import os,sys,re,gzip
path = "/home/XXX/nearline"
for file in os.listdir(path):
if re.match('.*\.recal.fastq.gz', file):
text = gzip.open(os.path.join(path,file),'r').readlines()[:10]
word_list = text.split()
number = word_list.count('J') + 1
if number !== 0:
print file
It's trying to open ERR001268_1.recal.fastq.gz from the working directory, not from /home/XXX/nearline.