So, I have a QTableWidget that I want to save it to an .xls file using the xlwt module...
Here's the code:
def savefile(self):
filename = unicode(QtGui.QFileDialog.getSaveFileName(self, 'Save File', '', ".xls(*.xls)"))
wbk = xlwt.Workbook()
self.sheet = wbk.add_sheet("sheet")
self.row = 0
self.col = 0
self.add2(self.row, self.col)
wbk.save(filename)
def add2(self, row, col):
for i in range(self.tableWidget.columnCount()):
for x in range(self.tableWidget.rowCount()):
try:
teext = str(self.tableWidget.item(row, col).text())
self.sheet.write(row, col, teext)
row += 1
except AttributeError:
pass
col += 1
But that writes out only the text from cell 0,0 and nothing else...
I think that I have made some serious mistake...
Update:
def savefile(self):
filename = unicode(QtGui.QFileDialog.getSaveFileName(self, 'Save File', '', ".xls(*.xls)"))
wbk = xlwt.Workbook()
self.sheet = wbk.add_sheet("sheet", cell_overwrite_ok=True)
self.add2()
wbk.save(filename)
def add2(self):
row = 0
col = 0
for i in range(self.tableWidget.columnCount()):
for x in range(self.tableWidget.rowCount()):
try:
teext = str(self.tableWidget.item(row, col).text())
self.sheet.write(row, col, teext)
row += 1
except AttributeError:
row += 1
row = 0
col += 1
Solved the problem...
You might also find it more concise and easier to use the output of the range (or xrange) as the indexes for your tableWidget.item call rather than worrying about incrementing your own counters. You might be using the sheet itself in other places in code, but if you're not, it would save you some memory to not assign the sheet to be an attribute variable of your class:
def savefile(self):
filename = unicode(QtGui.QFileDialog.getSaveFileName(self, 'Save File', '', ".xls(*.xls)"))
wbk = xlwt.Workbook()
sheet = wbk.add_sheet("sheet", cell_overwrite_ok=True)
self.add2(sheet)
wbk.save(filename)
def add2(self, sheet):
for currentColumn in range(self.tableWidget.columnCount()):
for currentRow in range(self.tableWidget.rowCount()):
try:
teext = str(self.tableWidget.item(currentRow, currentColumn).text()
sheet.write(currentRow, currentColumn, teext)
except AttributeError:
pass
Because you are using the range command, the currentColumn variable will increment from 0 to columnCount() and currentRow will increment from 0 to currentRow()
Related
I'm trying to read in data from a text file, create one long string of everything contained in that text file, and split it by space. I'm then trying to send that list split by space to printStringArray and have it print to an Excel sheet. I'm having trouble figuring out how to keep track of what row and column the data should be input into.
rowNum = 1
def createStringArray(theFilePath):
theFinalString = ""
with open(theFilePath) as file_in:
for line in file_in:
lineToString = str(line)
theCompleteString = lineToString.split()
printStringArray(theCompleteString)
def printStringArray(theStringArray):
with xlsxwriter.Workbook('testingThis.xlsx') as workbook:
worksheet = workbook.add_worksheet()
colNum = 1
global rowNum
for data in theStringArray:
worksheet.write(rowNum, colNum, data)
colNum = colNum + 1
rowNum = rowNum + 1
EDITED:
def printStringArray(theStringArray):
with xlsxwriter.Workbook('testingThis.xlsx') as workbook:
worksheet = workbook.add_worksheet()
colNum = 0
global rowNum
for data in theStringArray:
worksheet.write(rowNum, colNum, str(data))
print(rowNum)
print(colNum)
print(data)
colNum = colNum + 1
rowNum = rowNum + 1
I added some prints in to see where I was going wrong, but all the numbers are exactly what I want them to be for rowNum, colNum, and data. Right now it is only printing the very last line.
EDIT #2
rowNum = 0
with xlsxwriter.Workbook('testingThis.xlsx') as workbook:
worksheet = workbook.add_worksheet()
def createStringArray(theFilePath):
theFinalString = ""
with open(theFilePath) as file_in:
for line in file_in:
lineToString = str(line)
theCompleteString = lineToString.split()
printStringArray(theCompleteString)
for aString in theCompleteString:
theFinalString = theFinalString + aString + "--"
print(theFinalString)
def printStringArray(theStringArray):
colNum = 0
global rowNum
worksheet.write(15, 15, "Aapple")
for data in theStringArray:
worksheet.write(rowNum, colNum, str(data))
print(rowNum)
print(colNum)
print(data)
colNum = colNum + 1
rowNum = rowNum + 1
Here is my attpemt to read a textfile and write the content to an excel sheet, maybe this can help.
I use the write_row() function to write a list of strings to a row. This way I don't need to take care of the correct column numbers.
import xlsxwriter
# read textfile line by line
with open(path2txt, 'r') as txtfile:
lines_raw = txtfile.readlines()
# remove newline and whitespace on every line
lines = [line.strip() for line in lines_raw]
# write lines to worksheet,
# here we use "write_row()" to write the list of words column by column
with xlsxwriter.Workbook('test.xlsx') as workbook:
sheet = workbook.add_worksheet()
row, col = 0, 0
for line in lines:
parts = line.split()
if len(parts)>0: # only write nonempty lines
sheet.write_row(row, col, parts)
row += 1
I have a fragment of code which loads data from a .csv file.
It's written for Python 2.7 but in Python 3.6 does not work.
def load_new_data(self):
full = list()
with open(self.filename, 'rb') as csv_in:
myreader2 = csv.reader(csv_in, delimiter=';')
count = 0
for row in myreader2:
if count == 0:
headers = row[1:]
count += 1
elif count == 1:
count += 1
else:
current_row = row[1:-1]
full.append(current_row)
count += 1
new_df = pd.DataFrame.from_records(full, columns=headers)
new_df = new_df.iloc[1:, :80]
self.fill_in_blanks(new_df)
new_df = dp.remove_inc_variables(new_df, .1)
print '\t Removing incomplete variables.'
for i in new_df.columns:
try:
new_df.loc[:, i] = new_df.loc[:, i].astype(float)
except:
pass
return new_df
the error I get is:
212
213 count = 0
--> 214 for row in myreader2:
215 if count == 0:
216 headers = row[1:]
Error: iterator should return strings, not bytes (did you open the file in
text mode?)
I did try changing the 'rb' to 'r' and 'rt' and even deleting it, as other posts here suggest, but with no success...
try this
def load_new_data(self):
full = list()
with open(self.filename, 'r') as csv_in:
myreader2 = csv.reader(csv_in, delimiter=';')
count = 0
for row in myreader2:
if count == 0:
headers = row[1:]
count += 1
elif count == 1:
count += 1
else:
current_row = row[1:-1]
full.append(current_row)
count += 1
new_df = pd.DataFrame.from_records(full, columns=headers)
new_df = new_df.iloc[1:, :80]
self.fill_in_blanks(new_df)
new_df = dp.remove_inc_variables(new_df, .1)
print ('\t Removing incomplete variables.')
for i in new_df.columns:
try:
new_df.loc[:, i] = new_df.loc[:, i].astype(float)
except:
pass
return new_df
You should try codecs, for open file. Be careful this file encoding.
Sample:
def load_new_data(self):
with codecs.open(self.filename, 'rb', encoding="cp1251") as csv_in: # cp1251 replace for your encoding!
myreader2 = csv.reader(csv_in, delimiter=';')
headers = next(myreader2)[1:]
next(myreader2)
full = [row[1:] for row in myreader2]
new_df = pd.DataFrame.from_records(full, columns=headers)
new_df = new_df.iloc[1:, :80]
self.fill_in_blanks(new_df)
new_df = dp.remove_inc_variables(new_df, .1)
print('\t Removing incomplete variables.')
for i in new_df.columns:
try:
new_df.loc[:, i] = new_df.loc[:, i].astype(float)
except:
pass
return new_df
Now I don't have any problems about converting this csv or downloading it I have a problem saving it to django model at filefield
The minmized sample code:
def download_convert_reports_s3_temp():
def get_report_url():
bucket_name = 'temp_bucket'
conn = boto.connect_s3(AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY)
bucket = conn.get_bucket(bucket_name)
key = bucket.get_key('TEMP_2017-01-10.csv')
return key.generate_url(expires_in=600)
def get_doc():
return Doc.objects.get(owner=User.objects.first())
def get_file(file):
file_temp = NamedTemporaryFile(delete=True)
file_temp.write(file.content)
file_temp.flush()
return File(file_temp)
def convert_csv_to_xlsx():
request = requests.get(get_report_url())
csvfile = get_file(request)
from django.conf import settings
excelFile = xlsxwriter.Workbook('report.xlsx', {
'strings_to_numbers': True,
'default_date_format': 'yy/mm/dd',
'tmpdir': settings.MEDIA_ROOT +
'/documents/%s'.format(file.name.rsplit('.')[0] + '.xlsx')
}
excelFile = get_doc().file
worksheet = excelFile.add_worksheet()
worksheet.write('A1', 'data')
worksheet.write('B1', 'data')
worksheet.write('C1', 'data')
worksheet.write('D1', 'data')
worksheet.write('E1', 'data')
# Start from the first cell. Rows and columns are zero indexed.
row = 1
col = 0
with open(csvfile, 'rb') as f:
content = csv.reader(f)
# Iterate over the data and write it out row by row.
for row_data in content:
for data in row_data:
worksheet.write(row, col, data)
col += 1
row += 1
col = 0
f.close()
excelFile.close()
return convert_csv_to_xlsx()
Now the problem that I really don't know to to save this excel file to the doc.file,
and I tried django fieldfile save
---> 19 read = property(lambda self: self.file.
AttributeError: 'Workbook' object has no attribute 'read'
Any suggestion Thanks
def convert_csv_to_xlsx():
csvfile = get_file()
from django.conf import settings
excelFile = xlsxwriter.Workbook(filename=settings.MEDIA_ROOT + '/documents%s' % (
csvfile.name.rsplit('.')[0] + '.xlsx'))
bold = excelFile.add_format({'bold': 1, 'align': 'left', 'bg_color': 'red', 'color': 'white'})
worksheet = excelFile.add_worksheet()
worksheet.set_column(0, 4, width=15)
worksheet.write('A1', 'Sender MSISDN', bold)
worksheet.write('B1', 'Reciever MSISDN', bold)
worksheet.write('C1', 'Amount', bold)
worksheet.write('D1', 'Transaction ID', bold)
worksheet.write('E1', 'Datetime', bold)
# Start from the first cell. Rows and columns are zero indexed.
row = 1
col = 0
# Iterate over the data and write it out row by row.
for row_data in csv.reader(csvfile):
for idx, data in enumerate(row_data):
if idx == 0:
worksheet.write(row, col, data)
elif idx == 1:
worksheet.write(row, col, data)
elif idx == 2:
worksheet.write(row, col, data)
elif idx == 3:
worksheet.write(row, col, data)
elif idx == 4:
worksheet.write(row, col, data)
col += 1
row += 1
col = 0
csvfile.close()
doc = get_doc()
now = datetime.now()
excelFile.close()
doc.file.save(
name='RECHARGE_%d-%s-%s.xlsx' % (now.year,
validate_date(now.month),
validate_date(now.day)
),
content=File(open(settings.MEDIA_ROOT + '/documents%s' % (
csvfile.name.rsplit('.')[0] + '.xlsx',)))
)
os.remove(settings.MEDIA_ROOT + '/documents%s' % (
csvfile.name.rsplit('.')[0] + '.xlsx'), )
I am currently working with creating an Excel document in Python. I create the excel document but I'm not sure what is wrong with the code that it is not resizing the columns correctly. Does anyone have any ideas?
def writerow(self, vals):
ws = self.workbook.active
this_row = self.numrows
this_col = 1
for v in vals:
cell = ws.cell(row = this_row, column = this_col)
cell.value = v
if ws.column_dimensions[get_column_letter(this_col)] < len(str(v)):
ws.column_dimensions[get_column_letter(this_col)] = len(str(v))
this_col += 1
self.numrows += 1
self.worksheet = ws
I found what I needed for what I am working on.
I needed to add ".width" to the areas where I was checking or assigning column widths.
def writerow(self, vals):
ws = self.workbook.active
this_row = self.numrows
this_col = 1
for v in vals:
cell = ws.cell(row = this_row, column = this_col)
cell.value = v
print "Column Width:"
print ws.column_dimensions[get_column_letter(this_col)].width
if ws.column_dimensions[get_column_letter(this_col)].width < len(str(v)):
ws.column_dimensions[get_column_letter(this_col)].width = len(str(v))
this_col += 1
self.numrows += 1
self.worksheet = ws
i have a directory with 5+ invalid CSV files. i have no problems reading the files and then writing them as "good" CSV files one at a time. But when i try to process a second file i get "IndexError: array index out of range"
import xlrd
import csv, sys, os
import datetime, time
import logging
import Gmail_email
program = "CleanCSV"
date = datetime.datetime(1899, 12, 30)
argv0=""
argv1 = 'c:/tmp/checkEmail/' #input directory
argv2 = "f:/foo/in/bar-" #output directory
sys.argv = [argv0, argv1, argv2]
inDir = sys.argv[1]#input directory
outDir = sys.argv[2] #output directory
lList = [] #holder list to hold names of files to be processed
def processFiles():
try: #Makes list of local files in lDir, Populates lList
if os.listdir(inDir) == []: #checks for files in lDir
logging.info('No Files to upload')
exit()
else:
for file_name in os.listdir(inDir):
#print file_name
if os.path.isfile(inDir+file_name):
lList.append(file_name) # populate local dir list
if 'Thumbs.db' in lList: #remove windows thumbs file
lList.remove('Thumbs.db')
logging.info('Files to be checked')
logging.info('%s', lList )
#print lList, 'lList'
except Exception, e:
Gmail_email.email(e, program)
logging.warning('Error with local files')
logging.warning('%s', e)
exit()
for each in lList: #calls on cleanup method for each file in lLIst
filePath= inDir+each
print filePath, "filepath"
testFile(filePath)
def testFile(filePath):
try:
with open(filePath, "rb") as csvfile:
spamreader= csv.reader(csvfile, delimiter=' ', quotechar='|')
for row in spamreader:
#print "good file, most likely"
pass
except Exception, e:
logging.warning('Error with local files')
logging.warning('%s', e)
#print "cleaing bad file", filePath
cleanBadFile(filePath)
def cleanBadFile(filePath):
timestr = time.strftime("%Y%m%d-%H%M%S")
#print "bad file trying to clean"
f = open(outDir+timestr+".csv", 'ab')
try: #can i read the file
workbook = xlrd.open_workbook(filePath)
#will error here if bad xlrd cannot open it
print workbook.sheet_names()
#print workbook
except Exception, e:
#print e, " error"
pass
worksheet = workbook.sheet_by_name('Sheet')
num_rows = worksheet.nrows - 1
num_cells = worksheet.ncols - 1
#print worksheet.ncols, 'num cells'
curr_row = -1
while curr_row < num_rows: #goes over every row
num_cells = worksheet.ncols - 1
curr_row += 1
row = worksheet.row(curr_row)
print row, "row"
curr_cell = -1
print worksheet.row_len(curr_row), "row len"
print curr_row, curr_cell, "curr row, curr cell"
cell_type = worksheet.cell_type(curr_row, curr_cell)
cell_value = worksheet.cell_value(curr_row, curr_cell)
print ' ', cell_type, ':', cell_value
values= []
if cell_type == 0: #tests if first value in row is data
#assuming that good rows will have a value in the first cell of each row
#if no data row is not copied to new file
print "bad line"
pass
else:
while curr_cell < num_cells:
curr_cell += 1
# Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
print curr_row, "; ",curr_cell, " row and cell"
cell_type = worksheet.cell_type(curr_row, curr_cell)
cell_value = worksheet.cell_value(curr_row, curr_cell)
#print cell_type, ":", cell_value
if cell_type == xlrd.XL_CELL_DATE:
cell_value=datetime.timedelta(int(cell_value))
cell_value = str(date + cell_value)[:10]
#print cell_value, "cell value, cell date"
values.append(cell_value)
#print values, "values"
csv.writer(f, delimiter=',',
quotechar=',', quoting=csv.QUOTE_MINIMAL).writerow( values )
f.close()
print f.closed
print "ah"
curr_cell= 0
curr_row = 0
#print "checking file:", readFile
processFiles()
#print "exit"
exit
The error messsage
Traceback (most recent call last):
File "F:\cleanCSV.py", line 132, in <module>
processFiles()
File "F:\cleanCSV.py", line 51, in processFiles
testFile(filePath)
File "F:\cleanCSV.py", line 64, in testFile
cleanBadFile(filePath)
File "F:\cleanCSV.py", line 106, in cleanBadFile
cell_type = worksheet.cell_type(curr_row, curr_cell)
File "C:\Python27\lib\site-packages\xlrd\sheet.py", line 413, in cell_type
return self._cell_types[rowx][colx]
IndexError: array index out of range
I feel like I need to "reset" a counting variable to but think i have them all. I don't know what to do.
Two lines before the line causing the exception curr_cell is set to -1 which can't be a valid cell index. A comment some lines further down suggests you expect that to be the first cell in the row, so the index should be 0 instead of -1.
I moved my +1 (curr_cell+=1) down 3 lines.
while curr_cell < num_cells:
# Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
#print curr_row, "; ",curr_cell, " row and cell"
cell_type = worksheet.cell_type(curr_row, curr_cell)
cell_value = worksheet.cell_value(curr_row, curr_cell)
print cell_type, ":", cell_value
curr_cell += 1
if cell_type == xlrd.XL_CELL_DATE:
cell_value=datetime.timedelta(int(cell_value))
cell_value = str(date + cell_value)[:10]
#print cell_value, "cell value, cell date"