I need to remove timestamps in my file. It should only return the name. My source text file looks like this
7:52:01 AM sherr
hello GOOD morning .
おはようございます。
7:52:09 AM sherr
Who ?
誰?
7:52:16 AM sherr
OK .
わかりました
and my code looks like this
from openpyxl import Workbook
import copy
wb = Workbook()
with open('chat_20220207131707.txt', encoding='utf-8') as sherr:
row = 1
column = 1
ws = wb.active
for line in sherr:
ws.cell(row=row, column=column, value=line.strip())
if (column := column + 1) > 3:
row += 1
column = 1
for row in ws.iter_rows():
for cell in row:
alignment = copy.copy(cell.alignment)
alignment.wrapText=True
cell.alignment = alignment
wb.save('sherrplease.xlsx')
If the file always has the same structure, which it certainly looks like, this can be done with simple splitting of the string in question.
from openpyxl import Workbook
import copy
wb = Workbook()
with open('chat_20220207131707.txt', encoding='utf-8') as sherr:
row = 1
column = 1
ws = wb.active
for line in sherr:
if column == 1:
## split the line and rejoin
value = " ".join(line.strip().split(' ')[2:])
else:
value = line.strip()
ws.cell(row=row, column=column, value=value)
if (column := column + 1) > 3:
row += 1
column = 1
for row in ws.iter_rows():
for cell in row:
alignment = copy.copy(cell.alignment)
alignment.wrapText=True
cell.alignment = alignment
wb.save('sherrplease.xlsx')
Related
I'm trying to read in data from a text file, create one long string of everything contained in that text file, and split it by space. I'm then trying to send that list split by space to printStringArray and have it print to an Excel sheet. I'm having trouble figuring out how to keep track of what row and column the data should be input into.
rowNum = 1
def createStringArray(theFilePath):
theFinalString = ""
with open(theFilePath) as file_in:
for line in file_in:
lineToString = str(line)
theCompleteString = lineToString.split()
printStringArray(theCompleteString)
def printStringArray(theStringArray):
with xlsxwriter.Workbook('testingThis.xlsx') as workbook:
worksheet = workbook.add_worksheet()
colNum = 1
global rowNum
for data in theStringArray:
worksheet.write(rowNum, colNum, data)
colNum = colNum + 1
rowNum = rowNum + 1
EDITED:
def printStringArray(theStringArray):
with xlsxwriter.Workbook('testingThis.xlsx') as workbook:
worksheet = workbook.add_worksheet()
colNum = 0
global rowNum
for data in theStringArray:
worksheet.write(rowNum, colNum, str(data))
print(rowNum)
print(colNum)
print(data)
colNum = colNum + 1
rowNum = rowNum + 1
I added some prints in to see where I was going wrong, but all the numbers are exactly what I want them to be for rowNum, colNum, and data. Right now it is only printing the very last line.
EDIT #2
rowNum = 0
with xlsxwriter.Workbook('testingThis.xlsx') as workbook:
worksheet = workbook.add_worksheet()
def createStringArray(theFilePath):
theFinalString = ""
with open(theFilePath) as file_in:
for line in file_in:
lineToString = str(line)
theCompleteString = lineToString.split()
printStringArray(theCompleteString)
for aString in theCompleteString:
theFinalString = theFinalString + aString + "--"
print(theFinalString)
def printStringArray(theStringArray):
colNum = 0
global rowNum
worksheet.write(15, 15, "Aapple")
for data in theStringArray:
worksheet.write(rowNum, colNum, str(data))
print(rowNum)
print(colNum)
print(data)
colNum = colNum + 1
rowNum = rowNum + 1
Here is my attpemt to read a textfile and write the content to an excel sheet, maybe this can help.
I use the write_row() function to write a list of strings to a row. This way I don't need to take care of the correct column numbers.
import xlsxwriter
# read textfile line by line
with open(path2txt, 'r') as txtfile:
lines_raw = txtfile.readlines()
# remove newline and whitespace on every line
lines = [line.strip() for line in lines_raw]
# write lines to worksheet,
# here we use "write_row()" to write the list of words column by column
with xlsxwriter.Workbook('test.xlsx') as workbook:
sheet = workbook.add_worksheet()
row, col = 0, 0
for line in lines:
parts = line.split()
if len(parts)>0: # only write nonempty lines
sheet.write_row(row, col, parts)
row += 1
So I have this code that cleans up an Excel file and it all works pretty good, but something odd is happening where some of my cells randomly do not have their style updated. I have columns 10-12 set to date_style as defined in my code, but sometimes the cells in the sheet are not updating. Here's the code:
from datetime import datetime
from openpyxl.styles import NamedStyle
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font
import openpyxl, os
wb = openpyxl.load_workbook('Opportunities.xlsx', data_only=True)
sheet = wb['OpportunityList']
date_style = NamedStyle(name='datetime', number_format='MM/DD/YYYY')
bold = Font(bold=True)
no_fill = PatternFill(fill_type=None)
print('Deleting row ' + str(sheet.max_row))
sheet.delete_rows(sheet.max_row)
#Loop through the rows and update/delete as needed
for rowNum in range(sheet.max_row, 1, -1): #start with the last row
sheet.cell(row = rowNum, column = 10).value = datetime.strptime\
(sheet.cell(row = rowNum, column = 10).value, '%m/%d/%Y').date()
sheet.cell(row = rowNum, column = 11).value = datetime.strptime\
(sheet.cell(row = rowNum, column = 11).value, '%m/%d/%Y').date()
sheet.cell(row = rowNum, column = 12).value = datetime.strptime\
(sheet.cell(row = rowNum, column = 12).value, '%m/%d/%Y').date()
sheet.cell(row = rowNum, column = 10).style = date_style
sheet.cell(row = rowNum, column = 11).style = date_style
sheet.cell(row = rowNum, column = 12).style = date_style
if sheet.cell(row = rowNum, column = 10).value <=\
datetime.strptime('7/31/2017', "%m/%d/%Y").date():
print('Deleting row ' + str(rowNum))
sheet.delete_rows(rowNum)
if sheet.cell(row = rowNum, column = 4).value ==\
'Open' and sheet.cell(row = rowNum, column = 11).value <=\
datetime.strptime('12/31/2016', "%m/%d/%Y").date():
print('Deleting row ' + str(rowNum))
sheet.delete_rows(rowNum)
if sheet.cell(row = rowNum, column = 10).value <\
sheet.cell(row = rowNum, column = 11).value:
sheet.cell(row = rowNum, column = 11).value =\
sheet.cell(row = rowNum, column = 10).value
for rowNum in sheet['1:1']:
rowNum.font = bold
rowNum.fill = no_fill
for col in sheet.columns:
column = col[0].column # Get the column name
sheet.column_dimensions[column].width = 20
wb.save('Updated_Opportunities.xlsx')
See a screenshot to see what I'm talking about:
As you can see, some cells maintain the default 'yyyy-mm-dd' style and I don't know why. All 3 column are originally text columns, so the for loop is definitely updating them to a date type, it's just the format part that sometimes fails. Almost all the cells in all 3 columns update, but there's a few outliers and I don't have an explanation for it. It's not a big deal, but I'm a beginner, so perhaps I'm doing something wrong in the for loop without knowing.
Thanks in advance
I want to open a file using Python and paste query results out of Oracle in to a specific sheet. I found a way to do with with xlswriter but that is not the right tool for the job.
I can get my query to execute and append to a list. I have both strings and integers in the result. I cannot get this to transfer to an excel file. Any help would be great.
The error I'm getting is:
line 201, in _bind_value
raise ValueError("Cannot convert {0} to Excel".format(value))
ValueError: Cannot convert ('20 GA GV-CS-CT-DRY-G90 60xC COIL', 2, 848817, 982875, 1.15793510261929) to Excel
Code:
import cx_Oracle
import openpyxl
con = cx_Oracle.connect('example', 'example', "example")
cur = con.cursor()
heatmap_data = []
statement = """ select * from example"""
cur.arraysize = 2000
cur.execute(statement)
for result in cur:
heatmap_data.append(result)
con.close()
file = "path/Test.xlsx"
wb = openpyxl.load_workbook(filename=file)
ws = wb.get_sheet_by_name("Sheet1")
row = 1
col = 1
for rowNum in range(2, len(heatmap_data)):
ws.cell(row=row, column=col).value = heatmap_data[rowNum]
row =+ 1
wb.save(file)
Maybe openpyxl doesn't convert iterables (which is what looks to be passed) to ws.cell.value.
Try:
for rowNum, data in enumerate(heatmap_data):
ws.cell(row=rowNum + 2, column=col).value = ", ".join(data[rowNum])
# Noticed the range you're choosing skips the first 2 values of your data.
# Looks like you're not incrementing the column. Meh. Guess not.
Heh.
Resolved the issue with the following code:
row = 1
for i in (heatmap_data):
print(i[0], i[1], i[2], i[3], i[4])
ws.cell(row=row, column=1).value = (i[0])
ws.cell(row=row, column=2).value = (i[1])
ws.cell(row=row, column=3).value = (i[2])
ws.cell(row=row, column=4).value = (i[3])
ws.cell(row=row, column=5).value = (i[4])
row += 1
I am able to find the last row and i have my variable set, however i'm getting this error: This object does not support enumeration
## Export Severity to Qualy Data ##
## access excel application and open active worksheet ##
Excel = win32.gencache.EnsureDispatch('Excel.application')
Excel.Visible = False
wb = Excel.Workbooks.Open('Qualys Data.xlsx')
sh = wb.ActiveSheet
## Find last row ##
lastRow = sh.UsedRange.Rows.Count
next_Row = lastRow+1
print("Next row to print: ", next_Row)
## Can i loop through the document, if on last_Row / print to last row?
for line in sh:
if desired_row == next_Row:
sh.Range("A1:A1").Value = reportDate
print("Exporting to data to new spreadsheet...")
time.sleep(1)
Excel.DisplayAlerts = 0
wb.SaveAs('Qualys Data.xlsx')
Excel.DisplayAlerts = True
Excel.Quit()
Basically I want to write to the last row if on the last row, next_Row will print the row number to write to
It is not possible to iterate on wb.ActiveSheet which is why you are getting the error message.
If you are just trying to append a single cell to the bottom of the first row, you can just write directly to that cell, no enumeration is needed:
import win32com.client as win32
from datetime import datetime
import time
reportDate = datetime.now()
Excel = win32.gencache.EnsureDispatch('Excel.application')
Excel.Visible = False
wb = Excel.Workbooks.Open(r'e:\python temp\Qualys Data.xlsx')
ws = wb.ActiveSheet
## Find last row ##
lastRow = ws.UsedRange.Rows.Count
next_Row = lastRow + 1
print("Next row to print: ", next_Row)
ws.Cells(next_Row, 1).Value = reportDate
print("Exporting to data to new spreadsheet...")
time.sleep(1)
Excel.DisplayAlerts = 0
wb.SaveAs('Qualys Data.xlsx')
Excel.DisplayAlerts = True
Excel.Quit()
This is done using ws.Cells(next_Row, 1).Value = reportDate
In this example, it just appends the current date and time.
To write a list to the last row use the following:
data = ["cell 1", "cell 2", "cell 3", "cell 4"]
ws.Range(ws.Cells(next_Row, 1), ws.Cells(next_Row, len(data))).Value = data
I am trying to copy a sheet, default_sheet, into a new sheet new_sheet in the same workbook.
I did managed to create a new sheet and to copy the values from default sheet. How can I also copy the style of each cell into the new_sheet cells?
new_sheet = workbook.create_sheet()
new_sheet.title = sheetName
default_sheet = workbook.get_sheet_by_name('default')
new_sheet = workbook.get_sheet_by_name(sheetName)
for row in default_sheet.rows:
col_idx = float(default_sheet.get_highest_column())
starting_col = chr(65 + int(col_idx))
for row in default_sheet.rows:
for cell in row:
new_sheet[cell.get_coordinate()] = cell.value
<copy also style of each cell>
I am at the moment using openpyxl 1.8.2, but i have in mind to switch to 1.8.5.
One solution is with copy:
from copy import copy, deepcopy
new_sheet._styles[cell.get_coordinate()] = copy(
default_sheet._styles[cell.get_coordinate()])
As of openpyxl 2.5.4, python 3.4: (subtle changes over the older version below)
new_sheet = workbook.create_sheet(sheetName)
default_sheet = workbook['default']
from copy import copy
for row in default_sheet.rows:
for cell in row:
new_cell = new_sheet.cell(row=cell.row, column=cell.col_idx,
value= cell.value)
if cell.has_style:
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
For openpyxl 2.1
new_sheet = workbook.create_sheet(sheetName)
default_sheet = workbook['default']
for row in default_sheet.rows:
for cell in row:
new_cell = new_sheet.cell(row=cell.row_idx,
col=cell.col_idx, value= cell.value)
if cell.has_style:
new_cell.font = cell.font
new_cell.border = cell.border
new_cell.fill = cell.fill
new_cell.number_format = cell.number_format
new_cell.protection = cell.protection
new_cell.alignment = cell.alignment
The StyleableObject implementation stores styles in a single list, _style, and style properties on a cell are actually getters and setters to this array. You can implement the copy for each style individually but this will be slow, especially if you're doing it in a busy inner loop like I was.
If you're willing to dig into private class attributes there is a much faster way to clone styles:
if cell.has_style:
new_cell._style = copy(cell._style)
FWIW this is how the optimized WorksheetCopy class does it in the _copy_cells method.
May be this is the convenient way for most.
from openpyxl import load_workbook
from openpyxl import Workbook
read_from = load_workbook('path/to/file.xlsx')
read_sheet = read_from.active
write_to = Workbook()
write_sheet = write_to.active
write_sheet['A1'] = read_sheet['A1'].value
write_sheet['A1'].style = read_sheet['A1'].style
write_to.save('save/to/file.xlsx')
I organized the answers above and the code below works for me.
(it copies the cell value and the cell format)
from openpyxl import load_workbook
from copy import copy
wb = load_workbook(filename = 'unmerge_test.xlsx') #your file name
ws = wb['sheet_merged'] #your sheet name in the file above
for group in list(ws.merged_cells.ranges):
min_col, min_row, max_col, max_row = group.bounds
cell_start = ws.cell(row = min_row, column = min_col)
top_left_cell_value = cell_start.value
ws.unmerge_cells(str(group))
for i_row in range(min_row, max_row + 1):
for j_col in range(min_col, max_col + 1):
ws.cell(row = i_row, column = j_col, value = top_left_cell_value)
#copy the cell format
ws.cell(row = i_row, column = j_col).alignment = copy(cell_start.alignment)
ws.cell(row = i_row, column = j_col).border = copy(cell_start.border)
ws.cell(row = i_row, column = j_col).font = copy(cell_start.font)
wb.save("openpyxl_unmerged.xlsx")
Hope this helps!