Unable to open docx in python - python

The problem is that a file that I created with Microsoft Word 2010 but can't be opened with python. Some open and some don't. They're all created the same way.
At first I tried to open them via path, it didn't work so I tried to do it the simple. Still no success. This is the error that I get:docx.opc.exceptions.PackageNotFoundError: Package not found at 'COMANDA_TRANSPORT_-_Grecia_SRL.docx'
Here's my spaghetti code:
import os
import re
import Database
import mysql.connector as mysql
from docx import Document
from docx.shared import Inches
from Database import tables
#=============================================#
# == Search for file == #
director = os.path.dirname(os.path.abspath(__file__))
lista_directoare = os.listdir(director)
print(lista_directoare)
print(lista_directoare.sort())
# == Last file== #
ultimul_fisier = lista_directoare[-1]
print('Last file: ' + ultimul_fisier)
def sort(fisier):
fisier = re.search(r'\d+',ultimul_fisier).group()
print(fisier)
string_ultimFisier = str(ultimul_fisier)
print(string_ultimFisier)
print(director + "\\" + string_ultimFisier)
#fisier = open('{}'.format(ultimul_fisier),"rb")
#fisier = open(director + "\\" + string_ultimFisier,"rb")
#document = Document(fisier)
document = Document('COMANDA_TRANSPORT_-_Grecia_SRL.docx')
for paragraph in document.paragraphs:
if "pip" in paragraph.text:
print("Am gasit")
break
else:
print('Nu am gasit')
break
for table in tables:
print(table)
document.save('test.docx')

Related

Print command response into .csv table

I don't have much programming skills, but I need to send a output of a command into .csv table. I managed to create this, but it prints only 1st line of the table instead whole table, and I don't know how to procceed futher with turning it into csv.
Any help would be much appreciated.
from __future__ import print_function
from datetime import date
import sys
import os
import time
today1 = date.today().strftime('%Y_%m_%d')
strTime = time.strftime('%Y_%m_%d')
command = 'My command here'
cmd = session.command()
response = cmd.execute(command)
element_group = response.get_output()
table = element_group.groups()[0]
for cell in table[0]:
print(cell.labels()[0] + ' , ' + '\t', end='')
print('\n')
for cell in table[5]:
print(cell.value() + ' , ', end='')
print('\n')
I have tried script in description. I was expecting to print whole table and turning it into .csv file.
I have figured it out. Here is the script I wanted.
from __future__ import print_function
import csv
from datetime import date
import sys
import os
import time
today1 = date.today().strftime('%Y_%m_%d')
strTime = time.strftime('%Y_%m_%d')
command = 'My command here'
cmd = session.command()
response = cmd.execute(command)
element_group = response.get_output()
table = element_group.groups()[0]
header = [cell.labels()[0] for cell in table[0]]
rows = [[cell.value() for cell in row] for row in table]
directory = 'location'
filename = directory + 'filename' + today1 + '.csv'
with open(filename, mode='w') as file:
writer = csv.writer(file)
writer.writerow(header)
writer.writerows(rows)

How to print a chart from an Excel Chartsheet using Python

I'm completely new to coding (it's just for fun and hopefully to save some time at work) and I've been trying to make my first lines of code working.
Specifically, I want my code to open a certain Excel workbook, find certain worksheets which are actually chartsheets (each one with only one chart in it) and print them as pdf/jpeg files in a specific folder. I went for the ExportAsFixedFormat, but I encountered the following error.
AttributeError: 'Chartsheet' object has no attribute 'ExportAsFixedFormat'
Could you please help me? Is there any way to print/save a Chartsheet?
I went through the Chartsheet Object's methods, but I couldn't find anything helpful. I'm sure I'm missing something.
Some info about my configuration:
Windows 10 Home x64
Excel for Microsoft 365 MSO (16.0.13628.20318) 64 bit
Python 3.8 32 bit
Pywin32 version 227
Below the chunk of code that I'm having problems with.
[Edit]: below the whole code I wrote, maybe the error is not where I think it is.
Thank you in advance and sorry for my broken English.
First of all, I've imported a ton of things, I'm aware I most probably need just half of them.
import plotly
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import win32com.client as win32
import openpyxl
import os, sys
import math
import openpyxl
from openpyxl import Workbook, load_workbook
from openpyxl import chart
from openpyxl import chartsheet
from openpyxl.chartsheet.publish import WebPublishItem, WebPublishItems
from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
#from .drawings import find_images
from openpyxl.chartsheet import Chartsheet
import openpyxl.chart
import win32com.client
from pdf2image import convert_from_path
from pathlib import Path
import xlsxwriter
And here is the code I wrote:
path_filePy = Path(__file__).resolve()
current_folder = path_filePy.parent
image_folder_name = "Immages"
image_folder_path = os.path.join(current_folder, image_folder_name)
try:
os.mkdir(image_folder_path)
except OSError:
files = os.listdir(image_folder_path)
for f in files:
os.remove(image_folder_path + '\\'+ f)
folder_list = os.listdir(current_folder)
excel_list=[]
for l in folder_list:
if l.endswith('.xlsx'):
excel_list.append(l)
chartsheets_names=['Chartsheet1', 'Chartsheet2', 'Chartsheet3', 'Chartsheet4']
excel = win32.gencache.EnsureDispatch('Excel.Application')
for excelfile in excel_list:
wb = load_workbook(os.path.join(current_folder, excelfile))
for sheet in chartsheets_names:
ws=wb[sheet]
image_file_name = excelfile[:-5]+'_'+sheet+'.pdf'
image_file_path = os.path.join(image_folder_path,image_file_name)
ws.ExportAsFixedFormat(0, image_file_path)
convert_from_path(image_file_path, dpi=300, output_folder=image_folder_path,fmt='jpeg')
wb.Close()
I managed to get what I wanted in the end. Below is the code I'm using now, maybe it could be helpful to someone else too.
I think I was messing with code related to win32com and code related to openpxl.
Now I would like my Chartsheets to stretch all over the printing area prior to printing (I tried to set margins to zero, it does not work). I think I should use wb_sheet.PageSetup.ChartSize with the value FullPage, but I do not get how to assign it.
import os
import sys
from pathlib import Path
import win32com.client as w3c
from pdf2image import convert_from_path
# find the parent folder of the .py file
path_filePy = Path(__file__).resolve()
current_folder = path_filePy.parent
print(current_folder)
# create the destination folder or empty it if existing
image_folder_name = "Immages"
image_folder_path = os.path.join(current_folder, image_folder_name)
#print(image_folder_path)
try:
os.mkdir(image_folder_path)
except OSError:
files = os.listdir(image_folder_path)
for f in files:
os.remove(image_folder_path + '\\'+ f)
# list of file in the folder
folder_list = os.listdir(current_folder)
# list of only *.xlsx files
excel_list=[]
for l in folder_list:
if l.endswith('.xlsx'):
excel_list.append(l)
# listof sheets' names I want to print
chartsheets_names=['Sheet1', 'Sheet2', 'Sheet3', 'Sheet4']
o = w3c.Dispatch("Excel.Application")
o.Visible = False
# for each sheet names as in my list, in each xlsx file, it prints in both pdf and jpeg
for excel_file in excel_list:
try:
wb_path = os.path.join(os.path.abspath(current_folder), excel_file)
wb = o.Workbooks.Open(wb_path)
for wb_sheet in wb.Sheets:
if wb_sheet.Name in chartsheets_names:
path_to_pdf = os.path.join(os.path.abspath(image_folder_path), excel_file[:-5] + ' - ' + str(wb_sheet.Name) + '.pdf')
wb_sheet.SaveAs(path_to_pdf, FileFormat=57)
convert_from_path(
path_to_pdf, # the input pdf file
dpi=300,
output_folder=image_folder_path,
fmt='jpeg',
output_file=str(excel_file[:-5] + ' - ' + str(wb_sheet.Name)),
poppler_path = r"C:\where\your\poppler\bin folder is",
use_pdftocairo=False)
else: next
wb.Close(False)
except OSError:
next
o.Quit
`
See updated answer further down.
See further update below.
After pip install comtypes this works for me:
import os
import comtypes.client
SOURCE_DIR = r'C:\Users\xyz\SO-samples' # adjust to your needs
TARGET_DIR = r'C:\Users\xyz\SO-samples' # adjust to your needs
app = comtypes.client.CreateObject('Excel.Application')
app.Visible = False
infile = os.path.join(os.path.abspath(SOURCE_DIR), 'an-excel-file.xlsx')
outfile = os.path.join(os.path.abspath(TARGET_DIR), 'an-excel-file.pdf')
doc = app.Workbooks.Open(infile)
doc.ExportAsFixedFormat(0, outfile, 1, 0)
doc.Close()
app.Quit()
Updated answer - selectable sheets:
import os
import win32com.client
SOURCE_DIR = r'C:\Users\xyz\SO-samples' # adjust
TARGET_DIR = r'C:\Users\xyz\SO-samples' # adjust
wb_path = os.path.join(os.path.abspath(SOURCE_DIR), 'xyzzy.xlsx')
wb = o.Workbooks.Open(wb_path)
o = win32com.client.Dispatch("Excel.Application")
o.Visible = False
# print 1 sheet to 1 file
path_to_pdf = os.path.join(os.path.abspath(TARGET_DIR), 'xyzzy2.pdf')
ws_index_list = [2] # say you want to print this sheet
wb.WorkSheets(ws_index_list).Select()
wb.ActiveSheet.ExportAsFixedFormat(0, path_to_pdf)
# print 2 sheets to 1 file
path_to_pdf = os.path.join(os.path.abspath(TARGET_DIR), 'xyzzy-1-3.pdf')
ws_index_list = [1,3] # say you want to print these sheets
wb.WorkSheets(ws_index_list).Select()
wb.ActiveSheet.ExportAsFixedFormat(0, path_to_pdf)
# print 3 sheets to 1 file each
ws_index_list = [1,2,3] # say you want to print these sheets
for ws_index in ws_index_list:
path_to_pdf = os.path.join(os.path.abspath(TARGET_DIR), 'xyzzy-' + str(ws_index) + '.pdf')
wb.WorkSheets([ws_index]).Select()
wb.ActiveSheet.ExportAsFixedFormat(0, path_to_pdf)
# select sheet by name, print 1 sheet to 1 file
ws_sheet_name = 'named_sheet'
path_to_pdf = os.path.join(os.path.abspath(TARGET_DIR), 'xyzzy-' + ws_sheet_name + '.pdf')
wb.WorkSheets(ws_sheet_name).Select()
wb.ActiveSheet.ExportAsFixedFormat(0, path_to_pdf)
Further update - printing sheet names, select sheet by name:
import win32com.client as w3c
import os, sys
SOURCE_DIR = r'C:\Users\xyz\SO-samples'
TARGET_DIR = r'C:\Users\xyz\SO-samples'
wb_path = os.path.join(os.path.abspath(SOURCE_DIR), 'xyzzy.xlsx')
o = w3c.Dispatch("Excel.Application")
o.Visible = False
wb = o.Workbooks.Open(wb_path)
for wb_sheet in wb.Sheets:
print(wb_sheet.Name)
### this works
ws_sheet_name = [1,3]
path_to_pdf = os.path.join(os.path.abspath(TARGET_DIR), 'xyzzy' + '.pdf')
wb.Worksheets(ws_sheet_name).Select()
wb.ActiveSheet.ExportAsFixedFormat(0, path_to_pdf)
### this works
ws_sheet_name = 'xyzzy'
path_to_pdf = os.path.join(os.path.abspath(TARGET_DIR), 'xyzzy-xyzzy' + '.pdf')
wb.Worksheets(ws_sheet_name).Select()
wb.ActiveSheet.ExportAsFixedFormat(0, path_to_pdf)
wb.Close()

PyPDF2 give me blank pages in merged PDF

I have earlier come up with this question in here:
pypdf2-merging-pdf-pages-issue
Where I have now come a long way and can now create my PDF files from an Excel document via Pandas into PyPDF2.
As well as where I now have the number of pages that must be per. PDF.
However, my problem now is that my merged PDF files are now blank.
If I do a debug, then I can see that in my second loop, which contains the variable "paths" the right paths to my physical PDF files.
But that when they then come in through:
with path.open('rb') as pdf:
pdf_writer.append(pdf)
Then suddenly an extra "" enters the paths so that a path can be named c: \ users \ .... then suddenly it is called c: \ users \ ...
Do not know if this is what prevents the files from being opened and read correctly, and then merged into one PDF file.
Hope some can guide me as python for me is self taught.
Or in some other way can explain to me why I get created some merged PDF files that are suddenly blank on 3 pages.
My code is:
import datetime #Handle date
import pandas as pd #Handle data from Excel Sheet (Data analysis)
import PyPDF2 as pdf2 #Handle PDF read and merging
from pathlib import Path #Handle path
#Skip ERROR-message: Xref table not zero-indexed. ID numbers for objects will be corrected.
#import sys
#if not sys.warnoptions:
# import warnings
# warnings.simplefilter("ignore")
PDF_PATH = Path('C:/Users/TH/PDF/')
EXCEL_FILENAME = 'Resources/liste.xlsx'
def main():
today = datetime.date.today() # The date now
next_week = today.isocalendar()[1] + 1 # 0=Year, 1=week
resources = pd.read_excel(EXCEL_FILENAME, sheet_name='Ark1')
for row in resources.itertuples():
year = row.Aargang
paths = [
(PDF_PATH / row.Oevelse1).with_suffix('.pdf'),
(PDF_PATH / row.Oevelse2).with_suffix('.pdf'),
(PDF_PATH / row.Oevelse3).with_suffix('.pdf'),
]
pdf_writer = pdf2.PdfFileMerger()
for path in paths:
with path.open('rb') as pdf:
pdf_writer.append(pdf)
with open(f'Uge {next_week} - {year} Merged_doc.pdf', 'wb') as output:
pdf_writer.write(output)
if __name__ == '__main__':
main()
#anon01 Thx
And Thx/credit to Sirius3.
It's something about the PyPDF2, how to use it and some bugs with it.
So after edit the code to this it work.
import datetime #Handle date
import pandas as pd #Handle data from Excel Sheet (Data analysis)
from PyPDF2 import PdfFileMerger #Handle PDF read and merging
from pathlib import Path #Handle path
#Skip ERROR-message: Xref table not zero-indexed. ID numbers for objects will be corrected.
#import sys
#if not sys.warnoptions:
# import warnings
# warnings.simplefilter("ignore")
PDF_PATH = Path('C:/Users/TH/PDF')
EXCEL_FILENAME = 'Resources/liste.xlsx'
def main():
today = datetime.date.today() # The date now
next_week = today.isocalendar()[1] + 1 # 0=Year, 1=week
resources = pd.read_excel(EXCEL_FILENAME, sheet_name='Ark1')
for row in resources.itertuples():
year = row.Aargang
paths = [
(PDF_PATH / row.Oevelse1).with_suffix('.pdf'),
(PDF_PATH / row.Oevelse2).with_suffix('.pdf'),
(PDF_PATH / row.Oevelse3).with_suffix('.pdf'),
]
pdf_merger = PdfFileMerger()
for path in paths:
pdf_merger.append(str(path))
with open(f'Uge {next_week} - {year} Merged_doc.pdf', 'wb') as output:
pdf_merger.write(output)
pdf_merger.close()
if __name__ == '__main__':
main()

How do i add path in python docx?

I am trying to add a path in this code, how do i do it? I dont understand it.
# folder path
data_folder = Path("Desktop\biologi")
if y == ('biologi'):
document = Document() #create blank document
document.save(y+(x.strftime(" %Y-%m-%d"))+".docx") #save blank document, lägg in path här
document = Document(y+(x.strftime(" %Y-%m-%d"))+".docx") #open document
p = document.add_paragraph()
p.add_run(str(y+(x.strftime(" %Y-%m-%d"))))#edit words
document.save(y+(x.strftime(" %Y-%m-%d"))+".docx")#save edited document
´´´
Use os library.
import os
DESKTOP_PATH = os.path.expanduser("~\Desktop")
data_folder = os.path.join(DESKTOP_PATH, 'biologi')
print(data_folder)
# prints
# C:\Users\<Username>\Desktop\biologi

Gdata Export Document----Conflict Error

So this is rather worrying--I hope that someone can give me a hand with this one.
I am using a python script to download google doc spreadsheets and then back them up to our servers. MOST of the time, it works well, but every so often I get an error that looks like this:
gdata.service.RequestError: {'status': 409, 'body': '', 'reason': 'Conflict'}
Here is all of the code that I am using. Does somebody know if the Export function has some strange behavior that could be causing this?
QC_GoogleDoc_Spreadsheet_AutoLog
Author: Christopher James Johnson
Date: May 22, 2012
try:
from xml.etree import ElementTree
except ImportError:
from elementtree import ElementTree
import gdata.spreadsheet.service
import gdata.service
import atom.service
import gdata.spreadsheet
import gdata.docs.service
import atom
import getopt
import sys
import string
import time
import shutil
import os
import getpass
import tempfile
import csv
import time
import datetime
import glob
def main():
archiver = backUpper()
class backUpper():
def __init__(self):
gd = gdata.docs.service.DocsService()
self.gd_client = gdata.docs.service.DocsService()
self.gd_client.email = 'xxxx.xxxx'
self.gd_client.password = 'xxxxxxxx'
self.gd_client.source = 'Spreadsheets GData Sample'
self.gd_client.ProgrammaticLogin()
self.curr_key = ''
self.curr_wksht_id = ''
self.list_feed = None
self.autoLogPath = ""
spreadsheets_client = gdata.spreadsheet.service.SpreadsheetsService()
spreadsheets_client.email = self.gd_client.email
spreadsheets_client.password = self.gd_client.password
spreadsheets_client.source = "My Fancy Spreadsheet Downloader"
spreadsheets_client.ProgrammaticLogin()
feed = spreadsheets_client.GetSpreadsheetsFeed()
for i, entry in enumerate(feed.entry):
if isinstance(feed, gdata.spreadsheet.SpreadsheetsSpreadsheetsFeed):
if isinstance(entry, gdata.spreadsheet.SpreadsheetsSpreadsheet):
print entry.title.text
x = entry.id.text
print x
self.Download(entry)
self.DeleteTemporaryFiles()
def Download(self, entry):
line = entry.id.text
title = entry.title.text
splitLine = line.split('/')
key = splitLine[-1]
backUpDir = R'\\cob-hds-1\compression\QC\QCing\otherFiles\GoogleDocBackUp' + '\\'
now = datetime.datetime.now()
hour = now.hour
today = datetime.date.today()
if not os.path.exists(backUpDir + str(today)):
os.mkdir(backUpDir + str(today))
if not os.path.exists(backUpDir + str(today) + '\\' + str(hour)):
os.mkdir(backUpDir + str(today) + '\\' + str(hour))
backupDir = backUpDir + str(today) + '\\' + str(hour)
tempfile.tempdir = backupDir
file_path = tempfile.mkstemp(suffix='.xls')
uri = 'http://docs.google.com/feeds/documents/private/full/%s' % key
spreadsheets_client = gdata.spreadsheet.service.SpreadsheetsService()
spreadsheets_client.email = self.gd_client.email
spreadsheets_client.password = self.gd_client.password
spreadsheets_client.source = "My Fancy Spreadsheet Downloader"
spreadsheets_client.ProgrammaticLogin()
# ...
docEntry = self.gd_client.GetDocumentListEntry(uri)
docs_auth_token = self.gd_client.GetClientLoginToken()
self.gd_client.SetClientLoginToken(spreadsheets_client.GetClientLoginToken())
self.gd_client.Export(docEntry, file_path[1])
shutil.copy(file_path[1], backupDir + '//' + title + '.xls')
os.close(file_path[0])
self.gd_client.SetClientLoginToken(docs_auth_token)
if __name__ == '__main__':
main()
So the scary part--This just started happening THIS MORNING! Everything was great before...and this morning...something has started happening with this and other Gdata using python scripts! Please help!
Thanks!
EDIT: So a co-worker of mine was working on one of these spreadsheets at the time and both of our programs crashed. (Mine backs up the google docs and his writes to it. If we are both working on the same spreadsheet at the same time, could this create a problem?)

Categories