python docx can not find docx file - python

I actually have the file in my folder:
my code is :
#-*-coding:utf-8-*-
import re
import time
import datetime
import sys
import os
import csv
import docx
from docx import Document
from docx import *
CURRENT_DIR = os.path.dirname(os.path.abspath(sys.argv[0]))
docxFilePath = os.path.join(CURRENT_DIR,'111.docx')
doc=Document(docxFilePath)
when I run it , it returns me error is:
Traceback (most recent call last): File
"C:\Users\Windows\Desktop\test\fp\makereport.py", line 20, in <module>
doc=Document(docxFilePath) File "C:\Python27\lib\site-packages\docx\api.py", line 25, in Document
document_part = Package.open(docx).main_document_part File "C:\Python27\lib\site-packages\docx\opc\package.py", line 116, in open
pkg_reader = PackageReader.from_file(pkg_file) File "C:\Python27\lib\site-packages\docx\opc\pkgreader.py", line 32, in
from_file
phys_reader = PhysPkgReader(pkg_file) File "C:\Python27\lib\site-packages\docx\opc\phys_pkg.py", line 31, in
__new__
"Package not found at '%s'" % pkg_file docx.opc.exceptions.PackageNotFoundError: Package not found at
'C:\Users\Windows\Desktop\test\fp\111.docx'
please help

It seems for other file formats as docx, xlsx, and pdfs the file should be in the current working directory. So u can do :
import os
os.chdir('C://Users/Windows/Desktop/test/fp')
Then see if it works.

I have encountered the same problem and Scanny has answered it correct that file was found but was not a real .docx file.
Don't create it in any other application and rename it to .docx but create a real .docx file.
You can use below to create one using code.
doc = docx.Document()
doc.save("/path/to/file/where/it/needs/to/save/.docx")

Related

Python : Add watermark/background in all pages PDF

I just want to add/merge background in all pages in PDf but getpage(i) in inputfile giving me error. Only getPage(0) is running without error but creating duplicate copy of page first throughout the document keeping the original number of pages.
here is my code
from typing import BinaryIO
import os
import PyPDF2
from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger
from tkinter.filedialog import askopenfilename
from fpdf import FPDF
input_file = askopenfilename()
pdf = PdfFileReader(input_file)
watermark = PyPDF2.PdfFileReader(open('F:\abc\abc\PDF
Templates\Report First - Potrait.pdf', 'rb'))
output = PdfFileWriter()
num_numbers = pdf.numPages
for i in range(pdf.getNumPages()):
watermarks = watermark.getPage(0)
page = pdf.getPage(i)
page.mergePage(watermarks)
output.addPage(page)
with open(input_file.rsplit(".", 1)[0] + '_FP.pdf', "wb") as merged_file:
output.write(merged_file)
getting Error::
Traceback (most recent call last): File "C:\Users\Gaurav\Desktop\PFD
python\abc\PFD python\test2.py", line 23, in
page.mergePage(watermarks) File "C:\Users\Gaurav\AppData\Local\Programs\Python\Python310\lib\site-packages\PyPDF2\pdf.py",
line 2417, in mergePage
self._mergePage(page2) File "C:\Users\Gaurav\AppData\Local\Programs\Python\Python310\lib\site-packages\PyPDF2\pdf.py",
line 2426, in _mergePage
originalResources = self[PG.RESOURCES].getObject() File "C:\Users\Gaurav\AppData\Local\Programs\Python\Python310\lib\site-packages\PyPDF2\generic.py",
line 539, in getitem
return dict.getitem(self, key).getObject() KeyError: '/Resources'

FileNotFound error / reading PDF Files with PyPDF2 and os.listdir()

I have the following script to merge a couple of PDFs together:
import PyPDF2
import sys
import os
inputs = sys.argv[1]
list = os.listdir(inputs)
merger = PyPDF2.PdfFileMerger()
for pdf in list:
merger.append(pdf)
merger.write('merged.pdf')
print('All done')
The folder with the files is in a different directory than the running script, thus I inserted the full path.
Upon running like so from the terminal, python3 pdf-merger.py /Users/user/Documents/pdf_list, I get the following error:
Traceback (most recent call last):
File "pdf-merger.py", line 11, in <module>
merger.append(pdf)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/PyPDF2/merger.py", line 203, in append
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/PyPDF2/merger.py", line 114, in merge
fileobj = file(fileobj, 'rb')
FileNotFoundError: [Errno 2] No such file or directory: 'card.pdf'
I also tried with an alternative method:
import PyPDF2
import sys
import os
inputs = sys.argv[1]
list = [os.path.join(inputs,a) for a in os.listdir(inputs)]
merger = PyPDF2.PdfFileMerger()
for pdf in list:
merger.append(pdf)
merger.write('merged.pdf')
print('All done')
This time I get a PyPDF2.utils.PdfReadError: Could not read malformed PDF file, no matter what file it is.
Any ideas?
Found the problem. There was a hidden .DS_Store file in the directory which corrupted the script.
Ignoring it with if pdf.endswith('.pdf') resolved the issue!

Reading TDMS File with python nptdms, cannot open tdms file

I am having issues with getting basic function of the nptdms module working.
First, I am just trying to open a TDMS file and print the contents of specific channels within specific groups.
Using python 2.7 and the nptdms quick start here
Following this, I will be writing these specific pieces of data into a new TDMS file. Then, my ultimate goal is to be able to take a set of source files, open each, and write (append) to a new file. The source data files contain far more information that is needed, so I am breaking out the specifics into their own file.
The problem I have is that I cannot get past a basic error.
When running this code, I get:
Traceback (most recent call last):
File "PullTDMSdataIntoNewFile.py", line 27, in <module>
tdms_file = TdmsFile(r"C:\\Users\daniel.worts\Desktop\this_is_my_tdms_file.tdms","r")
File "C:\Anaconda2\lib\site-packages\nptdms\tdms.py", line 94, in __init__
self._read_segments(f)
File "C:\Anaconda2\lib\site-packages\nptdms\tdms.py", line 119, in _read_segments
object._initialise_data(memmap_dir=self.memmap_dir)
File "C:\Anaconda2\lib\site-packages\nptdms\tdms.py", line 709, in _initialise_data
mode='w+b', prefix="nptdms_", dir=memmap_dir)
File "C:\Anaconda2\lib\tempfile.py", line 475, in NamedTemporaryFile
(fd, name) = _mkstemp_inner(dir, prefix, suffix, flags)
File "C:\Anaconda2\lib\tempfile.py", line 244, in _mkstemp_inner
fd = _os.open(file, flags, 0600)
OSError: [Errno 2] No such file or directory: 'r\\nptdms_yjfyam'
Here is my code:
from nptdms import TdmsFile
import numpy as np
import pandas as pd
#set Tdms file path
tdms_file = TdmsFile(r"C:\\Users\daniel.worts\Desktop\this_is_my_tdms_file.tdms","r")
# set variable for TDMS groups
group_nameone = '101'
group_nametwo = '752'
# set objects for TDMS channels
channel_dataone = tdms_file.object(group_nameone 'Payload_1')
channel_datatwo = tdms_file.object(group_nametwo, 'Payload_2')
# set data from channels
data_dataone = channel_dataone.data
data_datatwo = channel_datatwo.data
print data_dataone
print data_datatwo
Big thanks to anyone who may have encountered this before and can help point to what I am missing.
Best,
- Dan
edit:
Solved the read data issue by removing the 'r' argument from the file path.
Now I am having another error I can't trace when trying to write.
from nptdms import TdmsFile, TdmsWriter, RootObject, GroupObject, ChannelObject
import numpy as np
import pandas as pd
newfilepath = r"C:\\Users\daniel.worts\Desktop\Mined.tdms"
datetimegroup101_channel_object = ChannelObject('101', DateTime, data_datetimegroup101)
with TdmsWriter(newfilepath) as tdms_writer:
tdms_writer.write_segment([datetimegroup101_channel_object])
Returns error:
Traceback (most recent call last):
File "PullTDMSdataIntoNewFile.py", line 82, in <module>
tdms_writer.write_segment([datetimegroup101_channel_object])
File "C:\Anaconda2\lib\site-packages\nptdms\writer.py", line 68, in write_segment
segment = TdmsSegment(objects)
File "C:\Anaconda2\lib\site-packages\nptdms\writer.py", line 88, in __init__
paths = set(obj.path for obj in objects)
File "C:\Anaconda2\lib\site-packages\nptdms\writer.py", line 88, in <genexpr>
paths = set(obj.path for obj in objects)
File "C:\Anaconda2\lib\site-packages\nptdms\writer.py", line 254, in path
self.channel.replace("'", "''"))
AttributeError: 'TdmsObject' object has no attribute 'replace'

How to generate PDF file from an HTML file using Reportlab and Pisa in Python?

Have the following code setup as follows to generate a PDF document using Reportlab and Pisa in Python.
import cStringIO
import ho.pisa as pisa
def html_to_pdf(data, filename, open=False):
pdf = pisa.CreatePDF(
cStringIO.StringIO(data),
file(filename, "wb"))
My HTML file contains standard HTML content.
It's fully qualified path along with .html extension is assigned to the output_file variable.
Call it like this:
with open(output_file, "r") as my_file:
contents = my_file.read()
html_to_pdf(contents, dest_pdf_file, open=True)
Get this error:
No handlers could be found for logger "sx.pisa3"
Traceback (most recent call last):
File "/home/devuser/myapp/app.py", line 8, in <module>
from utils.fileutils import FileUtil
File "/home/devuser/myapp/utils/fileutils.py", line 5, in <module>
import ho.pisa as pisa
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/ho/pisa/__init__.py", line 26, in <module>
from sx.pisa3.pisa import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/__init__.py", line 41, in <module>
from pisa import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa.py", line 32, in <module>
from pisa_document import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa_document.py", line 22, in <module>
from pisa_context import pisaContext
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa_context.py", line 21, in <module>
from pisa_util import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa_util.py", line 55, in <module>
raise ImportError("Reportlab Version 2.1+ is needed!")
ImportError: Reportlab Version 2.1+ is needed!
This is a "partial list" of what pip freeze yields.
Pillow==2.3.0
PyPDF2==1.25.1
html5lib==0.999
oneconf==0.3.7.14.04.1
pdfkit==0.5.0
pisa==3.0.33
reportlab==3.0
Seems like a broken installation issue...
Does anyone know how to fix this or any alternative methods (approaches and / or different libraries) used to generate HTML files into PDFs?
Got it working... Uninstalled and reinstalled pisa and it worked! :)
sudo easy_install pisa
My code:
import cStringIO
import ho.pisa as pisa
class FileUtil:
#staticmethod
def html_to_pdf(html, output_file):
pdfFile = file(output_file, "wb")
pdf = pisa.CreatePDF(
cStringIO.StringIO(html.encode("ISO-8859-1")), pdfFile)
pdfFile.close()

tokenize error in python

I have this sample program from python-docx library example-extracttext.py to extract text from a docx file.
#!/usr/bin/env python
"""
This file opens a docx (Office 2007) file and dumps the text.
If you need to extract text from documents, use this file as a basis for your
work.
Part of Python's docx module - http://github.com/mikemaccana/python-docx
See LICENSE for licensing information.
"""
import sys
from docx import opendocx, getdocumenttext
if __name__ == '__main__':
try:
document = opendocx(sys.argv[1])
newfile = open(sys.argv[2], 'w')
except:
print(
"Please supply an input and output file. For example:\n"
" example-extracttext.py 'My Office 2007 document.docx' 'outp"
"utfile.txt'"
)
exit()
# Fetch all the text out of the document we just created
paratextlist = getdocumenttext(document)
# Make explicit unicode version
newparatextlist = []
for paratext in paratextlist:
newparatextlist.append(paratext.encode("utf-8"))
# Print out text of document with two newlines under each paragraph
newfile.write('\n\n'.join(newparatextlist))
It runs fine but when I put another program called tokenize.py (given just below) in the same directory.
import nltk.data
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
fo = open(sys.argv[1], "r")
data = fo.read()
print '\n-----\n'.join(tokenizer.tokenize(data))
It gives following error.
Traceback (most recent call last):
File "./example-extracttext.py", line 14, in <module>
from docx import opendocx, getdocumenttext
File "/usr/local/lib/python2.7/dist-packages/docx-0.2.1-py2.7.egg/docx.py", line 12, in <module>
from lxml import etree
File "parsertarget.pxi", line 4, in init lxml.etree (src/lxml/lxml.etree.c:178742)
File "/usr/lib/python2.7/inspect.py", line 39, in <module>
import tokenize
File "/home/sriram/NLP_TOOLS/EDITING_TOOL/NLP/sriram_work/tokenize.py", line 3, in <module>
import nltk.data
File "/usr/local/lib/python2.7/dist-packages/nltk/__init__.py", line 106, in <module>
from decorators import decorator, memoize
File "/usr/local/lib/python2.7/dist-packages/nltk/decorators.py", line 176, in <module>
#decorator
File "/usr/local/lib/python2.7/dist-packages/nltk/decorators.py", line 154, in decorator
if inspect.isclass(caller):
AttributeError: 'module' object has no attribute 'isclass'
Please tell me how to resolve this. I want to use both the programs in a single shell script.

Categories