How do I add a page background image in pylatex? - python

I have written the following code, and have given the latex commands for drawing background, by using the NoEscape module.
I have an image reportbg.png in the same directory as the program. Now, I want this image to appear as a background in all pages of the report.
types = ('../Faults/*.png', '../Faults/*.jpg')
imgnames = []
for files in types:
imgnames.extend(natsort.natsorted(glob.glob(files)))
geometry_options = { "head": "30pt",
"margin": "0.3in",
"top": "0.2in",
"bottom": "0.4in",
"includeheadfoot": True}
doc = Document(geometry_options=geometry_options)
first_page = PageStyle("firstpage")
doc.preamble.append(first_page)
doc.change_document_style("firstpage")
new_comm1 = NoEscape(r'\usepackage{wallpaper}')
doc.append(new_comm1)
new_comm2 = NoEscape(r'\CenterWallPaper{reportbg.png}')
doc.append(new_comm2)
with doc.create(Section('Faults identified')):
doc.append("Report")
with doc.create(Subsection('Fault pictures')):
for i,imgname in enumerate(imgnames):
with doc.create(Figure(position='h!')) as f_pic:
f_pic.add_image(imgname, width='220px')
f_pic.add_caption('Height: '+str(56)+', Angle: '+str(20))
doc.append('Some regular text')
However, I got the following error:
! LaTeX Error: Can be used only in preamble.
See the LaTeX manual or LaTeX Companion for explanation.
Type H <return> for immediate help.
...
l.23 \usepackage
{wallpaper}%
! Undefined control sequence.
l.24 \CenterWallPaper
{reportbg.png}%
<../Faults/1.jpg, id=1, 1927.2pt x 1084.05pt> <use ../Faults/1.jpg>
<../Faults/2.jpg, id=2, 1927.2pt x 1084.05pt> <use ../Faults/2.jpg>
<../Faults/3.jpg, id=3, 1927.2pt x 1084.05pt> <use ../Faults/3.jpg>
<../Faults/4.jpg, id=4, 1003.75pt x 1003.75pt> <use ../Faults/4.jpg>
LaTeX Warning: '!h' float specifier changed to '!ht'.

To implement a Background Image on all the pages of the document, you can generate first the PDF document in pylatex and then add the image as a watermark with PyPDF2. To do so, you need to have your 'reportbg.png' image into a pdf format (reportbg.pdf).
Here's a modified example based on the pylatex documentation (https://jeltef.github.io/PyLaTeX/current/examples/basic.html):
CODE
from pylatex import Document, Section, Subsection, Command
from pylatex.utils import italic, NoEscape
import PyPDF2
class Document_Watermark():
def __init__(self, doc):
self.doc = doc
self.fill_document()
self.create_document()
self.Watermark()
def fill_document(self):
"""Add a section, a subsection and some text to the document.
:param doc: the document
:type doc: :class:`pylatex.document.Document` instance
"""
with self.doc.create(Section('A section')):
self.doc.append('Some regular text and some ')
self.doc.append(italic('italic text. '))
with self.doc.create(Subsection('A subsection')):
self.doc.append('Also some crazy characters: $&#{}')
def create_document(self):
# Add stuff to the document
with self.doc.create(Section('A second section')):
self.doc.append('Some text.')
self.doc.generate_pdf('basic_maketitle2', clean_tex=False, compiler='pdflatex')
tex = self.doc.dumps() # The document as string in LaTeX syntax
def Watermark(self):
Doc = open('basic_maketitle2.pdf', 'rb')
pdfReader = PyPDF2.PdfFileReader(Doc)
pdfWatermark = PyPDF2.PdfFileReader(open('watermark3.pdf', 'rb'))
pdfWriter = PyPDF2.PdfFileWriter()
for pageNum in range(0, pdfReader.numPages):
pageObj = pdfReader.getPage(pageNum)
pageObj.mergePage(pdfWatermark.getPage(0))
pdfWriter.addPage(pageObj)
resultPdfFile = open('PDF_Watermark.pdf', 'wb')
pdfWriter.write(resultPdfFile)
Doc.close()
resultPdfFile.close()
# Basic document
doc = Document('basic')
# Document with `\maketitle` command activated
doc = Document()
doc.preamble.append(Command('title', 'Awesome Title'))
doc.preamble.append(Command('author', 'Anonymous author'))
doc.preamble.append(Command('date', NoEscape(r'\today')))
doc.append(NoEscape(r'\maketitle'))
Document_Watermark(doc)
The example watermark is this one: watermark3.pdf
The initial PDF document: basic_maketitle2.pdf
The final document: PDF_Watermark.pdf
PS: The watermark, the initial generated pdf and the .py file must be at the same directory. I couldn't upload the PDF files, because this is my first answer post and I'm not really sure how could I, but I share some images. I hope it will be helpful.
For more information, I suggest to read the following book: "Automate the boring stuff with Python", chapter 13, by Al Sweigart.

Related

Issue with pdf reader returning empty pages in the output - pdfrw.pdfreader.PdfReader()

I want to
read a pdf of 70 pages
add a different footer in each page
save the new file
To achieve this, I started using pdfrw and reportlab. Although I got good enough results, some pages of the 70 pages-document are returning empty/blank from the pdfrw.pdfreader.PdfReader I don't know why (these pages seem to be okay before adding the footers and the document isn't encrypted or protected by any password).
Is there any approach to make the reading of pdfrw.pdfreader.PdfReader more robust? or an additional reading process that allows user to perform the override of footers? Thanks
Note: footer_array is a length-70 array of tags that shows the last update status of each slide of the 70-pages pdf.
from reportlab.pdfgen.canvas import Canvas
from reportlab.lib.colors import HexColor
from pdfrw import PdfReader
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
def add_footer(footer_array, doc_path, new_path):
reader = PdfReader(doc_path)
pages = [pagexobj(p) for p in reader.pages]
canvas = Canvas(new_path)
for page_num, page in enumerate(pages, start=1):
print(page_num, page.BBox)
if footer_array[page_num-1] == "updated":
canvas.setPageSize((page.BBox[2], page.BBox[3]))
canvas.doForm(makerl(canvas, page))
canvas.setFont("Helvetica", 8)
canvas.setFillColor(HexColor('#339966'))
canvas.drawString(page.BBox[2]*0.90, 10, footer_array[page_num-1])
canvas.showPage()
elif footer_array[page_num-1] == "":
canvas.setPageSize((page.BBox[2], page.BBox[3]))
canvas.doForm(makerl(canvas, page))
canvas.setFont("Helvetica", 8)
canvas.setFillColor(HexColor('#000000'))
canvas.drawString(page.BBox[2]*0.90, 10, footer_array[page_num-1])
canvas.showPage()
else:
canvas.setPageSize((page.BBox[2], page.BBox[3]))
canvas.doForm(makerl(canvas, page))
canvas.setFont("Helvetica", 8)
canvas.setFillColor(HexColor('#FF0000'))
canvas.drawString(page.BBox[2]*0.85, 10, footer_array[page_num-1])
canvas.showPage()
canvas.save()

docxtpl - error when opening document in Word after adding more than five images

I'm trying to automate some reports in Word, and I'm getting the following error when I open the created document in Word:
"Word found unreadable content in test. Do you want to recover the contents of this document? If you trust the source of this document, click Yes."
After clicking yes it says the file cannot be opened. When I open in Libre Office there's no issue (I'm running the script on Ubuntu/Python 3.8.5)
Here's a simplified version of my code:
from docxtpl import DocxTemplate, InlineImage
from docx.shared import Mm
doc = DocxTemplate("template_test.docx")
""" load up images """
mps_chart = InlineImage(doc, image_descriptor='test/mps_line_chart.png')
server_pie = InlineImage(doc, image_descriptor='test/server_availability_pie.png', width=Mm(76), height=Mm(58))
agent_pie = InlineImage(doc, image_descriptor='test/agent_availability_pie.png', width=Mm(76), height=Mm(58))
cases_chart = InlineImage(doc, image_descriptor='test/cases_bar_chart.png')
alarms_chart = InlineImage(doc, image_descriptor='test/alarms_line_chart.png')
intro_alarms_graphic = InlineImage(doc, image_descriptor='test/alarms_intro_graphic.png', width=Mm(38), height=Mm(38))
intro_cases_graphic = InlineImage(doc, image_descriptor='test/open cases_intro_graphic.png', width=Mm(38), height=Mm(38))
intro_mps_graphic = InlineImage(doc, image_descriptor='test/mps_intro_graphic.png', width=Mm(38), height=Mm(38))
intro_doc_graphic = InlineImage(doc, image_descriptor='test/doc_intro_graphic.png', width=Mm(38), height=Mm(38))
months = {"MONTH_1": "June", "MONTH_2": "May", "MONTH_3": "April"}
intro_images = {"intro_alarms": intro_alarms_graphic, "intro_cases": intro_cases_graphic, "intro_mps": intro_mps_graphic, "intro_doc": intro_doc_graphic}
images = {"mps_line_chart": mps_chart, "agent_pie_chart": agent_pie, "server_pie_chart": server_pie , "alarms_line_chart": alarms_chart, "cases_bar_chart": cases_chart}
context = {**images, **months, **intro_images}
doc.render(context)
doc.save("test.docx")
The following will work fine, I only get the error when more than 5 images are added:
intro_images = {"intro_alarms": intro_alarms_graphic}
images = {"mps_line_chart": mps_chart, "agent_pie_chart": agent_pie, "server_pie_chart": server_pie , "alarms_line_chart": alarms_chart}
I also still have the same issue when I include all the images in a single dict, or if I do this:
context = {"mps_line_chart": mps_chart, "agent_pie_chart": agent_pie, "server_pie_chart": server_pie , "alarms_line_chart": alarms_chart, "cases_bar_chart": cases_chart, "intro_alarms": intro_alarms_graphic, "intro_cases": intro_cases_graphic, "intro_mps": intro_mps_graphic, "intro_doc": intro_doc_graphic}
Seemed to be an issue with the Word document once it had been opened and saved again in Libre Office. I opened the template back in Word and saved which seems to have resolved the issue.

underline text with odfpy

I'd like to generate an odf file with odfpy, and am stuck on underlining text.
Here is a minimal example inspired from official documentation, where i can't find any information about what attributes can be used and where.
Any suggestion?
from odf.opendocument import OpenDocumentText
from odf.style import Style, TextProperties
from odf.text import H, P, Span
textdoc = OpenDocumentText()
ustyle = Style(name="Underline", family="text")
#uprop = TextProperties(fontweight="bold") #uncommented, this works well
#uprop = TextProperties(attributes={"fontsize":"26pt"}) #this either
uprop = TextProperties(attributes={"underline":"solid"}) # bad guess, wont work !!
ustyle.addElement(uprop)
textdoc.automaticstyles.addElement(ustyle)
p = P(text="Hello world. ")
underlinedpart = Span(stylename=ustyle, text="This part would like to be underlined. ")
p.addElement(underlinedpart)
p.addText("This is after the style test.")
textdoc.text.addElement(p)
textdoc.save("myfirstdocument.odt")
Here is how I finally got it:
I created a sample document with underlining using libreoffice, and unzipped it. Looking in styles.xml part of the extracted files, I got the part that makes underlining in the document:
<style:style style:name="Internet_20_link" style:display-name="Internet link" style:family="text">
<style:text-properties fo:color="#000080" fo:language="zxx" fo:country="none" style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color" style:language-asian="zxx" style:country-asian="none" style:language-complex="zxx" style:country-complex="none"/>
</style:style>
The interesting style attributes are named: text-underline-style,
text-underline-width and text-underline-color.
To use them in odfpy, '-' characters must be removed, and attributes keys must be used as str (with quotes) like in the following code. A correct style family (text in our case) must be specified in the Style constructor call.
from odf.opendocument import OpenDocumentText
from odf.style import Style, TextProperties
from odf.text import H, P, Span
textdoc = OpenDocumentText()
#underline style
ustyle = Style(name="Underline", family="text") #here style family
uprop = TextProperties(attributes={
"textunderlinestyle":"solid",
"textunderlinewidth":"auto",
"textunderlinecolor":"font-color"
})
ustyle.addElement(uprop)
textdoc.automaticstyles.addElement(ustyle)
p = P(text="Hello world. ")
underlinedpart = Span(stylename=ustyle, text="This part would like to be underlined. ")
p.addElement(underlinedpart)
p.addText("This is after the style test.")
textdoc.text.addElement(p)
textdoc.save("myfirstdocument.odt")

Why does python-docx ignore rlt = true?

I need to write into a docx file from python (which i'm a bit new at) but i have to do it written rtl. After days of googling, the best I could do is this:
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT as WD_STYLE_TYPE
from docx.shared import Pt
from docx.shared import Inches, Pt
# create docx file
document = Document()
# create paragraph
para = document.add_paragraph()
# create run
run = para.add_run("Hello World")
# create style
mystyle = document.styles.add_style("mystyle", 2)
run.style = mystyle
font = run.font
font.rtl = True # SET RIGHT TO LEFT
document.save(r"C:\Users\USER\Desktop\Code\TofesEfes\WordTes.docx")
the problem is that for some reason the code just ignores this line:
font.rtl = True # SET RIGHT TO LEFT
If I try to change it to:
font.bold = True # SET FONT TO BOLD
the font will come out bold.
I also tried changing the text to be in a rtl languge and nothin changed.
Does anyone here have any idea why it's doing this?

Use PyPDF2 to detect Embedded Subset fonts in PDF

I have modified the following script using PyPDF2 to traverse through a PDF and determine whether the PDF contains unembedded fonts. It works for figuring out the list of all fonts in the PDF, and which of those are embedded. However, some PDFs have fonts in which only the subset of the font used is embedded (see https://blogs.mtu.edu/gradschool/2010/04/27/how-to-determine-if-fonts-are-embedded/) - How do you determine in a PDF whether a subset of a font is embedded? Thank you!
from PyPDF2 import PdfFileReader
import sys
fontkeys = set(['/FontFile', '/FontFile2', '/FontFile3'])
def walk(obj, fnt, emb):
if '/BaseFont' in obj:
fnt.add(obj['/BaseFont'])
elif '/FontName' in obj and fontkeys.intersection(set(obj)):
emb.add(obj['/FontName'])
for k in obj:
if hasattr(obj[k], 'keys'):
walk(obj[k], fnt, emb)
if type(obj) == PyPDF2.generic.ArrayObject: # You can also do ducktyping here
for i in obj:
if hasattr(i, 'keys'):
walk(i, all_fonts, embedded_fonts)
return fnt, emb
if __name__ == '__main__':
fname = sys.argv[1]
pdf = PdfFileReader(fname)
fonts = set()
embedded = set()
for page in pdf.pages:
obj = page.getObject()
f, e = walk(obj['/Resources'], fonts, embedded)
fonts = fonts.union(f)
embedded = embedded.union(e)
unembedded = fonts - embedded
print 'Font List'
pprint(sorted(list(fonts)))
if unembedded:
print '\nUnembedded Fonts'
pprint(unembedded)
By convention the PostScript name of a subset font in a PDF file has a name which begins with XXXXXX+ where 'X' is any upper case ASCII character.
See Section 5.3 of the PDF Reference Manual (version 1.7)
Additionally the presence of a CharSet or CIDSet in the font descriptor can be used to indicate a subset font (both of these are optional).
However, all of these are 'conventions', there is no actual guaranteed way to be sure that a font which does not have any of these conventions is not actually a subset font.

Categories