Im trying to generate pdf from given html file but get_template function is not working i guess.
from io import BytesIO
from django.template.loader import get_template
from xhtml2pdf import pisa
def render_to_pdf(context_dict={}):
try:
template = get_template('invoice.html')
html = template.render(context_dict)
result = BytesIO()
pdf = pisa.pisaDocument(BytesIO(html.encode("ISO-8859-1")), result)
if not pdf.err:
return result.getvalue()
return None
except Exception as e:
print('ERROR', e)
The Except block returns None.
Change line to:
pdf = pisa.pisaDocument(BytesIO(html.encode("utf-8")), result)
It was my logical error issue. I didn't added 'templates' folder to DIRS in settings.py file.
Related
I am creating a Django API that converts any URL or HTML file into pdf and Docx. The implemented code below already renders in pdf format using pdfkit package. I'm using python-docx to generate in Docx, but I don't know how to handle it. I would like to have any support, please. I don't have deep knowledge and any help will be appreciated.
Here is my convert.py file:
import io
from pydoc import doc
from tempfile import NamedTemporaryFile
from typing import IO
from urllib.parse import urlparse
import pdfkit
from docx import Document
class ConvertingError(Exception):
"""
This exception represents an error during converting.
In example, when Host of a url is unreachable.
In other words, this is a wrapper for wkhtmltopdf errors.
"""
pass
def url_to_pdf(url: str) -> IO:
"""Fetch HTML from url and convert the page to pdf,"""
with NamedTemporaryFile('w+b') as tmpf:
try:
pdfkit.from_url(url, tmpf.name)
except OSError as e:
raise ConvertingError from e
pdf = io.BytesIO(tmpf.read())
return pdf
def html_to_pdf(html: str) -> IO:
"""Convert HTML string to pdf."""
with NamedTemporaryFile('w+b') as tmpf:
try:
pdfkit.from_string(html, tmpf.name)
except OSError as e:
raise ConvertingError from e
pdf = io.BytesIO(tmpf.read())
return pdf
def filename_from_url(url: str) -> str:
"""
Generate pdf filename using a hostname of a URL.
If no hostname is provided, return 'default.pdf' as filename.
"""
parsed = urlparse(URL)
return (parsed.hostname or 'default') + '.pdf'
def url_to_docx(url: str) -> IO:
pass
def html_to_docx(html: str) -> IO:
pass
And my views.py file
from fileinput import filename
from typing import IO
from django.http import FileResponse
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import MultiPartParser
from rest_framework.viewsets import ViewSet
from .converter import filename_from_url, html_to_pdf, url_to_pdf, ConvertingError,
url_to_docx, html_to_docx
from .serializers import HtmlFileInputSerializer, UrlInputSerializer
def generate_from_html(self, request):
serializer = HtmlFileInputSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
file: IO = serializer.validated_data['file']
content = str(file.read())
try:
pdf = html_to_pdf(content)
except ConvertingError:
raise ValidationError('The file is of inappropriate type or corrupted.')
response = FileResponse(pdf)
response["Content-Type"] = 'application/pdf'
return response
def generate_docx_from_html(self, request):
pass
# class UrlConverterViewSet(ViewSet):
def generate_from_url(self, request):
serializer = UrlInputSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
url: str = serializer.validated_data['url']
try:
pdf = url_to_pdf(URL)
except ConvertingError:
raise ValidationError('The url is invalid or unreachable.')
filename = serializer.validated_data.get('filename') or filename_from_url(URL)
response = FileResponse(pdf, filename=filename)
response["Content-Type"] = 'application/pdf'
return response
def generate_docx_from_url(self, request):
pass
class GeneratePdf(ViewSet):
# generate pdf view from html file and URL
parser_classes = (MultiPartParser,)
def create(self, request):
if request.data.get('file'):
return generate_from_html(self, request)
elif request.data.get('url'):
return generate_from_url(self, request)
else:
raise ValidationError('The file or url is invalid or unreachable.')
I tried to generate Pdf file from html using xhtml2pdf, after that I want to upload to S3. I had no idea how to do it, after trying a few ways but still stuck. Thank you so much for your help in advance.
def upload_pdf_S3(pdf):
client = boto3.client(
's3',
aws_access_key_id,
aws_secret_access_key
)
try:
client.upload_fileobj(pdf, 'test', 'test111.pdf')
return True
except ClientError as e:
print(e)
return False
def render_to_pdf(template_src, context_dict={}):
template = get_template(template_src)
html = template.render(context_dict)
result = BytesIO()
pdf = pisa.pisaDocument(BytesIO(html.encode("ISO-8859-1")), result)
upload_pdf_S3(pdf)
if not pdf.err:
return HttpResponse(result.getvalue(), content_type='application/pdf')
return None
I found a way to upload the file directly to the S3 bucket without having to write it locally. I use a different upload function but the idea is the same:
def upload_pdf_S3(pdf):
s3 = boto3.resource('s3')
try:
s3.Bucket(AWS_STORAGE_BUCKET_NAME).put_object(
Key=f"myfolder/myfile.pdf",
Body=pdf)
return True
except:
return False
The trick was that the Body parameter in put_object has to be in bytes format.
pisa_status = pisa.CreatePDF(html, dest=response, link_callback=link_callback)
upload_success = upload_pdf_S3(result.getvalue())
if not upload_success:
return HttpResponse('File upload to S3 has failed.')
It didn't work for me using pisa.pisaDocument(), but it did with pisa.createPDF() and result.getvalue(), as the latter is the file in bytes.
In case you don't have a link_callback function, I used the same the documentation has.
In case someone is in the same issues, you can tried this, it will create a local file, then you can upload to S3 for example.
file = open('test.pdf', "w+b")
pdf = pisa.pisaDocument(BytesIO(html.encode('utf-8')), dest=file)
I'm trying to convert django template to pdf using pdfkit, wkhtmltopdf was installed but I'm getting error like
OSError: wkhtmltopdf exited with non-zero code 1.
error:QXcbConnection: Could not connect to display
How to solve this issue or suggest me any other better way to export django template to pdf?
from django.http import HttpResponse
from django.template.loader import get_template
import pdfkit
def generatepdf(request):
data={}
template = get_template('template_name.html')
html = template.render(data)
pdf = pdfkit.from_string(html, False)
filename = "sample_pdf.pdf"
response = HttpResponse(pdf, content_type='application/pdf')
response['Content-Disposition'] = 'inline; filename="' + filename + '"'
return response
You can try https://pypi.org/project/django-wkhtmltopdf/. It works without issue and the pdf has no issue in terms of layout. It is very easy to implement.
in my Django 'views, I create a pdf file and I want to download it.
The file exist (path: /app/data/4.pdf) and i launch this command:
def download_line(request):
if not request.is_ajax() and not request.method == 'GET':
raise Http404
try:
fs =FileSystemStorage('/app/data')
with fs.open('4.pdf') as pdf:
response =HttpResponse(pdf,content_type='application/pdf')
response['Content-Disposition']='attachment; filename="4.pdf"'
except Exception as e:
logger.warning("Download Line | Erreur : " + e.message)
return response
But the download doesn't start and no error. Have you got a solution?
Thanks.
You can download existing file in your app by a link and static, like this
<a href="{% static 'questions/import_files/import_questions.xlsx' %}" download>Excel Format File </a>
I use FileResponse to serve file download, when the file already exists. FileResponse has been around since Django 1.7.4.
from django.core.files.storage import FileSystemStorage
from django.http import FileResponse
def download_line(request):
fs = FileSystemStorage('/absolute/folder/name')
FileResponse(fs.open('filename.pdf', 'rb'), content_type='application/force-download')
response['Content-Disposition'] = 'attachment; filename="filename.pdf"'
return response
Try this, I use this lines to download files
from django.http import HttpResponse
from wsgiref.util import FileWrapper
import os
def download(request, file_path):
"""
e.g.: file_path = '/tmp/file.pdf'
"""
try:
wrapper = FileWrapper(open(file_path, 'rb'))
response = HttpResponse(wrapper, content_type='application/force-download')
response['Content-Disposition'] = 'inline; filename=' + os.path.basename(file_path)
return response
except Exception as e:
return None
def sample_download_client_excel(request):
"""
e.g.: file_path = '/tmp/file.pdf'
"""
try:
obj = SampleFile.objects.all().first()
file_path = obj.file_name.url.strip('/')
wrapper = FileWrapper(open(file_path, 'rb'))
response = HttpResponse(
wrapper,
content_type='application/force-download'
)
response['Content-Disposition'] = 'inline; filename=' + os.path.basename(file_path)
return response
except Exception as e:
return None
when i render a html, it`s normal, however when i convert it to pdf use pisaļ¼
def render_html2pdf_stream_pisa(template_src, context_dict):
template = get_template(template_src)
context = Context(context_dict)
html = template.render(context)
try:
result = StringIO.StringIO()
pdf = pisa.pisaDocument(StringIO.StringIO(html.encode("UTF-8")),
dest=result,
encoding="UTF-8",
)
if not pdf.err:
return result.getvalue()
except Exception,e:
print Exception(e)
return None
as the result, the pdf is half-baked, not full pdf file like html`s style;
i don`t know where the issue