Why is my code to download files producing a 404 error? - python

I have created an app, generated client credentials, and trusted the app for my SharePoint online site.
I have created a file test.txt and it is placed under -https://company.sharepoint.com/sites/testsite/Shared%20Documents/General/test.txt
Additionally, I have installed the latest version of the module
pip freeze | grep Office
Office365-REST-Python-Client==2.3.11
class SharePoint:
def __init__(self):
context_auth = AuthenticationContext(Configs.SITE_URL) ---> SITE_URL='https://company.sharepoint.com/sites/testsite/'
context_auth.acquire_token_for_app(client_id=Configs.OAUTH_CLIENT_ID, client_secret=Configs.OAUTH_CLIENT_SECRET)
self.ctx = ClientContext(Configs.SITE_URL, context_auth)
def download_files(self):
file_url = "/sites/testsite/Shared%20Documents/General/test.txt"
download_path = os.path.join(tempfile.mkdtemp(), os.path.basename(file_url))
print(download_path)
with open(download_path, "wb") as local_file:
file = self.ctx.web.get_file_by_server_relative_url(file_url).download(local_file).execute_query()
print("[Ok] file has been downloaded into: {0}".format(download_path))
if __name__ == '__main__':
s = SharePoint()
s.download_files()
However, it throws an error, not able to get my head around this.
office365.runtime.client_request_exception.ClientRequestException: ('-2130575338, Microsoft.SharePoint.SPException', 'The file /sites/testsite/Shared%20Documents/General/test.txt does not exist.', "404 Client Error: Not Found for url: https://company.sharepoint.com/sites/testsite/_api/Web/getFileByServerRelativeUrl('%2Fsites%2Ftestsite%2FShared%2520Documents%2FGeneral%2Ftest.txt')?$select=ServerRelativePath")

You seem to be basing this off of the example shown here.
I was having similar issues at first, until I made all function inputs be absolute paths, inclusive of url scheme and site. This just removes a lot of room for error.
My current script is similar to this:
from urllib.parse import urlparse
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
def download_file(local_absolute_path:str, global_absolute_path:str, client_context:ClientContext) -> None:
print(f"The file {global_absolute_path} is being prepared for download.")
download_location = urlparse(global_absolute_path)
file_to_download = client_context.web.get_file_by_server_relative_url(download_location)
with open(local_absolute_path, "wb") as local_file:
file_to_download.download_session(local_file).execute_query()
print(f"──► Download successful. The file has been saved as {local_absolute_path}\n")
Note that self.ctx in your code is equivalent to client_context in mine.
I recommend writing a bunch of helper functions to convert the paths back and forth between absolute, relative and the file name. The ones I currently use can be found below:
import os
from urllib.parse import urlparse
class PathHandler(object):
def __init__(self, absolute_path:str) -> None:
self.absolute_path = absolute_path
def get_filename_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return os.path.basename(parsed_url.path)
def get_relative_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return parsed_url.path
def get_parent_folder_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return os.path.dirname(parsed_url.path)
def get_scheme_and_root_from_absolute(self) -> str:
parsed_url = urlparse(self.absolute_path)
return f"{parsed_url.scheme}//{parsed_url.netloc}"
def convert_to_absolute_local(self, local_root:str, global_root:str) -> str:
return local_root + os.sep + self.absolute_path[len(global_root):].replace("/", os.sep)
def convert_to_absolute_global(self, local_root:str, global_root:str) -> str:
return global_root + "/" + self.absolute_path[len(local_root):].replace(os.sep, "/")

Related

Djnago rest framework html/url to docx

I am creating a Django API that converts any URL or HTML file into pdf and Docx. The implemented code below already renders in pdf format using pdfkit package. I'm using python-docx to generate in Docx, but I don't know how to handle it. I would like to have any support, please. I don't have deep knowledge and any help will be appreciated.
Here is my convert.py file:
import io
from pydoc import doc
from tempfile import NamedTemporaryFile
from typing import IO
from urllib.parse import urlparse
import pdfkit
from docx import Document
class ConvertingError(Exception):
"""
This exception represents an error during converting.
In example, when Host of a url is unreachable.
In other words, this is a wrapper for wkhtmltopdf errors.
"""
pass
def url_to_pdf(url: str) -> IO:
"""Fetch HTML from url and convert the page to pdf,"""
with NamedTemporaryFile('w+b') as tmpf:
try:
pdfkit.from_url(url, tmpf.name)
except OSError as e:
raise ConvertingError from e
pdf = io.BytesIO(tmpf.read())
return pdf
def html_to_pdf(html: str) -> IO:
"""Convert HTML string to pdf."""
with NamedTemporaryFile('w+b') as tmpf:
try:
pdfkit.from_string(html, tmpf.name)
except OSError as e:
raise ConvertingError from e
pdf = io.BytesIO(tmpf.read())
return pdf
def filename_from_url(url: str) -> str:
"""
Generate pdf filename using a hostname of a URL.
If no hostname is provided, return 'default.pdf' as filename.
"""
parsed = urlparse(URL)
return (parsed.hostname or 'default') + '.pdf'
def url_to_docx(url: str) -> IO:
pass
def html_to_docx(html: str) -> IO:
pass
And my views.py file
from fileinput import filename
from typing import IO
from django.http import FileResponse
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import MultiPartParser
from rest_framework.viewsets import ViewSet
from .converter import filename_from_url, html_to_pdf, url_to_pdf, ConvertingError,
url_to_docx, html_to_docx
from .serializers import HtmlFileInputSerializer, UrlInputSerializer
def generate_from_html(self, request):
serializer = HtmlFileInputSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
file: IO = serializer.validated_data['file']
content = str(file.read())
try:
pdf = html_to_pdf(content)
except ConvertingError:
raise ValidationError('The file is of inappropriate type or corrupted.')
response = FileResponse(pdf)
response["Content-Type"] = 'application/pdf'
return response
def generate_docx_from_html(self, request):
pass
# class UrlConverterViewSet(ViewSet):
def generate_from_url(self, request):
serializer = UrlInputSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
url: str = serializer.validated_data['url']
try:
pdf = url_to_pdf(URL)
except ConvertingError:
raise ValidationError('The url is invalid or unreachable.')
filename = serializer.validated_data.get('filename') or filename_from_url(URL)
response = FileResponse(pdf, filename=filename)
response["Content-Type"] = 'application/pdf'
return response
def generate_docx_from_url(self, request):
pass
class GeneratePdf(ViewSet):
# generate pdf view from html file and URL
parser_classes = (MultiPartParser,)
def create(self, request):
if request.data.get('file'):
return generate_from_html(self, request)
elif request.data.get('url'):
return generate_from_url(self, request)
else:
raise ValidationError('The file or url is invalid or unreachable.')

Azure Durable Functions : Http Trigger error

As a newbie in Azure,
I am following Microsoft Azure Function tutorial page
https://learn.microsoft.com/en-us/azure/azure-functions/durable/durable-functions-cloud-backup?tabs=python
and github page
https://github.com/Azure/azure-functions-durable-python/tree/master/samples/fan_in_fan_out .
**HttpStart code**
import logging
import json
import azure.functions as func
import azure.durable_functions as df
async def main(req: func.HttpRequest, starter: str) -> func.HttpResponse:
client = df.DurableOrchestrationClient(starter)
payload: str = json.loads(req.get_body().decode()) # Load JSON post request data
instance_id = await client.start_new(req.route_params["functionName"], client_input=payload)
logging.info(f"Started orchestration with ID = '{instance_id}'.")
return client.create_check_status_response(req, instance_id)
**E2_BackupSiteContent**
import azure.functions as func
import azure.durable_functions as df
def orchestrator_function(context: df.DurableOrchestrationContext):
root_directory: str = context.get_input()
if not root_directory:
raise Exception("A directory path is required as input")
files = yield context.call_activity("E2_GetFileList", root_directory)
tasks = []
for file in files:
tasks.append(context.call_activity("E2_CopyFileToBlob", file))
results = yield context.task_all(tasks)
total_bytes = sum(results)
return total_bytes
main = df.Orchestrator.create(orchestrator_function)
**E2_CopyFileToBlob**
import os
import pathlib
from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import ResourceExistsError
connect_str = os.getenv('AzureWebJobsStorage')
def main(filePath: str) -> str:
# Create the BlobServiceClient object which will be used to create a container client
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
# Create a unique name for the container
container_name = "backups"
# Create the container if it does not exist
try:
blob_service_client.create_container(container_name)
except ResourceExistsError:
pass
# Create a blob client using the local file name as the name for the blob
parent_dir, fname = pathlib.Path(filePath).parts[-2:] # Get last two path components
blob_name = parent_dir + "_" + fname
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
# Count bytes in file
byte_count = os.path.getsize(filePath)
# Upload the created file
with open(filePath, "rb") as data:
blob_client.upload_blob(data)
return byte_count
**E2_GetFileList**
import os
from os.path import dirname
from typing import List
def main(rootDirectory: str) -> List[str]:
all_file_paths = []
# We walk the file system
for path, _, files in os.walk(rootDirectory):
# We copy the code for activities and orchestrators
if "E2_" in path:
# For each file, we add their full-path to the list
for name in files:
if name == "__init__.py" or name == "function.json":
file_path = os.path.join(path, name)
all_file_paths.append(file_path)
return all_file_paths
When I http trigger with postman app,
POST http://localhost:7071/api/orchestrators/E2_BackupSiteContent?req="D:\Tmp"
I got the following error messages.
[2021-11-12T02:13:42.432Z] Worker process started and initialized.
[2021-11-12T02:13:46.489Z] Host lock lease acquired by instance ID '000000000000000000000000AE48769C'.
[2021-11-12T02:13:52.529Z] Executing 'Functions.HttpStart' (Reason='This function was programmatically called via the host APIs.', Id=748996d0-1f84-4597-86ea-768467eb36e3)
[2021-11-12T02:13:52.560Z] Executed 'Functions.HttpStart' (Failed, Id=748996d0-1f84-4597-86ea-768467eb36e3, Duration=5433ms)
[2021-11-12T02:13:52.562Z] System.Private.CoreLib: Exception while executing function: Functions.HttpStart. Microsoft.Azure.WebJobs.Host: Exception binding parameter 'req'. Microsoft.AspNetCore.Server.Kestrel.Core: Reading the request body timed out due to data arriving too slowly. See MinRequestBodyDataRate.
What should I do to solve this problem?
(I tested with Linux and Windows.)
--Added--
Postman capture
Instead of passing the directory in the query string of the URL, you should pass the path in the HTTP request body in the postman tool. The Microsoft doc page itself shows how to do it, see Run the sample section.

Python ZipFile - ValueError: I/O operation on closed file

When I use the zipfile object in multiple functions, it works fine. However, when I try to run the one of the functions in thread, it gives the error "I/O operation on closed file".
Below code works fine which validates and extracts the zipfile
from zipfile import ZipFile
from threading import Thread
def extract_data(file):
zip_file = Zipfile(file)
validate = validate_function(zip_file)
if validate.status_code == 200:
data = extract_function(zip_file)
However, If I run the extract_function in thread, It gives me "ValueError: I/O operation on closed file"
def extract_data(file):
zip_file = Zipfile(file)
validate = validate_function(zip_file)
if validate.status_code == 200:
extract = Thread(target=extract_function,args=[zip_file])
extract.start()
Please guide me for understanding the root cause of this issue.
Update:
Here is the sample code to reproduce the issue:
from zipfile import ZipFile
from threading import Thread
import pandas as pd
from flask import Flask, request, Response
from werkzeug.middleware.proxy_fix import ProxyFix
from werkzeug.datastructures import FileStorage
from flask_restplus import Api, Resource, reqparse, cors
from flask_cors import cross_origin
app = Flask(__name__)
app.wsgi_app = ProxyFix(app.wsgi_app)
api = Api(app,
version='1.0.0',
doc='/',
)
def validate_function(zip_file):
try:
error = ZipFile.testzip(zip_file)
if error is None:
return Response('Zip file is validated',200)
else:
return Response('Invalid Zip file',601)
except Exception as e:
return Response('Error :' + str(e),601)
def extract_function(zip_file):
df_list = []
try:
for file in zip_file.namelist():
if file.endswith('.csv'):
df_list.append(pd.read_csv(zip_file.open(file)))
else:
excel_df = pd.read_excel(zipfile.open(file),None)
if type(excel_df) == dict:
df_list.extend(list(excel_df.values()))
else:
df_list.append(excel_df)
print(len(df_list))
except Exception as e:
print('Error in converting to dataframe', str(e))
def extract_data(file):
zip_file = ZipFile(file)
resp = validate_function(zip_file)
if resp.status_code == 200:
data = Thread(target= extract_function, args=[zip_file])
data.start()
#extract_function(zip_file) --> This works
return resp
process_data = reqparse.RequestParser()
process_data.add_argument('file', location='files', type=FileStorage, required=True, help='Input file in Zip format')
#api.route('/process-data')
#api.expect(process_data)
class DataExtract(Resource):
#cors.crossdomain(origin='*')
#cross_origin()
def post(self):
file = request.files['file']
resp = extract_data(file)
return resp
app.run()
Use with to open files, so they are closed correctly after you are done. You also have to open the file separately in the new thread:
def extract_data(file):
with ZipFile(file) as zip_file:
validate = validate_function(zip_file)
if validate.status_code == 200:
extract = Thread(target=extract_function,args=[file])
extract.start()
def extract_function(file):
with ZipFile(file) as zip_file:
# extract ...

Download file using fastapi

I see the functions for uploading in an API, but I don't see how to download. Am I missing something? I want to create an API for a file download site. Is there a different API I should be using?
from typing import List
from fastapi import FastAPI, Query
app = FastAPI()
PATH "some/path"
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
entry = PATH + "/".join(query_items["q"]) + "/"
dirs = os.listdir(entry)
results["folders"] = [val for val in dirs if os.path.isdir(entry+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry+val)]
results["path_vars"] = query_items["q"]
return results
Here is the sample bit of code for python to fetch files and dirs for a path, you can return the path as a list with a new entry in a loop to go deeper into a file tree. Passing a file name should trigger a download function, but I cant seem to get a download func going.
This worked For me
from starlette.responses import FileResponse
return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
This will download the file with filename
Since we're talking about FastAPI, the proper way to return a file response is covered in their documentation, code snippet below:
from fastapi import FastAPI
from fastapi.responses import FileResponse
file_path = "large-video-file.mp4"
app = FastAPI()
#app.get("/")
def main():
return FileResponse(path=file_path, filename=file_path, media_type='text/mp4')
FastAPI uses Starlette's FileResponse class so there are two ways to import FileResponse on your API code. But of course importing from FastAPI would be a better choice. You can follow the approach below to enable your API endpoints support file download.
Do not forget to add aiofiles to your dependency list. A basic requirements.txt file should look like (versions of modules might change in time, version 0.63.0 of fastapi strictly use starlette 0.13.6)
uvicorn==0.13.4
fastapi==0.63.0
starlette==0.13.6
aiofiles==0.6.0
And the API code
import os
from fastapi import FastAPI
from fastapi.responses import FileResponse
app = FastAPI()
#app.get("/")
async def main():
file_name = "FILE NAME"
# DEPENDS ON WHERE YOUR FILE LOCATES
file_path = os.getcwd() + "/" + file_name
return FileResponse(path=file_path, media_type='application/octet-stream', filename=file_name)
I figured it out,
from starlette.responses import FileResponse
#app.get("/shows/")
def get_items(q: List[str] = Query(None)):
'''
Pass path to function.
Returns folders and files.
'''
results = {}
query_items = {"q": q}
if query_items["q"]:
entry = PATH + "/".join(query_items["q"])
else:
entry = PATH
if os.path.isfile(entry):
return download(entry)
dirs = os.listdir(entry + "/")
results["folders"] = [
val for val in dirs if os.path.isdir(entry + "/"+val)]
results["files"] = [val for val in dirs if os.path.isfile(entry + "/"+val)]
results["path_vars"] = query_items["q"]
return results
def download(file_path):
"""
Download file for given path.
"""
if os.path.isfile(file_path):
return FileResponse(file_path)
# return FileResponse(path=file_path)
return None
I added this part
from starlette.responses import FileResponse
if os.path.isfile(entry):
return download(entry)
Allows you to host static file. But for some reason all files download as "download" .extension. If you know how to ensure original file name, let me know.
from fastapi import FastAPI
from fastapi.responses import FileResponse
import uvicorn
import os
app = FastAPI()
#app.get("/download-file")
def download_file(file_name: str):
folder_path = r"C:\Users\HP\Desktop\excel files"
file_location = f'{folder_path}{os.sep}{file_name}.xlsx'#os.sep is used to seperate with a \
return FileResponse(file_location, media_type='application/octet-stream', filename=file_name)
uvicorn.run(app, port=9105)

Change twitter banner from url

How would I go by changing the twitter banner using an image from url using tweepy library: https://github.com/tweepy/tweepy/blob/v2.3.0/tweepy/api.py#L392
So far I got this and it returns:
def banner(self):
url = 'https://blog.snappa.com/wp-content/uploads/2019/01/Twitter-Header-Size.png'
file = requests.get(url)
self.api.update_profile_banner(filename=file.content)
ValueError: stat: embedded null character in path
It seems like filename requires an image to be downloaded. Anyway to process this without downloading the image and then removing it?
Looking at library's code you can do what you want.
def update_profile_banner(self, filename, *args, **kargs):
f = kargs.pop('file', None)
So what you need to do is supply the filename and the file kwarg:
filename = url.split('/')[-1]
self.api.update_profile_banner(filename, file=file.content)
import tempfile
def banner():
url = 'file_url'
file = requests.get(url)
temp = tempfile.NamedTemporaryFile(suffix=".png")
try:
temp.write(file.content)
self.api.update_profile_banner(filename=temp.name)
finally:
temp.close()

Categories