I am trying to read the excel file that I stored in Sharepoint. But I wasn't able to. I have referred to most of the existing articles but that didn't solve my issue.
Code I referenced From: How to read SharePoint Online (Office365) Excel files in Python with Work or School Account?
Read xlsx stored on sharepoint location with openpyxl in python?
But no luck.
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = '##########################'
password = '##########################'
site_url = "#######################################################"
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions("{0}/_api/web/".format(site_url))
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content)
Error I am getting :
I hope this works for you. Once you download the file you can read it as per its respective file format.
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
ctx_auth = AuthenticationContext(url)
ctx_auth.acquire_token_for_user(username, password)
ctx = ClientContext(url, ctx_auth)
response = File.open_binary(ctx, "/Shared Documents/User Guide.docx")
with open("./User Guide.docx", "wb") as local_file:
local_file.write(response.content)
Related
I am trying to read a pdf file which I have uploaded on an Azure storage account. I am trying to do this using python.
I have tried using the SAS token/URL of the file and pass it thorugh PDFMiner but I am not able get the path of the file which will be accepted by PDFMiner. I am using something like the below code:
from azure.storage.filedatalake import DataLakeServiceClient
from azure.storage.filedatalake import generate_file_sas
import os
storage_account_name = "mystorageaccount"
storage_account_key = "mystoragekey"
container_name = "mycontainer"
directory_name = 'mydirectory'
service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
"https", storage_account_name), credential=storage_account_key)
file_system_client = service_client.get_file_system_client(file_system=container_name)
directory_client = file_system_client.get_directory_client(directory_name)
file_client = directory_client.get_file_client('XXX.pdf')
download = file_client.download_file()
downloaded_bytes = download.readall()
file_sas = generate_file_sas(account_name= storage_account_name,file_system_name= container_name,directory_name= directory_name,file_name= dir_name,credential= storage_account_key)
from pdfminer.pdfpage import PDFPage
with open(downloaded_bytes, 'rb') as infile:
PDFPage.get_pages(infile, check_extractable=False)
from pdfminer.pdfpage import PDFPage
with open(file_sas, 'rb') as infile:
PDFPage.get_pages(infile, check_extractable=False)
Neither of the options are working.
Initially the input_dir was setup locally, so the code was able to fetch the pdf file and read it.
Is there a different way to pass the URL/path of the file from the storage account to the pdf's read function?
Any help is appreciated.
I tried in my environment and got below results:
Initially, I tried with same process without downloading the Pdf files from azure Datalake storage account and got no results. But AFAIK, to read the pdf file with downloading is possible way.
I tried with below code to read pdf file with Module PyPDF2, and it executed with content successfully.
Code:
from azure.storage.filedatalake import DataLakeFileClient
import PyPDF2
service_client = DataLakeFileClient.from_connection_string("<your storage connection string>",file_system_name="test",file_path="dem.pdf")
with open("dem.pdf", 'wb') as file:
data = service_client.download_file()
data.readinto(file)
object=open("dem.pdf",'rb')
pdfread=PyPDF2.PdfFileReader(object)
print("Number of pages:",pdfread.numPages)
pageObj = pdfread.getPage(0)
print(pageObj.extractText())
Console:
You can also read the pdf file through browser using file URL:
https://<storage account name >.dfs.core.windows.net/test/dem.pdf+? sas-token
Browser:
I am using SharePoint Office365 python libraries to read the pdf files in a folder and copy them to s3 but I am getting error as:
b'The length of the URL for this request exceeds the configured maxUrlLength value.'
Here is my code
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from office365.runtime.auth.user_credential import UserCredential
def sharepoint_connection(username, password, site_url, relative_url):
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
web = ctx.web.get().execute_query()
return ctx
def sharepoint_files(relative_url):
file_names = []
file_details = {}
ctx = sharepoint_connection(username,password,site_url,relative_url)
files = ctx.web.get_folder_by_server_relative_url(relative_url).files
ctx.load(files)
ctx.execute_query()
for file in files:
file_names.append(file.properties['ServerRelativeUrl'])
file_url = file.properties['ServerRelativeUrl']
file_name = file_url[file_url.rfind("/")+1:]
file_details[file_name] = file_url
# print(file_details)
return file_details
site_url = "https://account.sharepoint.com/sites/ExternalSharing"
relative_url = "/sites/ExternalSharing/Shared Documents/OCE********"
ctx = sharepoint_connection(username,password,site_url,relative_url)
file_url = file_details[file]
response = File.open_binary(ctx, file_url)
print(response. Content)
I understand that the URL is too long. So I tried to map the sharepoint folder to one drive and then upload it but its the same issue.
Is there a way to handle this scenario.
Thanks in advance. Please let me know if any more information is needed.
Thanks,
Ashish
I use this code to read an Excel file in SharePoint, to read it, no problem the code works very well, I would also like to modify it live
I read that it is possible with the O365 module
But I don't know how to use Office365-REST-Python-Client connection with O35
here is my test code
client_id = "client_id"
client_secret = "client_secret"
site_url = "https://site.sharepoint.com/sites/test"
path = 'test/General/test.xlsx'
from office365.runtime.auth.client_credential import ClientCredential
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from xlrd import open_workbook # test for open excel
credentials = ClientCredential(client_id, client_secret)
ctx = ClientContext(site_url).with_credentials(credentials)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Web title: {0}".format(web.properties['Title']))
print(web.properties)
from O365.excel import WorkBook
# given a File instance that is a xlsx file ...
excel_file = WorkBook(my_file_instance) # my_file_instance should be an instance of File.
ws = excel_file.get_worksheet('my_worksheet')
cella1 = ws.get_range('A1')
cella1.values = 35
cella1.update()
If anyone has a solution
Thanks for your help.
I am trying to save the data that pulled out from PostgreSQL db onto designated MS SharePoint folder. To do so, first I retrieved data from local db, then I need to store/save this data onto SharePoint folder. I tried of using office365 api to do this, but no data saved on SharePoint folder. Does anyone has similar experiences of doing this in python? Any workaround to do this in python? any thoughts?
My current attempt:
first, I did pull up data from local postgresql db as follow:
from sqlalchemy import create_engine
import pandas as pd
import os.path
hostname = 'localhost'
database_name = 'postgres'
user = 'kim'
pw = 'password123'
engine = create_engine('postgresql+psycopg2://'+user+':'+pw+'#'+hostname+'/'+database_name)
sql = """ select * from mytable """
with engine.connect() as conn:
df = pd.read_sql_query(sql,con=engine)
then, I tried to store/save the data to designated sharepoint folder as follow:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
url_shrpt = 'https://xyzcompany.sharepoint.com/_layouts/15/sharepoint.aspx?'
username_shrpt = 'kim#xyzcompany.com'
password_shrpt = 'password123'
folder_url_shrpt = 'https://xyzcompany.sharepoint.com/:f:/g/EnIh9jxkDVpOsTnAUbo-LvIBdsN0X_pJifX4_9Rx3rchnQ'
ctx_auth = AuthenticationContext(url_shrpt)
if ctx_auth.acquire_token_for_user(username_shrpt, password_shrpt):
ctx = ClientContext(url_shrpt, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
else:
print(ctx_auth.get_last_error())
response = File.open_binary(ctx, df)
with open("Your_Offline_File_Path", 'wb') as output_file:
output_file.write(response.content)
but file was not saved on SharePoint folder. How should we save the data from PostgreSQL onto SharePoint folder using python? Is there any workaround to do this? any thoughts?
objective:
I want to write down the data that pulled out from PostgreSQL db onto SharePoint folder. From my current attempt, above attempt didn't save data onto sharepoint folder. Can anyone suggest possible way of doing this?
I think you should write csv files locally, then try following in order to upload them onto SharePoint folder:
from shareplum import Site
from shareplum import Office365
from requests_ntlm import HttpNtlmAuth
from shareplum.site import Version
UN = "myself#xyzcompany.com"
PW = "hello#"
cred = HttpNtlmAuth(UN,PW)
authcookie = Office365('https://xyzcompany.sharepoint.com',username=UN,password=PW).GetCookies()
site = Site('https://xyzcompany.sharepoint.com/sites/sample_data/',version=Version.v365,authcookie=authcookie)
folder = site.Folder('Shared Documents/New Folder')
files = Path(os.getcwd()).glob('*.csv')
for file in files:
with open(file, mode='rb') as rowFile:
fileContent = rowFile.read()
folder.upload_file(fileContent, os.path.basename(file))
this is error-free and working solution, this should work for uploading files to SharePoint folder.
A slight variation to what #Jared had up there for example if one wants to create a folder based on a date and upload files to it from a location other than the root folder on the user's computer. This will be handy to people interested in a such a solution, a problem I had.
from shareplum import Site
from shareplum import Office365
from shareplum.site import Version
import pendulum #Install it for manipulation of dates
todaysdate = pendulum.now() Get todays date
foldername1 = todaysdate.strftime('%d-%m-%Y') #Folder name in a format such as 19-06-2021
UN = "myself#xyzcompany.com"
PW = "hello#"
path = r"C:\xxxx\xxx\xxx" #Path where the files to be uploaded are stored.
doc_library = "xxxxx/yyyy" #Folder where the new folder (foldername1) will be stored
authcookie = Office365('https://xyzcompany.sharepoint.com',username=UN,password=PW).GetCookies()
site = Site('https://xyzcompany.sharepoint.com/sites/sample_data/',version=Version.v365,authcookie=authcookie)
folder = site.Folder(doc_library+'/'+foldername1) #Creates the new folder matching todays date.
files = glob.glob(path+"\\*.csv")
for file in files:
with open(file, mode='rb') as rowFile:
fileContent = rowFile.read()
folder.upload_file(fileContent, os.path.basename(file))
That's a solution that will work well for anyone looking around for such code.
I need to read a JSON file from a blob container in Azure for doing some transformation on top of the JSON Files. I have seen few documentation and StackOverflow answers and developed a python code that will read the files from the blob.
I have tried the below script from one of the Stackoverflow answers to read JSON file but I get the below error
"TypeError: the JSON object must be str, bytes or byte array, not BytesIO"
I am new to python programming so not sure of the issue in the code. I tried with download_stream.content_as_text() but the file doesnt read the file without any error.
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from io import BytesIO
import requests
from pandas import json_normalize
import json
filename = "sample.json"
container_name="test"
constr = ""
blob_service_client = BlobServiceClient.from_connection_string(constr)
container_client=blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(filename)
streamdownloader=blob_client.download_blob()
stream = BytesIO()
streamdownloader.download_to_stream(stream)
# with open(stream) as j:
# contents = json.loads(j)
fileReader = json.loads(stream)
print(filereader)
You can use readallfunction. Please try this code:
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import json
filename = "sample.json"
container_name="test"
constr = ""
blob_service_client = BlobServiceClient.from_connection_string(constr)
container_client = blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(filename)
streamdownloader = blob_client.download_blob()
fileReader = json.loads(streamdownloader.readall())
print(fileReader)
Result: