I am working on a project where I will need to potentially open and read a textfile either on a local server or remotely (via url). Is there a python function that works like php's :
file_get_contents()
that can do this? right now I have:
def get_data_from_file(path):
for i, line in enumerate(open(path)):
.....
I would like to pass in a path either locally or remotely.
You could try:
def file_get_contents(path):
try:
urllib.urlretrieve(path, filename=path)
except:
print 'not a page'
if os.path.exists(path):
with open(path, r) as file:
data = file.read()
print data
else:
print 'no such file'
Related
I am attempting to make a program update itself to the newest version that I have made. E.g. I added a new functionality to it. It would be useful for me to be able to upload the updated file to a central location like my Raspberry Pi and have the program update itself across all of my computers without updating each one individually.
I have made the bellow code, but it does not work. It can recognize when the file is up-to-date but running the new program it downloads fails, it successfully downloads and deletes itself, but the new program is not run, with no error messages being shown.
Update test.py:
#updaterV1.py
import time
import requests
import os
import hashlib
time.sleep(5)
cwd = os.getcwd()
URL = r"http://[rasberry pi's ip]/update%20files/dev/hash.txt"
hash_path = os.path.join(cwd,"remote hash.txt")
with open (hash_path, "wb") as f:
f.write(requests.get(URL).content)
with open(hash_path,"r") as hash_file:
remotehash = (hash_file.readline()).strip()
os.remove(hash_path)
hasher = hashlib.sha256()
with open(__file__, 'rb') as self_file:
selfunhashed = self_file.read()
hasher.update(selfunhashed)
selfhash = hasher.hexdigest()
print(selfhash)
print(remotehash)
if (selfhash == remotehash):
print("program is up to date")
input()
else:
update_path = os.path.join(cwd,"temp name update.py")
URL = r"http://[rasberry pi's ip]/update%20files/dev/update.py"
with open (update_path, "wb") as f:
f.write(requests.get(URL).content)
with open(update_path,"r") as f:
name = f.readline().strip()
name = name[1:] #use the 1st line as "#name.py" not "# name"
update_path = os.path.join(cwd,name)
try:
os.remove(update_path)
except:
pass
os.rename(os.path.join(cwd,"temp name update.py"),update_path)
os.system("python \""+update_path+"\"")
print("removing self file now")
os.remove(__file__)
It uses a separate TXT file with the hash of the program stored in the same folder to check the remote files hash without downloading the actual file to hash it locally.
I'm using this to connect to Azure File Share and upload a file. I would like to chose what extension file will have, but I can't. I got an error shown below. If I remove .txt everything works fine. Is there a way to specify file extension while uploading it?
Error:
Exception: ResourceNotFoundError: The specified parent path does not exist.
Code:
def main(blobin: func.InputStream):
file_client = ShareFileClient.from_connection_string(conn_str="<con_string>",
share_name="data-storage",
file_path="outgoing/file.txt")
f = open('/home/temp.txt', 'w+')
f.write(blobin.read().decode('utf-8'))
f.close()
# Operation on file here
f = open('/home/temp.txt', 'rb')
string_to_upload = f.read()
f.close()
file_client.upload_file(string_to_upload)
I believe the reason you're getting this error is because outgoing folder doesn't exist in your file service share. I took your code and ran it with and without extension and in both situation I got the same error.
Then I created a folder and tried to upload the file and I was able to successfully do so.
Here's the final code I used:
from azure.storage.fileshare import ShareFileClient, ShareDirectoryClient
conn_string = "DefaultEndpointsProtocol=https;AccountName=myaccountname;AccountKey=myaccountkey;EndpointSuffix=core.windows.net"
share_directory_client = ShareDirectoryClient.from_connection_string(conn_str=conn_string,
share_name="data-storage",
directory_path="outgoing")
file_client = ShareFileClient.from_connection_string(conn_str=conn_string,
share_name="data-storage",
file_path="outgoing/file.txt")
# Create folder first.
# This operation will fail if the directory already exists.
print "creating directory..."
share_directory_client.create_directory()
print "directory created successfully..."
# Operation on file here
f = open('D:\\temp\\test.txt', 'rb')
string_to_upload = f.read()
f.close()
#Upload file
print "uploading file..."
file_client.upload_file(string_to_upload)
print "file uploaded successfully..."
I created a document site on plone from which file uploads can be made. I saw that plone saves them in the filesystem in the form of a blob, now I need to take them through a python script that will process the pdfs downloaded with an OCR. Does anyone have any idea how to do it? Thank you
Not sure how to extract PDFs from BLOB-storage or if it's possible at all, but you can extract them from a running Plone-site (e.g. executing the script via a browser-view):
import os
from Products.CMFCore.utils import getToolByName
def isPdf(search_result):
"""Check mime_type for Plone >= 5.1, otherwise check file-extension."""
if mimeTypeIsPdf(search_result) or search_result.id.endswith('.pdf'):
return True
return False
def mimeTypeIsPdf(search_result):
"""
Plone-5.1 introduced the mime_type-attribute on files.
Try to get it, if it doesn't exist, fail silently.
Return True if mime_type exists and is PDF, otherwise False.
"""
try:
mime_type = search_result.mime_type
if mime_type == 'application/pdf':
return True
except:
pass
return False
def exportPdfFiles(context, export_path):
"""
Get all PDF-files of site and write them to export_path on the filessytem.
Remain folder-structure of site.
"""
catalog = getToolByName(context, 'portal_catalog')
search_results = catalog(portal_type='File', Language='all')
for search_result in search_results:
# For each PDF-file:
if isPdf(search_result):
file_path = export_path + search_result.getPath()
file_content = search_result.getObject().data
parent_path = '/'.join(file_path.split('/')[:-1])
# Create missing directories on the fly:
if not os.path.exists(parent_path):
os.makedirs(parent_path)
# Write PDF:
with open(file_path, 'w') as fil:
fil.write(file_content)
print 'Wrote ' + file_path
print 'Finished exporting PDF-files to ' + export_path
The example keeps the folder-structure of the Plone-site in the export-directory. If you want them flat in one directory, a handler for duplicate file-names is needed.
Have a peculiar issue that I can't seem to fix on my own..
I'm attempting to FTP a list of files in a directory over to an iSeries IFS using Python's ftplib library.
Note, the files are in a single subdirectory down from the python script.
Below is an excerpt of the code that is giving me trouble:
from ftplib import FTP
import os
localpath = os.getcwd() + '/Files/'
def putFiles():
hostname = 'host.name.com'
username = 'myuser'
password = 'mypassword'
myftp = FTP(hostname)
myftp.login(username, password)
myftp.cwd('/STUFF/HERE/')
for file in os.listdir(localpath):
if file.endswith('.csv'):
try:
file = localpath + file
print 'Attempting to move ' + file
myftp.storbinary("STOR " + file, open(file, 'rb'))
except Exception as e:
print(e)
The specific error that I am getting throw is:
Attempting to move /home/doug/Files/FILE.csv
426-Unable to open or create file /home/doug/Files to receive data.
426 Data transfer ended.
What I've done so far to troubleshoot:
Initially I thought this was a permissions issue on the directory containing my files. I used chmod 777 /home/doug/Files and re-ran my script, but the same exception occured.
Next I assumed there was an issue between my machine and the iSeries. I validated that I could indeed put files by using ftp. I was successfully able to put the file on the iSeries IFS using the shell FTP.
Thanks!
Solution
from ftplib import FTP
import os
localpath = os.getcwd() + '/Files/'
def putFiles():
hostname = 'host.name.com'
username = 'myuser'
password = 'mypassword'
myftp = FTP(hostname)
myftp.login(username, password)
myftp.cwd('/STUFF/HERE/')
for csv in os.listdir(localpath):
if csv.endswith('.csv'):
try:
myftp.storbinary("STOR " + csv, open(localpath + csv, 'rb'))
except Exception as e:
print(e)
As written, your code is trying to execute the following FTP command:
STOR /home/doug/Files/FILE.csv
Meaning it is trying to create /home/doug/Files/FILE.csv on the IFS. Is this what you want? I suspect that it isn't, given that you bothered to change the remote directory to /STUFF/HERE/.
If you are trying to issue the command
STOR FILE.csv
then you have to be careful how you deal with the Python variable that you've named file. In general, it's not recommended that you reassign a variable that is the target of a for loop, precisely because this type of confusion can occur. Choose a different variable name for localpath + file, and use that in your open(..., 'rb').
Incidentally, it looks like you're using Python 2, since there is a bare print statement with no parentheses. I'm sure you're aware that Python 3 is recommended by now, but if you do stick to Python 2, it's recommended that you avoid using file as a variable name, because it actually means something in Python 2 (it's the name of a type; specifically, the return type of the open function).
Am using Bottle to create an upload API. The script below is able to upload a file to a directory but got two issues which I need to address. One is how can I avoid loading the whole file to memory the other is how to set a maximum size for upload file?
Is it possible to continuously read the file and dump what has been read to file till the upload is complete? the upload.save(file_path, overwrite=False, chunk_size=1024) function seems to load the whole file into memory. In the tutorial, they have pointed out that using .read() is dangerous.
from bottle import Bottle, request, run, response, route, default_app, static_file
app = Bottle()
#route('/upload', method='POST')
def upload_file():
function_name = sys._getframe().f_code.co_name
try:
upload = request.files.get("upload_file")
if not upload:
return "Nothing to upload"
else:
#Get file_name and the extension
file_name, ext = os.path.splitext(upload.filename)
if ext in ('.exe', '.msi', '.py'):
return "File extension not allowed."
#Determine folder to save the upload
save_folder = "/tmp/{folder}".format(folder='external_files')
if not os.path.exists(save_folder):
os.makedirs(save_folder)
#Determine file_path
file_path = "{path}/{time_now}_{file}".\
format(path=save_folder, file=upload.filename, timestamp=time_now)
#Save the upload to file in chunks
upload.save(file_path, overwrite=False, chunk_size=1024)
return "File successfully saved {0}{1} to '{2}'.".format(file_name, ext, save_folder)
except KeyboardInterrupt:
logger.info('%s: ' %(function_name), "Someone pressed CNRL + C")
except:
logger.error('%s: ' %(function_name), exc_info=True)
print("Exception occurred111. Location: %s" %(function_name))
finally:
pass
if __name__ == '__main__':
run(host="localhost", port=8080, reloader=True, debug=True)
else:
application = default_app()
I also tried doing a file.write but same case. File is getting read to memory and it hangs the machine.
file_to_write = open("%s" %(output_file_path), "wb")
while True:
datachunk = upload.file.read(1024)
if not datachunk:
break
file_to_write.write(datachunk)
Related to this, I've seen the property MEMFILE_MAX where several SO posts claim one could set the maximum file upload size. I've tried setting it but it seems not to have any effect as all files no matter the size are going through.
Note that I want to be able to receive office document which could be plain with their extensions or zipped with a password.
Using Python3.4 and bottle 0.12.7
Basically, you want to call upload.read(1024) in a loop. Something like this (untested):
with open(file_path, 'wb') as dest:
chunk = upload.read(1024)
while chunk:
dest.write(chunk)
chunk = upload.read(1024)
(Do not call open on upload; it's already open for you.)
This SO answer includes more example sof how to read a large file without "slurping" it.