HI i have a small python script which untars a list of files present in a folder.Below is the script.
app = Flask(__name__)
#app.route('/untarJson')
def untarJson():
outdir="C:\\Users\\esrilka\\Documents\\Tar Files\\Untar"
inputfilefolder="C:\\Users\\esrilka\\Documents\\Tar Files\\New tar files\\"
jsonfiles=[]
for filenames in os.listdir(inputfilefolder):
if filenames.endswith(".tar.gz"):
head,tail= os.path.split(filenames)
basename=os.path.splitext(os.path.splitext(tail)[0])[0]
t = tarfile.open(os.path.join(inputfilefolder,filenames), 'r')
for member in t.getmembers():
if "autodiscovery/report.json" in member.name:
with open(os.path.join(outdir,basename + '.json' ), 'wb') as f:
f.write(t.extractfile('autodiscovery/report.json').read())
if __name__ == '__main__':
app.run(debug=True)
It works fine without flask and in the folder i have four tar files and all 4 files are untarred.
But when i use flask only one file is untarred and the only one file name is displayed.
how can i untar all files inside a folder and also return the name of the files(i.,. only short names and not with full path)
See if the below code works for you, I have changed only little bit to your original code and it works without any issues. All the available tar.gz files are untared and file names gets displayed after request completes,
from flask import Flask, jsonify
import tarfile
import os
app = Flask(__name__)
#app.route('/untarJson')
def untarJson():
outdir = "C:\\tests\\untared"
inputfilefolder = "C:\\tests"
jsonfiles = []
for filenames in os.listdir(inputfilefolder):
if filenames.endswith(".tar.gz"):
head, tail = os.path.split(filenames)
basename = os.path.splitext(os.path.splitext(tail)[0])[0]
t = tarfile.open(os.path.join(inputfilefolder, filenames), 'r')
for member in t.getmembers():
if "autodiscovery/report.json" in member.name:
with open(os.path.join(outdir, basename + '.json'), 'wb') as f:
f.write(t.extractfile('autodiscovery/report.json').read())
jsonfiles.append(os.path.join(outdir, basename + '.json'))
return jsonify(jsonfiles), 200
if __name__ == '__main__':
app.run(debug=True)
After request completed, something like below will be returned (output will be different in your case),
[
"C:\tests\untared\autodiscovery1.json",
"C:\tests\untared\autodiscovery2.json",
"C:\tests\untared\autodiscovery3.json"
]
Related
Edit: here's a git repo for easy testing:
https://gitlab.com/qualisign/ugit-bdd/
I want to refactor some repeated code from a step_def file to a conftest.py file. Here's what the step_def looks like:
#scenario('../features/CLI.feature',
'store file in object database')
def test_file_stored_by_content_address():
pass
#given("a file exists at some full path within a ugit dir", target_fixture="file_exists_at_path")
def file_exists_at_path(file_within_ugit_dir):
return file_within_ugit_dir
#when("I enter ugit hash-object followed by that path")
def file_gets_hashed(file_exists_at_path):
dir_name = os.path.dirname(file_exists_at_path)
base_name = os.path.basename(file_exists_at_path)
os.chdir(dir_name)
os.system(f'ugit hash-object {base_name}')
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
And here's the conftest.py:
import os
import subprocess
import hashlib
import pytest
from pytest_bdd import scenario, given, when, then, parsers
WISE_WORDS = "Don\\'t be a fool! I\\'ll call you later."
#pytest.fixture(scope="session")
def is_ugit_dir(tmp_path_factory):
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
return path
#pytest.fixture
def file_within_ugit_dir(is_ugit_dir):
path = is_ugit_dir
full_path = f'{path}/wise_words.txt'
os.system(f'echo {WISE_WORDS} > wise_words.txt')
return full_path
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
"""
Returns the full path to a hash-object within the objects database
"""
subprocess.run(['ugit', 'hash-object', file_within_ugit_dir])
# there should now be a file with a sha1 content-address in the following directory
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
sha_hash = hashlib.sha1 (f.read()).hexdigest ()
return objects_dir+sha_hash
When I run the test, it seems that the temporary directory is not being kept open between steps:
t-74/.ugit/objects/7b5ee3d8d42c66048125a3937a0170ffdaf7b272'
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_hashed):
> with open(file_hashed, "rb") as f:
E FileNotFoundError: [Errno 2] No such file or directory: '/private/var/folders/m2/99x5jvw95ll6sbtgvj5md9700000gp/T/pytest-of-davidjoseph/pytest-74/.ugit/objects/7b5ee3d8d42c66048125a3937a0170ffdaf7b272'
/Users/davidjoseph/projects/ugit-bdd/tests/step_defs/test_cli.py:43: FileNotFoundError
-------------------------------------- Captured stdout call ---------------------------------------
Initialized empty ugit repository in /private/var/folders/m2/99x5jvw95ll6sbtgvj5md9700000gp/T/pytest-of-davidjoseph/pytest-74/data1/.ugit
7b5ee3d8d42c66048125a3937a0170ffdaf7b272
Is there any way to kee this temp directory open to be reused between fixtures in the conftest.py file, and eventually in the step_def file?
Changing the scope of the is_ugit_dir fixture to "session" as suggested in the comment is sufficient; all the rest are the errors in your own code:
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
You change the current working directory to /tmp/pytest-smth/data and invoke ugit init in there - I assume the tool creates repository metadata at /tmp/pytest-smth/data/.ugit then. Later, you use
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
to create the objects dir - this will get you /tmp/pytest-smth/.ugit/objects. No wonder this directory doesn't exist. Changing it to e.g. objects_dir = is_ugit_dir / '.ugit' / 'objects' fixes the first error. As a follow-up, the return of file_hashed fixture has to be changed to objects_dir / sha_hash to work with pathlib paths.
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
Aside that file_path is not defined (I guess this should be file_within_ugit_dir), you are reading the file into contents and then again. Why that? Either rewind the file via f.seek(0) before invoking f.read() again or use contents for comparison.
Here's the full working code, with minimal necessary changes:
conftest.py
import os
import subprocess
import hashlib
import pytest
from pytest_bdd import scenario, given, when, then, parsers
WISE_WORDS = "Don\\'t be a fool! I\\'ll call you later."
#pytest.fixture(scope="session")
def is_ugit_dir(tmp_path_factory):
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
return path
#pytest.fixture
def file_within_ugit_dir(is_ugit_dir):
path = is_ugit_dir
full_path = path / 'wise_words.txt'
os.system(f'echo {WISE_WORDS} > wise_words.txt')
return full_path
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
"""
Returns the full path to a hash-object within the objects database
"""
subprocess.run(['ugit', 'hash-object', file_within_ugit_dir])
# there should now be a file with a sha1 content-address in the following directory
objects_dir = is_ugit_dir / '.ugit' / 'objects'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
data = b'blob\x00' + f.read() # prepend the object type
sha_hash = hashlib.sha1(data).hexdigest()
return objects_dir / sha_hash
step_def.py
import os
from pytest_bdd import scenario, given, when, then, parsers
#scenario('features/CLI.feature', 'store file in object database')
def test_file_stored_by_content_address():
pass
#given("a file exists at some full path within a ugit dir", target_fixture="file_exists_at_path")
def file_exists_at_path(file_within_ugit_dir):
return file_within_ugit_dir
#when("I enter ugit hash-object followed by that path")
def file_gets_hashed(file_exists_at_path):
dir_name = os.path.dirname(file_exists_at_path)
base_name = os.path.basename(file_exists_at_path)
os.chdir(dir_name)
os.system(f'ugit hash-object {base_name}')
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read().strip(b"blob\x00")
with open(file_within_ugit_dir, "rb") as hf:
assert hf.read() == contents
I would say that you have logic issue in your code.
According to the test scenario the fixture file_hashed must return a path to the existing file containing hash. One can see it here:
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
In the conftest.py you are not creating the file containing hash. You are creating a dummy link instead, and because there is nothing on that link, you get FileNotFoundError. Error is here (you code does not create a hash file):
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
sha_hash = hashlib.sha1 (f.read()).hexdigest ()
return objects_dir+sha_hash
I'm getting error when i make postman request to my api when trying to read files from a directory.
cwd = os.getcwd()
print(cwd)
cwd = cwd.replace('\\','/')
print(cwd)
path = cwd + "/training_data/"
print(path)
try:
for files in os.listdir(path):
data = open(path + files,'r').readlines()
bot.train(data)
except Exception as e:
return jsonify("Error while opening file",path,cwd,os.listdir(path))
I'm getting the following exception:
[
"Error while opening file",
"C:/Users/RakeshS/Desktop/app/training_data/",
"C:/Users/RakeshS/Desktop/app",
[
"code.txt",
"deputation1.txt",
"football.txt",
"Greeting.txt",
"internetaccess.txt",
"intravels.txt",
"sentiment.txt",
"system.txt"
]]
Why is it not able to open the file and read data when i'm getting all the list of files inside the directory?
Here is complete solution to your problem:
from flask import Flask, jsonify
import os
app = Flask(__name__)
#app.route('/')
def hello_world():
cwd = os.getcwd()
path = os.path.join(os.getcwd(), 'training_data')
try:
for file in os.listdir(path):
path_and_file = os.path.join(path, file)
data = open(path_and_file, 'r').readlines()
print(data) # To print everything from a file
return jsonify("Files successfully opened", path, cwd, os.listdir(path))
except:
return jsonify("There was error opening files", path, cwd, os.listdir(path))
if __name__ == '__main__':
app.run()
Here is the output:
Explanation:
In my example, I put it on / route, but you can put it where ever you want.
Whenever I go to / route, I get JSON response. os.getcwd() gets me current directory, but I join two paths using os.path.join() function. From python documentation:
Join one or more path components intelligently.
You can read more on python documentation. Next, since I get the path to training_data, I need to join again path to training_data and file. And I return JSON data. If anything goes wrong, you can print traceback in except clause and also return data, so that flask doesn't raise error for returning no response to the user.
P.S.
training_data folder is in a same level as a your flask application.
I upload file to dropbox api, but it post on dropbox all directories from my computer since root folder. I mean you have folder of your project inside folder home, than user until you go to file sours folder. If I cut that structure library can't see that it is file, not string and give mistake message.
My code is:
def upload_file(project_id, filename, dropbox_token):
dbx = dropbox.Dropbox(dropbox_token)
file_path = os.path.abspath(filename)
with open(filename, "rb") as f:
dbx.files_upload(f.read(), file_path, mute=True)
link = dbx.files_get_temporary_link(path=file_path).link
return link
It works, but I need something like:
file_path = os.path.abspath(filename)
chunks = file_path.split("/")
name, dir = chunks[-1], chunks[-2]
which gives me mistake like:
dropbox.exceptions.ApiError: ApiError('433249b1617c031b29c3a7f4f3bf3847', GetTemporaryLinkError('path', LookupError('not_found', None)))
How could I make only parent folder and filename in the path?
For example if I have
/home/user/project/file.txt
I need
/project/file.txt
you have /home/user/project/file.txt and you need /project/file.txt
I would split according to os default separator (so it would work with windows paths as well), then reformat only the 2 last parts with the proper format (sep+path) and join that.
import os
#os.sep = "/" # if you want to test that on Windows
s = "/home/user/project/file.txt"
path_end = "".join(["{}{}".format(os.sep,x) for x in s.split(os.sep)[-2:]])
result:
/project/file.txt
I assume the following code should works:
def upload_file(project_id, filename, dropbox_token):
dbx = dropbox.Dropbox(dropbox_token)
abs_path = os.path.abspath(filename)
directory, file = os.path.split(abs_path)
_, directory = os.path.split(directory)
dropbox_path = os.path.join(directory, file)
with open(abs_path, "rb") as f:
dbx.files_upload(f.read(), dropbox_path, mute=True)
link = dbx.files_get_temporary_link(path=dropbox_path).link
return link
I'm writing a small web page whose task is to let a user upload his input file and with uploading I want to execute my calculation program in python which will give me output for the user.
My code looks like this:
import os
import os.path
import datetime
import subprocess
from flask import Flask, render_template, request, redirect, url_for
from werkzeug import secure_filename
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['ALLOWED_EXTENSIONS'] = set(['txt', 'gro', 'doc', 'docx'])
current_time = datetime.datetime.now()
file_time = current_time.isoformat()
proper_filename = file_time
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in app.config['ALLOWED_EXTENSIONS']
def run_script():
subprocess.call(['/home/martyna/Dropbox/programowanie/project_firefox/topologia.py', '/uploads/proper_filename'])
#app.route('/')
def index():
return render_template('index.html')
#app.route('/upload', methods = ['POST'])
def upload():
file = request.files['file']
if file and allowed_file(file.filename):
file.save(os.path.join(app.config['UPLOAD_FOLDER'], proper_filename))
run_script().start()
return "Thank you for uploading"
if __name__ == '__main__':
app.debug = True
app.run(host='0.0.0.0')
Uploading goes well, but the problem is that when I hit upload I get message OSError: [Errno 13] Permission denied and the line causing the problem is:
subprocess.call(['/home/martyna/Dropbox/programowanie/project_firefox/topologia.py', '/uploads/2014-05-16T22:08:19.522441'])
program topologia.py runs from command python topologia.py input_file
I have no idea how to solve that problem.
You have two problems:
Your script is probably not marked as executable. You can work around that by using the current Python executable path; use sys.executable to get the path to that.
You are telling the script to process /uploads/proper_filename, but the filename you actually upload your file to is not the same at all; you should use the contents of the string referenced by proper_filename instead.
Put these two together:
import sys
from flask import current_app
def run_script():
filename = os.path.join(current_app.config['UPLOAD_FOLDER'], proper_filename)
subprocess.call([
sys.executable,
'/home/martyna/Dropbox/programowanie/project_firefox/topologia.py',
filename])
You do not need to call .start() on the result of run_script(); you'll get an attribute error on NoneType. Just call run_script() and be done with it:
run_script()
Executing a script from a command line and from a server will not be done with the same permissions.
user#mycomputer:~$ ./script
In this exemple, ./script is launched by user. So if it does some inputs/outputs, the access rigths will depend on user rights.
When it is a server that runs the script, in your case Flask, it is probably www-data that launch the script. So the access rights are not the same.
So to create a file into a folder the user executing the script should have the permissions on the folder.
I have created a Google App Engine project in Python it runs on my localhost but when I upload it onto geo-event-maps.appspot.com the markers are not displaying.
I have a cron which runs to call on /place.
I have no log errors
My datastore is empty!
The txt files are being uploaded with:
file_path = os.path.dirname(__file__)
path = os.path.join(file_path, 'storing', 'txtFiles')
Is there a way of checking the files have been uploaded?!
I am at an absolute loss. Has anyone had these problems before?
Below is my main.py:
'''
Created on Mar 30, 2011
#author: kimmasterson
'''
#!/usr/bin/env python
from google.appengine.ext import webapp
from google.appengine.ext import db
from placemaker import placemaker
import logging
import wsgiref.handlers
import os, glob
from google.appengine.dist import use_library
use_library('django', '1.2')
from google.appengine.ext.webapp import template
class Story(db.Model):
id = db.StringProperty()
loc_name = db.StringProperty()
title = db.StringProperty()
long = db.FloatProperty()
lat = db.FloatProperty()
link = db.StringProperty()
date = db.StringProperty()
class MyStories(webapp.RequestHandler):
def get(self):
temp = db.Query(Story)
temp = temp.count()
story_set = Story.all()
template_values = {
'storyTemp': story_set
}
path = os.path.join(os.path.dirname(__file__), 'index.html')
self.response.out.write(template.render(path, template_values))
class place(webapp.RequestHandler):
def get(self):
#path = '/storing/txtFiles'
file_path = os.path.dirname(__file__)
path = os.path.join(file_path, 'storing', 'txtFiles')
try:
for infile in glob.glob(os.path.join(path, '*.txt')):
#print infile
f = open(infile, 'r')
data = f.read()
newfile = infile.replace('.txt', '')
newfile = newfile.replace('/storing/txtFiles/', '')
#print newfile
storyname = 'http://www.independent.ie/national-news/' + newfile
#print storyname
#print newfile
#logging.info(data)
p = placemaker('HSnG9pPV34EUBcexz.tDYuSrZ8Hnp.LowswI7TxreF8sXrdpVyVIKB4uPGXBYOA9VjjF1Ca42ipd_KhdJsKYjI5cXRo0eJM-')
print p.find_places(data)
for place in p.places:
splitted = place.name.split()
for word in splitted:
temp = db.Query(Story)
temp = temp.filter("link = ", storyname)
results = temp.fetch(limit=1)
if len(results) > 0:
break
elif 'IE' in word:
print temp
print 'success'
print 'name of the file is:' + newfile
story = Story(name=newfile, long=place.centroid.longitude, lat=place.centroid.latitude, link=storyname, loc_name=place.name, title=newfile).put()
#logging.info(type(place.centroid.latitude))
except:
print 'error'
def main():
application = webapp.WSGIApplication([('/', MyStories), ('/place', place)],
debug=True)
wsgiref.handlers.CGIHandler().run(application)
if __name__ == '__main__':
main()
Here is my cron.yaml
cron:
- description: running place
url: /place
schedule: every day 11:05
App.yaml is as follows:
application: geo-event-maps
version: 2
runtime: python
api_version: 1
handlers:
- url: .*
script: main.py
builtins:
- datastore_admin: on
You need to be sure your files are being uploaded with your application code, they can not be marked as static files or they won't be accessible to your code. Run appcfg.py with the --verbose flag and make sure they get uploaded.
Second issue, in your place class you define path as path = '/storing/txtFiles'. That is wrong. Your path will probably be something more like:
file_path = os.path.dirname(__file__)
path = os.path.join(file_path, 'storing', 'txtFiles')
Also, I suggest you don't use print, instead use self.response.out.write(stuff_to_write).
You might also want to see about using key_names. You'll be able to make your code quite a bit more efficient then by running a batch db.get instead of a db.Query inside a nested for-loop. Use Appstats and try to minimize the number of RPCs.
First make sure that you are accessing your files using a relative path.
Next ensure you have not marked the files as static within your app.yaml as static files are not uploaded to the same place as your application (they are sent somewhere that the Google Frontend servers can serve them more directly).