Get the extension of uploaded file in flask - python

I'm writing a flask API to extract text from the document. I want to check the extension and if it is pdf I'll give it to the pdf miner else docx2txt.
#app.route('/text-extraction', methods = ['POST'])
def text_extractions():
f = request.files['files']
split_tup = os.path.splitext(f)
file_extension = split_tup[1]
if file_extension == '.pdf':
return extract_text(f)
else:
text = docx2txt.process(f)
if extract_text:
return text.replace('\t', ' ')
return None

Related

Using string matching from csv to find patterns in Python using results from OCR

I am new to python and I want to use the results from the OCR (string) to be able to match the first column of my csv file and then only if the condition is true (the string from ocr matches to the one in the csv then it should use the pic. I get an error as soon as I try to integrate the code together.
For the OCR, I am using pytesseract and I am using Flask to render the web app.
The error I get is : AttributeError: '_io.TextIOWrapper' object has no attribute 'filename'
New error: The view function for 'upload_image' did not return a valid response. The function either returned None or ended without a return statement.
This error only persists when I try to add this code:
match = extracted_text
matched_row = None
with open("/Users/ri/Desktop/DPL/DPL.csv", "r") as file:
# Read file as a CSV delimited by tabs.
reader = csv.reader(file, delimiter='\t')
for row in reader:
if row[0] == match:
matched_row = row
print(matched_row)
app.py
#app.route('/', methods=['POST'])
def upload_image():
if request.method == 'POST':
# checks whether or not the post request has the file part
if 'file' not in request.files:
flash('No file part')
return redirect(request.url)
file = request.files['file']
# if user does not select file, browser also
# submit a empty part without filename
if file.filename == '':
flash('No file selected for uploading')
return redirect(request.url)
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
file.save(os.path.join(os.getcwd() +
UPLOAD_INPUT_IMAGES_FOLDER, file.filename))
flash('File successfully uploaded')
# calls the ocr_processing function to perform text extraction
extracted_text = ocr_processing(file)
print(extracted_text)
match = extracted_text
matched_row = None
with open("/Users/ri/Desktop/DPL/DPL.csv", "r") as f:
# Read file as a CSV delimited by tabs.
reader = csv.reader(f, delimiter='\t')
for row in reader:
if row[0] == match:
matched_row = row
print(matched_row)
loaded_vec = CountVectorizer(
vocabulary=pickle.load(open("./tfidf_vector.pkl", "rb")))
loaded_tfidf = pickle.load(open("./tfidf_transformer.pkl", "rb"))
model_pattern_type = pickle.load(
open("./clf_svm_Pattern_Category.pkl", "rb"))
model_pattern_category = pickle.load(
open("./clf_svm_Pattern_Type.pkl", "rb"))
match = [match]
X_new_counts = loaded_vec.transform(
match)
# .values.astype('U')
X_new_tfidf = loaded_tfidf.transform(X_new_counts)
predicted_pattern_type = model_pattern_type.predict(X_new_tfidf)
your_predicted_pattern_type = predicted_pattern_type[0]
predicted_pattern_category = model_pattern_category.predict(
X_new_tfidf)
your_predicted_pattern_category = predicted_pattern_category[0]
return render_template('uploads/results.html',
msg='Processed successfully!',
match=match,
your_predicted_pattern_category=your_predicted_pattern_category,
your_predicted_pattern_type=your_predicted_pattern_type,
img_src=UPLOAD_INPUT_IMAGES_FOLDER + file.filename)
# break
else:
print("no mattern found")
else:
flash('Allowed file types are txt, pdf, png, jpg, jpeg, gif')
return redirect(request.url)

How to save images in dynamic folder?

first python script generating folder based on request. second script giving similar image and saving in generated folder. While in multiple request, images is not saving as per request.Only images saving in first folder generated. for example if three concurrent request came it generated 3 folders.but all result is saving in first folder other 2 folders is empty.so second and third user not getting result.
1st Script
#app.route('/imageUploads', methods=['GET', 'POST'])
def upload_img():
import secrets
import string
N = 9
res = ''.join(secrets.choice(string.ascii_uppercase + string.digits)
for i in range(N))
print("The generated random string : " + str(res))
timesearch = str(res)
randomvariable = timesearch
result = 'static/' + randomvariable
globe = result;
if not gfile.Exists(result):
result = 'static/' + randomvariable
os.mkdir(result)
#shutil.rmtree(result)
else:
for root, dirs, files in os.walk(result):
for file in files:
os.remove(os.path.join(root, file))
if request.method == 'POST' or request.method == 'GET':
print(request.method)
# check if the post request has the file part
if 'file' not in request.files:
print('No file part')
return redirect(request.url)
file = request.files['file']
#print(file.filename)
# if user does not select file, browser also
# submit a empty part without filename
if file.filename == '':
print('No selected file')
return redirect(request.url)
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
#filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
inputloc = os.path.join(app.config['UPLOAD_FOLDER'], filename)
recommend(inputloc, extracted_features,randomvariable)
#os.remove(inputloc)
#label = label1.replace("\n", "")
# name = os.path.basename(pred_final[neighbor])
image_path = randomvariable
image_list =[os.path.join(image_path,file) for file in os.listdir(result)
if not file.startswith('.')]
image_list = [k.replace("\\","/") for k in image_list]
)
images = []
resultImages = []
for i, image in enumerate(image_list):
v = len(image_list)
print(v,"number of images are")
images.append({'path': 'https://localhost:5000/'+image, 'fileName': os.path.basename(image)})
resultImages.append(os.path.basename(image))
with open('data.json', "w") as f:
json.dump({"resultImages": resultImages}, f)
2nd python script
def get_top_k_similar(image_data, pred, pred_final, k=10):
result = 'static/' + recommend.randomString
# cosine calculates the cosine distance, not similiarity. Hence no need to reverse list
top_k_ind = np.argsort([cosine(image_data, pred_row) \
for ith_row, pred_row in enumerate(pred)])[:k]
print(top_k_ind)
for i, neighbor in enumerate(top_k_ind):
image = ndimage.imread(pred_final[neighbor])
name = os.path.basename(pred_final[neighbor])
name = os.path.join('static', recommend.randomString, name)
imsave(name, image)

Extrating text with textract from S3 bucket

I am trying to retrieve a .doc file from a s3 bucket and use textract to read its text. In order to do so, I created these two functions:
def process_files(filepath):
s3 = s3fs.S3FileSystem()
filename = 's3://' + bucket_name + '/' + filepath
_, ext = os.path.splitext(filename)
if ext == '.pdf':
extract_string = pdf_to_string(s3, filename)
return extract_string
elif ext == '.doc':
extract_string = doc_to_string(s3, filename)
return extract_string
def doc_to_string(s3_file, filename):
"""
convert an .doc or .docs file into string
"""
print(filename)
print(s3_file.ls('/myname/test_files/*'))
text = textract.process(filename)
return text
However, I am getting the error:
Is this the right path/to/file/you/want/to/extract.doc
Therefore I changed my code in order to change the path:
def doc_to_string(s3_file, filename):
"""
convert an .doc or .docs file into string
"""
text = textract.process(s3_file.ls('/myname/test_files/*'))
return text
But I get:
Path should be string bytes or os.pathlike

How to write form data to a text document

I'm trying to get my form data to be captured and written to a word document. I have the following function...
def index():
FILE_TYPES = set(['txt', 'doc', 'docx', 'odt', 'pdf', 'rtf', 'text', 'wks', 'wps', 'wpd'])
mail = Mail(app)
errors = ''
form = ApplicationForm(CombinedMultiDict((request.files, request.form)))
capture = [form.department.data, form.name.data, form.address.data, str(form.telephone.data), form.email.data, form.explain.data]
department_data = form.department.data
name_data = form.name.data
if department_data == 'cpoms':
flash(capture)
if form.validate_on_submit():
flash('Thanks %s, we will try to get back to your regarding you application as soon as possible.' % form.name.data)
print "Form successfully submitted"
submit_name = form.file_upload.data.filename
if '.' in submit_name and submit_name.rsplit('.', 1)[1] in FILE_TYPES:
filename = secure_filename(submit_name)
form.file_upload.data.save('uploads/' + filename)
return redirect('home')
Does anyone know how I can move the data from the capture variable and write this to a text document?
How about:
with open("/path/to/file.txt", "a") as write_file:
for cap in capture:
write_file.write(capt)
I hope this will work.

outputting .zip file in django

I want to upload zip file with .csv files and output zip file with .vm files.
I use this code:
def csv_archive_to_vm(request):
response = HttpResponse(content_type='application/force-download')
work_string = ''
if request.method == "POST":
##reading input zip file
input_file = request.FILES.get('file')
zf = zipfile.ZipFile(input_file)
for info in zf.infolist():
##reading files in archive
path = re.search('(.*\.csv)', info.filename)
path_name = re.search('(.*/)(.*\.csv)', info.filename)
for string in zf.open(info.filename):
quotes_search = re.search('"(.*)",\s*"(.*)",\s*"(.*)"', string)
if quotes_search:
descr = quotes_search.group(1)
macro_name = quotes_search.group(2)
say = quotes_search.group(3)
new_lines_search = re.search('/n', say)
if new_lines_search:
say = re.sub('/n', '\n\t\t', say)
##making content for new files for new archive
work_string = work_string + '##' + descr + '\n#macro(' + macro_name + ')\n\t#random()\n\t\t' + say + '\n\t#end\n#end\n\n'
##outputting new archive
zipdata = StringIO()
zf_create = zipfile.ZipFile(zipdata, mode='a')
try:
if path_name:
zf_create.writestr(str(path_name.group(1)) + str(path_name.group(2))[0:-4] + '.vm', work_string)
finally:
zf_create.close()
work_string = ''
response = HttpResponse(zipdata.read())
response['Content-Disposition'] = 'attachment; filename=assistant-linguistics_vm.zip'
response['Content-Type'] = 'application/x-zip'
return response
but i get empty zip archive, with 0kb weight. What am i doing wrong? Thanks.

Categories