Error serving large files when using Flask and Tornado - python

I am serving large static files ~70 MB, I am able to download the files when working in flask alone, but I am getting the error below when using Tornado and flask.
Exception ignored in: <bound method Future.__del__ of <tornado.concurrent.Future object at 0x32c61acc>>
Traceback (most recent call last):
File "/home/user/virtual/lib/python3.4/site-packages/tornado/concurrent.py", line 333, in __del__
File "/usr/local/lib/python3.4/traceback.py", line 181, in format_exception
File "/usr/local/lib/python3.4/traceback.py", line 153, in _format_exception_iter
File "/usr/local/lib/python3.4/traceback.py", line 18, in _format_list_iter
File "/usr/local/lib/python3.4/traceback.py", line 65, in _extract_tb_or_stack_iter
File "/usr/local/lib/python3.4/linecache.py", line 15, in getline
File "/usr/local/lib/python3.4/linecache.py", line 41, in getlines
File "/usr/local/lib/python3.4/linecache.py", line 126, in updatecache
File "/usr/local/lib/python3.4/tokenize.py", line 437, in open
AttributeError: 'module' object has no attribute 'open'
Here is the code that I am using to serve files
def download(path):
def generate():
with open(path, 'rb') as file_handler:
while True:
chunk = file_handler.read(1024)
if not chunk:
break
yield chunk
return Response(generate(), direct_passthrough=True, mimetype='application/octet-stream',
headers={'Content-Disposition': 'attachment;filename={}'.format(os.path.basename(path))})
http_server = HTTPServer(WSGIContainer(APP))
http_server.listen(PORT, address='0.0.0.0')
IOLoop.instance().start()

Related

Traceback error while uploading file to dropbox in python

I am building a test upload api to check dropbox to build final project but I am getting this error when i run python file in cmd:
Traceback (most recent call last):
File "C:\Users\sufiy\Desktop\test.py", line 7, in <module>
dbx.files_upload(file_contents, '/testdropbox.txt', mode=dropbox.files.WriteMode.overwrite)
File "C:\Users\sufiy\AppData\Local\Programs\Python\Python311\Lib\site-packages\dropbox\base.py", line 3210, in files_upload
r = self.request(
^^^^^^^^^^^^^
File "C:\Users\sufiy\AppData\Local\Programs\Python\Python311\Lib\site-packages\dropbox\dropbox_client.py", line 326, in request
res = self.request_json_string_with_retry(host,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\sufiy\AppData\Local\Programs\Python\Python311\Lib\site-packages\dropbox\dropbox_client.py", line 476, in request_json_string_with_retry
return self.request_json_string(host,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\sufiy\AppData\Local\Programs\Python\Python311\Lib\site-packages\dropbox\dropbox_client.py", line 538, in request_json_string
raise TypeError('expected request_binary as binary type, got %s' %
TypeError: expected request_binary as binary type, got <class 'str'>
here is my code:
import dropbox
dbx = dropbox.Dropbox('my api key')
with open('testdropbox.txt', 'r') as f:
file_contents = f.read()
dbx.files_upload(file_contents, '/testdropbox.txt', mode=dropbox.files.WriteMode('overwrite'))
I tried to build a program that overwrite txt file every minute and I want it to work so I can schedule this by using windows task schedule

BS4 MemoryError: stack overflow and EOFError: Ran out of input when using multiprocessing in python

I have a simple python script that utilizes Python's BS4 library and multiprocessing to do some web scraping. I was initially getting some errors where the script would not complete since I would exceed the recursion limit, but then I found out here that BeautifulSoup trees cannot be pickled, and so causes issues with multiprocessing, so I followed one recommendation in the top answer which was to do the following: sys.setrecursionlimit(25000)
This worked fine for a couple of weeks with no issues (as far as I could tell), but today I restarted the script and some of the processes do not work and I get the error that you can see below:
I now get this error:
Traceback (most recent call last):
File "C:/Users/user/PycharmProjects/foo/single_items/single_item.py", line 243, in <module>
Process(target=instance.constant_thread).start()
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\popen_spawn_win32.py", line 89, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\site-packages\bs4\element.py", line 1449, in __getattr__
"'%s' object has no attribute '%s'" % (self.__class__, tag))
MemoryError: stack overflow
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\spawn.py", line 115, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
I am not sure what it means, but here is a pseudocode example of the script I have running:
class foo:
def __init__(url):
self.url = url
def constant_scrape:
while True:
rq = make_get_request(self.url)
soup = BeautifulSoup(rq)
if __name__ == '__main__':
sys.setrecursionlimit(25000)
url_list = [...]
for url in url_list:
instance = foo(url)
Process(target=instance.constant_scrape).start()
update 1:
It seems be that it is the same URLs every time that crash even though each url is of (seemingly) the same HTML format as the URLs that do not crash.

Parsing GFF files throws TypeError

I am trying to parse a gif file with Biopython, and am using the sample code from their website. This is the code:
from BCBio import GFF
in_file = "infile.gff"
in_handle = open(in_file)
for rec in GFF.parse(in_handle):
print(rec)
in_handle.close()
When I run the code I get the following error:
Traceback (most recent call last):
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/Bio/SeqIO/Interfaces.py", line 47, in __init__
self.stream = open(source, "r" + mode)
TypeError: expected str, bytes or os.PathLike object, not FakeHandle
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "get_genes_dpt.py", line 37, in <module>
for rec in GFF.parse(in_handle):
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/BCBio/GFF/GFFParser.py", line 746, in parse
target_lines):
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/BCBio/GFF/GFFParser.py", line 322, in parse_in_parts
for results in self.parse_simple(gff_files, limit_info, target_lines):
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/BCBio/GFF/GFFParser.py", line 343, in parse_simple
for results in self._gff_process(gff_files, limit_info, target_lines):
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/BCBio/GFF/GFFParser.py", line 637, in _gff_process
for out in self._lines_to_out_info(line_gen, limit_info, target_lines):
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/BCBio/GFF/GFFParser.py", line 699, in _lines_to_out_info
fasta_recs = self._parse_fasta(FakeHandle(line_iter))
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/BCBio/GFF/GFFParser.py", line 560, in _parse_fasta
return list(SeqIO.parse(in_handle, "fasta"))
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/Bio/SeqIO/__init__.py", line 607, in parse
return iterator_generator(handle)
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/Bio/SeqIO/FastaIO.py", line 183, in __init__
super().__init__(source, mode="t", fmt="Fasta")
File "/Users/juliofdiaz/anaconda2/envs/python37/lib/python3.7/site-packages/Bio/SeqIO/Interfaces.py", line 51, in __init__
if source.read(0) != "":
TypeError: read() takes 1 positional argument but 2 were given
I am not sure how to fix the error as it seems I am passing a str and not a FakeHandle. I am running biopython 1.78 with conda.

How to debug vage error lxml.etree.SerialisationError: unknown error -2029930774 in python

I am using some legacy code from python2 that has to work with python3. So far so good, most of the things work as they should. However I get the most vage error from a library called lxml.
In my understanding this is a library that binds to a binary program written in c.
The problem comes from this piece of code:
with etree.xmlfile(self.temp_file, encoding='utf-8') as xf:
with xf.element('{http://www.opengis.net/citygml/2.0}CityModel', nsmap=nsmap):
with open(input_gml, mode='rb') as f:
context = etree.iterparse(f)
for action, elem in context:
if action == 'end' and elem.tag == '{http://www.opengis.net/citygml/2.0}cityObjectMember':
# Duplicate feature and subfeatures
self.duplicateFeature(xf, elem)
# Clean up the original element and the node of its previous sibling
# (https://www.ibm.com/developerworks/xml/library/x-hiperfparse/)
elem.clear()
while elem.getprevious() is not None:
del elem.getparent()[0]
del context
xf.flush()
It processes this xml file. And gets the following error:
Traceback (most recent call last):
File "/usr/local/bin/stetl", line 4, in <module>
__import__('pkg_resources').run_script('Stetl==2.0', 'stetl')
File "/usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py", line 666, in run_script
self.require(requires)[0].run_script(script_name, ns)
File "/usr/local/lib/python3.6/site-packages/pkg_resources/__init__.py", line 1446, in run_script
exec(code, namespace, namespace)
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/EGG-INFO/scripts/stetl", line 43, in <module>
main()
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/EGG-INFO/scripts/stetl", line 36, in main
etl.run()
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/stetl/etl.py", line 157, in run
chain.run()
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/stetl/chain.py", line 172, in run
packet = self.first_comp.process(packet)
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/stetl/component.py", line 213, in process
packet = self.next.process(packet)
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/stetl/component.py", line 213, in process
packet = self.next.process(packet)
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/stetl/component.py", line 213, in process
packet = self.next.process(packet)
File "/usr/local/lib/python3.6/site-packages/Stetl-2.0-py3.6.egg/stetl/component.py", line 199, in process
packet = self.invoke(packet)
File "/app/bgt/etl/stetlbgt/subfeaturehandler.py", line 144, in invoke
del context
File "src/lxml/serializer.pxi", line 925, in lxml.etree.xmlfile.__exit__
File "src/lxml/serializer.pxi", line 1263, in lxml.etree._IncrementalFileWriter._close
File "src/lxml/serializer.pxi", line 1269, in lxml.etree._IncrementalFileWriter._handle_error
File "src/lxml/serializer.pxi", line 199, in lxml.etree._raiseSerialisationError
lxml.etree.SerialisationError: unknown error -2029930774
I'm not sure what's going wrong here. It seems that something is wrong with some weird encoded character.
How to debug this?

How to upload binary file with ftplib in Python?

My python2 script uploads files nicely using this method but python3 is presenting problems and I'm stuck as to where to go next (googling hasn't helped).
from ftplib import FTP
ftp = FTP(ftp_host, ftp_user, ftp_pass)
ftp.storbinary('STOR myfile.txt', open('myfile.txt'))
The error I get is
Traceback (most recent call last):
File "/Library/WebServer/CGI-Executables/rob3/functions/cli_f.py", line 12, in upload
ftp.storlines('STOR myfile.txt', open('myfile.txt'))
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 454, in storbinary
conn.sendall(buf)
TypeError: must be bytes or buffer, not str
I tried altering the code to
from ftplib import FTP
ftp = FTP(ftp_host, ftp_user, ftp_pass)
ftp.storbinary('STOR myfile.txt'.encode('utf-8'), open('myfile.txt'))
But instead I got this
Traceback (most recent call last):
File "/Library/WebServer/CGI-Executables/rob3/functions/cli_f.py", line 12, in upload
ftp.storbinary('STOR myfile.txt'.encode('utf-8'), open('myfile.txt'))
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 450, in storbinary
conn = self.transfercmd(cmd)
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 358, in transfercmd
return self.ntransfercmd(cmd, rest)[0]
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 329, in ntransfercmd
resp = self.sendcmd(cmd)
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 244, in sendcmd
self.putcmd(cmd)
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 179, in putcmd
self.putline(line)
File "/Library/Frameworks/Python.framework/Versions/3.1/lib/python3.1/ftplib.py", line 172, in putline
line = line + CRLF
TypeError: can't concat bytes to str
Can anybody point me in the right direction
The issue is not with the command argument, but with the the file object. Since you're storing binary you need to open file with 'rb' flag:
>>> ftp.storbinary('STOR myfile.txt', open('myfile.txt', 'rb'))
'226 File receive OK.'
APPEND to file in FTP.
Note: it's not SFTP - FTP only
import ftplib
ftp = ftplib.FTP('localhost')
ftp.login ('user','password')
fin = open ('foo.txt', 'r')
ftp.storbinary ('APPE foo2.txt', fin, 1)
Ref: Thanks to Noah

Categories