I need to read a flow url as csv then this is what I do:
class Command(BaseCommand):
help = 'Admin command to import feed'
def _download_flow(self, url):
req = requests.get(url, stream=True)
if req.status_code == 200:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
for line in req.iter_lines():
tmp.write(line)
return tmp
raise Exception('error:{}'.format(req.status_code))
def handle(self, *args, **options):
catalog = self._download_flow(options['url'])
with open(catalog.name, 'rU') as csvfile:
reader = csv.DictReader(
csvfile,
delimiter=';',
quotechar='"')
for row in reader:
raise Exception(row)
catalog.close()
Basically, from an url, I create a temporary csv file. Then, now I want to parse this file to work with lines but I don't know why my exception is not raised. (My file has content, i've checked).
Do you have any clue to help me ?
Thanks
The problem came from the _download() method, the correct way to construct the file is:
def _download_flow(self, url):
req = requests.get(url, stream=True)
if req.status_code == 200:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
for chunk in req.iter_content():
tmp.write(chunk)
return tmp
raise Exception('error:{}'.format(req.status_code))
Related
from requests import *
import json
import base64
import urllib
from cmd import Cmd
url = "http://api.response.htb/"
url_digest = "cab532f75001ed2cc94ada92183d2160319a328e67001a9215956a5dbf10c545"
def get(url, url_digest): data = {
"url": url,
"url_digest": url_digest,
"method": "GET",
"session": "5f7bf45b02c832cf5b40c15ab6d365af",
"session_digest": "a2b9ac69ab85795d13d12857a709a024cd729dcdf2c3fd3bb21ed514bc9990ac"
}
headers = {'Content-Type': 'application/json'}
url_proxy = "http://proxy.response.htb/fetch"
s = Session()
res = s.post(url_proxy, json=data, headers=headers)
body = json.loads(res.text)['body']
body = base64.b64decode(body)
if "zip" in url:
f = open("file.zip", "wb")
f.write(body)
f.close()
print("Done saving file :-");
else: print body
def url_de(url):
s = Session()
res = s.get('http://www.response.htb/status/main.js.php',
cookies={'PHPSESSID': url})
x = res.text.find("session_digest':'")
y = res.text.find("'};")
return res.text[x+17:y]
class pr(Cmd):
prompt = "==> "
def default(self, url): url_digest = url_de(url)
get(url, url_digest)
def do_exit(self, a): exit()
pr().cmdloop()
at line 32 vs code is giving an error message as expected expression pylance and unable to proceed further. please anyone help me to solve this error. i am getting two error one is in else and another is at return statement at line 43. so if anyone can able to identify the error and help me out to solve this please help me.
Indentation is significant in Python.
You have one line after your if indented, then lines which are not indented. This means the conditional is finished. You then have an else by itself, which is not permitted.
You likely meant:
if "zip" in url:
f = open("file.zip", "wb")
f.write(body)
f.close()
print("Done saving file :-");
else:
print(body)
But this would be improved by using a context manager:
if "zip" in url:
with open("file.zip", "wb") as f:
f.write(body)
print("Done saving file :-");
else:
print(body)
This is your code scope
Just indent 28-30 and 38-43 line , then part 1,2 will into if scope
part 3,4 into func scope
I am trying to input a file from an input field, save it temporarily to the disk and reply with a response to re-download the same file.
In order to do this, I've read that I need to reply to the browser with a content-type : application/octet-stream and a content-disposition: attachment; "filename=myfile.extension".
I can store and listen to my music file in the /tmp folder so I know that the input part of it works.
This is my code in Pyramid:
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
input_file.seek(0)
file_path = os.path.join('/tmp', '%s.mp3' % uuid.uuid4())
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
print(f"Wrote: {file_path}")
filename = file_path.split('/')[-1]
print(filename)
f = open(file_path, 'rb')
return Response(body_file=f, charset='UTF-8', content_type='application/octet-stream', content_disposition=f'attachment; "filename={filename}"')
These are my response headers:
And this is my response body:
However Chrome/Firefox do not start the download of my binary file. What am I doing wrong?
UPDATE
I also tried with FileResponse from Pyramid without success, I still do not get the download popup.
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
input_file.seek(0)
file_path = os.path.join('/tmp', '%s.mp3' % uuid.uuid4())
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
print(f"Wrote: {file_path}")
return FileResponse(file_path, request=request)
Apparently I was thinking how to perform this in the wrong way. I need to return a Response('OK') when I upload the file through /process and make another request to return a FileResponse object, building another endpoint /download and returning that fileresponse object fixed this issue.
Example:
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
db = request.POST['volume']
input_file.seek(0)
filename = '%s.mp3' % uuid.uuid4()
file_path = os.path.join('/tmp', filename)
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
if boost_track(file_path, filename, db):
return Response(json_body={'filename': filename})
#view_config(route_name='download')
def download_file(request):
filename = request.GET['filename']
file_path = os.path.join('/tmp', filename)
f = open(file_path, 'rb')
return Response(body_file=f, charset='UTF-8', content_type='application/download', content_disposition=f'attachment; filename="{filename}"')
Sorry if this has been asked, but is it possible to skip a column when writing to a csv file?
Here is the code I have:
with open("list.csv","r") as f:
reader2 = csv.reader(f)
for row in reader2:
url = 'http://peopleus.intelius.com/results.php?ReportType=33&qi=0&qk=10&qp='+row
req = urllib.request.Request(url)
response = urllib.request.urlopen(req)
html = response.read()
retrieved_name = b'class="singleName">(.*?)<\/h1'
retrieved_number = b'<div\sclass="phone">(.*?)<\/div'
retrieved_nothing = b"(Sorry\swe\scouldn\\'t\sfind\sany\sresults)"
if re.search(retrieved_nothing,html):
noth = re.search(retrieved_nothing.decode('utf-8'),html.decode('utf-8')).group(1)
add_list(phone_data, noth)
else:
if re.search(retrieved_name,html):
name_found = re.search(retrieved_name.decode('utf-8'),html.decode('utf-8')).group(1)
else:
name_found = "No name found on peopleus.intelius.com"
if re.search(retrieved_number,html):
number_found = re.search(retrieved_number.decode('utf-8'),html.decode('utf-8')).group(1)
else:
number_found = "No number found on peopleus.intelius.com"
add_list(phone_data, name_found, number_found)
with open('column_skip.csv','a+', newline='') as mess:
writ = csv.writer(mess, dialect='excel')
writ.writerow(phone_data[-1])
time.sleep(10)
Assuming that there is data in the first three rows of column_skip.csv, can I have my program start writing its info in column 4?
Yeah, don't use csv.writer method and write it as an simple file write operation:
`file_path ='your_csv_file.csv'
with open(file_path, 'w') as fp:
#following are the data you want to write to csv
fp.write("%s, %s, %s" % ('Name of col1', 'col2', 'col4'))
fp.write("\n")`
I hope this helps...
I am trying access a page by incrementing the page counter using opencorporates api. But the problem is there are times when useless data is there. For example in the below url for jurisdiction_code = ae_az I get webpage showing just this:
{"api_version":"0.2","results":{"companies":[],"page":1,"per_page":26,"total_pages":0,"total_count":0}}
which is technically empty. How to check for such data and skip over this to move on to next jurisdiction?
This is my code
import urllib2
import json,os
f = open('codes','r')
for line in f.readlines():
id = line.strip('\n')
url = 'http://api.opencorporates.com/v0.2/companies/search?q=&jurisdiction_code={0}&per_page=26¤t_status=Active&page={1}?api_token=ab123cd45'
i = 0
directory = id
os.makedirs(directory)
while True:
i += 1
req = urllib2.Request(url.format(id, i))
print url.format(id,i)
try:
response = urllib2.urlopen(url.format(id, i))
except urllib2.HTTPError, e:
break
content = response.read()
fo = str(i) + '.json'
OUTFILE = os.path.join(directory, fo)
with open(OUTFILE, 'w') as f:
f.write(content)
Interpret the response you get back (you already know it's json) and check if the data you want is there.
...
content = response.read()
data = json.loads(content)
if not data.get('results', {}).get('companies'):
break
...
Here's your code written with Requests and using the answer here. It is nowhere near as robust or clean as it should be, but demonstrates the path you might want to take. The rate limit is a guess, and doesn't seem to work. Remember to put your actual API key in.
import json
import os
from time import sleep
import requests
url = 'http://api.opencorporates.com/v0.2/companies/search'
token = 'ab123cd45'
rate = 20 # seconds to wait after rate limited
with open('codes') as f:
codes = [l.strip('\n') for l in f]
def get_page(code, page, **kwargs):
params = {
# 'api_token': token,
'jurisdiction_code': code,
'page': page,
}
params.update(kwargs)
while True:
r = requests.get(url, params=params)
try:
data = r.json()
except ValueError:
return None
if 'error' in data:
print data['error']['message']
sleep(rate)
continue
return data['results']
def dump_page(code, page, data):
with open(os.path.join(code, str(page) + '.json'), 'w') as f:
json.dump(data, f)
for code in codes:
try:
os.makedirs(code)
except os.error:
pass
data = get_page(code, 1)
if data is None:
continue
dump_page(code, 1, data['companies'])
for page in xrange(1, int(data.get('total_pages', 1))):
data = get_page(code, page)
if data is None:
break
dump_page(code, page, data['companies'])
I think that actually this example is not "technically empty." It contains data and is therefore technically not empty. The data just does not include any fields that are useful to you. :-)
If you want your code to skip over responses that have uninteresting data, then just check whether the JSON has the necessary fields before writing any data:
content = response.read()
try:
json_content = json.loads(content)
if json_content['results']['total_count'] > 0:
fo = str(i) + '.json'
OUTFILE = os.path.join(directory, fo)
with open(OUTFILE, 'w') as f:
f.write(content)
except KeyError:
break
except ValueError:
break
etc. You might want to report the ValueError or the KeyError, but that's up to you.
I am attempting to stream a csv file as an attachment download. The CSV files are getting to be 4MB in size or more, and I need a way for the user to actively download the files without waiting for all of the data to be created and committed to memory first.
I first used my own file wrapper based on Django's FileWrapper class. That failed. Then I saw a method here for using a generator to stream the response:
How to stream an HttpResponse with Django
When I raise an error within the generator, I can see that I am creating the proper data with the get_row_data() function, but when I try to return the response it comes back empty. I've also disabled the Django GZipMiddleware. Does anyone know what I'm doing wrong?
Edit: The issue I was having was with the ConditionalGetMiddleware. I had to replace it, the code is in an answer below.
Here is the view:
from django.views.decorators.http import condition
#condition(etag_func=None)
def csv_view(request, app_label, model_name):
""" Based on the filters in the query, return a csv file for the given model """
#Get the model
model = models.get_model(app_label, model_name)
#if there are filters in the query
if request.method == 'GET':
#if the query is not empty
if request.META['QUERY_STRING'] != None:
keyword_arg_dict = {}
for key, value in request.GET.items():
#get the query filters
keyword_arg_dict[str(key)] = str(value)
#generate a list of row objects, based on the filters
objects_list = model.objects.filter(**keyword_arg_dict)
else:
#get all the model's objects
objects_list = model.objects.all()
else:
#get all the model's objects
objects_list = model.objects.all()
#create the reponse object with a csv mimetype
response = HttpResponse(
stream_response_generator(model, objects_list),
mimetype='text/plain',
)
response['Content-Disposition'] = "attachment; filename=foo.csv"
return response
Here is the generator I use to stream the response:
def stream_response_generator(model, objects_list):
"""Streaming function to return data iteratively """
for row_item in objects_list:
yield get_row_data(model, row_item)
time.sleep(1)
And here is how I create the csv row data:
def get_row_data(model, row):
"""Get a row of csv data from an object"""
#Create a temporary csv handle
csv_handle = cStringIO.StringIO()
#create the csv output object
csv_output = csv.writer(csv_handle)
value_list = []
for field in model._meta.fields:
#if the field is a related field (ForeignKey, ManyToMany, OneToOne)
if isinstance(field, RelatedField):
#get the related model from the field object
related_model = field.rel.to
for key in row.__dict__.keys():
#find the field in the row that matches the related field
if key.startswith(field.name):
#Get the unicode version of the row in the related model, based on the id
try:
entry = related_model.objects.get(
id__exact=int(row.__dict__[key]),
)
except:
pass
else:
value = entry.__unicode__().encode("utf-8")
break
#if it isn't a related field
else:
#get the value of the field
if isinstance(row.__dict__[field.name], basestring):
value = row.__dict__[field.name].encode("utf-8")
else:
value = row.__dict__[field.name]
value_list.append(value)
#add the row of csv values to the csv file
csv_output.writerow(value_list)
#Return the string value of the csv output
return csv_handle.getvalue()
Here's some simple code that'll stream a CSV; you can probably go from this to whatever you need to do:
import cStringIO as StringIO
import csv
def csv(request):
def data():
for i in xrange(10):
csvfile = StringIO.StringIO()
csvwriter = csv.writer(csvfile)
csvwriter.writerow([i,"a","b","c"])
yield csvfile.getvalue()
response = HttpResponse(data(), mimetype="text/csv")
response["Content-Disposition"] = "attachment; filename=test.csv"
return response
This simply writes each row to an in-memory file, reads the row and yields it.
This version is more efficient for generating bulk data, but be sure to understand the above before using it:
import cStringIO as StringIO
import csv
def csv(request):
csvfile = StringIO.StringIO()
csvwriter = csv.writer(csvfile)
def read_and_flush():
csvfile.seek(0)
data = csvfile.read()
csvfile.seek(0)
csvfile.truncate()
return data
def data():
for i in xrange(10):
csvwriter.writerow([i,"a","b","c"])
data = read_and_flush()
yield data
response = HttpResponse(data(), mimetype="text/csv")
response["Content-Disposition"] = "attachment; filename=test.csv"
return response
The middleware issue has been solved as of Django 1.5 and a StreamingHttpResponse has been introduced. The following should do:
import cStringIO as StringIO
import csv
def csv_view(request):
...
# Assume `rows` is an iterator or lists
def stream():
buffer_ = StringIO.StringIO()
writer = csv.writer(buffer_)
for row in rows:
writer.writerow(row)
buffer_.seek(0)
data = buffer_.read()
buffer_.seek(0)
buffer_.truncate()
yield data
response = StreamingHttpResponse(
stream(), content_type='text/csv'
)
disposition = "attachment; filename=file.csv"
response['Content-Disposition'] = disposition
return response
There's some documentation on how to output csv from Django but it doesn't take advantage of the StreamingHttpResponse so I went ahead and opened a ticket in order to track it.
The problem I was having was with the ConditionalGetMiddleware. I saw django-piston come up with a replacement middleware for the ConditionalGetMiddleware that allows streaming:
from django.middleware.http import ConditionalGetMiddleware
def compat_middleware_factory(klass):
"""
Class wrapper that only executes `process_response`
if `streaming` is not set on the `HttpResponse` object.
Django has a bad habbit of looking at the content,
which will prematurely exhaust the data source if we're
using generators or buffers.
"""
class compatwrapper(klass):
def process_response(self, req, resp):
if not hasattr(resp, 'streaming'):
return klass.process_response(self, req, resp)
return resp
return compatwrapper
ConditionalMiddlewareCompatProxy = compat_middleware_factory(ConditionalGetMiddleware)
So then you will replace ConditionalGetMiddleware with your ConditionalMiddlewareCompatProxy middleware, and in your view (borrowed code from a clever answer to this question):
def csv_view(request):
def data():
for i in xrange(10):
csvfile = StringIO.StringIO()
csvwriter = csv.writer(csvfile)
csvwriter.writerow([i,"a","b","c"])
yield csvfile.getvalue()
#create the reponse object with a csv mimetype
response = HttpResponse(
data(),
mimetype='text/csv',
)
#Set the response as an attachment with a filename
response['Content-Disposition'] = "attachment; filename=test.csv"
response.streaming = True
return response