with open("student.csv", "r") as csv_ledger:
r = csv.DictReader(csv_ledger)
data = [dict(d) for d in r ]
groups = {}
for k, g in groupby(data, lambda r: (r['name'])):
items = []
for i in g:
#data processing
try:
post_api = requests.post(ENDPOINT_URL, json=groups, headers=headers)
except requests.ConnectionError:
print("Something went wrong")
finally:
print("resume post request")
Currently, my code won't be able to resume the post request when the internet connection is disconnected. It's not working if I use the try and exception.
Not sure you can resume like you want without server side implementation as well but you can resume from the client. Here is a simple blocking example but you may want to throw it in a thread.
import csv
import socket
from time import sleep
import requests
def is_internet_on():
try:
socket.setdefaulttimeout(3)
socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect(('1.1.1.1', '8080'))
return True
except:
return False
def do_call_later(url, headers, body):
while not is_internet_on():
sleep(5)
requests.post(url, headers=headers, body=body)
with open("student.csv", "r") as csv_ledger:
r = csv.DictReader(csv_ledger)
data = [dict(d) for d in r]
groups = {}
for k, g in groupby(data, lambda r: (r['name'])):
items = []
for i in g:
# data processing
pass
try:
timeout_arg = (
# first tuple value is the connection timeout,
# how long to wait before initial connection is established
1.0,
# second tuple value is the read timeout, this is how long
# the client will wait after the initial connection
# before dropping the connection because no response was sent
1.0
)
post_api = requests.post(ENDPOINT_URL, json=groups, headers=headers, timeout=timeout_arg)
except requests.ConnectionError:
do_call_later(ENDPOINT_URL, headers, groups)
finally:
print("resume post request")
Edit: docs for the timeout call: https://github.com/kennethreitz/requests/blob/master/requests/api.py#L34
Not sure, if it can work like this.
The idea for resumable request is to send data in chunks and then assemble as the backend.Therefore if a request fails in between, it can resume later by sending other chunks.
Also the backend should be able to accept chunks and assemble them
Take a look at a python library resumable
Related
I'm new to python, but I don't see much information on Stackoverflow in regards to paginating with the links method. The loop works perfectly in that it pulls all the data I want, but it only breaks until there's a timeout error when my Mac falls asleep. Sometimes it runs for 2 hours until my Mac sleeps. I'm wondering if there's a faster way to retrieve this data? Here is my python script:
import requests
import pandas as pd
res = []
url = "https://horizon.stellar.org/accounts/GCQJVAXWHB23WBNIG7TWEWHWUGGB6HWBC2ASPF5HMSADO5R5UKI4T7SD/trades"
querystring = {"limit":"200"}
try:
while True:
response = requests.request("GET", url, params=querystring)
data = response.json()
res += data['_embedded']['records']
if "href" not in data['_links']['next']:
break
url = data['_links']['next']['href']
except Exception as ex:
print("Exception:", ex)
df = pd.json_normalize(res)
df.to_csv('stellar_t7sd_trades.csv')
It returns with the following:
Exception: ('Connection aborted.', TimeoutError(60, 'Operation timed
out'))
But it returns the desired data into the csv file.
Is there a problem with my loop in that it doesn't properly break when It's done returning the data? Just trying to figure out a way so it doesn't run for 2 hours, but other than that, I get the desired data.
I solved this by adding a break after n number of loop iterations. This only works because I know exactly how many iterations of the loop will pull the data I need.
res = []
url = "https://horizon.stellar.org/accounts/GCQJVAXWHB23WBNIG7TWEWHWUGGB6HWBC2ASPF5HMSADO5R5UKI4T7SD/trades"
querystring = {"limit":"200"}
n = 32
try:
while n > 0: #True:
response = requests.request("GET", url, params=querystring)
n-=1
data = response.json()
res += data['_embedded']['records']
if "href" not in data['_links']['next']:
break
elif n==32:
break
url = data['_links']['next']['href']
except Exception as ex:
print("Exception:", ex)
df = pd.json_normalize(res)
df.to_csv('stellar_t7sd_tradestest.csv')
import requests
import json
import threading
data = {
"amount": 2
}
def foo(data):
try:
r = requests.post(url = "www.mysite.com", data = data)
j = json.loads(r.text)
print(j)
except requests.exceptions.RequestException as e:
raise SystemExist(e)
threading.Timer(1, foo, [data]).start()
I want to run this http request every second using a thread in my program. However, the program only runs the http request once and exit. How do I fix this?
You need to restart the timer after each request :
def foo(data):
try:
r = requests.post(url = "www.mysite.com", data = data)
j = json.loads(r.text)
print(j)
threading.Timer(1, foo, [data]).start() # New Line Added
except requests.exceptions.RequestException as e:
raise SystemExist(e)
I am trying to create a data endpoint that streams either the entirety of a file or responds appropriately to range requests. Streaming the whole file seems understandable, but it's not clear to me how to deal with range requests. Particularly, I can't see how aiohttp.MultipartWriter can write to a StreamResponse.
Here's an abstracted form of my code, so far:
from aiohttp.web import Request, StreamResponse
from aiohttp.multipart import MultipartWriter
async def data_handler(req:Request) -> StreamResponse:
is_range_request = "Range" in req.headers
with open("my_big_file", "rb") as f:
if is_range_request:
status_code = 202
content_type = "multipart/bytes"
else:
status_code = 200
content_type = "application/octet-stream"
resp = SteamResponse(status=status_code, headers={"Content-Type": content_type})
resp.enable_chunked_encoding()
resp.enable_compression()
await resp.prepare(req)
if is_range_request:
# _parse_range_header :: str -> List[ByteRange]
# ByteRange = Tuple[int, int] i.e., "from" and "to", inclusive
ranges = _parse_range_header(req.headers["Range"])
mpwriter = MultipartWriter("bytes")
for r in ranges:
range_from, range_to = r
range_size = (range_to - range_from) + 1
range_header = {"Content-Type": "application/octet-stream"}
# FIXME Won't this block?
f.seek(range_from)
mpwriter.append(f.read(range_size), range_header)
# TODO Write to response. How?...
else:
while True:
data = f.read(8192)
if not data:
await resp.drain()
break
resp.write(data)
return resp
This also doesn't return the response until it gets to the end. This doesn't seem correct to me: How does an upstream call know what's going on until the response is returned; or is the asyncio stuff doing this for me automagically?
I tried to get the ice cast meta data of a mp3 stream with this script:
import requests
url = 'http://stream.jam.fm/jamfm-nmr/mp3-128/konsole/'
try:
response = requests.get(url, headers={'Icy-MetaData': 1}, stream=True)
response.raise_for_status()
except requests.RequestException, e:
print 'Error:', e
else:
headers, stream = response.headers, response.raw
meta_int = headers.get('icy-metaint')
if meta_int is not None:
audio_length = int(meta_int)
while True:
try:
audio_data = stream.read(audio_length)
meta_byte = stream.read(1)
if (meta_byte):
meta_length = ord(meta_byte) * 16
meta_data = stream.read(meta_length)
print meta_data
except KeyboardInterrupt:
break
response.close()
This works but just for the first package. I will never receive an update on the title information when the track changes. My question is: Is this intended behavior and the track info is just send once or did I something wrong? I would like to be able to notice a track change without polling the stream from time to time.
while True:
try:
#new request
response = requests.get(url, headers={'Icy-MetaData': 1}, stream=True)
response.raise_for_status()
headers, stream = response.headers, response.raw
meta_int = headers.get('icy-metaint')
audio_data = stream.read(audio_length)
meta_byte = stream.read(1)
if (meta_byte):
meta_length = ord(meta_byte) * 16
meta_data = stream.read(meta_length)
print (meta_data)
except KeyboardInterrupt:
break
right now I'm using Flask, and I'm having trouble while trying to do more than one GET request using python requests module.
If I try to send a series of requests, the first one is completed successfully, but the other ones throw a timeout exception.
Here is part of the view's code:
import requests
sess = requests.Session()
site_url = 'http://www.example.com/api/'
steps = ['first_step', 'second_step', 'third_step']
step_responses = dict()
for s in steps:
try:
req = sess.get(site_url + s, timeout=5))
except requests.exceptions.Timeout:
return jsonify({'result':False, 'error':'timeout'})
except requests.exceptions.ConnectionError:
return jsonify({'result':False, 'error':'connection_error'})
else:
step_responses[s] = True
If I extract this part into a standalone .py file, it completes successfully.
import requests
sess = requests.Session()
site_url = 'http://www.example.com/api/'
steps = ['first_step', 'second_step', 'third_step']
step_responses = dict()
for s in steps:
try:
req = sess.get(site_url + s, timeout=5)
except requests.exceptions.Timeout:
step_responses[s] = 'timeout'
except requests.exceptions.ConnectionError:
step_responses[s] = 'conn_error'
else:
step_responses[s] = 'ok'
print step_responses
Works for me. You may want to check the second and third steps
import requests
sess = requests.Session()
def module():
site_url = 'http://stackoverflow.com/'
steps = ['users', 'questions', 'tags']
step_responses = dict()
for s in steps:
try:
req = sess.get(site_url + s, timeout=5)
except requests.exceptions.Timeout:
return jsonify({'result':False, 'error':'timeout'})
except requests.exceptions.ConnectionError:
return jsonify({'result':False, 'error':'connection_error'})
else:
step_responses[s] = True
You might want to make sure that you read all the values from the req object.
I think you might need req.text and req.status_code or req.content
Check half-way down the page here: http://docs.python-requests.org/en/latest/api/#request-sessions where they discuss session parameters
"class requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=0, pool_block=False)"
I'm not at all sure how to use connection pools and so forth but the docs do say (http://docs.python-requests.org/en/latest/user/advanced/) (Look for Keep Alive)
"Note that connections are only released back to the pool for reuse once all body data has been read; be sure to either set stream to False or read the content property of the Response object."