So I have this code that is creating an output in Excel.
What I want to do now is get the parameters (lid) in payload to loop through a list of other ID's
This list is stored in a txt file.
can anyone modify my code to show me how to do that please?
The text file has values
1654,
3457,
4327,
1234
(can also hard code these somewhere in the script if it is easier)
from __future__ import print_function
import sys
import csv
import collections
import itertools
try:
import requests
from requests import exceptions
import base64
import json
except ImportError as e:
import requests
from requests import exceptions
import base64
import json
print ("Import Error: %s" % e)
API_TOKEN = u''
b64token = base64.b64encode(bytes(API_TOKEN))
REST_BASE_URL = u'https://visdasa.dsds.com/rest/'
# API URL request examples (choose one)
REST_URL = u'rawdata/'
FULL_URL = REST_BASE_URL + REST_URL
def retrieve_data(api_url):
try:
#connect to the API and retrieve data
bauth_header = {'Authorization': 'Basic '+b64token.decode('UTF-8')}
payload = {'start': '2014-08-01T00:00:01', 'stop': '2014- 8-01T23:59:59','category': 'ots','lid': '9263'}
response = requests.get(api_url, headers=bauth_header, params=payload)
# check the api response
if response.status_code == requests.codes.ok:
# Convert from json data
json_data = json.loads(response.text)
Header_String = "ID", "Site Name", "Network ID", "Network Lablel", "Company Branch ID", "Comapany Label","Count", "timestamp", "ots_duration", "notsure1", "notsure2"
for location_row in json_data["data"]["locations"]:
Location_string = (location_row["id"], location_row["label"], location_row["site"]["network"]["id"],location_row["site"]["network"]["label"],
location_row["site"]["id"], location_row["site"]["label"])
try:
with open('C:\\Users\\teddy\\Desktop\\party\\test.csv', 'w') as wFile:
writer = csv.writer(wFile, delimiter=',')
writer.write(Header_string)
for row in json_data["data"]["raw_data"]:
writer.writerow(row)
except IOError as e:
logger.error("I/O error({0}): {1}".format(e.errno, e.strerror))
print( "I/O error({0}): {1}".format(e.errno, e.strerror))
else:
json_data = json.loads(response.text)
# If not successful api call the throw an error
raise requests.RequestException("Error with the api. Status code : %i \n Json response: %s"
% (response.status_code, json_data))
except (requests.exceptions.ProxyError, requests.RequestException) as e:
print (e)
def main():
#retrieve_data(FULL_URL, PROXY_SETTINGS)
retrieve_data(FULL_URL)
sys.exit()
if __name__ == '__main__':
main()
Why not just pass all the lid values as a parameter to your retrieve_data function.
def retrieve_data(api_url):
would become
def retrieve_data(api_url, lid_value):
You would remove the hardcoded lid section of your payload so the payload would look like this
payload = {'start': '2014-08-01T00:00:01', 'stop': '2014- 8-01T23:59:59','category': 'ots'}
Then on the next line you can add
payload['lid'] = lid_value
In your main function you could then loop through the values in the text file. Here is a simple loop with a list.
def main():
lid_values = ['1654', '3457', '4327', '1234']
for lid in lid_values:
retrieve_data(FULL_URL, lid)
sys.exit()
Related
import requests
import json
import threading
data = {
"amount": 2
}
def foo(data):
try:
r = requests.post(url = "www.mysite.com", data = data)
j = json.loads(r.text)
print(j)
except requests.exceptions.RequestException as e:
raise SystemExist(e)
threading.Timer(1, foo, [data]).start()
I want to run this http request every second using a thread in my program. However, the program only runs the http request once and exit. How do I fix this?
You need to restart the timer after each request :
def foo(data):
try:
r = requests.post(url = "www.mysite.com", data = data)
j = json.loads(r.text)
print(j)
threading.Timer(1, foo, [data]).start() # New Line Added
except requests.exceptions.RequestException as e:
raise SystemExist(e)
I am able to use haveibeenpwned to search for 1 account compromise. However, I could not find an option to use the API key to search for compromise of all the email accounts on a domain. (For example. if the domain is xyz.com, I want to search for the compromise of abc#xyz.com, peter.charlie#xyz.com and so on). I am aware of the notification email that I can sign up for. But, that is a lengthy process and I prefer using the API.
So, I wrote a script to search against haveibeenpwned for all the email address of my domain, but it takes very long. I searched through a couple of Github projects, but I did not find any such implementation. Has anyone tried this before?
I have added the code below. I am using Multi threading approach, but still it takes very long, is there any other Optimization strategy I can use? Please help. Thank you.
import requests, json
import threading
from time import sleep
import datetime
import splunklib.client as client
import splunklib.results as results
date = datetime.datetime.now()
from itertools import islice
import linecache
import sys
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
class myThread (threading.Thread):
def __init__(self, threadID, name, list_emails):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.list_emails = list_emails
def run(self):
i=0
print "Starting " + self.name
for email in self.list_emails:
print i
i=i+1
result = check_pasteaccount(email)
print email
print result
print result
print "Exiting " + self.name
def check_pasteaccount(account):
account = str(account)
result = ""
URL = "https://haveibeenpwned.com/api/v3/pasteaccount/%s?truncateResponse=false" % (account)
# print(URL)
headers= {'hibp-api-key':api_key}
result = ""
try:
r = requests.get(url=URL,headers=headers)
# sleep(2)
status_code = r.status_code
if status_code == 200:
data = r.text
result = []
for entry in json.loads(data.decode('utf8')):
if int((date - datetime.datetime.strptime(entry['Date'], '%Y-%m-%dT%H:%M:%SZ')).days) > 120:
pass
else:
result.append(['Title: {0}'.format(entry['Title']), \
'Source: {0}'.format(['Source']), \
'Paste ID: {0}'.format(entry['Id'])])
if len(result) == 0:
result = "No paste reported for given account and time frame."
else:
paste_result = ""
for entry in result:
for item in entry:
paste_result += str(item) + "\r\n"
paste_result += "\r\n"
result = paste_result
elif status_code == 404:
result = "No paste for the account"
else:
if status_code == 429:
sleep(5)
# print "Limit exceeded, sleeping"
result = check_pasteaccount(account)
else:
result = "Exception"
print status_code
except Exception as e:
result = "Exception"
PrintException()
pass
return result
def split_every(n, iterable):
iterable = iter(iterable)
for chunk in iter(lambda: list(islice(iterable, n)), []):
yield chunk
def main():
print datetime.datetime.now()
# Fetching the list of email addresses from Splunk
list_emails = connect_splunk()
print datetime.datetime.now()
i=0
list_split = split_every(1000,list_emails)
threads=[]
for list in list_split:
i=i+1
thread_name = "Thread" + str(i)
thread = myThread(1, thread_name, list)
thread.start()
threads.append(thread)
# Wait for all the threads to complete
for t in threads:
t.join()
print "Completed Search"
Here's a shorter and maybe more efficient version of your script using the standard multiprocessing library instead of a hand-rolled thread system.
You'll need Python 3.6+ since we're using f-strings.
You'll need to install the tqdm module for fancy progress bars.
You can adjust the number of concurrent requests with the pool size parameter.
Output is written in machine-readable JSON Lines format into a timestamped file.
A single requests session is shared (per-worker), which means less time spent connecting to HIBP.
import datetime
import json
import multiprocessing
import random
import time
import requests
import tqdm
HIBP_PARAMS = {
"truncateResponse": "false",
}
HIBP_HEADERS = {
"hibp-api-key": "xxx",
}
sess = requests.Session()
def check_pasteaccount(account):
while True:
resp = sess.get(
url=f"https://haveibeenpwned.com/api/v3/pasteaccount/{account}",
params=HIBP_PARAMS,
headers=HIBP_HEADERS,
)
if resp.status_code == 429:
print("Quota exceeded, waiting for a while")
time.sleep(random.uniform(3, 7))
continue
if resp.status_code >= 400:
return {
"account": account,
"status": resp.status_code,
"result": resp.text,
}
return {
"account": account,
"status": resp.status_code,
"result": resp.json(),
}
def connect_splunk():
# TODO: return emails
return []
def main():
list_emails = [str(account) for account in connect_splunk()]
datestamp = datetime.datetime.now().isoformat().replace(":", "-")
output_filename = f"accounts-log-{datestamp}.jsonl"
print(f"Accounts to look up: {len(list_emails)}")
print(f"Output filename: {output_filename}")
with multiprocessing.Pool(processes=16) as p:
with open(output_filename, "a") as f:
results_iterable = p.imap_unordered(
check_pasteaccount, list_emails, chunksize=20
)
for result in tqdm.tqdm(
results_iterable,
total=len(list_emails),
unit="acc",
unit_scale=True,
):
print(json.dumps(result, sort_keys=True), file=f)
if __name__ == "__main__":
main()
I'm new to python and I want this code to run only once and stops, not every 30 seconds
because I want to run multiple codes like this with different access tokens every 5 seconds using the command line.
and when I tried this code it never jumps to the second one because it's a while true:
import requests
import time
api_url = "https://graph.facebook.com/v2.9/"
access_token = "access token"
graph_url = "site url"
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
# Beware of rate limiting if trying to increase frequency.
refresh_rate = 30 # refresh rate in second
while True:
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open ("open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
time.sleep(refresh_rate)
From what I understood you're trying to execute the piece of code for multiple access tokens. To make your job simple, have all your access_tokens as lists and use the following code. It assumes that you know all your access_tokens in advance.
import requests
import time
def scrape_facebook(api_url, access_token, graph_url):
""" Scrapes the given access token"""
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open (access_token+"_"+"open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
access_token = ['a','b','c']
graph_url = ['sss','xxx','ppp']
api_url = "https://graph.facebook.com/v2.9/"
for n in range(len(graph_url)):
scrape_facebook(api_url, access_token[n], graph_url[n])
time.sleep(5)
I'm trying to retrieve response from ip-api.com for most IP ranges. But I want to store that data in Aerospike but I'm having some errors.
Here is the Python script
# import the module
from __future__ import print_function
import aerospike
import urllib2
config = {
'hosts': [ ('127.0.0.1', 3000) ]
}
try:
client = aerospike.client(config).connect()
except:
import sys
print("failed to connect to the cluster with", config['hosts'])
sys.exit(1)
key = ('ip', 'hit', 'trial')
try:
for i in range(0,255):
for j in range(0,255):
for k in range(0,255):
for l in range(0,255):
if not((i == 198 and j == 168) or (i == 172 and j > 15 and j < 32) or (i == 10)):
response = urllib2.urlopen('http://ip-api.com/json/'+str(i)+'.'+str(j)+'.'+str(k)+'.'+str(l))
html = response.read()
client.put(key, html)
except Exception as e:
import sys
print("error: {0}".format(e), file=sys.stderr)
client.close()
I'm new to Python as well as Aerospike, infact any no-SQL databases. Any help would be appreciated.
All code from aerospike perspective it right, except you would want to change
html = response.read()
client.put(key, html)
to
import json
client.put(key, json.load(response))
The response is a json string which needs to be converted to json object
Here I have a rate stream that outputs the following and i'm looking to only print the "bid" price. Could someone help explain how I can parse the output correctly? It's driving me crazy!
example = 1.05653
I need the output without quotes or any other markup as well..
JSON
{
"tick": {
"instrument": "EUR_USD",
"time": "2015-04-13T14:28:26.123314Z",
"bid": 1.05653,
"ask": 1.05669
}
}
My code:
import requests
import json
from optparse import OptionParser
def connect_to_stream():
"""
Environment <Domain>
fxTrade stream-fxtrade.oanda.com
fxTrade Practice stream-fxpractice.oanda.com
sandbox stream-sandbox.oanda.com
"""
# Replace the following variables with your personal ones
domain = 'stream-fxpractice.oanda.com'
access_token = 'xxxxx'
account_id = 'xxxxxxxxx'
instruments = "EUR_USD"
try:
s = requests.Session()
url = "https://" + domain + "/v1/prices"
headers = {'Authorization' : 'Bearer ' + access_token,
# 'X-Accept-Datetime-Format' : 'unix'
}
params = {'instruments' : instruments, 'accountId' : account_id}
req = requests.Request('GET', url, headers = headers, params = params)
pre = req.prepare()
resp = s.send(pre, stream = True, verify = False)
return resp
except Exception as e:
s.close()
print "Caught exception when connecting to stream\n" + str(e)
def demo(displayHeartbeat):
response = connect_to_stream()
if response.status_code != 200:
print response.text
return
for line in response.iter_lines(1):
if line:
try:
msg = json.loads(line)
except Exception as e:
print "Caught exception when converting message into json\n" + str(e)
return
if msg.has_key("instrument") or msg.has_key("tick"):
print line
if displayHeartbeat:
print line
else:
if msg.has_key("instrument") or msg.has_key("tick"):
print line
def main():
usage = "usage: %prog [options]"
parser = OptionParser(usage)
parser.add_option("-b", "--displayHeartBeat", dest = "verbose", action = "store_true",
help = "Display HeartBeat in streaming data")
displayHeartbeat = False
(options, args) = parser.parse_args()
if len(args) > 1:
parser.error("incorrect number of arguments")
if options.verbose:
displayHeartbeat = True
demo(displayHeartbeat)
if __name__ == "__main__":
main()
Sorry if this is an extremely basic question but I'm not that familiar with python..
Thanks in advance!
You are iterating over the stream line by line attempting to parse each line as JSON. Each line alone is not proper JSON so that's one problem.
I would just regex over each hline you bring in looking for the text "bid: " followed by a decimal number, and return that number as a float. For example:
import re
for line in response.iter_lines(1):
matches = re.findall(r'\"bid\"\:\s(\d*\.\d*)', line)
if len(matches) > 0:
print float(matches[0])
Try something along the lines of this:
def demo(displayHeartbeat):
response = connect_to_stream()
for line in response.iter_lines():
if line.startswith(" \"bid\"")
print "bid:"+line.split(":")[1]
This actually turned out to be pretty easy, I fixed it by replacing the "demo" function with this:
def demo(displayHeartbeat):
response = connect_to_stream()
if response.status_code != 200:
print response.text
return
for line in response.iter_lines(1):
if line:
try:
msg = json.loads(line)
except Exception as e:
print "Caught exception when converting message into json\n" + str(e)
return
if displayHeartbeat:
print line
else:
if msg.has_key("instrument") or msg.has_key("tick"):
print msg["tick"]["ask"] - .001
instrument = msg["tick"]["instrument"]
time = msg["tick"]["time"]
bid = msg["tick"]["bid"]
ask = msg["tick"]["ask"]