I have writen some python code o help me pull data from an API. The first version of my program work quite well.
N0w i am trying to develop a more DRY version of the code by introducing functions and loops . I am still new to python.
Your proffesional advice will be really apreciated
import requests
import json
# Bika Lims Authentication details
username = 'musendamea'
password = '!Am#2010#bgl;'
# API url Calls for patients, analysis and cases
patient_url = "adress1"
analysis_url = "adress2"
cases_url = "adress3"
# peform API calls and parse json data
patient_data = requests.get(patient_url, auth=(username, password ))
analysis_data = requests.get(analysis_url, auth=(username, password ))
cases_data = requests.get(cases_url, auth=(username, password ))
patients = json.loads(patient_data.text)
analysis = json.loads(analysis_data.text)
cases = json.loads(cases_data.text)
# checks for errors if any
print ("Patients")
print (patients['error'])
print (patients['success'])
print (patients['last_object_nr'])
print (patients['total_objects'])
print ("\n Analysis")
print (analysis['error'])
print (analysis['success'])
print (analysis['last_object_nr'])
print (analysis['total_objects'])
print ("\n Cases")
print (cases['error'])
print (cases['success'])
print (cases['last_object_nr'])
print (cases['total_objects'])
# create and save json files for patients, analysis and cases
with open('patients.json', 'w') as outfile:
json.dump(patients['objects'], outfile)
with open('analysis.json', 'w') as outfile1:
json.dump(analysis['objects'], outfile1)
with open('cases.json', 'w') as outfile2:
json.dump(cases['objects'], outfile2)
The Above code works pretty well but my challenge is making the code DRY. somehow the loop breaks when i change the following section
your_domain = "10.0.0.191"
data_types = ['patients', 'analysis', 'cases']
checkers = ['error', 'success', 'total_objects']
urls = []
data_from_api = []
# API url Call
base_url = "http://" + your_domain + "/##API/read?"
page_size = "1000000000000000000"
patient_url = base_url + "catalog_name=bika_patient_catalog&page_size="
+ page_size
analysis_url = base_url + "portal_type=AnalysisRequest&
review_state=published&page_size=" + page_size
cases_url = base_url + "portal_type=Batch&page_size=" + page_size
urls.append(patient_url)
urls.append(analysis_url)
urls.append(cases_url)
# peform API calls and parse json data
def BikaApiCalls(urls, username, password):
for i in len(urls):
data_ = requests.get(urls[i - 1], auth = (username, password))
print (data_types[i] + " ~ status_code: ")
print (data_.status_code + "\n")
data_from_api.append(json.loads(data_.text))
for val in len(checkers):
print (data_from_api[i][val])
BikaApiCalls(urls, username, password)
# Write JSON files
def WriteJson(data_types, data_from_api):
for i in len(data_from_api):
with open(data_types[i] + '.json', 'w') as outfile:
json.dump(data_from_api[i]['objects'], outfile)
WriteJson(data_types, data_from_api)
Where am I getting it wrong. I tried some debugging but i ca seen to get through. Id really appreciate your help.
Thanks in advance :)
Related
I would like to ask, what I am doing wrong in code that I am not getting all data from website API?
For example, in my JSON is missing data 2022-01-03 8:00 Vikings Exchange.
Website: https://www.nasdaqomxnordic.com/news/companynews.
import requests
import json
import time
import csv
import pandas
start=250
with open('C:/Users/apskaita3/Desktop/number2.txt', "r") as f:
start= f.readlines()
start=int(start[0])
start=start + 70
results = {"item": {}}
# Todo load json
for i in range(0,9800): #<----- Just change range here to increase number of requests
URL = f"https://api.news.eu.nasdaq.com/news/query.action?type=handleResponse&showAttachments=true&showCnsSpecific=true&showCompany=true&countResults=false&freeText=&company=&market=Main%20Market%2C+Helsinki&cnscategory=&fromDate=&toDate=&globalGroup=exchangeNotice&globalName=NordicMainMarkets&displayLanguage=en&language=en&timeZone=CET&dateMask=yyyy-MM-dd+HH%3Amm%3Ass&limit=19&start={i}&dir=ASC"
r = requests.get(url = URL)
#time.sleep(1)
res = r.text.replace("handleResponse(", "")
#print(res)
#print(f'r is {r}')
res_json = json.loads(res)
#print(res_json)
data = res_json
a=i+1
#print(data)
print("Doing: " + str(i + 1) + "th")
#data = r.json()
downloaded_entries = data["results"]["item"]
new_entries = [d for d in downloaded_entries if d["headline"] not in results["item"]]
start=str(start)
for entry in new_entries:
if entry["market"] == 'Main Market, Helsinki' and entry["published"]>="2021-10-20 06:30:00":
headline = entry["headline"].strip()
published = entry["published"]
market=entry["market"]
market="Main Market, Helsinki"
results["item"][headline] = {"company": entry["company"], "messageUrl": entry["messageUrl"], "published": entry["published"], "headline": headline}
print(entry['market'])
#time.sleep(5)
print(f"Market: {market}/nDate: {published}/n")
#print( results["item"][headline] )
#print(results)
#print(json.dumps({"item": list(results["item"].values())}, indent = 4))
with open("C:/Users/apskaita3/Finansų analizės ir valdymo sprendimai, UAB/Rokas Toomsalu - Power BI analitika/Integracijos/1_Public comapnies analytics/Databasesets/Others/market_news_helsinki.json", "w") as outfile:
json_object = json.dumps({"item": list(results["item"].values())}, indent = 4)
outfile.write(json_object)
#print(json_object)
with open("C:/Users/apskaita3/Desktop/number2.txt", "w") as outfile1:
outfile1.write(start) # type: ignore
I expecting to get all data from website, Helsinki market. I have tried to fetch data, I am getting a big part of data but not all.
I have a simple script for yandex.metrika counters goals logging. It writes when goal was created or deleted to txt file. Code for writing to text file:
if cID == 18662179:
with open('toyota_goalss_log.txt','a') as log2:
print(str(datetime.date.today()) +str(res2), file = log2)
print(str(datetime.date.today()) +str(res2),cID)
log2.close()
The script runs correctly if I click the button "run" in editor on pythonanywhere: No errors, the data is appended to the text files. But if I create a task to run this script every hour the data does not get appended to the text files... and there are no errors in task log or errors log, too. What did I do wrong?
More code:
#!/usr/bin/python3.6
import requests
import datetime
from pprint import pprint
import time
def goalsS():
token = 'AQAAAAAFKNk4AAPquxxxxxxxxx'
headers = {'Authorization': 'OAuth ' + token}
countersDict = {18662179:'site.ru', 901167:'site.ru'}
counterIds = [18662179, 901167]
for cID in counterIds:
names = []
ng=[]
url = "https://api-metrika.yandex.net/management/v1/counter/"+str(cID)+"/goals"
r = requests.get(url, headers=headers)
res = r.json()['goals']
for i in res:
ng.append(str(i['id'])+": "+ i['name']+'|')
names.append(i['name'])
goalsDict = dict(zip(ng,names))
clear = str(ng).replace('[','').replace(']','').replace("'",'').replace(',','')
with open(str(cID)+'goals_log.log','a') as log:
print(clear, file = log)
log.close()
li = []
f = open(str(cID)+'goals_log.log', 'r')
for line in f:
line = set(line.rstrip("\n").split('|'))
li.append(line)
res2 = li[-1] - li[-2]
if res2 == set():
res2 = li[-2]-li[-1]
print(res2,'set')
if res2 == set():
pass
else:
if cID == 18662179:
with open('toyota_goalss_log.txt','a') as log2:
print(str(datetime.date.today()) + ' ' + 'Удалили цель(и)'+' '+str(res2).replace(',','').replace('{','').replace('}',''),file = log2)
print(str(datetime.date.today()) + ' ' + 'Удалили цель\цели'+' '+str(res2),cID)
log2.close()
else:
if cID == 18662179:
with open('toyota_goalss_log.txt','a') as log2:
print(str(datetime.date.today()) + ' ' + 'Создали цель(и)'+' '+str(res2).replace(',','').replace('{','').replace('}',''),file = log2)
print(str(datetime.date.today()) + ' ' + 'Создали цель\цели'+' '+str(res2),cID)
log2.close()
if __name__ == '__main__':
goalsS()
Use absolute path to the file.
with open('/path/to/file','a') as log2:
...
i'm getting json data from the facebook-graph-api about:
my relationship with my friends
my friends relationships with each other.
right now my program looks like this (in python pseudo code, please note some variables have been changed for privacy):
import json
import requests
# protected
_accessCode = "someAccessToken"
_accessStr = "?access_token=" + _accessCode
_myID = "myIDNumber"
r = requests.get("https://graph.facebook.com/" + _myID + "/friends/" + _accessStr)
raw = json.loads(r.text)
terminate = len(raw["data"])
# list used to store the friend/friend relationships
a = list()
for j in range(0, terminate + 1):
# calculate terminating displacement:
term_displacement = terminate - (j + 1)
print("Currently processing: " + str(j) + " of " + str(terminate))
for dj in range(1, term_displacement + 1):
# construct urls based on the raw data:
url = "https://graph.facebook.com/" + raw["data"][j]["id"] + "/friends/" + raw["data"][j + dj]["id"] + "/" + _accessStr
# visit site *THIS IS THE BOTTLENECK*:
reqTemp = requests.get(url)
rawTemp = json.loads(reqTemp.text)
if len(rawTemp["data"]) != 0:
# data dumps to list which dumps to file
a.append(str(raw["data"][j]["id"]) + "," + str(rawTemp["data"][0]["id"]))
outputFile = "C:/Users/franklin/Documents/gen/friendsRaw.csv"
output = open(outputFile, "w")
# write all me/friend relationship to file
for k in range(0, terminate):
output.write(_myID + "," + raw["data"][k]["id"] + "\n")
# write all friend/friend relationships to file
for i in range(0, len(a)):
output.write(a[i])
output.close()
So what its doing is: first it calls my page and gets my friend list (this is allowed through the facebook api using an access_token) calling a friend's friend list is NOT allowed but I can work around that by requesting a relationship between a friend on my list and another friend on my list. so in part two (indicated by the double for loops) i'm making another request to see if some friend, a, is also a friend of b, (both of which are on my list); if so there will be a json object of length one with friend a's name.
but with about 357 friends there's literally thousands of page requests that need to be made. in other words the program is spending a lot of time just waiting around for the json-requests.
my question is then can this be rewritten to be more efficient? currently, due to security restrictions, calling a friend's friend list attribute is disallowed. and it doesn't look like the api will allow this. are there any python tricks that can make this run faster? maybe parallelism?
Update modified code is pasted below in the answers section.
Update this is the solution I came up with. Thanks #DMCS for the FQL suggestion but I just decided to use what I had. I will post the FQL solution up when I get a chance to study the implementation. As you can see this method just makes use of more condensed API calls.
Incidentally for future reference the API call limit is 600 calls per 600 seconds, per token & per IP, so for every unique IP address, with a unique access token, the number of calls is limited to 1 call per second. I'm not sure what that means for asynchronous calling #Gerrat, but there is that.
import json
import requests
# protected
_accessCode = "someaccesscode"
_accessStr = "?access_token=" + _accessCode
_myID = "someidnumber"
r = requests.get("https://graph.facebook.com/"
+ _myID + "/friends/" + _accessStr)
raw = json.loads(r.text)
terminate = len(raw["data"])
a = list()
for k in range(0, terminate - 1):
friendID = raw["data"][k]["id"]
friendName = raw["data"][k]["name"]
url = ("https://graph.facebook.com/me/mutualfriends/"
+ friendID + _accessStr)
req = requests.get(url)
temp = json.loads(req.text)
print("Processing: " + str(k + 1) + " of " + str(terminate))
for j in range(0, len(temp["data"])):
a.append(friendID + "," + temp["data"][j]["id"] + ","
+ friendName + "," + temp["data"][j]["name"])
# dump contents to file:
outputFile = "C:/Users/franklin/Documents/gen/friendsRaw.csv"
output = open(outputFile, "w")
print("Dumping to file...")
# write all me/friend relationships to file
for k in range(0, terminate):
output.write(_myID + "," + raw["data"][k]["id"]
+ ",me," + str(raw["data"][k]["name"].encode("utf-8", "ignore")) + "\n")
# write all friend/friend relationships to file
for i in range(0, len(a)):
output.write(str(a[i].encode("utf-8", "ignore")) + "\n")
output.close()
This isn't likely optimal, but I tweaked your code a bit to use Requests async method (untested):
import json
import requests
from requests import async
# protected
_accessCode = "someAccessToken"
_accessStr = "?access_token=" + _accessCode
_myID = "myIDNumber"
r = requests.get("https://graph.facebook.com/" + _myID + "/friends/" + _accessStr)
raw = json.loads(r.text)
terminate = len(raw["data"])
# list used to store the friend/friend relationships
a = list()
def add_to_list(reqTemp):
rawTemp = json.loads(reqTemp.text)
if len(rawTemp["data"]) != 0:
# data dumps to list which dumps to file
a.append(str(raw["data"][j]["id"]) + "," + str(rawTemp["data"][0]["id"]))
async_list = []
for j in range(0, terminate + 1):
# calculate terminating displacement:
term_displacement = terminate - (j + 1)
print("Currently processing: " + str(j) + " of " + str(terminate))
for dj in range(1, term_displacement + 1):
# construct urls based on the raw data:
url = "https://graph.facebook.com/" + raw["data"][j]["id"] + "/friends/" + raw["data"][j + dj]["id"] + "/" + _accessStr
req = async.get(url, hooks = {'response': add_to_list})
async_list.append(req)
# gather up all the results
async.map(async_list)
outputFile = "C:/Users/franklin/Documents/gen/friendsRaw.csv"
output = open(outputFile, "w")
I'm using the following code on ScraperWiki to search Twitter for a specific hashtag.
It's working great and is picking out any postcode provided in the tweet (or returning false if none is available). This is achieved with the line data['location'] = scraperwiki.geo.extract_gb_postcode(result['text']).
But I'm only interested in tweets which include postcode information (this is because they're going to be added to a Google Map at a later stage).
What would be the easiest way to do this? I'm relatively au fait with PHP, but Python's a completely new area for me.
Thanks in advance for your help.
Best wishes,
Martin
import scraperwiki
import simplejson
import urllib2
QUERY = 'enter_hashtag_here'
RESULTS_PER_PAGE = '100'
NUM_PAGES = 10
for page in range(1, NUM_PAGES+1):
base_url = 'http://search.twitter.com/search.json?q=%s&rpp=%s&page=%s' \
% (urllib2.quote(QUERY), RESULTS_PER_PAGE, page)
try:
results_json = simplejson.loads(scraperwiki.scrape(base_url))
for result in results_json['results']:
#print result
data = {}
data['id'] = result['id']
data['text'] = result['text']
data['location'] = scraperwiki.geo.extract_gb_postcode(result['text'])
data['from_user'] = result['from_user']
data['created_at'] = result['created_at']
print data['from_user'], data['text']
scraperwiki.sqlite.save(["id"], data)
except:
print 'Oh dear, failed to scrape %s' % base_url
break
Do you just want this? I tried on the free ScraperWiki test page and seems to do what you want. If you're looking for something more complicated, let me know.
import scraperwiki
import simplejson
import urllib2
QUERY = 'meetup'
RESULTS_PER_PAGE = '100'
NUM_PAGES = 10
for page in range(1, NUM_PAGES+1):
base_url = 'http://search.twitter.com/search.json?q=%s&rpp=%s&page=%s' \
% (urllib2.quote(QUERY), RESULTS_PER_PAGE, page)
try:
results_json = simplejson.loads(scraperwiki.scrape(base_url))
for result in results_json['results']:
#print result
data = {}
data['id'] = result['id']
data['text'] = result['text']
data['location'] = scraperwiki.geo.extract_gb_postcode(result['text'])
data['from_user'] = result['from_user']
data['created_at'] = result['created_at']
if data['location']:
print data['location'], data['from_user']
scraperwiki.sqlite.save(["id"], data)
except:
print 'Oh dear, failed to scrape %s' % base_url
break
Outputs:
P93JX VSDC
FV36RL Bootstrappers
Ci76fP Eli_Regalado
UN56fn JasonPalmer1971
iQ3H6zR GNOTP
Qr04eB fcnewtech
sE79dW melindaveee
ud08GT MariaPanlilio
c9B8EE akibantech
ay26th Thepinkleash
I've refined it a bit so it's a bit picker than the scraperwiki check for extracting gb postcodes, which lets though quite a few false positives. Basically I took the accepted answer from here, and added some negative lookbehind/lookahead to filter out a few more. It looks like the scraper wiki check does the regex without the negative lookbehind/lookahead. Hope that helps a bit.
import scraperwiki
import simplejson
import urllib2
import re
QUERY = 'sw4'
RESULTS_PER_PAGE = '100'
NUM_PAGES = 10
postcode_match = re.compile('(?<![0-9A-Z])([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {0,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)(?![0-9A-Z])', re.I)
for page in range(1, NUM_PAGES+1):
base_url = 'http://search.twitter.com/search.json?q=%s&rpp=%s&page=%s' \
% (urllib2.quote(QUERY), RESULTS_PER_PAGE, page)
try:
results_json = simplejson.loads(scraperwiki.scrape(base_url))
for result in results_json['results']:
#print result
data = {}
data['id'] = result['id']
data['text'] = result['text']
data['location'] = scraperwiki.geo.extract_gb_postcode(result['text'])
data['from_user'] = result['from_user']
data['created_at'] = result['created_at']
if data['location'] and postcode_match.search(data['text']):
print data['location'], data['text']
scraperwiki.sqlite.save(["id"], data)
except:
print 'Oh dear, failed to scrape %s' % base_url
break
I have searched and searched but I have only found solutions involving php and not python/django. My goal is to make a website (backend coded in python) that will allow a user to input a string. The backend script would then be run and output a dictionary with some info. What I want is to use the info from the dictionary to sort of draw it onto an image I have on the server and give the new image to the user. How can I do this offline for now? What libraries can I use? Any suggestions on the route I should head on would be lovely.
I am still a novice so please forgive me if my code needs work. So far I have no errors with what I have but like I said I have no clue where to go next to achieve my goal. Any tips would be greatly appreciated.
This is sort of what I want the end goal to be http://combatarmshq.com/dynamic-signatures.html
This is what I have so far (I used beautiful soup as a parser from here. If this is too excessive or if I did it in a not so good way please let me know if there is a better alternative. Thanks):
The url where I'm getting the numbers I want (These are dynamic) is this: http://combatarms.nexon.net/ClansRankings/PlayerProfile.aspx?user=
The name of the player will go after user so an example is http://combatarms.nexon.net/ClansRankings/PlayerProfile.aspx?user=-aonbyte
This is the code with the basic functions to scrape the website:
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
def get_avatar(player_name):
'''Return the players avatar as a binary string.'''
player_name = str(player_name)
url = 'http://combat.nexon.net/Avatar/MyAvatar.srf?'
url += 'GameName=CombatArms&CharacterID=' + player_name
sock = urlopen(url)
data = sock.read()
sock.close()
return data
def save_avatar(data, file_name):
'''Saves the avatar data from get_avatar() in png format.'''
local_file = open(file_name + '.png', 'w' + 'b')
local_file.write(data)
local_file.close()
def get_basic_info(player_name):
'''Returns basic player statistics as a dictionary'''
url = 'http://combatarms.nexon.net/ClansRankings'
url += '/PlayerProfile.aspx?user=' + player_name
sock = urlopen(url)
html_raw = sock.read()
sock.close()
html_original_parse = BeautifulSoup(''.join(html_raw))
player_info = html_original_parse.find('div', 'info').find('ul')
basic_info_list = range(6)
for i in basic_info_list:
basic_info_list[i] = str(player_info('li', limit = 7)[i+1].contents[1])
basic_info = dict(date = basic_info_list[0], rank = basic_info_list[1], kdr = basic_info_list[2], exp = basic_info_list[3], gp_earned = basic_info_list[4], gp_current = basic_info_list[5])
return basic_info
And here is the code that tests out those functions:
from grabber import get_avatar, save_avatar, get_basic_info
player = raw_input('Player name: ')
print 'Downloading avatar...'
avatar_data = get_avatar(player)
file_name = raw_input('Save as? ')
print 'Saving avatar as ' + file_name + '.png...'
save_avatar(avatar_data, file_name)
print 'Retrieving ' + player + '\'s basic character info...'
player_info = get_basic_info(player)
print ''
print ''
print 'Info for character named ' + player + ':'
print 'Character creation date: ' + player_info['date']
print 'Rank: ' + player_info['rank']
print 'Experience: ' + player_info['exp']
print 'KDR: ' + player_info['kdr']
print 'Current GP: ' + player_info['gp_current']
print ''
raw_input('Press enter to close...')
If I understand you correctly, you want to get an image from one place, get some textual information from another place, draw text on top of the image, and then return the marked-up image. Do I have that right?
If so, get PIL, the Python Image Library. Both PIL and BeatifulSoup are capable of reading directly from an opened URL, so you can forget that socket nonsense. Get the player name from the HTTP request, open the image, use BeautifulSoup to get the data, use PIL's text functions to write on the image, save the image back into the HTTP response, and you're done.