HOW CAN GET JSON DATA AND SORT IT BY PYTHON - python

I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.
When I use order:print(html.text),get all jsondata:
[{"Site":"屏東(琉球)","county":"屏東縣","PM25":"6","DataCreationDate":"202 0-04-19 03:00","ItemUnit":"μg/m3"},
{"Site":"臺南(北門)","county":"臺南市" ,"PM25":"25","DataCreationDate":"2020-04-19 03:00","ItemUnit":"μg/m3"}, ....................................
If I use order: for Site in jsondata:.....,I only get data:
SITE:基隆CYTY:基隆市P25:21DATE:2020-04-19 14:00UNIT:μg/m3
Why? thank you for your answer sincerely
import json
import requests
url1 = '[https://opendata.epa.gov.tw/ws/Data/ATM00625/?$format=json][1]'
html = requests.get(url1)
# html.encoding = "BIG5"
html.encoding = html.apparent_encoding
# print(html.text)
jsondata = eval(html.text)
# jsondata = json.loads(html.text)
for Site in jsondata:
Sitename = Site["Site"]
countyname = Site["county"]
PM25name = Site["PM25"]
DataCreationDatename = Site["DataCreationDate"]
ItemUnitname = Site["ItemUnit"]
print("SITE:" + Sitename + "CYTY:" + countyname + "P25:" + PM25name + "DATE:" + DataCreationDatename + "UNIT:" + ItemUnitname)

Always remember not to use eval() when you have json.loads():
import json
import requests
url1 = 'https://opendata.epa.gov.tw/ws/Data/ATM00625/?$format=json]'
html = requests.get(url1)
html = html.decode('utf8')
html = json.loads(html)
for Site in html:
Sitename = Site["Site"]
countyname = Site["county"]
PM25name = Site["PM25"]
DataCreationDatename = Site["DataCreationDate"]
ItemUnitname = Site["ItemUnit"]
print("SITE:" + Sitename + "CYTY:" + countyname + "P25:" + PM25name + "DATE:" + DataCreationDatename + "UNIT:" + ItemUnitname)

Related

Replace multiple patterns once at a time with python

so what i wanna do is basically i have a list of urls with multiple parameters, such as:
https://www.somesite.com/path/path2/path3?param1=value1&param2=value2
and i would want to get is something like this:
https://www.somesite.com/path/path2/path3?param1=PAYLOAD&param2=value2
https://www.somesite.com/path/path2/path3?param1=value1&param2=PAYLOAD
like i wanna iterate through every parameter (basically every match of "=" and "&") and replace each value one per time. Thank you in advance.
from urllib.parse import urlparse
import re
urls = ["https://www.somesite.com/path/path2/path3?param1=value1&param2=value2&param3=value3",
"https://www.anothersite.com/path/path2/path3?param1=value1&param2=value2&param3=value3"]
parseds = [urlparse(url) for url in urls]
newurls = []
for parsed in parseds:
params = parsed[4].split("&")
for i, param in enumerate(params):
newparam = re.sub("=.+", "=PAYLOAD", param)
newurls.append(
parsed[0] +
"://" +
parsed[1] +
parsed[2] +
"?" +
parsed[4].replace(param, newparam)
)
newurls is
['https://www.somesite.com/path/path2/path3?param1=PAYLOAD&param2=value2&param3=value3',
'https://www.somesite.com/path/path2/path3?param1=value1&param2=PAYLOAD&param3=value3',
'https://www.somesite.com/path/path2/path3?param1=value1&param2=value2&param3=PAYLOAD',
'https://www.anothersite.com/path/path2/path3?param1=PAYLOAD&param2=value2&param3=value3',
'https://www.anothersite.com/path/path2/path3?param1=value1&param2=PAYLOAD&param3=value3',
'https://www.anothersite.com/path/path2/path3?param1=value1&param2=value2&param3=PAYLOAD']
I've solved it:
from urllib.parse import urlparse
url = "https://github.com/search?p=2&q=user&type=Code&name=djalel"
parsed = urlparse(url)
query = parsed.query
params = query.split("&")
new_query = []
for param in params:
l = params.index(param)
param = str(param.split("=")[0]) + "=" + "PAYLOAD"
params[l] = param
new_query.append("&".join(params))
params = query.split("&")
for query in new_query:
print(str(parsed.scheme) + '://' + str(parsed.netloc) + str(parsed.path) + '?' + query)
Output:
https://github.com/search?p=PAYLOAD&q=user&type=Code&name=djalel
https://github.com/search?p=2&q=PAYLOAD&type=Code&name=djalel
https://github.com/search?p=2&q=user&type=PAYLOAD&name=djalel
https://github.com/search?p=2&q=user&type=Code&name=PAYLOAD

Return multiple items

i am new to python as a matter of fact, this is my first python project. I am using ebaysdk to search for electronics on ebay and i want it to return multiple results because my app is for comparing prices but it returns only one result.
Someone please help me to make the code return multiple results.
Here is my code snippet.
#app.route('/ebay_page_post', methods=['GET', 'POST'])
def ebay_page_post():
if request.method == 'POST':
#Get json format of the text sent by Ajax
search = request.json['search']
try:
#ebaysdk code starts here
api = finding(appid='JohnOkek-hybridse-PRD-5c2330105-9bbb62f2', config_file = None)
api_request = {'keywords':search, 'outputSelector': 'SellerInfo', 'categoryId': '293'}
response = api.execute('findItemsAdvanced', api_request)
soup = BeautifulSoup(response.content, 'lxml')
totalentries = int(soup.find('totalentries').text)
items = soup.find_all('item')
for item in items:
cat = item.categoryname.string.lower()
title = item.title.string.lower().strip()
price = int(round(float(item.currentprice.string)))
url = item.viewitemurl.string.lower()
seller = item.sellerusername.text.lower()
listingtype = item.listingtype.string.lower()
condition = item.conditiondisplayname.string.lower()
print ('____________________________________________________________')
#return json format of the result for Ajax processing
return jsonify(cat + '|' + title + '|' + str(price) + '|' + url + '|' + seller + '|' + listingtype + '|' + condition)
except ConnectionError as e:
return jsonify(e)
Based on the code you provided, added the key value pair collection example you could use :
#app.route('/ebay_page_post', methods=['GET', 'POST'])
def ebay_page_post():
if request.method == 'POST':
#Get json format of the text sent by Ajax
search = request.json['search']
try:
#ebaysdk code starts here
api = finding(appid='JohnOkek-hybridse-PRD-5c2330105-9bbb62f2', config_file = None)
api_request = {'keywords':search, 'outputSelector': 'SellerInfo', 'categoryId': '293'}
response = api.execute('findItemsAdvanced', api_request)
soup = BeautifulSoup(response.content, 'lxml')
totalentries = int(soup.find('totalentries').text)
items = soup.find_all('item')
# This will be returned
itemsFound = {}
# This index will be incremented
# each time an item is added
index = 0
for item in items:
cat = item.categoryname.string.lower()
title = item.title.string.lower().strip()
price = int(round(float(item.currentprice.string)))
url = item.viewitemurl.string.lower()
seller = item.sellerusername.text.lower()
listingtype = item.listingtype.string.lower()
condition = item.conditiondisplayname.string.lower()
# Adding the item found in the collection
# index is the key and the item json is the value
itemsFound[index] = jsonify(cat + '|' + title + '|' + str(price) + '|' + url + '|' + seller + '|' + listingtype + '|' + condition)
# Increment the index for the next items key
index++
for key in itemsFound:
print key, ':', itemsFound[key
# return itemsFound
except ConnectionError as e:
return jsonify(e)
Once the first item is found, add it to the collection. After your for loop finishes, then return the collection.
Right now you are returning (breaking the iteration) once you have found the first
I was able to solve the problem.
Click here to see how i did it
Thanks to every contributor, i am most grateful to you all.

String Indices must be integers, not str - API

I have an interesting behavior happening with my program.
i have the following methods:
def getMarket(self, Currency):
return self.public_api('GetMarket/' + Currency + '_BTC')
def getBalance(self, Currency):
self.api_params.clear()
self.api_params['Currency'] = Currency
return self.private_api('GetBalance')
my_api = buyBot(API_KEY, API_SECRET)
pumpCoin = my_api.getMarket('OSC')
pumpRawRate = pumpCoin['Data']['High']
pumpRawQty = .02
pumpBuyRate = my_api.calculateBuy(pumpRawRate)
pumpQty = float(pumpRawQty)/float(pumpBuyRate)
pumpSellRate = pumpCoin['Data']['Low']
pumpSellCoin = my_api.getBalance('OSC')
pumpSellAmount = pumpSellCoin["Data"]["Total"]
print str(pumpRawRate) + '\n' + str(pumpBuyRate) + '\n' + str(pumpSellRate) + '\n' + str(pumpQty) + '\n' + str(pumpSellAmount)`
From section: pumpCoin = my_api.getMarket('OSC') to pumpSellRate = pumpCoin['Data']['Low'], i have no problems getting the information and working with it.
Problem seems to be starting with line: pumpSellCoin = my_api.getBalance('OSC')
I get the following Error message:
Traceback (most recent call last):
File "C:\XXXXXX.py", line 92, in <module>
pumpSellAmount = pumpSellCoin["Data"]["Total"]
TypeError: string indices must be integers, not str
if i run: print (my_api.getBalance('OSC'), i am able to see all the private API information that is retrieved by that call, however i am not sure why it is giving me a problem when i try to call 1 specific item in the stack.
Let me know if you need any more information on this.
Any help will be greatly appreciated.
I have looked at the other posts and so far i can't seem to figure out the exact cause.
This is the private_api code
def private_api(self, meth):
time.sleep(1)
params = self.api_params
url = self.apisite + meth
nonce = str(int(time.time()))
post_data = json.dumps(params)
hash = hashlib.md5()
hash.update(post_data)
base64hash = base64.b64encode(hash.digest())
sig = self.apikey + "POST" + urllib.quote_plus(url).lower() + nonce + base64hash
hmacsig = base64.b64encode(hmac.new(base64.b64decode(self.apisecret), sig, hashlib.sha256).digest())
hdr = "amx " + self.apikey + ":" + hmacsig + ":" + nonce
headers = { 'Authorization': hdr, 'Content-Type':'application/json; charset=utf-8' }
request = urllib2.Request(url, data=post_data, headers=headers)
return urllib2.urlopen(request).read()
Please add this to your code:
print('pumpSellCoin', type(pumpSellCoin["Data"]), type(pumpSellCoin["Data"]["Total"]))
pumpSellAmount = pumpSellCoin["Data"]["Total"]
This will show you that one of your variables is a list or a string and not a dictionary and you need to access is using a number and not a name like "Data" or "Total"
Try this example:
test = 'abcde'
print(type(test))
print(test[0])
print(test[2:4])
print(test['whatever']) # this results in TypeError: string indices must be integers
if i run the program as follows:
my_api = buyBot(API_KEY, API_SECRET)
pumpCoin = my_api.getMarket('OSC')
pumpRawRate = pumpCoin['Data']['High']
pumpRawQty = .02
pumpBuyRate = my_api.calculateBuy(pumpRawRate)
pumpQty = float(pumpRawQty)/float(pumpBuyRate)
pumpSellRate = pumpCoin['Data']['Low']
pumpSellBal = my_api.getBalance('OSC')
print pumpSellBal
#print('pumpSellBal', type(pumpSellBal["Data"]), type(pumpSellBal["Data"]["Total"]))
#pumpSellAmount = pumpSellBal['Data']['Total']
print str(pumpRawRate) + '\n' + str(pumpBuyRate) + '\n' + str(pumpSellRate) + '\n' + str(pumpQty) #+ '\n' + str(pumpSellAmount)
i get the following results:
{"Success":true,"Error":null,"Data":[{"CurrencyId":235,"Symbol":"OSC","Total":8561.03652012,"Available":0.00000000,"Unconfirmed":0.00000000,"HeldForTrades":8561.03652012,"PendingWithdraw":0.00000000,"Address":null,"Status":"OK","StatusMessage":null,"BaseAddress":null}]}
1.61e-06
2.415e-06
1.25e-06
8281.57349896
So i am definitely able to communicate back and forward, however the issue only seems to be when i try to work with a single piece of information from pumpSellBal = my_api.getBalance('OSC')

Function invoking issue

I have this python code, but when I run it, is printing out just the first target, here is my python code:
def get_next_target(S):
start_link = S.find('<a href=')
start_quote = S.find('"', start_link)
end_quote = S.find('"', start_quote + 1)
url = S[start_quote + 1:end_quote]
print url
return url, end_quote
get_next_target(S)
where variable S = '<susuds><a href="www.target1.com"/><ahsahsh><saudahsd><a href="www.target2.com"/><p>sa</h1><a href="www.target3.com"/>'
What I want is to print out the three targets, but instead it's just printing the first one, why is that?
I think you should use BeautifulSoup to extract info from html/xml.
In [1]: from bs4 import BeautifulSoup
In [2]: html = '''<susuds><a href="www.target1.com"/><ahsahsh><saudahsd><a href=
...: "www.target2.com"/><p>sa</h1><a href="www.target3.com"/>'''
In [3]: soup = BeautifulSoup(html, 'lxml')
In [4]: for a in soup.find_all('a'):
...: print(a['href'])
...:
www.target1.com
www.target2.com
www.target3.com
If you logically want to achieve this without using any special module then following code will do that.
import re
import sys
S = '<susuds><a href="www.target1.com"/><ahsahsh><saudahsd><a href="www.target2.com"/><p>sa</h1><a href="www.target3.com"/>'
abc = []
def get_next_target(S):
search_index = [i.start() for i in re.finditer('<a href=', S)]
for j in range(len(search_index)):
if ( j == len(search_index)-1):
A =S[ search_index[j]:len(S) ]
search_start_index = A.find('"')
search_end_index = A.rfind('"')
start_final = search_index[j] + search_start_index + 1
start_end = search_index[j] + search_end_index
final_result = S[ start_final:start_end ]
abc.append(final_result)
print abc
else:
A = S[ search_index[j]:search_index[j+1] ]
search_start_index = A.find('"')
search_end_index = A.rfind('"')
start_final = search_index[j] + search_start_index + 1
start_end = search_index[j] + search_end_index
final_result = S[ start_final:start_end ]
abc.append(final_result)`enter code here`
get_next_target(S)
Note: If you don't want to append the result in to a list then replace the last two line of if and else statement with "print final_result".enter code here

Extract data from web page

I have a script to extract data from here: http://espn.go.com/nba/statistics/player/_/stat/scoring-per-48-minutes/
Part of obtaining the data in the script looks like this:
pts_start = data.find('">',mpg_end) + 2
pts_end = data.find('<',pts_start)
store.append(data[pts_start:pts_end])
mf_start = data.find(' >',pts_end) + 2
mf_end = data.find('<',mf_start)
store.append(data[mf_start:mf_end])
fg_start = data.find(' >',mf_end) + 2
fg_end = data.find('<',fg_start)
store.append(data[fg_start:fg_end])
I see that the names like fg and pts correspond to the table headlines, but I don't understand why certain ones are abbreviated in the script.
I want to modify the script to obtain the headlines on this table: http://espn.go.com/nba/statistics/player/_/stat/rebounds. I tried doing this by just plugging in the names as they appear at the top of the table but the resulting CSV file had missing information.
Full code :
import os
import csv
import time
import urllib2
uri = 'http://espn.go.com/nba/statistics/player/_/stat/scoring-per-48-minutes'
def get_data():
try:
req = urllib2.Request(uri)
response = urllib2.urlopen(req, timeout=600)
content = response.read()
return content
except Exception, e:
print "\n[!] Error: " + str(e)
print ''
return False
def extract(data,rk):
print '\n[+] Extracting data.'
start = 0
while True:
store = [rk]
if data.find('nba/player/',start) == -1:
break
with open("data.csv", "ab") as fcsv:
main = data.find('nba/player/',start)
name_start = data.find('>',main) + 1
name_end = data.find('<',name_start)
store.append(data[name_start:name_end])
team_start = data.find('">',name_end) + 2
team_end = data.find('<',team_start)
store.append(data[team_start:team_end])
gp_start = data.find(' >',team_end) + 2
gp_end = data.find('<',gp_start)
store.append(data[gp_start:gp_end])
mpg_start = data.find(' >',gp_end) + 2
mpg_end = data.find('<',mpg_start)
store.append(data[mpg_start:mpg_end])
pts_start = data.find('">',mpg_end) + 2
pts_end = data.find('<',pts_start)
store.append(data[pts_start:pts_end])
mf_start = data.find(' >',pts_end) + 2
mf_end = data.find('<',mf_start)
store.append(data[mf_start:mf_end])
fg_start = data.find(' >',mf_end) + 2
fg_end = data.find('<',fg_start)
store.append(data[fg_start:fg_end])
m3_start = data.find(' >',fg_end) + 2
m3_end = data.find('<',m3_start)
store.append(data[m3_start:m3_end])
p3_start = data.find(' >',m3_end) + 2
p3_end = data.find('<',p3_start)
store.append(data[p3_start:p3_end])
ft_start = data.find(' >',p3_end) + 2
ft_end = data.find('<',ft_start)
store.append(data[ft_start:ft_end])
ftp_start = data.find(' >',ft_end) + 2
ftp_end = data.find('<',ftp_start)
store.append(data[ftp_start:ftp_end])
start = name_end
rk = rk + 1
csv.writer(fcsv).writerow(store)
fcsv.close()
def main():
print "\n[+] Initializing..."
if not os.path.exists("data.csv"):
with open("data.csv", "ab") as fcsv:
csv.writer(fcsv).writerow(["RK","PLAYER","TEAM","GP", "MPG","PTS","FGM-FGA","FG%","3PM-3PA","3P%","FTM-FTA","FT%"])
fcsv.close()
rk = 1
global uri
while True:
time.sleep(1)
start = 0
print "\n[+] Getting data, please wait."
data = get_data()
if not data:
break
extract(data,rk)
print "\n[+] Preparing for next page."
time.sleep(1.5)
rk = rk + 40
if rk > 300:
print "\n[+] All Done !\n"
break
uri = 'http://espn.go.com/nba/statistics/player/_/stat/scoring-per-48-minutes/sort/avg48Points/count/' + str(rk)
if __name__ == '__main__':
main()
I specifically want to know how to grab info based on the headlines. Like TEAM GP MPG PTS FGM-FGA FG% 3PM-3PA 3P% FTM-FTA FT%
So the script doesn't need to be changed besides things like pts or mpg in pts_start = data.find('">',mpg_end) + 2
I don't understand why I can't just input the name of the headline in the table has shown for certain ones. Like instead of FTM-FTA, the script puts ft.
Extracting html data rather easy with BeautifulSoup. Following example is you to get the idea but not a complete solution to your problem. However you can easily extend.
from bs4 import BeautifulSoup
import urllib2
def get_html_page_dom(url):
response = urllib2.urlopen(url)
html_doc = response.read()
return BeautifulSoup(html_doc, 'html5lib')
def extract_rows(dom):
table_rows = dom.select('.mod-content tbody tr')
for tr in table_rows:
# skip headers
klass = tr.get('class')
if klass is not None and 'colhead' in klass:
continue
tds = tr.select('td')
yield {'RK': tds[0].string,
'PLAYER': tds[1].select('a')[0].string,
'TEAM': tds[2].string,
'GP': tds[3].string
# you can fetch rest of the indexs for corresponding headers
}
if __name__ == '__main__':
dom = get_html_page_dom('http://espn.go.com/nba/statistics/player/_/stat/scoring-per-48-minutes/')
for data in extract_rows(dom):
print(data)
You can simply run and see the result ;).

Categories