I started two days ago with ethereum blockchain, so my knowledge is still a little bit all over the place. Nevertheless, i managed to connect to a node, pull some general block data and so on. As a next level of difficulty, I tried to start building event filters, in order to look at more specific types of historical data (to be clear, I don't want to fetch live data, I would rather like to query through the entire chain, and get historical sample extracts for various types of data).
See here my first attempt to build an event filter for the USDC Uniswap V2 contract, in order to collect Swap events (its not about speed or efficiency right now, just to make it work):
w3 = Web3(Web3.HTTPProvider(NODE_ADDRESS))
# uniswap v2 USDC
address = w3.toChecksumAddress('0xb4e16d0168e52d35cacd2c6185b44281ec28c9dc')
# get the ABI for uniswap v2 pair events
resp = requests.get("https://unpkg.com/#uniswap/v2-core#1.0.0/build/IUniswapV2Pair.json")
if resp.status_code==200:
abi = json.loads(resp.content)['abi']
# create contract object
contract = w3.eth.contract(address=address, abi=abi)
# get topics by hashing abi event signatures
res = contract.events.Swap.build_filter()
# put this into a filter input dictionary
filter_params = {'fromBlock':int_to_hex(12000000),'toBlock':int_to_hex(12010000),**res.filter_params}
# res.filter_params contains: 'topics' and 'address'
# create a filter id (i.e. a hashed version of the filter data, representing the filter)
method = 'eth_newFilter'
params = [filter_params]
resp = self.block_manager.general_sample_request(method,params)
if 'error' in resp:
print(resp)
else:
filter_id = resp['result']
# pass on the filter id, in order to query the respective logs
params = [filter_id]
method = 'eth_getFilterLogs'
resp = self.block_manager.general_sample_request(method,params)
# takes about 10-12s for about 12000 events
the resulting array contains event logs of this structure:
resp['result'][0]
>>>
{'address': '0xb4e16d0168e52d35cacd2c6185b44281ec28c9dc',
'topics': ['0xd78ad95fa46c994b6551d0da85fc275fe613ce37657fb8d5e3d130840159d822',
'0x0000000000000000000000007a250d5630b4cf539739df2c5dacb4c659f2488d',
'0x0000000000000000000000000ffd670749d4179558b6b367e30e72ce2efea28f'],
'data': '0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000\
00000000000000000000000000034f0f8a0c7663264000000000000000000000000000000000000000000000\
000000000019002d5b60000000000000000000000000000000000000000000000000000000000000000',
'blockNumber': '0xb71b01',
'transactionHash': '0x76403053ee0300411b68fc223b327b51fb4f1a26e1f6cb8667e05ec370e8176e',
'transactionIndex': '0x22',
'blockHash': '0x4bd35cb48395e77fd317a0309342c95d6687dbc4fcb85ada2d635fe266d1e769',
'logIndex': '0x16',
'removed': False}
As far as I understand now, I can somehow apply the ABI to decode the 'data' field.
I tried with this function:
contract.decode_function_input(resp['result'][0]['data'])
but it gives me this error:
>>> ValueError: Could not find any function with matching selector
Seems like there is some problem with decoding the data. However, I am so close now to getting the real data, I dont wanna give up xD. Any help will be appreciated!
Thanks!
import json
import traceback
from pprint import pprint
from eth_utils import event_abi_to_log_topic, to_hex
from hexbytes import HexBytes
from web3._utils.events import get_event_data
from web3.auto import w3
def decode_tuple(t, target_field):
output = dict()
for i in range(len(t)):
if isinstance(t[i], (bytes, bytearray)):
output[target_field[i]['name']] = to_hex(t[i])
elif isinstance(t[i], (tuple)):
output[target_field[i]['name']] = decode_tuple(t[i], target_field[i]['components'])
else:
output[target_field[i]['name']] = t[i]
return output
def decode_list_tuple(l, target_field):
output = l
for i in range(len(l)):
output[i] = decode_tuple(l[i], target_field)
return output
def decode_list(l):
output = l
for i in range(len(l)):
if isinstance(l[i], (bytes, bytearray)):
output[i] = to_hex(l[i])
else:
output[i] = l[i]
return output
def convert_to_hex(arg, target_schema):
"""
utility function to convert byte codes into human readable and json serializable data structures
"""
output = dict()
for k in arg:
if isinstance(arg[k], (bytes, bytearray)):
output[k] = to_hex(arg[k])
elif isinstance(arg[k], (list)) and len(arg[k]) > 0:
target = [a for a in target_schema if 'name' in a and a['name'] == k][0]
if target['type'] == 'tuple[]':
target_field = target['components']
output[k] = decode_list_tuple(arg[k], target_field)
else:
output[k] = decode_list(arg[k])
elif isinstance(arg[k], (tuple)):
target_field = [a['components'] for a in target_schema if 'name' in a and a['name'] == k][0]
output[k] = decode_tuple(arg[k], target_field)
else:
output[k] = arg[k]
return output
def _get_topic2abi(abi):
if isinstance(abi, (str)):
abi = json.loads(abi)
event_abi = [a for a in abi if a['type'] == 'event']
topic2abi = {event_abi_to_log_topic(_): _ for _ in event_abi}
return topic2abi
def _sanitize_log(log):
for i, topic in enumerate(log['topics']):
if not isinstance(topic, HexBytes):
log['topics'][i] = HexBytes(topic)
if 'address' not in log:
log['address'] = None
if 'blockHash' not in log:
log['blockHash'] = None
if 'blockNumber' not in log:
log['blockNumber'] = None
if 'logIndex' not in log:
log['logIndex'] = None
if 'transactionHash' not in log:
log['transactionHash'] = None
if 'transactionIndex' not in log:
log['transactionIndex'] = None
def decode_log(log, abi):
if abi is not None:
try:
# get a dict with all available events from the ABI
topic2abi = _get_topic2abi(abi)
# ensure the log contains all necessary keys
_sanitize_log(log)
# get the ABI of the event in question (stored as the first topic)
event_abi = topic2abi[log['topics'][0]]
# get the event name
evt_name = event_abi['name']
# get the event data
data = get_event_data(w3.codec, event_abi, log)['args']
target_schema = event_abi['inputs']
decoded_data = convert_to_hex(data, target_schema)
return (evt_name, decoded_data, target_schema)
except Exception:
return ('decode error', traceback.format_exc(), None)
else:
return ('no matching abi', None, None)
Example usage:
output = decode_log(
{'data': '0x000000000000000000000000000000000000000000000000000000009502f90000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000093f8f932b016b1c',
'topics': [
'0xd78ad95fa46c994b6551d0da85fc275fe613ce37657fb8d5e3d130840159d822',
'0x0000000000000000000000007a250d5630b4cf539739df2c5dacb4c659f2488d',
'0x000000000000000000000000242301fa62f0de9e3842a5fb4c0cdca67e3a2fab'],
},
pair_abi
)
print(output[0])
pprint(output[1])
# Swap
# {'amount0In': 2500000000,
# 'amount0Out': 0,
# 'amount1In': 0,
# 'amount1Out': 666409132118600476,
# 'sender': '0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D',
# 'to': '0x242301FA62f0De9e3842A5Fb4c0CdCa67e3A2Fab'}
Or in your case:
output = decode_log(resp['result'][0], pair_abi)
print(output[0])
pprint(output[1])
# Swap
# {'amount0In': 0,
# 'amount0Out': 6711072182,
# 'amount1In': 3814822253806629476,
# 'amount1Out': 0,
# 'sender': '0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D',
# 'to': '0x0Ffd670749D4179558b6B367E30e72ce2efea28F'}
Now, note that you need to provide the pair_abi variable. It depends on the type of smart contract that you're using. I've found that when on Uniswap V3, the UniswapV2Pair ABI worked for some events, while UniswapV3Pool ABI worked for others, in particular for the Swap event that I've found the most useful.
After a few hours of digging I managed to find this solution, which is a slightly modified version of the one proposed in: https://towardsdatascience.com/decoding-ethereum-smart-contract-data-eed513a65f76 Big thumbs up to its author ๐ You can read more there on parsing the transaction input too.
Related
I am running a dask.distributed Client that gets data from an API with several parameters, parses results and joins/aggregates on each result. This is done with client.map()
Sometimes the API call gives an empty string because the specific combination of input parameters doesn't exist. It doesn't make sense to continue with computations and I would like to just kill that worker (without passing on e.g. a None).
How do I tell Dask to kill a worker if its result is None/error and exclude that future from the following operations?
Please let me know if you need more details.
Thanks.
EDIT:
Added a minimal working example to show the logic: the first map produces a lot of "useless" workers that I would like to kill.
Please notice that this is not my actual use case, I am querying an Influx database via http requests but the general structure of the code is the same. I am open to any comments on how to do that faster/more efficiently.
ยดยดยดยดpython
import requests
import numpy as np
import pandas as pd
from dask.distributed import Client, LocalCluster, as_completed
import dask.dataframe as dd
def fetch_html(pair):
req_string = 'https://www.bitstamp.net/api/v2/order_book/{currency_pair}/'
response = requests.get(req_string.format(currency_pair=pair))
try:
result = response.json()
return result
except Exception as e:
print('Error: {}\nMessage: {}'.format(e,response.reason))
return None
def parse_result(result):
if result:
data = {}
data['prices'] = [e[0] for e in result['bids']]
data['vols'] = [e[1] for e in result['bids']]
data['index'] = [result['timestamp'] for i in data['prices']]
df = pd.DataFrame.from_dict(data).set_index('index')
return df
else:
return pd.DataFrame()
def other_calcs(result):
if not result.empty:
# something
return result
else:
return pd.DataFrame()
def aggregator(res1, res2):
if (not res1.empty) and (not res2.empty):
# something
return res1
elif not res2.empty:
# something
return res2
elif not res1.empty:
return res1
else:
return pd.DataFrame()
if __name__=='__main__':
pairs = [
# legit params (100s of these):
'btcusd',
'btceur',
'btcgbp',
'bateur',
'batbtc',
'umausd',
'xrpusdt',
'eurteur',
'eurtusd',
'manausd',
'sandeur',
'storjusd',
'storjeur',
'adausd',
'adaeur',
# bad params resulting in error / empty result (100s of these)
'foobar',
'foobaz',
'foousd',
'barbaz',
'bazbar',
]
cluster = LocalCluster(n_workers=16, threads_per_worker=1)
client = Client(cluster)
futures_list = client.map(fetch_html, pairs)
futures_list = client.map(parse_result, futures_list)
futures_list = client.map(other_calcs, futures_list)
seq = as_completed(futures_list)
while seq.count() > 1:
f1 = next(seq)
f2 = next(seq)
new = client.submit(aggregator, f1, f2, priority=1)
seq.add(new)
final = next(seq)
final = final.result()
print(final.head())
ยดยดยดยด
I am currently having an issue where I am trying to store data in a list (using dataclasses). When I print the data inside the list in the function (PullIncursionData()) it responded with a certain amount of numbers (never the same, not possible due to it's nature). When printing it after it being called to store it's return in a Var it somehow prints only the same number.
I cannot share the numbers, as they update with EVE Online's API, so the only way is to run it locally and read the first list yourself.
The repository is Here: https://github.com/AtherActive/EVEAPI-Demo
Heads up! Inside the main.py (the file with issues) (a snippet of code is down below) are more functions. All functions from line 90 and forward are important, the rest can be ignored for this question, as they do not interact with the other functions.
def PullIncursionData():
#Pulls data from URL and converts it into JSON
url = 'https://esi.evetech.net/latest/incursions/?datasource=tranquility'
data = rq.get(url)
jsData = data.json()
#Init var to store incursions
incursions = []
#Set lenght for loop. yay
length = len(jsData)
# Every loop incursion data will be read by __parseIncursionData(). It then gets added to var Incursions.
for i in range(length):
# Add data to var Incursion.
incursions.append(__parseIncursionData(jsData, i))
# If Dev mode, print some debug. Can be toggled in settings.py
if settings.developerMode == 1:
print(incursions[i].constellation_id)
return incursions
# Basically parses the input data in a decent manner. No comments needed really.
def __parseIncursionData(jsData, i):
icstruct = stru.Incursion
icstruct.constellation_id = jsData[i]['constellation_id']
icstruct.constellation_name = 'none'
icstruct.staging = jsData[i]['staging_solar_system_id']
icstruct.region_name = ResolveSystemNames(icstruct.constellation_id, 'con-reg')
icstruct.status = jsData[i]['state']
icstruct.systems_id = jsData[i]['infested_solar_systems']
icstruct.systems_names = ResolveSystemNames(jsData[i]['infested_solar_systems'], 'system')
return icstruct
# Resolves names for systems, regions and constellations. Still WIP.
def ResolveSystemNames(id, mode='constellation'):
#init value
output_name = 'none'
# If constellation, pull data and find region name.
if mode == 'con-reg':
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?constellationid={}&format=json'.format(id)
data = rq.get(url)
jsData = data.json()
output_name = jsData[0]['regionname']
# Pulls system name form Fuzzwork.co.uk.
elif mode == 'system':
#Convert output to a list.
output_name = []
lenght = len(id)
# Pulls system name from Fuzzwork. Not that hard.
for i in range(lenght):
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?solarsystemid={}&format=json'.format(id[i])
data = rq.get(url)
jsData = data.json()
output_name.append(jsData[i]['solarsystemname'])
return output_name
icdata = PullIncursionData()
print('external data check:')
length = len(icdata)
for i in range(length):
print(icdata[i].constellation_id)
structures.py (custom file)
#dataclass
class Incursion:
constellation_id = int
constellation_name = str
staging = int
staging_name = str
systems_id = list
systems_names = list
region_name = str
status = str
def ___init___(self):
self.constellation_id = -1
self.constellation_name = 'undefined'
self.staging = -1
self.staging_name = 'undefined'
self.systems_id = []
self.systems_names = []
self.region_name = 'undefined'
self.status = 'unknown'
I am getting different result when I use Bio Entrez to search. For example when I search on browser using query "covid side effect" I get 344 result where as I get only 92 when I use Bio Entrez. This is the code I was using.
from Bio import Entrez
Entrez.email = "Your.Name.Here#example.org"
handle = Entrez.esearch(db="pubmed", retmax=40, term="covid side effect", idtype="acc")
record = Entrez.read(handle)
handle.close()
print(record['Count'])
I was hoping if someone could help me with this discrepancy.
For some reason everyone seemed to have same issue whether it's R api or Python API. I have found a work around to get the same result. It is slow but it gets job done. If your result is less than 10k you could probably use Selenium to get the pubmedid. Else, we can scrape the data using code below. I hope this will help someone in future.
import requests
# # Custom Date Range
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=dates.2009/01/01-2020/03/01&format=pmid&sort=pubdate&size=200&page={}".format(i))
# # Custom Year Range
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=years.2010-2019&format=pmid&sort=pubdate&size=200&page={}".format(i))
# #Relative Date
# req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term=covid&filter=datesearch.y_1&format=pmid&sort=pubdate&size=200&page={}".format(i))
# # filter language
# # &filter=lang.english
# # filter human
# #&filter=hum_ani.humans
# Systematic Review
#&filter=pubt.systematicreview
# Case Reports
# &filter=pubt.casereports
# Age
# &filter=age.newborn
search = "covid lungs"
# search_list = "+".join(search.split(' '))
def id_retriever(search_string):
string = "+".join(search_string.split(' '))
result = []
old_result = len(result)
for page in range(1,10000000):
req = requests.get("https://pubmed.ncbi.nlm.nih.gov/?term={string}&format=pmid&sort=pubdate&size=200&page={page}".format(page=page,string=string))
for j in req.iter_lines():
decoded = j.decode("utf-8").strip(" ")
length = len(decoded)
if "log_displayeduids" in decoded and length > 46:
data = (str(j).split('"')[-2].split(","))
result = result + data
data = []
new_result = len(result)
if new_result != old_result:
old_result = new_result
else:
break
return result
ids=id_retriever(search)
len(ids)
Im working on a small project of retrieving information about books from the Google Books API using Python 3. For this i make a call to the API, read out the variables and store those in a list. For a search like "linkedin" this works perfectly. However when i enter "Google", it reads the second title from the JSON input. How can this happen?
Please find my code below (Google_Results is the class I use to initialize the variables):
import requests
def Book_Search(search_term):
parms = {"q": search_term, "maxResults": 3}
r = requests.get(url="https://www.googleapis.com/books/v1/volumes", params=parms)
print(r.url)
results = r.json()
i = 0
for result in results["items"]:
try:
isbn13 = str(result["volumeInfo"]["industryIdentifiers"][0]["identifier"])
isbn10 = str(result["volumeInfo"]["industryIdentifiers"][1]["identifier"])
title = str(result["volumeInfo"]["title"])
author = str(result["volumeInfo"]["authors"])[2:-2]
publisher = str(result["volumeInfo"]["publisher"])
published_date = str(result["volumeInfo"]["publishedDate"])
description = str(result["volumeInfo"]["description"])
pages = str(result["volumeInfo"]["pageCount"])
genre = str(result["volumeInfo"]["categories"])[2:-2]
language = str(result["volumeInfo"]["language"])
image_link = str(result["volumeInfo"]["imageLinks"]["thumbnail"])
dict = Google_Results(isbn13, isbn10, title, author, publisher, published_date, description, pages, genre,
language, image_link)
gr.append(dict)
print(gr[i].title)
i += 1
except:
pass
return
gr = []
Book_Search("Linkedin")
I am a beginner to Python, so any help would be appreciated!
It does so because there is no publisher entry in volumeInfo of the first entry, thus it raises a KeyError and your except captures it. If you're going to work with fuzzy data you have to account for the fact that it will not always have the expected structure. For simple cases you can rely on dict.get() and its default argument to return a 'valid' default entry if an entry is missing.
Also, there are a few conceptual problems with your function - it relies on a global gr which is bad design, it shadows the built-in dict type and it captures all exceptions guaranteeing that you cannot exit your code even with a SIGINT... I'd suggest you to convert it to something a bit more sane:
def book_search(search_term, max_results=3):
results = [] # a list to store the results
parms = {"q": search_term, "maxResults": max_results}
r = requests.get(url="https://www.googleapis.com/books/v1/volumes", params=parms)
try: # just in case the server doesn't return valid JSON
for result in r.json().get("items", []):
if "volumeInfo" not in result: # invalid entry - missing volumeInfo
continue
result_dict = {} # a dictionary to store our discovered fields
result = result["volumeInfo"] # all the data we're interested is in volumeInfo
isbns = result.get("industryIdentifiers", None) # capture ISBNs
if isinstance(isbns, list) and isbns:
for i, t in enumerate(("isbn10", "isbn13")):
if len(isbns) > i and isinstance(isbns[i], dict):
result_dict[t] = isbns[i].get("identifier", None)
result_dict["title"] = result.get("title", None)
authors = result.get("authors", None) # capture authors
if isinstance(authors, list) and len(authors) > 2: # you're slicing from 2
result_dict["author"] = str(authors[2:-2])
result_dict["publisher"] = result.get("publisher", None)
result_dict["published_date"] = result.get("publishedDate", None)
result_dict["description"] = result.get("description", None)
result_dict["pages"] = result.get("pageCount", None)
genres = result.get("authors", None) # capture genres
if isinstance(genres, list) and len(genres) > 2: # since you're slicing from 2
result_dict["genre"] = str(genres[2:-2])
result_dict["language"] = result.get("language", None)
result_dict["image_link"] = result.get("imageLinks", {}).get("thumbnail", None)
# make sure Google_Results accepts keyword arguments like title, author...
# and make them optional as they might not be in the returned result
gr = Google_Results(**result_dict)
results.append(gr) # add it to the results list
except ValueError:
return None # invalid response returned, you may raise an error instead
return results # return the results
Then you can easily retrieve as much info as possible for a term:
gr = book_search("Google")
And it will be far more tolerant of data omissions, provided that your Google_Results type makes most of the entries optional.
Following #Coldspeed's recommendation it became clear that missing information in the JSON file caused the exception to run. Since I only had a "pass" statement there it skipped the entire result. Therefore I will have to adapt the "Try and Except" statements so errors do get handled properly.
Thanks for the help guys!
I am getting JIRA data using the following python code,
how do I store the response for more than one key (my example shows only one KEY but in general I get lot of data) and print only the values corresponding to total,key, customfield_12830, summary
import requests
import json
import logging
import datetime
import base64
import urllib
serverURL = 'https://jira-stability-tools.company.com/jira'
user = 'username'
password = 'password'
query = 'project = PROJECTNAME AND "Build Info" ~ BUILDNAME AND assignee=ASSIGNEENAME'
jql = '/rest/api/2/search?jql=%s' % urllib.quote(query)
response = requests.get(serverURL + jql,verify=False,auth=(user, password))
print response.json()
response.json() OUTPUT:-
http://pastebin.com/h8R4QMgB
From the the link you pasted to pastebin and from the json that I saw, its a you issues as list containing key, fields(which holds custom fields), self, id, expand.
You can simply iterate through this response and extract values for keys you want. You can go like.
data = response.json()
issues = data.get('issues', list())
x = list()
for issue in issues:
temp = {
'key': issue['key'],
'customfield': issue['fields']['customfield_12830'],
'total': issue['fields']['progress']['total']
}
x.append(temp)
print(x)
x is list of dictionaries containing the data for fields you mentioned. Let me know if I have been unclear somewhere or what I have given is not what you are looking for.
PS: It is always advisable to use dict.get('keyname', None) to get values as you can always put a default value if key is not found. For this solution I didn't do it as I just wanted to provide approach.
Update: In the comments you(OP) mentioned that it gives attributerror.Try this code
data = response.json()
issues = data.get('issues', list())
x = list()
for issue in issues:
temp = dict()
key = issue.get('key', None)
if key:
temp['key'] = key
fields = issue.get('fields', None)
if fields:
customfield = fields.get('customfield_12830', None)
temp['customfield'] = customfield
progress = fields.get('progress', None)
if progress:
total = progress.get('total', None)
temp['total'] = total
x.append(temp)
print(x)