I'm fetching financial information from an api endpoint and when I get a 200 response through
r = requests.get(url)
data = r.json()
It'll return None for all null values. How do I convert all null/None values to 0? Since it's financial data, the JSON is usually quite massive (300k-400k lines, some with deep nested nulls) so I can't do a try/except block on each TypeError.
An extract of the json response looks something like this:
{'0':
'Highlights': {'QuarterlyRevenueGrowthYOY': 0.671, 'GrossProfitTTM': 3750684, 'DilutedEpsTTM': 0.2, 'QuarterlyEarningsGrowthYOY': 0.95
5}, 'Valuation': {'TrailingPE': 60.75, 'ForwardPE': 0, 'PriceSalesTTM': 2.0817, 'PriceBookMRQ': 4.207, 'EnterpriseValueRevenue': 1.
806, 'EnterpriseValueEbitda': 0.0952}, 'Technicals': {'Beta': None, '52WeekHigh': 12.35, '52WeekLow': 7.84, '50DayMA': 11.0197, '20
0DayMA': 10.2209, 'SharesShort': 0, 'SharesShortPriorMonth': 0, 'ShortRatio': 0, 'ShortPercent': 0}, 'SplitsDividends': {'ForwardAn
nualDividendRate': 0.18, 'ForwardAnnualDividendYield': 0.0151, 'PayoutRatio': 0.9, 'DividendDate': '0000-00-00', 'ExDividendDate':
'2020-06-11', 'LastSplitFactor': '', 'LastSplitDate': '0000-00-00'}, 'Earnings': {'Last_0': {'date': '2020-06-30', 'epsActual': 0.1
9, 'epsEstimate': None, 'epsDifference': None, 'surprisePercent': None}, 'Last_1': {'date': '2019-12-31', 'epsActual': 1.86, 'epsEs
timate': None, 'epsDifference': None, 'surprisePercent': None}, 'Last_2': {'date': '2019-06-30', 'epsActual': -0.82, 'epsEstimate':
None, 'epsDifference': None, 'surprisePercent': None}, 'Last_3': {'date': '0000-00-00', 'epsActual': 0, 'epsEstimate': 0, 'epsDiff
erence': 0, 'surprisePercent': 0}}, 'Financials': {'Balance_Sheet': {'currency_symbol': 'EUR', 'quarterly_last_0': {'date': '2020-0
6-30', 'filing_date': None, 'totalAssets': '12810000.00', 'intangibleAssets': '281000.00', 'otherCurrentAssets': '60000.00', 'total
Liab': '4225000.00', 'totalStockholderEquity': '8585000.00', 'deferredLongTermLiab': '74000.00', 'otherCurrentLiab': '1274000.00',
'commonStock': '80000.00', 'retainedEarnings': '311000.00', 'otherLiab': '200000.00', 'goodWill': '3381000.00', 'otherAssets': '730
00.00', 'cash': '4983000.00', 'totalCurrentLiabilities': '4025000.00', 'shortLongTermDebt': None,
...
}
Yeah you get the point.. a ton of None all over the place. Any quick fixes for this?
def recursive_replace(obj, findVal, replaceVal):
for k, v in obj.items():
if v == findVal:
obj[k] = replaceVal
elif isinstance(v, dict):
obj[k] = recursive_replace(obj[k], findVal, replaceVal)
return obj
result = recursive_replace(json.loads(yourdata), None, 0)
Found a way to do it, #Charles Duffy, thanks for the inspiration - borrowed some but couldn't get it quite to work. The final code looks like this if anyone would need it in the future
from collections.abc import Mapping, Iterable
def replace_none_values(noneVal, replaceVal='0.00'): # not sure if this is bad practice
if noneVal is None:
return replaceVal
if isinstance(noneVal, Mapping):
return {k: replace_none_values(v, replaceVal) for k, v in noneVal.items()}
elif not isinstance(noneVal, str) and isinstance(noneVal, Iterable):
return [replace_none_values(v, replaceVal) for v in noneVal]
return noneVal
Related
Below is an example of sports betting app I'm working on.
games.json()['data'] - contains the game id for each sport event for that day. The API then returns the odds for that specific game.
What's the fastest option to take json and turn it into a panda dataframe? currently looking into msgspec.
Some games can have over 5K total bets
master_df = pd.DataFrame()
for game in games.json()['data']:
odds_params = {'key': api_key, 'game_id': game['id'], 'sportsbook': sportsbooks}
odds = requests.get(api_url, params=odds_params)
for o in odds.json()['data'][0]['odds']:
temp = pd.DataFrame()
temp['id'] = [game['id']]
for k,v in game.items():
if k != 'id' and k != 'is_live':
temp[k] = v
for k, v in o.items():
if k == 'id':
temp['odds_id'] = v
else:
temp[k] = v
if len(master_df) == 0:
master_df = temp
else:
master_df = pd.concat([master_df, temp])
odds.json response snippet -
{'data': [{'id': '35142-30886-2023-02-08',
'sport': 'basketball',
'league': 'NBA',
'start_date': '2023-02-08T19:10:00-05:00',
'home_team': 'Washington Wizards',
'away_team': 'Charlotte Hornets',
'is_live': False,
'tournament': None,
'status': 'unplayed',
'odds': [{'id': '4BB426518ECF',
'sports_book_name': 'Betfred',
'name': 'Charlotte Hornets',
'price': 135.0,
'checked_date': '2023-02-08T11:46:12-05:00',
'bet_points': None,
'is_main': True,
'is_live': False,
'market_name': '1st Half Moneyline',
'home_rotation_number': None,
'away_rotation_number': None,
'deep_link_url': None,
'player_id': None},
....
By the end of this process, I usually have about 30K records in the dataframe
Here is what I would do.
def _create_record_(game: dict, odds: dict) -> dict:
"""
Warning: THIS MUTATES THE INPUT
"""
odds['id'] = "odds_id"
# the pipe | operator is only available in dicts in recent versions of python
# use dict(**game, **odds) if you get a TypeError
result = game | odds
result.pop("is_live")
return result
def _get_odds(game: dict) -> list:
params = {'key': api_key, 'game_id': game['id'], 'sportsbook': sportsbooks}
return requests.get(api_url, params=params).json()['data'][0]['odds']
df = pd.DataFrame(
[
_create_record_(game, odds)
for game in games.json()['data']
for odds in _get_odds(game)
]
)
The fact that it is in this list comprehenesion isn't relevant. And equivalent for-loop would work just as well, the point is you create a list of dicts first, then create your dataframe. This avoids the quadratic time behavior of incrementally creating a dataframe using pd.concat.
I have a list of dictionaries but I want to store 3 values from a dictionary named 'price'
My code is
response = yf.Ticker("FB").stats()["price"]
output:
{'averageDailyVolume10Day': 19621971,
'averageDailyVolume3Month': 16023089,
'circulatingSupply': None,
'currency': 'USD',
'currencySymbol': '$',
'exchange': 'NMS',
'exchangeDataDelayedBy': 0,
'exchangeName': 'NasdaqGS',
'fromCurrency': None,
'lastMarket': None,
'longName': 'Facebook, Inc.',
'marketCap': 960766541824,
'marketState': 'REGULAR',
'maxAge': 1,
'openInterest': None,
'postMarketChange': None,
'postMarketPrice': None,
'preMarketChange': 3.51001,
'preMarketChangePercent': 0.0103239,
'preMarketPrice': 343.5,
'preMarketSource': 'FREE_REALTIME',
'preMarketTime': 1634736599,
'priceHint': 2,
'quoteSourceName': 'Nasdaq Real Time Price',
'quoteType': 'EQUITY',
'regularMarketChange': 0.7750244,
'regularMarketChangePercent': 0.0022795508,
'regularMarketDayHigh': 343.94,
'regularMarketDayLow': 339.7,
'regularMarketOpen': 343.445,
'regularMarketPreviousClose': 339.99,
'regularMarketPrice': 340.765,
'regularMarketSource': 'FREE_REALTIME',
'regularMarketTime': 1634749118,
'regularMarketVolume': 8538416,
'shortName': 'Facebook, Inc.',
'strikePrice': None,
'symbol': 'FB',
'toCurrency': None,
'underlyingSymbol': None,
'volume24Hr': None,
'volumeAllCurrencies': None}
I would like to get only shortName, regularMarketPrice and symbol
I know that if I want to exctrat one value I should run
response = yf.Ticker("FB").stats()["price"]["shortName"]
but is there a way to store all 3 values in response?
Assuming the output dictionary you show is stored in response variable, you can try this -
keys = ['shortName', 'regularMarketPrice', 'symbol']
filtered_response = {k:response.get(k) for k in keys}
{'shortName': 'Facebook, Inc.',
'regularMarketPrice': 340.765,
'symbol': 'FB'}
#RJ has it right in the comments, but here some explanation for you:
In this case, yf.Ticker("FB").stats()["price"]["shortName"] is returning you the entire dictionary. So all of values are being returned and stored in response.
So you can just do:
response = yf.Ticker("FB").stats()["price"]
shortName = response["shortName"]
regularMarketPrice = response["regularMarketPrice"]
symbol = response["symbol"]
d = {...}
market_price, name, symbol = [d.get(k) for k in d if k == "regularMarketPrice" or k == "shortName" or k == "symbol"]
print(f'MarketPrice: {market_price}')
print(f'shortName : {name}')
print(f'symbol : {symbol}')
I have a dictionary -
{
'buy': {'trade_transaction_amount__sum': None, 'tax__sum': None, 'trade_fee__sum': None},
'sell': {'trade_transaction_amount__sum': None, 'tax__sum': None, 'trade_fee__sum': None}
}
What would be the best approach to replace the None values with 0.
Note - Not every time the values for these keys are None.
You can use dictionary comprehension approach, and change the value to 0 if it returns a falsely value (such as None, False, '' or 0).
d = {'a': None, 'b': 1}
d1 = {k: v or 0 for (k, v) in d.items()}
# {'a': 0, 'b': 1}
You can recursively replace all None with 0 in the dict by checking the type of the values to see it's a nested dict or not.
test_dict = {
'buy': {'trade_transaction_amount__sum': None, 'tax__sum': None, 'trade_fee__sum': None},
'sell': {'trade_transaction_amount__sum': None, 'tax__sum': None, 'trade_fee__sum': None}
}
def replace_none_with(d, replacement=0):
retval = {}
for key, val in d.items():
if val is None:
retval[key] = replacement
elif isinstance(val, dict):
retval[key] = replace_none_with(val, replacement)
else:
retval[key] = val
return retval
print(replace_none_with(test_dict))
Output:
{'buy': {'trade_transaction_amount__sum': 0, 'tax__sum': 0, 'trade_fee__sum': 0}, 'sell': {'trade_transaction_amount__sum': 0, 'tax__sum': 0, 'trade_fee__sum': 0}}
Here is the recursive approach which works with any levels of nesting dictionaries :
d = {
'buy': {'trade_transaction_amount__sum': None, 'tax__sum': None,
'trade_fee__sum': None},
'sell': {'trade_transaction_amount__sum': None, 'tax__sum': None,
'trade_fee__sum': None}
}
def replacer(dictionary):
for k, v in dictionary.items():
if isinstance(v, dict):
replacer(v)
elif v is None:
dictionary[k] = 0
replacer(d)
print(d)
I am trying to recursively compare below two python dictionaries:
expectededr = {'uid': 'e579b8cb-7d9f-4c0b-97de-a03bb52a1ec3', 'attempted': {'smpp': {'registeredDelivery': 0}, 'status': 'success', 'OATON': 1, 'OANPI': 1, 'DATON': 1, 'DANPI': 1, 'OA': '12149921220', 'DA': '1514525404'}, 'customerID': 'customer01', 'productID': 'product'}
edr = {'Category': 'NO', 'Type': 'mt', 'uid': 'e579b8cb-7d9f-4c0b-97de-a03bb52a1ec3', 'protocolID': 'smpp', 'direction': 'attempted', 'attempted': {'status': 'success', 'OANPI': 1, 'DATON': 1, 't2': 1512549691602, 'DANPI': 1, 'OA': '12149921220', 'DA': '1514525404', 'smpp': {'fragmented': False, 'sequenceID': 1, 'registeredDelivery': 0, 'messageID': '4e7b48ad-b39e-4e91-a7bb-2de463e4a6ee', 'srcPort': 39417, 'messageType': 4, 'Status': 0, 'ESMClass': 0, 'dstPort': 0, 'size': 0}, 'OATON': 1, 'PID': 0, 't1': 1512549691602}, 'customerID': 'customer01', 'productID': 'product'}
I am trying to compare the in a way that find and compare the key and value of first dictionary in second and if matching then print PASS else print FAIL.
for key in expectededr:
if expectededr[key] == edr[key]:
print("PASS")
else:
print("FAIL")
Output:
FAIL
PASS
PASS
PASS
Above code is not able to compare all the keys and values as these are nested dictionaries.
As you can see below, if i print key and values above i see that its not going in sub dictionary and missing their keys:
for key in expectededr:
if expectededr[key] == edr[key]:
print(expectededr[key])
print(edr[key])
Output:
customer01
customer01
e579b8cb-7d9f-4c0b-97de-a03bb52a1ec3
e579b8cb-7d9f-4c0b-97de-a03bb52a1ec3
product
product
Could someone help to update this code so that I can do the comparision in these nested dictionaries ?
One way is to flatten the dictionaries and then compare if the keys match.
So Lets initialiaze your dicts first:
In [23]: expectededr = {'uid': 'e579b8cb-7d9f-4c0b-97de-a03bb52a1ec3', 'attempted': {'smpp': {'registeredDelivery': 0}, 'status': 'success', 'OATON': 1, 'OANP
...: I': 1, 'DATON': 1, 'DANPI': 1, 'OA': '12149921220', 'DA': '1514525404'}, 'customerID': 'customer01', 'productID': 'product'}
...:
...: edr = {'Category': 'NO', 'Type': 'mt', 'uid': 'e579b8cb-7d9f-4c0b-97de-a03bb52a1ec3', 'protocolID': 'smpp', 'direction': 'attempted', 'attempted': {'
...: status': 'success', 'OANPI': 1, 'DATON': 1, 't2': 1512549691602, 'DANPI': 1, 'OA': '12149921220', 'DA': '1514525404', 'smpp': {'fragmented': False, '
...: sequenceID': 1, 'registeredDelivery': 0, 'messageID': '4e7b48ad-b39e-4e91-a7bb-2de463e4a6ee', 'srcPort': 39417, 'messageType': 4, 'Status': 0, 'ESMCl
...: ass': 0, 'dstPort': 0, 'size': 0}, 'OATON': 1, 'PID': 0, 't1': 1512549691602}, 'customerID': 'customer01', 'productID': 'product'}
...:
For flattening your dictionaries, we can use the approach suggested in Flatten nested Python dictionaries, compressing keys:
In [24]: import collections
...:
...: def flatten(d, parent_key='', sep='_'):
...: items = []
...: for k, v in d.items():
...: new_key = parent_key + sep + k if parent_key else k
...: if isinstance(v, collections.MutableMapping):
...: items.extend(flatten(v, new_key, sep=sep).items())
...: else:
...: items.append((new_key, v))
...: return dict(items)
...:
And generated flattened dicts
In [25]: flat_expectededr = flatten(expectededr)
In [26]: flat_edr = flatten(edr)
Now its a simple comparison:
In [27]: for key in flat_expectededr:
...: if flat_edr.get(key) == flat_expectededr[key]:
...: print "PASS"
...: else:
...: print "FAIL"
PASS
PASS
PASS
PASS
PASS
PASS
PASS
PASS
PASS
PASS
PASS
Simple way :
for i in edr.keys():
if i in expectededr.keys():
print 'true : key'+i
else:
print 'fail : key'+ i
How do I filter a nested dictionary in python based on key values:
d = {'data': {'country': 'US', 'city': 'New York', 'state': None},
'tags': ['US', 'New York'],
'type': 'country_info',
'growth_rate': None
}
I want to filter this dictionary to eliminate NoneType values so the resulting dict should be:
d = {'data': {'country': 'US', 'city': 'New York'},
'tags': ['US', 'New York'],
'type': 'country_info',
}
Also, the dict can have multiple levels of nesting. I want to remove all NoneType values from the dict.
You can define this recursively pretty easily with a dict comprehension.
def remove_keys_with_none_values(item):
if not hasattr(item, 'items'):
return item
else:
return {key: remove_keys_with_none_values(value) for key, value in item.items() if value is not None}
Recursion isn't too optimised in Python, but given the relatively small number of nestings that are likely, I wouldn't worry.
Looking before we leap isn't too Pythonic, I think it is a better option than catching the exception - as it's likely that the value will not be a dict most of the time (it is likely we have more leaves than branches).
Also note that in Python 2.x, you probably want to swap in iteritems() for items().
I really appreciate the answer by #Lattyware. It helped me filter out a nested object and remove empty values regardless of type being dict, list, or str.
Here is what I came up with:
remove-keys-with-empty-values.py
# remove-keys-with-empty-values.py
from pprint import pprint
def remove_keys_with_empty_values(item):
if hasattr(item, 'items'):
return {key: remove_keys_with_empty_values(value) for key, value in item.items() if value==0 or value}
elif isinstance(item, list):
return [remove_keys_with_empty_values(value) for value in item if value==0 or value]
else:
return item
d = {
'string': 'value',
'integer': 10,
'float': 0.5,
'zero': 0,
'empty_list': [],
'empty_dict': {},
'empty_string': '',
'none': None,
}
d['nested_dict'] = d.copy()
l = d.values()
d['nested_list'] = l
pprint({
"DICT FILTERED": remove_keys_with_empty_values(d),
"DICT ORIGINAL": d,
"LIST FILTERED": remove_keys_with_empty_values(l),
"LIST ORIGINAL": l,
})
execution
python remove-keys-with-empty-values.py
{'DICT FILTERED': {'float': 0.5,
'integer': 10,
'nested_dict': {'float': 0.5,
'integer': 10,
'string': 'value',
'zero': 0},
'nested_list': [0,
'value',
10,
0.5,
{'float': 0.5,
'integer': 10,
'string': 'value',
'zero': 0}],
'string': 'value',
'zero': 0},
'DICT ORIGINAL': {'empty_dict': {},
'empty_list': [],
'empty_string': '',
'float': 0.5,
'integer': 10,
'nested_dict': {'empty_dict': {},
'empty_list': [],
'empty_string': '',
'float': 0.5,
'integer': 10,
'none': None,
'string': 'value',
'zero': 0},
'nested_list': [{},
0,
'value',
None,
[],
10,
0.5,
'',
{'empty_dict': {},
'empty_list': [],
'empty_string': '',
'float': 0.5,
'integer': 10,
'none': None,
'string': 'value',
'zero': 0}],
'none': None,
'string': 'value',
'zero': 0},
'LIST FILTERED': [0,
'value',
10,
0.5,
{'float': 0.5,
'integer': 10,
'string': 'value',
'zero': 0}],
'LIST ORIGINAL': [{},
0,
'value',
None,
[],
10,
0.5,
'',
{'empty_dict': {},
'empty_list': [],
'empty_string': '',
'float': 0.5,
'integer': 10,
'none': None,
'string': 'value',
'zero': 0}]}