How would I slice a value of a key in a dictionary - python

hello I'm new to coding and I have to Define a function named filterByMonth with two parameters. The first argument passed to the function should be a list of dictionaries (the data), the second a number (representing a month, 1 through 12). This function must return a list of all the dictionaries from the input list whose 'issued' date is in the indicated month. 'slice' a string to more easily extract the relevant portion of a date from a string. Sample function call: filterByMonth(data,9)
With a list of dictionaries I need to slice the month which is a part of the value for the key 'issued' which is the third key in the dictionaries.
data = [
{'apno': 'FEND19-9487084', 'aptype': 'FENDRIVE', 'issued': '2019-09-05T00:00:00.000', 'stname': '129 PARKSIDE CT', 'city': 'BUFFALO', 'state': 'NY', 'zip': '14214', 'applicant': 'PETER CICERO', 'fees': '150', 'value': '3500', 'plans': '0', 'sbl': '0795300004001000', 'landuse': '411', 'inspector': 'ANDREW BLERSCH', 'expdate': '2020-03-05T00:00:00.000', 'descofwork': 'REMOVE EXISTING DRIVEWAY AND REPLACE IN KIND WITH CONCRETE TO CODE ON SOUTH / RIGHT SIDE OF STRUCTURE TO CODE - SURVEY SCANNED', 'location_1': {'latitude': '42.95116080935555', 'longitude': '-78.83406536395538', 'human_address': '{"address": "", "city": "", "state": "", "zip": ""}'}, 'latitude': '42.95116080935555', 'longitude': '-78.83406536395538', 'council_district': 'UNIVERSITY', 'police_district': 'District D', 'census': '45', 'census_block_group': '1', 'census_block': '1010', 'neighborhood': 'UNKNOWN', ':#computed_region_fk4y_hpmh': '5', ':#computed_region_eziv_p4ck': '64', ':#computed_region_tmcg_v66k': '8', ':#computed_region_kwzn_pe6v': '18', ':#computed_region_uh5x_q5mi': '88', ':#computed_region_dwzh_dtk5': '1573', ':#computed_region_b3rm_3c8a': '37', ':#computed_region_xbxg_7ifr': '24', ':#computed_region_urdz_b6n8': '7'},
{'apno': 'SWIM19-9485898', 'aptype': 'SWIM POOL', 'issued': '2019-08-19T00:00:00.000', 'stname': '341 NORWALK', 'city': 'BUFFALO', 'state': 'NY', 'zip': '14216', 'applicant': 'MS CHRISTINE SALAMONE', 'fees': '75', 'value': '500', 'plans': '0', 'sbl': '0785000006033000', 'landuse': '210', 'inspector': 'ANDREW BLERSCH', 'expdate': '2020-02-19T00:00:00.000', 'descofwork': 'INSTALLATION OF AN ABOVE GROUND SWIMMING POOL SUBMITTED THROUGH IDT', 'location_1': {'latitude': '42.95333872723409', 'longitude': '-78.85429233887896', 'human_address': '{"address": "", "city": "", "state": "", "zip": ""}'}, 'latitude': '42.95333872723409', 'longitude': '-78.85429233887896', 'council_district': 'DELAWARE', 'police_district': 'District D', 'census': '49', 'census_block_group': '1', 'census_block': '1000', 'neighborhood': 'UNKNOWN', ':#computed_region_fk4y_hpmh': '5', ':#computed_region_eziv_p4ck': '51', ':#computed_region_tmcg_v66k': '7', ':#computed_region_kwzn_pe6v': '5', ':#computed_region_uh5x_q5mi': '190', ':#computed_region_dwzh_dtk5': '944', ':#computed_region_b3rm_3c8a': '28', ':#computed_region_xbxg_7ifr': '25', ':#computed_region_urdz_b6n8': '2'},
]
def filterByMonth(dta,month):
result=[]
for x in dta:
for y in x:
for x['issued'] in y
if month== x[6]:
result.append(x)
return result
print(filterByMonth(data,9))

You could do this more easily with the datetime module like so
from datetime import datetime
data = [
{'apno': 'FEND19-9487084', 'aptype': 'FENDRIVE', 'issued': '2019-09-05T00:00:00.000', 'stname': '129 PARKSIDE CT', 'city': 'BUFFALO', 'state': 'NY', 'zip': '14214', 'applicant': 'PETER CICERO', 'fees': '150', 'value': '3500', 'plans': '0', 'sbl': '0795300004001000', 'landuse': '411', 'inspector': 'ANDREW BLERSCH', 'expdate': '2020-03-05T00:00:00.000', 'descofwork': 'REMOVE EXISTING DRIVEWAY AND REPLACE IN KIND WITH CONCRETE TO CODE ON SOUTH / RIGHT SIDE OF STRUCTURE TO CODE - SURVEY SCANNED', 'location_1': {'latitude': '42.95116080935555', 'longitude': '-78.83406536395538', 'human_address': '{"address": "", "city": "", "state": "", "zip": ""}'}, 'latitude': '42.95116080935555', 'longitude': '-78.83406536395538', 'council_district': 'UNIVERSITY', 'police_district': 'District D', 'census': '45', 'census_block_group': '1', 'census_block': '1010', 'neighborhood': 'UNKNOWN', ':#computed_region_fk4y_hpmh': '5', ':#computed_region_eziv_p4ck': '64', ':#computed_region_tmcg_v66k': '8', ':#computed_region_kwzn_pe6v': '18', ':#computed_region_uh5x_q5mi': '88', ':#computed_region_dwzh_dtk5': '1573', ':#computed_region_b3rm_3c8a': '37', ':#computed_region_xbxg_7ifr': '24', ':#computed_region_urdz_b6n8': '7'},
{'apno': 'SWIM19-9485898', 'aptype': 'SWIM POOL', 'issued': '2019-08-19T00:00:00.000', 'stname': '341 NORWALK', 'city': 'BUFFALO', 'state': 'NY', 'zip': '14216', 'applicant': 'MS CHRISTINE SALAMONE', 'fees': '75', 'value': '500', 'plans': '0', 'sbl': '0785000006033000', 'landuse': '210', 'inspector': 'ANDREW BLERSCH', 'expdate': '2020-02-19T00:00:00.000', 'descofwork': 'INSTALLATION OF AN ABOVE GROUND SWIMMING POOL SUBMITTED THROUGH IDT', 'location_1': {'latitude': '42.95333872723409', 'longitude': '-78.85429233887896', 'human_address': '{"address": "", "city": "", "state": "", "zip": ""}'}, 'latitude': '42.95333872723409', 'longitude': '-78.85429233887896', 'council_district': 'DELAWARE', 'police_district': 'District D', 'census': '49', 'census_block_group': '1', 'census_block': '1000', 'neighborhood': 'UNKNOWN', ':#computed_region_fk4y_hpmh': '5', ':#computed_region_eziv_p4ck': '51', ':#computed_region_tmcg_v66k': '7', ':#computed_region_kwzn_pe6v': '5', ':#computed_region_uh5x_q5mi': '190', ':#computed_region_dwzh_dtk5': '944', ':#computed_region_b3rm_3c8a': '28', ':#computed_region_xbxg_7ifr': '25', ':#computed_region_urdz_b6n8': '2'}
]
def filterByMonth(data, month):
result = []
for item in data:
datestring = item['issued']
dt = datetime.strptime(datestring, '%Y-%m-%dT%H:%M:%S.%f')
if dt.month == month:
result.append(item)
return result
print(filterByMonth(data, 9))
and a more pythonic way would be this
def filterByMonth(data, month):
return [item for item in data if datetime.strptime(item['issued'], '%Y-%m-%dT%H:%M:%S.%f').month == month]

Related

remove repeated values in dictionary

I want to remove the repeated value in a dictionary after I extracted the needed data which is 'rate' and 'genre'
a=[{'movie': 'abc', 'rate': '9', 'origin': 'AU', 'genre': 'horror'},
{'movie': 'xyz', 'rate': '7', 'origin': 'NY', 'genre': 'romance'},
{'movie': 'jkl', 'rate': '9', 'origin': 'HK', 'genre': 'horror'},
{'movie': 'qwe', 'rate': '6', 'origin': 'HK', 'genre': 'comedy'},
{'movie': 'vbn', 'rate': '9', 'origin': 'BKK', 'genre': 'romance'}]
needed_data=[]
for test in a:
x={}
word=['rate','genre']
for key,value in test.items():
for words in word:
if key == words:
x[key] = value
needed_data.append(x)
results = {}
filters=[]
for yy in needed_data:
for key,value in yy.items():
if value not in results.values():
results[key] = value
filters.append(results)
print(filters)
the output from above code is
[{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'},
{'rate': '9', 'genre': 'romance'}]
my desired output would be
[{'rate': '9', 'genre': 'horror'},
{'rate': '7', 'genre': 'romance'},
{'rate': '6', 'genre': 'comedy'},
{'rate': '9', 'genre': 'romance'}]
I would recommend to use pandas for data processing
import pandas as pd
df = pd.DataFrame(a)
df_dd= df[["genre", "rate"]].drop_duplicates()
new_a = df_dd.to_dict(orient="records")
print(new_a)
Output
[{'genre': 'horror', 'rate': '9.'},
{'genre': 'romance', 'rate': '7'},
{'genre': 'horror', 'rate': '9'},
{'genre': 'comedy', 'rate': '6'},
{'genre': 'romance', 'rate': '9'}]
Your data has strings '9.' and '9' Do you want it that way?
z = {f"{float(x['rate']):.2f}-{x['genre']}": x for x in needed_data}
list(z.values())
Output
[{'rate': '9', 'genre': 'horror'},
{'rate': '7', 'genre': 'romance'},
{'rate': '6', 'genre': 'comedy'},
{'rate': '9', 'genre': 'romance'}]
This is the easy way to do your task:
a=[{'movie': 'abc', 'rate': '9.', 'origin': 'AU', 'genre': 'horror'},
{'movie': 'xyz', 'rate': '7', 'origin': 'NY', 'genre': 'romance'},
{'movie': 'jkl', 'rate': '9', 'origin': 'HK', 'genre': 'horror'},
{'movie': 'qwe', 'rate': '6', 'origin': 'HK', 'genre': 'comedy'},
{'movie': 'vbn', 'rate': '9', 'origin': 'BKK', 'genre': 'romance'}]
c = []
for b in a:
c.append({'rate':b['rate'],'genre':b['genre'] })
print(c)
So the Output will be:
[{'rate': '9.', 'genre': 'horror'}, {'rate': '7', 'genre': 'romance'}, {'rate': '9', 'genre': 'horror'}, {'rate': '6', 'genre': 'comedy'}, {'rate': '9', 'genre': 'romance'}]

Finding missing value in JSON using python

I am facing this problem, I want to separate the dataset that has completed and not complete.
So, I want to put flag like 'complete' in the JSON. Example as in output.
This is the data that i have
data=[{'id': 'abc001',
'demo':{'gender':'1',
'job':'6',
'area':'3',
'study':'3'},
'ex_data':{'fam':'small',
'scholar':'2'}},
{'id': 'abc002',
'demo':{'gender':'1',
'edu':'6',
'qual':'3',
'living':'3'},
'ex_data':{'fam':'',
'scholar':''}},
{'id': 'abc003',
'demo':{'gender':'1',
'edu':'6',
'area':'3',
'sal':'3'}
'ex_data':{'fam':'big',
'scholar':NaN}}]
Output
How can I put the flag and also detect NaN and NULL in JSON?
Output=[{'id': 'abc001',
'completed':'yes',
'demo':{'gender':'1',
'job':'6',
'area':'3',
'study':'3'},
'ex_data':{'fam':'small',
'scholar':'2'}},
{'id': 'abc002',
'completed':'no',
'demo':{'gender':'1',
'edu':'6',
'qual':'3',
'living':'3'},
'ex_data':{'fam':'',
'scholar':''}},
{'id': 'abc003',
'completed':'no',
'demo':{'gender':'1',
'edu':'6',
'area':'3',
'sal':'3'}
'ex_data':{'fam':'big',
'scholar':NaN}}]
Something like this should work for you:
data = [
{
'id': 'abc001',
'demo': {
'gender': '1',
'job': '6',
'area': '3',
'study': '3'},
'ex_data': {'fam': 'small',
'scholar': '2'}
},
{
'id': 'abc002',
'demo': {
'gender': '1',
'edu': '6',
'qual': '3',
'living': '3'},
'ex_data': {'fam': '',
'scholar': ''}},
{
'id': 'abc003',
'demo': {
'gender': '1',
'edu': '6',
'area': '3',
'sal': '3'},
'ex_data': {'fam': 'big',
'scholar': None}
}
]
def browse_dict(dico):
empty_values = 0
for key in dico:
if dico[key] is None or dico[key] == "":
empty_values += 1
if isinstance(dico[key], dict):
for k in dico[key]:
if dico[key][k] is None or dico[key][k] == "":
empty_values += 1
if empty_values == 0:
dico["completed"] = "yes"
else:
dico["completed"] = "no"
for d in data:
browse_dict(d)
print(d)
Output :
{'id': 'abc001', 'demo': {'gender': '1', 'job': '6', 'area': '3', 'study': '3'}, 'ex_data': {'fam': 'small', 'scholar': '2'}, 'completed': 'yes'}
{'id': 'abc002', 'demo': {'gender': '1', 'edu': '6', 'qual': '3', 'living': '3'}, 'ex_data': {'fam': '', 'scholar': ''}, 'completed': 'no'}
{'id': 'abc003', 'demo': {'gender': '1', 'edu': '6', 'area': '3', 'sal': '3'}, 'ex_data': {'fam': 'big', 'scholar': None}, 'completed': 'no'}
Note that I changed NaN to None, because here you are most likely showing a python dictionary, not a JSON file since you are using data =
In a dictionary, the NaN value would be changed for None.
If you have to convert your JSON to a dictionary, refer to the JSON module documentation.
Also please check your dictionary syntax. You missed several commas to separate data.
You should try
The Input is
data = [{'demo': {'gender': '1', 'job': '6', 'study': '3', 'area': '3'}, 'id': 'abc001', 'ex_data': {'scholar': '2', 'fam': 'small'}}, {'demo': {'living': '3', 'gender': '1', 'qual': '3', 'edu': '6'}, 'id': 'abc002', 'ex_data': {'scholar': '', 'fam': ''}}, {'demo': {'gender': '1', 'area': '3', 'sal': '3', 'edu': '6'}, 'id': 'abc003', 'ex_data': {'scholar': None, 'fam': 'big'}}]
Also, Nan will not work in Python. So, instead of Nan we have used None.
for item in data:
item["completed"] = 'yes'
for key in item.keys():
if isinstance(item[key],dict):
for inner_key in item[key].keys():
if (not item[key][inner_key]):
item["completed"] = "no"
break
else:
if (not item[key]):
item["completed"] = "no"
break
The Output will be
data = [{'demo': {'gender': '1', 'job': '6', 'study': '3', 'area': '3'}, 'completed': 'yes', 'id': 'abc001', 'ex_data': {'scholar': '2', 'fam': 'small'}}, {'demo': {'living': '3', 'edu': '6', 'qual': '3', 'gender': '1'}, 'completed': 'no', 'id': 'abc002', 'ex_data': {'scholar': '', 'fam': ''}}, {'demo': {'edu': '6', 'gender': '1', 'sal': '3', 'area': '3'}, 'completed': 'no', 'id': 'abc003', 'ex_data': {'scholar': None, 'fam': 'big'}}]

How do I create a function that will return a value in a dictionary for each row within a data sheet using Python?

I need to create a new column in my table for a state region that populates a region for every row of data (each having a state). How do I write a function to call upon a dictionary for each row item?
I have about 30,000 row items, and I believe a loop would take too long. I am certain there is some way to do this with dictionaries. I've tried using different methods to call this but cannot seem to get it to populate the correct data.
states = {
'AK': 'Alaska',
'AL': 'Alabama',
'AR': 'Arkansas',
'AZ': 'Arizona',
'CA': 'California',
'CO': 'Colorado',
'CT': 'Connecticut',
'DC': 'District of Columbia',
'DE': 'Delaware',
'FL': 'Florida',
'GA': 'Georgia',
'HI': 'Hawaii',
'IA': 'Iowa',
'ID': 'Idaho',
'IL': 'Illinois',
'IN': 'Indiana',
'KS': 'Kansas',
'KY': 'Kentucky',
'LA': 'Louisiana',
'MA': 'Massachusetts',
'MD': 'Maryland',
'ME': 'Maine',
'MI': 'Michigan',
'MN': 'Minnesota',
'MO': 'Missouri',
'MS': 'Mississippi',
'MT': 'Montana',
'NC': 'North Carolina',
'ND': 'North Dakota',
'NE': 'Nebraska',
'NH': 'New Hampshire',
'NJ': 'New Jersey',
'NM': 'New Mexico',
'NV': 'Nevada',
'NY': 'New York',
'OH': 'Ohio',
'OK': 'Oklahoma',
'OR': 'Oregon',
'PA': 'Pennsylvania',
'RI': 'Rhode Island',
'SC': 'South Carolina',
'SD': 'South Dakota',
'TN': 'Tennessee',
'TX': 'Texas',
'UT': 'Utah',
'VA': 'Virginia',
'VT': 'Vermont',
'WA': 'Washington',
'WI': 'Wisconsin',
'WV': 'West Virginia',
'WY': 'Wyoming'
}
state_abbrev = {v: k for k, v in states.items()}
state_code = {
'AK': '10','AL': '4', 'AR': '9', 'AR': '6', 'CA': '9', 'CO': '8', 'CT': '1', 'DC': '3', 'DE': '3', 'FL': '4',
'GA': '4', 'HI': '9', 'IA': '7', 'ID': '10', 'IL': '5', 'IN': '5', 'KS': '7', 'KY': '4', 'LA': '6',
'MA': '1', 'MD': '3', 'ME': '1', 'MI': '5', 'MN': '5','MO': '7', 'MS': '4', 'MT': '8', 'NC': '4',
'ND': '8', 'NE': '7', 'NH': '1', 'NJ': '2', 'NM': '6','NV': '9', 'NY': '2', 'OH': '5', 'OK': '6',
'OR': '10', 'PA': '3', 'PR': '2', 'RI': '1', 'SC': '4', 'SD': '8', 'TN': '4', 'TX': '6', 'UT': '8',
'VA': '3', 'VI': '2', 'VT': '1', 'WA': '10', 'WI': '5', 'WV': '3', 'WY': '8', 'PI': '9'
}
state_region = {v: k for k, v in state_code.items()}
def get_region():
return [state_region[i] for i in fulldf['state']]
fulldf["Region"] = get_region()
fulldf.tail()
Returns key error 'MA', expected to return a new column named "Region" that populates the region for each "state" listed.
KeyError Traceback (most recent call last)
<ipython-input-338-6afc1e48556a> in <module>
33 return [state_region[i] for i in fulldf['state']]
34
---> 35 fulldf["Region"] = get_region()
36 fulldf.tail()
37
<ipython-input-338-6afc1e48556a> in get_region()
31
32 def get_region():
---> 33 return [state_region[i] for i in fulldf['state']]
34
35 fulldf["Region"] = get_region()
<ipython-input-338-6afc1e48556a> in <listcomp>(.0)
31
32 def get_region():
---> 33 return [state_region[i] for i in fulldf['state']]
34
35 fulldf["Region"] = get_region()
KeyError: 'MA'
Your get_region function is flawed. It should be:
def get_region():
return [state_region[i] for i in fulldf['state']]
Python comprehensions are optimized enough for that function to be fine for a 30k length dataframe.

How to fix KeyError when assigning regions to states in a dictionary

I am assigning regions to each individual state. My code reads from an excel file, and there are about 30k rows. I set up a dictionary assigning each state to a region as well as state abbreviations to each state name. I am trying to create a column that will populate each row item's region, but keep getting a KeyError at 'MA' (because there are no line items for this state in my Excel file).
I have tried writing a phrase using 'except' as well as 'if missing' and neither seems to clear the error and produce the desired results. I also tried removing MA from the dictionary, but the same error appears. I'm new to Python-- I am sure there is an easy fix here but do not know what it is.
states = {
'AK': 'Alaska',
'AL': 'Alabama',
'AR': 'Arkansas',
'AZ': 'Arizona',
'CA': 'California',
'CO': 'Colorado',
'CT': 'Connecticut',
'DC': 'District of Columbia',
'DE': 'Delaware',
'FL': 'Florida',
'GA': 'Georgia',
'HI': 'Hawaii',
'IA': 'Iowa',
'ID': 'Idaho',
'IL': 'Illinois',
'IN': 'Indiana',
'KS': 'Kansas',
'KY': 'Kentucky',
'LA': 'Louisiana',
'MA': 'Massachusetts',
'MD': 'Maryland',
'ME': 'Maine',
'MI': 'Michigan',
'MN': 'Minnesota',
'MO': 'Missouri',
'MS': 'Mississippi',
'MT': 'Montana',
'NC': 'North Carolina',
'ND': 'North Dakota',
'NE': 'Nebraska',
'NH': 'New Hampshire',
'NJ': 'New Jersey',
'NM': 'New Mexico',
'NV': 'Nevada',
'NY': 'New York',
'OH': 'Ohio',
'OK': 'Oklahoma',
'OR': 'Oregon',
'PA': 'Pennsylvania',
'RI': 'Rhode Island',
'SC': 'South Carolina',
'SD': 'South Dakota',
'TN': 'Tennessee',
'TX': 'Texas',
'UT': 'Utah',
'VA': 'Virginia',
'VT': 'Vermont',
'WA': 'Washington',
'WI': 'Wisconsin',
'WV': 'West Virginia',
'WY': 'Wyoming'
}
stateplusdc = states.keys()
state_abbrev = {v: k for k, v in states.items()}
state_code = {
'AK': '10','AL': '4', 'AR': '9', 'AR': '6', 'CA': '9', 'CO': '8',
'CT': '1', 'DC': '3', 'DE': '3', 'FL': '4', 'GA': '4', 'HI': '9', 'IA': '7', 'ID': '10', 'IL': '5', 'IN': '5', 'KS': '7', 'KY': '4', 'LA': '6', 'MA': '1', 'MD': '3', 'ME': '1', 'MI': '5', 'MN': '5','MO': '7', 'MS': '4', 'MT': '8', 'NC': '4', 'ND': '8', 'NE': '7', 'NH': '1', 'NJ': '2', 'NM': '6','NV': '9', 'NY': '2', 'OH': '5', 'OK': '6','OR': '10', 'PA': '3', 'PR': '2', 'RI': '1', 'SC': '4', 'SD': '8', 'TN': '4', 'TX': '6', 'UT': '8', 'VA': '3', 'VI': '2', 'VT': '1', 'WA': '10', 'WI': '5', 'WV': '3', 'WY': '8', 'PI': '9'
}
state_region = {v: k for k, v in state_code.items()}
excel_file = r'/Users/amandawhiting/Desktop/PA_spending_excel.xlsx'
df = pd.read_excel(excel_file)
df = df.rename(columns={'DAMAGE_CATEGORY_CODE': 'damageCode', 'FEDERAL_SHARE_OBLIGATED':'FedShareObligated', 'PROJECT_AMOUNT': 'ProjectAmount'})
df = df[df['FedShareObligated']>= 0]
df = df[df['ProjectAmount'] >= 0df2 = pd.read_csv("/Users/amandawhiting/Desktop/DisasterDeclarationsSummaries.csv", usecols = ['disasterNumber', 'fyDeclared', 'state'])
df = df[df['damageCode'] != 'A - Debris Removal']
df = df[df['damageCode'] != 'B - Protective Measures']
df = df[df['damageCode'] != 'Z - State Management']
df = df[df['damageCode'] != 'H - Fire Management']
df = df.drop_duplicates()
df = df.reset_index(drop=True)
df2 = pd.read_csv("/Users/amandawhiting/Desktop/DisasterDeclarationsSummaries.csv", usecols = ['disasterNumber', 'fyDeclared', 'state'])
df2 = df2[df2['fyDeclared'] > 1991]
df2 = df2[df2['fyDeclared'] < 2017]
df2 = df2.reset_index(drop=True)
df2['disasterNumber'] = df2['disasterNumber'].astype(int)
fulldf = pd.merge(df, df2, left_on = 'DISASTER_NUMBER', right_on = 'disasterNumber', how = 'inner',)
fulldf = fulldf.drop_duplicates()
fulldf = fulldf.reset_index(drop=True)
def get_region():
return [state_region[i] for i in fulldf['state']]
fulldf["Region"] = get_region()
fulldf.head()
Expected results: New column in existing table labeled "Regions" that populates each cell with the corresponding region for that state in that line.
Actual results: Key Error 'MA':
KeyError Traceback (most recent call last)
<ipython-input-403-13becd272809> in <module>
31 return [state_region[i] for i in fulldf['state']]
32
---> 33 fulldf["Region"] = get_region()
34
35 fulldf.head()
<ipython-input-403-13becd272809> in get_region()
29
30 def get_region():
---> 31 return [state_region[i] for i in fulldf['state']]
32
33 fulldf["Region"] = get_region()
<ipython-input-403-13becd272809> in <listcomp>(.0)
29
30 def get_region():
---> 31 return [state_region[i] for i in fulldf['state']]
32
33 fulldf["Region"] = get_region()
KeyError: 'MA'
IIUC, you're looking for either:
# replace non-available keys with NA
fulldf["Region"] = fulldf['state'].map(state_region)
or
# keep the non-available keys intact
fulldf["Region"] = fulldf['state'].replace(state_region)
Try it:
state_region = {v: k for k, v in state_code.items()}
def get_region():
result = []
for i in fulldf['state'] :
if i in state_region :
result.append(state_region[i])
else :
result.append("NA")
return result

How to read a JSON retrieved from an API and save it into a CSV file?

I am using a weather API that responses with a JSON file. Here is a sample of the returned readings:
{
'data': {
'request': [{
'type': 'City',
'query': 'Karachi, Pakistan'
}],
'weather': [{
'date': '2019-03-10',
'astronomy': [{
'sunrise': '06:46 AM',
'sunset': '06:38 PM',
'moonrise': '09:04 AM',
'moonset': '09:53 PM',
'moon_phase': 'Waxing Crescent',
'moon_illumination': '24'
}],
'maxtempC': '27',
'maxtempF': '80',
'mintempC': '22',
'mintempF': '72',
'totalSnow_cm': '0.0',
'sunHour': '11.6',
'uvIndex': '7',
'hourly': [{
'time': '24',
'tempC': '27',
'tempF': '80',
'windspeedMiles': '10',
'windspeedKmph': '16',
'winddirDegree': '234',
'winddir16Point': 'SW',
'weatherCode': '116',
'weatherIconUrl': [{
'value': 'http://cdn.worldweatheronline.net/images/wsymbols01_png_64/wsymbol_0002_sunny_intervals.png'
}],
'weatherDesc': [{
'value': 'Partly cloudy'
}],
'precipMM': '0.0',
'humidity': '57',
'visibility': '10',
'pressure': '1012',
'cloudcover': '13',
'HeatIndexC': '25',
'HeatIndexF': '78',
'DewPointC': '15',
'DewPointF': '59',
'WindChillC': '24',
'WindChillF': '75',
'WindGustMiles': '12',
'WindGustKmph': '19',
'FeelsLikeC': '25',
'FeelsLikeF': '78',
'uvIndex': '0'
}]
}]
}
}
I used the following Python code in my attempt to reading the data stored in JSON file:
import simplejson as json
data_file = open("new.json", "r")
values = json.load(data_file)
But this outputs with an error as follows:
JSONDecodeError: Expecting value: line 1 column 1 (char 0) error
I am also wondering how I can save the result in a structured format in a CSV file using Python.
As stated below by Rami, the simplest way to do this would to use pandas to either a) .read_json(), or to use pd.DataFrame.from_dict(). however the issue with this particular case is you have nested dictionary/json. What do I mean it's nested? Well, if you were to simply put this into a dataframe, you'd have this:
print (df)
request weather
0 {'type': 'City', 'query': 'Karachi, Pakistan'} {'date': '2019-03-10', 'astronomy': [{'sunrise...
Which is fine if that's what you want. However, I am assuming you'd like all the data/instance flattened into a singe row.
So you'll need to either use json_normalize to unravel it (which is possible, but you'd need to be certain the json file follows the same format/keys throughout. And you'd still need to pull out each of the dictionaries within the list, within the dictionaries. Other option is use some function to flatten out the nested json. Then from there you can simply write to file:
I choose to flatten it using a function, then construct the dataframe:
import pandas as pd
import json
import re
from pandas.io.json import json_normalize
data = {'data': {'request': [{'type': 'City', 'query': 'Karachi, Pakistan'}], 'weather': [{'date': '2019-03-10', 'astronomy': [{'sunrise': '06:46 AM', 'sunset': '06:38 PM', 'moonrise': '09:04 AM', 'moonset': '09:53 PM', 'moon_phase': 'Waxing Crescent', 'moon_illumination': '24'}], 'maxtempC': '27', 'maxtempF': '80', 'mintempC': '22', 'mintempF': '72', 'totalSnow_cm': '0.0', 'sunHour': '11.6', 'uvIndex': '7', 'hourly': [{'time': '24', 'tempC': '27', 'tempF': '80', 'windspeedMiles': '10', 'windspeedKmph': '16', 'winddirDegree': '234', 'winddir16Point': 'SW', 'weatherCode': '116', 'weatherIconUrl': [{'value': 'http://cdn.worldweatheronline.net/images/wsymbols01_png_64/wsymbol_0002_sunny_intervals.png'}], 'weatherDesc': [{'value': 'Partly cloudy'}], 'precipMM': '0.0', 'humidity': '57', 'visibility': '10', 'pressure': '1012', 'cloudcover': '13', 'HeatIndexC': '25', 'HeatIndexF': '78', 'DewPointC': '15', 'DewPointF': '59', 'WindChillC': '24', 'WindChillF': '75', 'WindGustMiles': '12', 'WindGustKmph': '19', 'FeelsLikeC': '25', 'FeelsLikeF': '78', 'uvIndex': '0'}]}]}}
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
flat = flatten_json(data['data'])
results = pd.DataFrame()
special_cols = []
columns_list = list(flat.keys())
for item in columns_list:
try:
row_idx = re.findall(r'\_(\d+)\_', item )[0]
except:
special_cols.append(item)
continue
column = re.findall(r'\_\d+\_(.*)', item )[0]
column = column.replace('_', '')
row_idx = int(row_idx)
value = flat[item]
results.loc[row_idx, column] = value
for item in special_cols:
results[item] = flat[item]
results.to_csv('path/filename.csv', index=False)
Output:
print (results.to_string())
type query date astronomy0sunrise astronomy0sunset astronomy0moonrise astronomy0moonset astronomy0moonphase astronomy0moonillumination maxtempC maxtempF mintempC mintempF totalSnowcm sunHour uvIndex hourly0time hourly0tempC hourly0tempF hourly0windspeedMiles hourly0windspeedKmph hourly0winddirDegree hourly0winddir16Point hourly0weatherCode hourly0weatherIconUrl0value hourly0weatherDesc0value hourly0precipMM hourly0humidity hourly0visibility hourly0pressure hourly0cloudcover hourly0HeatIndexC hourly0HeatIndexF hourly0DewPointC hourly0DewPointF hourly0WindChillC hourly0WindChillF hourly0WindGustMiles hourly0WindGustKmph hourly0FeelsLikeC hourly0FeelsLikeF hourly0uvIndex
0 City Karachi, Pakistan 2019-03-10 06:46 AM 06:38 PM 09:04 AM 09:53 PM Waxing Crescent 24 27 80 22 72 0.0 11.6 7 24 27 80 10 16 234 SW 116 http://cdn.worldweatheronline.net/images/wsymb... Partly cloudy 0.0 57 10 1012 13 25 78 15 59 24 75 12 19 25 78 0

Categories