Saving python dictionary (or JSON?) as CSV - python

I have been trying to save the output from Google Search Console API as a CSV File. Initially, I was using sys.stdout to save what was print from the sample code they had provided. However, on the third or so attempt, I started receiving this error:
File "C:\python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\uff1a' in position 13: character maps to <undefined>
After that I tried switching to using Pandas to csv funtion. The result is not what I had hoped for but is at least closer:
> ,rows,responseAggregationType
0,"{'keys': ['amp pwa'], 'clicks': 1, 'impressions': 4, 'ctr': 0.25, 'position': 7.25}",byProperty
1,"{'keys': ['convert desktop site to mobile'], 'clicks': 1, 'impressions': 2, 'ctr': 0.5, 'position': 1.5}",byProperty
I'm very new to python but I figure it has something to do with the output from the API pull not being quite the standard dict object format.
I also tried using the csv.write function (I deleted that code before coming here so I don't have an example) but the result was the same unable to encode issues as from sys.stdout.
Here is the code that prints the output exactly as I need it, I just need to be able to save it somewhere where I can use it in a spreadsheet.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import argparse
import sys
from googleapiclient import sample_tools
# Declare command-line flags.
argparser = argparse.ArgumentParser(add_help=False)
argparser.add_argument('property_uri', type=str,
help=('Site or app URI to query data for (including '
'trailing slash).'))
argparser.add_argument('start_date', type=str,
help=('Start date of the requested date range in '
'YYYY-MM-DD format.'))
argparser.add_argument('end_date', type=str,
help=('End date of the requested date range in '
'YYYY-MM-DD format.'))
def main(argv):
service, flags = sample_tools.init(
argv, 'searchconsole', 'v1', __doc__, __file__, parents=[argparser],
scope='https://www.googleapis.com/auth/webmasters.readonly')
# Get top 10 queries for the date range, sorted by click count, descending.
request = {
'startDate': flags.start_date,
'endDate': flags.end_date,
'dimensions': ['query'],
'rowLimit': 10
}
response = execute_request(service, flags.property_uri, request)
print_table(response, 'Top Queries')
def execute_request(service, property_uri, request):
"""Executes a searchAnalytics.query request.
Args:
service: The searchconsole service to use when executing the query.
property_uri: The site or app URI to request data for.
request: The request to be executed.
Returns:
An array of response rows.
"""
return service.searchanalytics().query(
siteUrl=property_uri, body=request).execute()
def print_table(response, title):
"""Prints out a response table.
Each row contains key(s), clicks, impressions, CTR, and average position.
Args:
response: The server response to be printed as a table.
title: The title of the table.
"""
print('\n --' + title + ':')
if 'rows' not in response:
print('Empty response')
return
rows = response['rows']
row_format = '{:<20}' + '{:>20}' * 4
print(row_format.format('Keys', 'Clicks', 'Impressions', 'CTR', 'Position'))
for row in rows:
keys = ''
# Keys are returned only if one or more dimensions are requested.
if 'keys' in row:
keys = u','.join(row['keys']).encode('utf-8').decode()
print(row_format.format(
keys, row['clicks'], row['impressions'], row['ctr'], row['position']))
if __name__ == '__main__':
main(sys.argv)
Here's the output as I want it, but comma separated:
Keys Clicks Impressions CTR Position
amp pwa 1 4 0.25 7.25
convert desktop site to mobile 1 2 0.5 1.5
And here is what printing just the result object results in:
{'rows': [{'keys': ['amp pwa'], 'clicks': 1, 'impressions': 4, 'ctr': 0.25, 'position': 7.25}, {'keys': ['convert desktop site to mobile'], 'clicks': 1, 'impressions': 2, 'ctr': 0.5, 'position': 1.5}], 'responseAggregationType': 'byProperty'}
I hope I have included enough info, I tried every solution recommended here and on other sites before asking a question. It just seems like an oddly formatted json/dictionary object.
Any help is extremely appreciated.
Update, Solution:
Adjused output code to be:
import csv
with open("out.csv", "w", encoding="utf8", newline='') as f:
rows = response['rows']
writer = csv.writer(f)
headers = ["Keys", "Clicks", "Impressions", "CTR", "Position"]
writer.writerow(headers)
for row in rows:
keys = ''
# Keys are returned only if one or more dimensions are requested.
if 'keys' in row:
keys = u','.join(row['keys']).encode('utf-8').decode()
# Looks like your data has the keys in lowercase
writer.writerow([keys, row['clicks'], row['impressions'], row['ctr'], row['position']])

It may just be the encoding of the output file that's the problem.
It looks like the rows you get from the response are a series of dict-like objects, so this should work:
import csv
with open("out.csv", "w", encoding="utf8") as f:
writer = csv.writer(f)
headers = ["Keys", "Clicks", "Impressions", "CTR", "Position"]
writer.writerow(headers)
for row in rows:
writer.writerow(
[
", ".join(row.get("keys", [])),
row["clicks"],
row["impressions"],
row["ctr"],
row["postition"],
]
)
The writer object accepts a number of arguments to control line separators and quoting in the output csv. Check the module docs for details.

Related

Python 'list' object has no attribute 'keys' when trying to write a row in CSV file

I am trying to write a new row into a CSV file and I can't because I get an error in Python Shell.
Below is the code I am using (I am reading JSON from API and want to put data into CSV file)
# import urllib library
from urllib.request import Request, urlopen
c=1
# import json
import json
# store the URL in url as
# parameter for urlopen
import pandas as pd
import csv
headerList = ['name','id','order','height','weight','speed','special_defense','special_attack','defense','attack','hp']
# open CSV file and assign header
with open("pokemon_stats.csv", 'w') as file:
dw = csv.DictWriter(file, delimiter=',',
fieldnames=headerList)
dw.writeheader()
# display csv file
fileContent = pd.read_csv("pokemon_stats.csv")
for r in range(1,3):
req = Request('https://pokeapi.co/api/v2/pokemon/'+str(r)+'/', headers={'User-Agent': 'Chrome/32.0.1667.0'})
# store the response of URL
response = urlopen(req)
# storing the JSON response
# from url in data
data_json = json.loads(response.read())
#print(data_json)
for key, value in data_json.items():
if key=='name':
name=value
elif key=='id':
id=value
elif key=='order':
order=value
elif key=='height':
height=value
elif key=='weight':
weight=value
elif key == 'stats':
for sub in data_json['stats']:
for i in sub:
if i=='base_stat':
base_stat=sub[i]
if i=='stat':
for j in sub[i]:
if j=='name':
stat_name=sub[i][j]
if stat_name=='hp':
hp=base_stat
elif stat_name=='attack':
attack=base_stat
elif stat_name=='defense':
defense=base_stat
elif stat_name=='special-attack':
special_attack=base_stat
elif stat_name=='special-defense':
special_defense=base_stat
elif stat_name=='speed':
speed=base_stat
data = [name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp]
dw.writerow(data)
After I try the execution of this code I get an error as it follows:
Traceback (most recent call last):
File "C:/Users/sbelcic/Desktop/NANOBIT_API.py", line 117, in <module>
dw.writerow(data)
File "C:\Users\sbelcic\AppData\Local\Programs\Python\Python37\lib\csv.py", line 155, in writerow
return self.writer.writerow(self._dict_to_list(rowdict))
File "C:\Users\sbelcic\AppData\Local\Programs\Python\Python37\lib\csv.py", line 148, in _dict_to_list
wrong_fields = rowdict.keys() - self.fieldnames
AttributeError: 'list' object has no attribute 'keys'*
Can somebody pls help and tell me what I am doing wrong.
I don't have working experience of manipulating JSON response with Python so any comments are welcome. If someone sees a better way to do this he is welcome to share.
Since dw is a DictionaryWriter, data needs to be a dictionary (currently it's a list) as seen in the documentation.
Convert data to a dictionary with your headers
data = [name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp]
data = dict(zip(headerList, data))
dw.writerow(data)
Check the example for using the DictWriter. You need to pass a dictionary to writerow instead of a list, so your last line should be
data =['name':name,'id': id,'order':order,'height': height,'weight':weight,'speed':speed,'special_defense':special_defense,'special_attack':special_attack,'defense':defense,'attack':attack,'hp':hp]
dw.writerow(data)
Note that your whole code can also be simplified if you populate the data dictionary instead of all your if/else:
data={} #empty dictionary
#First extract everything that is on the main level of your dict
for key in ("name", "id", "order", "height", "weight":
if key in data_json:
data[key]=data_json[key]
#Check if the "stats" dict exists in your JSON data
if 'stats' in data_json:
if 'base_stat' in data_json['stats']:
data['base_stat']=data_json['stats']['base_stat']
if 'stat' in data_json['stats']:
statDict = data_json['stats']['stat']
for key in ['hp', 'attack', 'defense', 'special-attack', 'special-defense', 'speed']:
if key in statDict:
data[key]=statDict[key]
Notes:
I did not test this code, check it carefully, but I hope you get the idea
You could add else to all if key in checks to include an error message if a stat is missing
If you are sure that all keys will always be present, then you can skip a few of the if checks
I'm going to ignore the actual error that got you here, and instead propose a radical restructure: I think your code will be simpler and easier to reason about.
I've looked at the JSON returned from that Pokemon API and I can see why you started down the path you did: there's a lot of data, and you only need a small subset of it. So, you're going through a lot of effort to pick out exactly what you want.
The DictWriter interface can really help you here. Consider this really small example:
header = ['name', 'id', 'order']
with open('output.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=header)
writer.writeheader()
writer.writerow({'name': 'bulbasaur', 'id': 1, 'order': 1, 'species': {}})
Maybe you've run something like this before and got this error:
ValueError: dict contains fields not in fieldnames: 'species'
because the JSON you loaded has keys you didn't include when you created your writer... because you don't want them. And then, maybe you figured, "well, that means I've got to be very selective about what I put in the dict before passing to writerow()?
Since you've already defined which keys you care about for the header, use those keys to pull out what you want from the JSON:
header = ['name', 'id', 'order', 'height', 'weight',
'speed', 'special-defense', 'special-attack',
'defense', 'attack', 'hp']
all_data = json.load(open('1.json')) # bulbasaur, I downloaded this from the API URL
my_data = {}
for key in header:
my_data[key] = all_data.get(key) # will return None for sub-stats keys, which is okay for now
writer = csv.DictWriter(sys.stdout, fieldnames=header)
writer.writeheader()
writer.writerow(my_data)
The get(key_name) method on a dict (the JSON data) will try to find that key in the dict and return that key's value. If the key isn't found, None is returned. Running that I get the following CSV (the sub-stat columns are empty, as expected):
name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp
bulbasaur,1,1,7,69,,,,,,
This has the same effect as your "if this key, then this value" statements, but it's driven by the data (header names) you already defined.
On to the sub-stats...
I think it's safe to assume that if there is a stats key in the JSON, each "stat object" in the list of stats will have the data you want. It's important to make sure you're only copying the stats you've specified in header; and again, you can use your data to drive the process:
for stat in all_data['stats']:
stat_name = stat['stat']['name']
if stat_name not in header:
continue # skip this sub-stat, no column for it in the CSV
base_stat = stat['base_stat']
my_data[stat_name] = base_stat
When I insert that loop, I now get this for my CSV output:
name,id,order,height,weight,speed,special_defense,special_attack,defense,attack,hp
bulbasaur,1,1,7,69,45,,,49,49,45
Some stats are populated, but some, the "special" stats are blank? That's because in your header you've named them like special_attack (with underscore) but in reality they're like special-attack (with hyphen). I fixed your header, and now I get:
name,id,order,height,weight,speed,special-defense,special-attack,defense,attack,hp
bulbasaur,1,1,7,69,45,65,65,49,49,45
Those are all the pieces you need. To put it together, I recommend the following structure... I'm a big fan of breaking up a process like this into distinct tasks: get all the data, then process all the data, then write all the processed data. It makes debugging easier, and less indentation of code:
# Make all API calls and record their JSON
all_datas = []
# loop over your API calls:
# make the request
# get the JSON data
# append JSON data to all_datas
# Process/transform the API JSON into what you want
my_data_rows = []
for all_data in all_datas:
my_data_row = {}
for key in header:
my_data_row[key] = all_data.get(key)
for stat in all_data['stats']:
stat_name = stat['stat']['name']
if stat_name not in header:
continue # skip this sub-stat
base_stat = stat['base_stat']
my_data[stat_name] = base_stat
# Write your transformed data to CSV
writer = csv.DictWriter(sys.stdout, fieldnames=header)
writer.writeheader()
writer.writerows(my_data_rows)

How to insert a key value pair on a list and write on a csv file

Im new to Python and I have been given a task for the company's HRIS. So I have written a code from a raw .csv file that will be re-written by filtering all other data and make sure that the first instance of IN and OUT of a person will be listed. How can I insert or append the time-in of a person on a list on the very same row?
employeeInfo2 = {'Name': employeeName, 'Date': employeeDate, 'Status': employeeStatus}
if employeeInfo2 not in employeeProfile:
employeeProfile.append(employeeInfo2)
when i tried putting this line of code just below the above code, the time is displayed as a new line in the csv file or is written on a new row.
employeeProfile.append({'Time': employeeTime})
import csv
import string
import datetime
from dateutil.parser import parse
from collections import OrderedDict
employeeProfile = []
with open('newDTR.csv', 'r') as csv_file:
employee = csv.DictReader(csv_file, delimiter=",")
for employeeList in employee:
stringDate = employeeList['Date/Time']
employeeName = employeeList['Name']
employeeStatus = employeeList['Status']
dateTimeObject = datetime.datetime.strptime(stringDate, '%d/%m/%Y %I:%M:%S %p')
employeeDate = dateTimeObject.strftime("%d/%m/%Y")
employeeTime = dateTimeObject.strftime("%H:%M:%S")
parsedTimeOut = parse(employeeTime)
expected = parsedTimeOut + datetime.timedelta(hours=9)
timeOut = expected.time()
employeeInfo2 = {'Name': employeeName, 'Date': employeeDate, 'Status': employeeStatus}
if employeeInfo2 not in employeeProfile:
employeeProfile.append(employeeInfo2)
with open('fixedDTR.csv', mode='w', newline='', encoding='utf8') as new_csv_file:
fieldnames = ['Name', 'Date', 'Status', 'Time', 'Expected Time Out']
csv_writer = csv.DictWriter(new_csv_file, fieldnames=fieldnames)
csv_writer.writeheader()
for b in employeeProfile:
print(b)
csv_writer.writerow(b)
I was expecting that employeeTime will be aligned to each line of data but is not. Probably because the employeeProfile.append({'Time': employeeTime}) is on a new line. What should be the best approach?
Well, looking at your code, there isnt and insert for Time as you are writing b from employeeProfile.
Simply put when you use csv_write.writerow(b), this will automatically go down 1 row in the csv file. You could append your time key to the dict stored in employeeprofile.
employeeInfo2 = {'Name': employeeName, 'Date': employeeDate, 'Status': employeeStatus}
if employeeInfo2 not in employeeProfile:
employeeInfo2["Time"] = employeeTime # or whatever you wanted in the field
employeeProfile.append(employeeInfo2)
This would add the Time column to your dict, which would then be written nicely by csv_writer.writerow.
Based on your output I am guessing you are writing the time after like this:
csv_writer.writerow(b)
csv_writer.writerow(times)
where times is your dict for the Times. which causes the offset since writerow adds a newline to each line of your csv.

csv upload function works, but error if I want to fill it with content

I wrote a function that creates a report for me and uploads it to S3. However, I have problems filling the CSV file with content. Here you can see the code:
import boto3
import re
import csv
def lambda_handler(event,context):
client = boto3.client('ce')
response = client.get_cost_and_usage(
TimePeriod={
'Start': "2019-05-15",
'End': "2019-07-05"
},
Granularity='MONTHLY',
Metrics=['BlendedCost'],
GroupBy=[
{
'Type': 'TAG',
'Key': 'Project'
},
]
)
csv_testerinho = csv.writer(open("/tmp/csv_testerinho.csv", "w+"))
csv_testerinho.writerow(["Account Name", "Month", "Cost"])
#writing rows in csv
for detail in response:
csv_testerinho.writerow([response['Start'],
response['End'],
response['BlendedCost']
])
client = boto3.client('s3')
client.upload_file('/tmp/csv_testerinho.csv', 'bucket_name','final_testerinho.csv')
When I execute the code I get the following error:
Response:
{
"errorMessage": "'Start'",
"errorType": "KeyError",
"stackTrace":
}
What would I have to do to fill the CSV with the information I get through the API?
You should retrieve the start and end times using the following where n is an index into the ResultsByTime list:
response['ResultsByTime'][n]['TimePeriod']['Start']
response['ResultsByTime'][n]['TimePeriod'['End']
Or you could write:
for result in response['ResultsByTime']:
start = result['TimePeriod']['Start']
end = result['TimePeriod']['End']
So, applying this to your code:
with open("/tmp/csv_testerinho.csv", "w+") as fd:
csv_testerinho = csv.writer(fd)
csv_testerinho.writerow(["Start", "End", "Cost"])
for result in response['ResultsByTime']:
start = result['TimePeriod']['Start']
end = result['TimePeriod']['End']
total_cost = 0.0
for group in result['Groups']:
cost = group['Metrics']['BlendedCost']['Amount']
total_cost += float(cost)
csv_testerinho.writerow([
start,
end,
total_cost
])
You will need to double-check how I have retrieved and aggregated the blended cost because it's not completely trivial to work out how to do this. If you print out the response dict object you get back, you will see what it contains.
BTW you seem to be writing column headers Account Name, Month and Cost but writing rows containing start time, end time, and cost which looks like a problem.
See the get_cost_and_usage reference for more details of the response.

Code fails to run when there is no data

When I run my below query and there is no data in the values such as ["VT","NCR","N","DT","RD"], the query fails.
With the error message of
ValueError: dict contains fields not in fieldnames: ‘VT’
Is there a way to say if there is no data in any of the values still carry on running the query to grab data for the values that have data in python?
For example: the ‘TRY’, ‘CATCH’, or ’PASS’ method?
I have been struggling on this for days, could someone show me how to do this?
My Code:
from datetime import datetime
from elasticsearch import Elasticsearch
import csv
es = Elasticsearch(["9200"])
res = es.search(index="search", body=
{
"_source": ["VT","NCR","N","DT","RD"],
"query": {
"bool": {
"must": [{"range": {"VT": {
"gte": "now/d",
"lte": "now+1d/d"}}},
{"wildcard": {"user": "mike*"}}]}}},size=10)
csv_file = 'File_' + str(datetime.now().strftime('%Y_%m_%d - %H.%M.%S')) + '.csv'
header_names = { 'VT': 'Date', 'NCR': 'ExTime', 'N': 'Name', 'DT': 'Party', ' RD ': 'Period'}
with open(csv_file, 'w', newline='') as f:
header_present = False
for doc in res['hits']['hits']:
my_dict = doc['_source']
if not header_present:
w = csv.DictWriter(f, my_dict.keys())
w.writerow(header_names,)
header_present = True
w.writerow(my_dict)
I'd like to point out a flaw in your comment
# will write DATE, TIME, ... in correct place
w.writerow(header_names,)
Actually, it writes out the values of the the dictionary under the headers of the keys... Therefore you're writing two headers, basically.
Regarding the error, according to the documentation , you can ignore missing fields and set default values when they don't exist
The optional restval parameter specifies the value to be written if the dictionary is missing a key in fieldnames. If the dictionary passed to the writerow() method contains a key not found in fieldnames, the optional extrasaction parameter indicates what action to take. If it is set to 'raise', the default value, a ValueError is raised. If it is set to 'ignore', extra values in the dictionary are ignored.
For example
with open(csv_file, 'w', newline='') as f:
# Open one csv for all the results
w = csv.DictWriter(f, fieldnames=header_names.keys(), restval='', extrasaction='ignore')
# There's only one header, don't need a boolean flag
w.writeheader()
# proceed to write results
for doc in res['hits']['hits']:
my_dict = doc['_source']
# Parse this dictionary however you need to write a valid CSV row
w.writerow(my_dict)
Otherwise, don't use a DictWriter and form the CSV row yourself. You can use dict.get() to extract values, but set default values that don't exist in the data

Python Dictionaries & CSV Values | Check CSV

The csv file works fine. So does the dictionary but I can't seem to check the values in the csv file to make sure I'm not adding duplicate entries. How can I check this? The code I tried is below:
def write_csv():
csvfile = csv.writer(open("address.csv", "a"))
check = csv.reader(open("address.csv"))
for item in address2:
csvfile.writerow([address2[items]['address']['value'],address2[items]['address']['count'],items, datetime.datetime.now()])
def check_csv():
check = csv.reader(open("address.csv"))
csvfile = csv.writer(open("address.csv", "a"))
for stuff in address2:
address = address2[str(stuff)]['address']['value']
for sub in check:
if sub[0] == address:
print "equals"
try:
address2[stuff]['delete'] = True
except:
address2[stuff]['delete'] = True
else:
csvfile.writerow([address2[stuff]['address']['value'], address2[stuff]['address']['count'], stuff, datetime.datetime.now()])
Any ideas?
Your CSV and dict structures are a little wonky - I'd love to know if that is set or if you can change them to be more useful. Here is an example that does basically what you want -- you'll have to change some things to fit your format. The most important change is probably not writing to a file that you are reading - that is going to lead to headaches.
This does what you asked with the delete flag -- is there an external need for this? If not there is almost certainly a better way (removing the bad rows, saving the good rows somewhere else, etc - depends on what you are doing).
Anyway, here is the example. I used just the commented block to create the csv file in the first place, then added the new address to the list and ran the rest. Instead of looping through the file over and over it makes a lookup dict by address and stores the row number, which it then uses to update the delete flag if it is found when it reads the csv file. You'll want to take the prints out and uncomment the last line to actually write the new rows.
import csv, datetime
addresses = [
{'address': {'value': '123 road', 'count': 1}, 'delete': False},
{'address': {'value': '456 road', 'count': 1}, 'delete': False},
{'address': {'value': '789 road', 'count': 1}, 'delete': False},
{'address': {'value': '1 new road', 'count': 1}, 'delete': False},
]
now = datetime.datetime.now()
### create the csv
##with open('address.csv', 'wb') as csv_file:
## writer = csv.writer(csv_file)
## for row in addresses:
## writer.writerow([ row['address']['value'], row['address']['count'], now.strftime('%Y-%m-%d %H:%M:%S') ])
# make lookup keys for the dict
address_lookup = {}
for i in range(len(addresses)):
address_row = addresses[i]
address_lookup[address_row['address']['value']] = i
# read csv once
with open('address.csv', 'rb') as csv_file:
reader = csv.reader(csv_file)
for row in reader:
print row
# if address is found in the dict, set delete flag to true
if row[0] in address_lookup:
print 'flagging address as old: %s' % row[0]
addresses[ address_lookup[row[0]] ]['delete'] = True
with open('address.csv', 'ab') as csv_file:
# go back through addresses and add any that shouldnt be deleted to the csv
writer = csv.writer(csv_file)
for address_row in addresses:
if address_row['delete'] is False:
print 'adding row: '
print address_row
#writer.writerow([ row['address']['value'], row['address']['count'], now.strftime('%Y-%m-%d %H:%M:%S') ])

Categories