Write Python Dictionary List Value into single CSV Cell? - python

This is probably easy, but I'm not sure how to do it. This is purely an example, but for Algeria - how would I write both "DZ" and "DB" to a single row? The cell value is bracketed like a list, ['DZ', 'DB'] instead of something like DZ DB
import csv
# csv header
fieldnames = ['name', 'area', 'country_code2', 'country_code3']
# csv data
rows = [
{'name': 'Albania',
'area': 28748,
'country_code2': 'AL',
'country_code3': 'ALB'},
{'name': 'Algeria',
'area': 2381741,
'country_code2': ['DZ', 'DB'],
'country_code3': 'DZA'},
{'name': 'American Samoa',
'area': 199,
'country_code2': 'AS',
'country_code3': 'ASM'}
]
with open('countries.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)

Convert a list of rows to a delimited string.
for row in rows:
if isinstance(row['country_code2'], list):
row['country_code2'] = ' '.join(row['country_code2'])

You need to preprocess the rows before saving them into CSV.
so we using isinstance to check if the country_code2 is list or string
import csv
# csv header
fieldnames = ['name', 'area', 'country_code2', 'country_code3']
# csv data
rows = [
{'name': 'Albania',
'area': 28748,
'country_code2': 'AL',
'country_code3': 'ALB'},
{'name': 'Algeria',
'area': 2381741,
'country_code2': ['DZ', 'DB'],
'country_code3': 'DZA'},
{'name': 'American Samoa',
'area': 199,
'country_code2': 'AS',
'country_code3': 'ASM'}
]
for row in rows:
# check the country_code2 of every row if its and list
if isinstance(row['country_code2'], list):
# join the list to single string with ' ' seperating the values
row['country_code2'] = ' '.join(row['country_code2'])
with open('countries.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)

Related

Write csv file (including header) via dataset

I want to create a function that takes a file name and a dataset, and in turn, creates a .csv file with the dictionary keys as a header and the values in the rows beneath the header. I would like this to be independent of the number of columns in the dataset. Also, if any of the columns shows no data I still want the rest of the data to be written in the .csv file.
My current code looks like this:
def write_data_to_csv(csv_name, dataset):
import csv
with open(f"{csv_name}.csv", "w") as csv_file:
writer = csv.writer(csv_file, delimiter = ";")
writer.writerow(["iso_country", "continent", "country", "population", "population_density", "date", "total_cases"])
for i in dataset:
try:
writer.writerow([i['iso_country'], i['continent'], i['date'], i['total_cases']])
except:
0
My data looks like this:
[{'iso_country': 'ALB',
'continent': 'Europe',
'location': 'Albania',
'population': 2877800.0,
'population_density': 104.871,
'date': '2021-03-05',
'total_cases': 111301.0},
{'iso_country': 'ALB',
'continent': 'Europe',
'location': 'Albania',
'population': 2877800.0,
'population_density': 104.871,
'date': '2021-03-05',
'total_cases': 111301.0}]
Thanks in advance!
You can use this simple write_csv() function to write any list of dicts to a csv:
import csv
from typing import List, Dict
def write_csv(data: List[Dict], path: str) -> None:
with open(path, 'w', newline='') as file:
fields = set().union(*data)
writer = csv.DictWriter(file, fieldnames=fields)
writer.writeheader()
writer.writerows(data)
csv_data = [{'iso_country': 'ALB',
'continent': 'Europe',
'location': 'Albania',
'population': 2877800.0,
'population_density': 104.871,
'date': '2021-03-05',
'total_cases': 111301.0},
{'iso_country': 'ALB',
'continent': 'Europe',
'location': 'Albania',
'population': 2877800.0,
'population_density': 104.871,
'date': '2021-03-05',
'total_cases': 111301.0}]
write_csv(csv_data, 'data.csv')
The typing import is just to make the documentation automatic for most IDEs

remove the duplicate rows in CSV file

I have the following python function that exports JSON data to CSV file, it works fine - the keys(csv headers) and values(csv rows) are populated in the CSV, but I'm trying to remove the duplicates rows in the the csv file?
instead of manually removing them in Excel, how do I remove the duplicate values in python?
def toCSV(res):
with open('EnrichedEvents.csv', 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['process_hash', 'process_name', "process_effective_reputation"]
dict_writer = csv.DictWriter(csvfile, fieldnames=fieldnames,extrasaction='ignore')
dict_writer.writeheader()
for r in res:
dict_writer.writerow(r)
Thank you
for example in the csv, the duplicate row on apmsgfwd.exe information.
duplicate data below:
process_hash process_name process_effective_reputation
['f810a809e9cdf70c3189008e07c83619', '58d44528b60d36b515359fe234c9332ccef6937f5c950472230ce15dca8812e2'] c:\windows\system32\delltpad\apmsgfwd.exe ADAPTIVE_WHITE_LIST
['73ca11f2acf1adb7802c2914e1026db899a3c851cd9500378c0045e0'] c:\users\zdr3dds01\documents\sap\sap gui\export.mhtml NOT_LISTED
['f810a809e9cdf70c3189008e07c83619', '58d44528b60d36b515359fe234c9332ccef6937f5c950472230ce15dca8812e2'] c:\windows\system32\delltpad\apmsgfwd.exe ADAPTIVE_WHITE_LIST
['f810a809e9cdf70c3189008e07c83619', '58d44528b60d36b515359fe234c9332ccef6937f5c950472230ce15dca8812e2'] c:\windows\system32\delltpad\apmsgfwd.exe ADAPTIVE_WHITE_LIST
['582f018bc7a732d63f624d6f92b3d143', '66505bcb9975d61af14dd09cddd9ac0d11a3e2b5ae41845c65117e7e2b046d37'] c:\users\jij09\appdata\local\kingsoft\power word 2016\2016.3.3.0368\powerword.exe ADAPTIVE_WHITE_LIST
json data:
[{'device_name': 'fk6sdc2', 'device_timestamp': '2020-10-27T00:50:46.176Z', 'event_id': '9b1bvf6e17ee11eb81b', 'process_effective_reputation': 'LIST', 'process_hash': ['bfc7dcf5935830f3a9df8e9b6425c37a', 'ca9f3a24506cc518fc939a33c100b2d557f96e040f712f6dd4641ad1734e2f19'], 'process_name': 'c:\\program files (x86)\\toh122soft\\thcasdf3\\toho34rce.exe', 'process_username': ['JOHN\\user1']}, {'device_name': 'fk6sdc2', 'device_timestamp': '2020-10-27T00:50:46.176Z', 'event_id': '9b151f6e17ee11eb81b', 'process_effective_reputation': 'LIST', 'process_hash': ['bfc7dcf5935f3a9df8e9b6830425c37a', 'ca9f3a24506cc518fc939a33c100b2d557f96e040f712f6dd4641ad1734e2f19'], 'process_name': 'c:\\program files (x86)\\oft\\tf3\\tootsice.exe', 'process_username': ['JOHN\\user2']}, {'device_name': '6asdsdc2', 'device_timestamp': '2020-10-27T00:50:46.176Z', 'event_id': '9b151f698e11eb81b', 'process_effective_reputation': 'LIST', 'process_hash': ['9df8ebfc7dcf5935830f3a9b6425c37a', 'ca9f3a24506cc518ff6ddc939a33c100b2d557f96e040f7124641ad1734e2f19'], 'process_name': 'c:\\program files (x86)\\toht\\th3\\tohce.exe', 'process_username': ['JOHN\\user3']}]
Is it necessary to use above approach, if not then I usually use pandas library for reading csv files.
import pandas as pd
data = pd.read_csv('EnrichedEvents.csv')
data.drop_duplicates(inplace=True)
data.to_csv('output.csv',index=False)
Below is standalone example that shows how to filter duplicates. The idea is to get the values of each dict and convert them into tuple. Using a set we can filter out the duplicates.
import csv
csv_columns = ['No', 'Name', 'Country']
dict_data = [
{'No': 1, 'Name': 'Alex', 'Country': ['India']},
{'No': 1, 'Name': 'Alex', 'Country': ['India']},
{'No': 1, 'Name': 'Alex', 'Country': ['India']},
{'No': 1, 'Name': 'Alex', 'Country': ['India']},
{'No': 2, 'Name': 'Ben', 'Country': ['USA']},
]
csv_file = "Names.csv"
with open(csv_file, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
entries = set()
for data in dict_data:
val = tuple(','.join(v) if isinstance(v, list) else v for v in data.values())
if val not in entries:
writer.writerow(data)
entries.add(val)
print('done')
Names.csv
No,Name,Country
1,Alex,['India']
2,Ben,['USA']

Trouble editing a CSV file using python csv module dictwriter

I am making a program that reads data from a form, stores it into a dictionary, and then uses csv.DictWrite to make append the data to a csv file. I run the program but nothing happens to my data.csv file. The main program and the data file are in the same working directory, and csvmodule is installed as well.
Here's the code,
def response_to_csv(data):
#append w/ dictionary -> more efficiewn
with open('data.csv', 'a', newline = '') as csvfile:
fieldnames = ['date', 'first', 'last', 'age', 'email', 'country',
'city/town', 'Uni Student', 'Instagram','Followers','Affiliate'
]
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
writer.writeheader()
writer.writerow({
'date' : data['date'],
'first': data['first'],
'last' : data['last'],
'age' : data['age'],
'email': data['email'],
'country': data['country'],
'city/town': data['city/town'],
'Uni Student': data['Uni Student'],
'Instagram': data['Instagram'],
'Followers': data['Followers'],
'Affiliate': data['Affiliate']
})
Here's the data dictionary
data = {
'date' : date,
'first': fname,
'last' : lname,
'age' : age,
'email': email,
'country': country,
'city/town': city_town,
'Uni Student': is_Uni_Student,
'Instagram': insta,
'Followers': ig_followers,
'Affiliate': affiliation
}
response_to_csv(data)
import csv
data = {
'date' : '202001',
'first': 'Bob',
'last' : 'Smith',
'age' : 45,
'email': 'bsmith#gmail.com',
'country': 'USA',
'city/town': 'New York',
'Uni Student': 1,
'Instagram': '#bsmith',
'Followers': 45678,
'Affiliate': 'Red Bull'
}
def response_to_csv(data):
fieldnames = ['date', 'first', 'last', 'age', 'email', 'country',
'city/town', 'Uni Student', 'Instagram','Followers','Affiliate'
]
with open('data.csv', 'a', newline = '') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
writer.writeheader()
writer.writerow(data)
response_to_csv(data)
Your code worked for me, although I had to fix the indentation of the body of your function, with open(...) should not be at the same indent as def response_to_csv(data):
import csv
def response_to_csv(data):
#append w/ dictionary -> more efficiewn
with open('data.csv', 'a', newline = '') as csvfile:
fieldnames = ['date', 'first', 'last', 'age', 'email', 'country',
'city/town', 'Uni Student', 'Instagram','Followers','Affiliate'
]
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
writer.writeheader()
writer.writerow({
'date' : data['date'],
'first': data['first'],
'last' : data['last'],
'age' : data['age'],
'email': data['email'],
'country': data['country'],
'city/town': data['city/town'],
'Uni Student': data['Uni Student'],
'Instagram': data['Instagram'],
'Followers': data['Followers'],
'Affiliate': data['Affiliate']
})
data = {
'date' : '2019_01_01',
'first': 'firstname',
'last' : 'lname',
'age' : '99',
'email': 'email#address.com',
'country': 'USA',
'city/town': 'MyTown',
'Uni Student': True,
'Instagram': 'MyInsta',
'Followers': 24,
'Affiliate': 'affiliation'
}
response_to_csv(data)
$ cat data.csv
date,first,last,age,email,country,city/town,Uni Student,Instagram,Followers,Affiliate
2019_01_01,firstname,lname,99,email#address.com,USA,MyTown,True,MyInsta,24,affiliation

Python 2.7 create new line for record in csv(unicodecsv)

I currently have a list of data containing these properties:
properties = {
'address':address,
'city':city,
'state':state,
'postal_code':postal_code,
'price':price,
'facts and features':info,
'real estate provider':broker,
'url':property_url,
'title':title
}
These are populated with about 25 rows.
I am attempting to write them to a csv file using this:
with open("ogtest-%s-%s.csv" % (listValue, zipCodes),'w') as csvfile:
fieldnames = ['title','address','city','state','postal_code','price','facts and features','real estate provider','url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in scraped_data:
writer.writerow(row)
my end csv result is a list with the data in one row. Each area such as address has title and below it ALL the values.
['/profile/Colin-Welman/', '/profile/andreagressinger/', '/profile/Regina-Vannicola/', '/profile/Kathryn-Perkins/', etc
The scraped_data appears like this:
{'city': '(844) 292-5128(310) 505-7493(310) 562-8483(310) 422-9001(310) 439-5303(323) 736-4891(310) 383-8111(310) 482-2033(646) 872-4990(310) 963-1648', 'state': None, 'postal_code': None, 'facts and features': u'', 'address': ['/profile/Jonathan-Pearson/', '/profile/margret2/', '/profile/user89580694/', '/profile/Rinde-Philippe/', '/profile/RogerPerryLA/', '/profile/tamaramattox/', '/profile/JFKdogtownrealty/', '/profile/The-Cunningham-Group/', '/profile/TheHeatherGroup/', '/profile/EitanSeanConstine1/'], 'url': None, 'title': None, 'price': None, 'real estate provider': 'Jonathan PearsonMargret EricksonSusan & Rachael RosalesRinde PhilippeRoger PerryTamara Mattoxjeff koneckeThe Cunningham GroupHeather Shawver & Heather RogersEitan Sean Constine'}
My goal is for each item to be on it's own row.
I've tried this: adding 'newline' in the with open (it gives an error)
adding delimiter to csv.Dictwriter (this added individual columns for each record not rows)
Any help would be much appreciated.
Your scraped_data should be a list of dictionaries for csv.DictWriter to work. For instance:
import csv
# mock data
scraped_data = list()
row = {
'title': 'My Sweet Home',
'address': '1000, Abc St.',
'city': 'San Diego',
'facts and features': 'Miniramp in the backyard',
'postal_code': '000000',
'price': 1000000,
'real estate provider': 'Abc Real Estate',
'state': 'CA',
'url': 'www.mysweethome.com'
}
scraped_data.append(row)
fieldnames = [
'title', 'address', 'city', 'state', 'postal_code', 'price',
'facts and features', 'real estate provider', 'url'
]
# writing CSV file
with open('ogtest.csv', 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in scraped_data:
writer.writerow(row)
Hope it helps.
Cheers

how to convert dictionary into csv file

I got some data and I need to convert it into a csv file. I'm not a coder but I think that python can help me with these dict/tuples.
a = {
'page': '1',
'records': 984,
'rows': [{'cell': ['2012000000885',
'A. B. OLIVEIRA - ME',
'10\\/08\\/2012',
'2.200,00',
'0,00',
'0,00'],
'id': '2012000000885;02070712000102;2012;4'},
{'cell': ['2012000000440',
'A. C SILVA E CIA LTDA',
'26\\/04\\/2012',
'600,00',
'600,00',
'600,00'],
'id': '2012000000440;02070712000102;2012;4'},
{'cell': ['2012000000104',
'ADAILTON PEREIRA DE OLIVEIRA',
'27\\/01\\/2012',
'100,00',
'100,00',
'100,00'],
'id': '2012000000104;02070712000102;2012;4'},
{'cell': ['2012000000261',
'ADAILTON PEREIRA DE OLIVEIRA',
'01\\/03\\/2012',
'200,00',
'200,00',
'200,00'],
'id': '2012000000261;02070712000102;2012;4'},
{'cell': ['2012000000360',
'ADAILTON PEREIRA DE OLIVEIRA',
'28\\/03\\/2012',
'200,00',
'200,00',
'200,00'],
'id': '2012000000360;02070712000102;2012;4'}],
'total': 1}
I'm interest just on 'rows', I need separate 'Id' an 'cell' to import them to csv file.
Thank you in advance for any help.
Rogerio Marinho
import csv
with open('output.csv', 'wb') as fout:
csvout = csv.writer(fout)
for row in a['rows']:
csvout.writerow( [row['id']] + row['cell'] )
Creates a CSV file with first column as ID and subsequent columns as whatever cell is.
This is much cleaner:
import csv
with open('names.csv', 'w') as csvfile:
fieldnames = ['first_name', 'last_name']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})

Categories