How can my function take csv files as an input? - python

I need to take a csv file as a function argument and as an example code I have:
import openpyxl
from openpyxl import load_workbook
import csv
import xlrd
def supermarket(filename):
dictionary = {}
with open(filename, newline='') as filename:
read_csv = csv.reader(filename, delimiter=',')
for row in read_csv:
dictionary += row[0]
dictionary[row[0]] = row[1]
return dictionary
print(supermarket(prices1.csv))
However, when I debug the code, the traceback error says it is a NameError for which
prices1
is not defined. My professor is telling me that my argument should be able to take prices1.csv as an argument but I don't see how that's possible.

Your code is actually referring to a member csv of variable prices1, which obviously doesn't exist. Filename should actually be a string. So your code can be changed to this (assuming the csv file lies in the same directory as your working directory):
import openpyxl
from openpyxl import load_workbook
import csv
import xlrd
def supermarket(filename):
dictionary = {}
with open(filename, newline='') as filename:
read_csv = csv.reader(filename, delimiter=',')
for row in read_csv:
dictionary += row[0]
dictionary[row[0]] = row[1]
return dictionary
print(supermarket("prices1.csv"))

If it is convenience, use pandas library to handle CSV file in much more efficient way.

Related

Python: Trying to print fields of config.ini to csv file

I have a config.ini file which the user should supply certain fields they want to show as headers in a CSV file. I have tried a few different methods but cannot seem to get what I need printed.
This is what the INI file looks like:
import csv
from dataclasses import fields
from distutils.command.config import config
import pandas as pd
from jira import JIRA
from atlassian import Confluence
from configparser import ConfigParser
csvFilename = "csvHeaders.csv" #File name
## Config file
config_file = ConfigParser() #create/get configparser object
config_file.read("config.ini")
#Get the fields from config file
#fieldlist = []
fieldlist = config_file["FIELDINFO"]
#print(fieldlist['fields'])
#print (fieldlist) #1ST PRINT STATEMENT -- This prints <Section:FIELDINFO>
#with open ('config.ini', 'w') as conf:
# config_object.write(conf)
for i in fieldlist:
#print(fieldlist[i])
#csvHeaders = fieldlist['fields']
csvHeaders = fieldlist[i]
#print(csvHeaders) #2ND PRINT STATEMENT -- Prints list of fields in config.ini
with open(csvFilename, 'w') as f:
csvwriter = csv.writer(f)
#csvwriter.writerow(fieldlist[i])
#csvwriter.writerow(csvHeaders)
#csvwriter.writerow(fieldlist['fields'])
csvwriter.writerow(fieldlist)
#for i in (fieldlist):
# csvwriter.writerow(fieldlist[i])
Currently this prints out, field1,field2,field3 etc rather than the field name. When I try to manipulate the writerow statement to fieldlist[i], it prints the field names but with commas in between each character. What am I doing wrong? Thank you.
You can try this code.
import csv
from configparser import ConfigParser
csvFilename = "csvHeaders.csv" #File name
# Config file
config_file = ConfigParser() #create/get configparser object
config_file.read("config.ini")
#Get the list
fieldlist = config_file["FIELDINFO"]
#create headers
csvHeaders = []
#copy the field names into an arry
for i in fieldlist:
csvHeaders.append(fieldlist[i])
with open(csvFilename, 'w', newline='') as f:
csvwriter = csv.DictWriter(f, fieldnames=csvHeaders)
#write header
csvwriter.writeheader()
#write a sample row.
csvwriter.writerow({csvHeaders[0]:'1',
csvHeaders[1]:'2',
csvHeaders[2]:'3',
csvHeaders[3]:'4',
csvHeaders[4]:'5',
csvHeaders[5]:'6',
csvHeaders[6]:'7'}
)
Refer csv file documentation
To create a CSV header from the field values in config.ini, you can do the following:
import csv
from configparser import ConfigParser
csvFilename = "csvHeaders.csv" #File name
## Config file
config_file = ConfigParser() #create/get configparser object
config_file.read("config.ini")
fieldlist = config_file["FIELDINFO"]
with open(csvFilename, 'w', newline='') as f:
csvwriter = csv.writer(f)
csvwriter.writerow(fieldlist.values())
This would create a normal CSV file header as:
Issue Key,Status,Created,Updated,Priority,Summary,Type
Note: A CSV file by default will have commas separating each column (which is why the are called Comma Separated Variable files). You can change the delimiter being used but the default should normally be used.
If one of your fields in the INI file happens to contain a comma, the CSV library will automatically wrap the entry in quotes for you which is needed to allow a program reading the file back in to work correctly.

Python converts multiple JSON files in a folder directory to CSV

I have a lot of JSON files, I put them in my folder, I want to convert them to CSV format,
Should I use import glob? ? I am a novice, how can I modify my codeļ¼Œ
#-*-coding:utf-8-*-
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('C:/Users/jeri/Desktop/1', '*.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w')
# csvfile = open(path+'.csv', 'wb')
csvfile = open('C:/Users/jeri/Desktop/1.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0])
print(path)
trans(path)
Yes using glob would be a good way to iterate through the .json files in your folder! But glob doesn't have anything to do with the reading/writing of files. After importing glob, you can use it like this:
for curr_file in glob.glob("*.json"):
# Process each file here
I see that you've used the json module to read in your code snippet. I'd say the better way to go about it is to use pandas.
df = pd.read_json()
I say this because with the pandas library, you can simply convert from .json to .csv using
df.to_csv('file_name.csv')
Combining the three together, it would look like this:
for curr_file in glob.glob("*.json"):
# Process each file here
df = pd.read_json(curr_file)
df.to_csv('file_name.csv')
Also, note that if your json has nested objects, it can't be directly converted to csv, you'll have to settle the organization of data prior to the conversion.

Read a file from a folder and extract a specific key from the file and save as in CSV file

I'm new to Python and the task I am performing is to extract a specific key value from a list of .iris ( which contains the list of nested dictionary format) files in a specific directory.
I wanted to extract the specific value and save it as a new .csv file and repeat it for all other files.
Below is my sample of .iris file from which I should extract only for the these keys ('uid','enabled','login','name').
{"streamType":"user",
"uid":17182,
"enabled":true,
"login":"xyz",
"name":"abcdef",
"comment":"",
"authSms":"",
"email":"",
"phone":"",
"location":"",
"extraLdapOu":"",
"mand":997,
"global":{
"userAccount":"View",
"uid":"",
"retention":"No",
"enabled":"",
"messages":"Change"},
"grants":[{"mand":997,"role":1051,"passOnToSubMand":true}],
I am trying to convert the .iris file to .json and reading the files one by, but unfortunately, I am not getting the exact output as desired.
Please, could anyone help me?
My code (added from comments):
import os
import csv
path = ''
os.chdir(path)
# Read iris File
def read_iris_file(file_path):
with open(file_path, 'r') as f:
print(f.read())
# iterate through all files
for file in os.listdir():
# Check whether file is in iris format or not
if file.endswith(".iris"):
file_path = f"{path}\{file}"
# call read iris file function
print(read_iris_file(file_path))
Your files contain data in JSON format, so we can use built-in json module to parse it. To iterate over files with certain extension you can use pathlib.glob() with next pattern "*.iris". Then we can use csv.DictWriter() and pass "ignore" to extrasaction argument which will make DictWriter ignore keys which we don't need and write only those which we passed to fieldnames argument.
Code:
import csv
import json
from pathlib import Path
path = Path(r"path/to/folder")
keys = "uid", "enabled", "login", "name"
with open(path / "result.csv", "w", newline="") as out_f:
writer = csv.DictWriter(out_f, fieldnames=keys, extrasaction='ignore')
writer.writeheader()
for file in path.glob("*.iris"):
with open(file) as inp_f:
data = json.load(inp_f)
writer.writerow(data)
Try the below (the key point here is loading the iris file using ast)
import ast
fields = ('uid','enabled','login','name')
with open('my.iris') as f1:
data = ast.literal_eval(f1.read())
with open('my.csv','w') as f2:
f2.write(','.join(fields) + '\n')
f2.write(','.join(data[f] for f in fields) + '\n')
my.csv
uid,enabled,login,name
17182,true,xyz,abcdef

Merge columns of numbers of different lengths from separate text files into a single csv file

Hi I am fairly new to Python programming and cannot seem to overcome this issue.
I have a directory with 100 subfolders with a single text file in each (with no file extension) all named exactly the same. Each file contains a single column of numbers of different lengths.
I want to merge all the numbers of each file into a single csv file with the numbers from each file in separate columns.
So I should end up with a matrix of 100 columns of differing lengths of numbers where each column corresponds to a single file.
Example of files:
file1
1
15
23
22
10
file 2
3
55
22
I have this script:
# import modules
import glob
import csv
import sys
import itertools
inf = glob.glob("*/*-ambig")
for f in inf:
with open(f) as fin:
with open(sys.argv[1], 'w') as fout:
writer = csv.writer(fout, delimiter=',', quotechar='', quoting=csv.QUOTE_NONE)
headers = ('coverage', )
writer.writerow(headers)
for line in fin:
columns = line.split("\n") # split each column on new line
writer.writerow(itertools.izip_longest(*columns, fillvalue=['']))
However I am getting this error:
Traceback (most recent call last):
File "coverage_per_strain.py", line 21, in <module>
writer.writerow(itertools.izip_longest(*columns, fillvalue=['']))
_csv.Error: sequence expected
Does anyone have any idea what is wrong with my code? And can you see any other errors?
Thanks!
csv.writerow expects a sequence as an argument. itertools.izip_longest is returning an iterator. Hence the error message.
You should be able to fix the problem with:
writer.writerow(list(itertools.izip_longest(*columns, fillvalue=[''])))
Here's a solution that I wrote up before I realized you were using Python 2.7. This will only work as written in Python 3.3+, as it uses the very nifty contextlib.ExitStack context manager, which was only added in that version (I also use Python 3's map):
import glob
import csv
import sys
import contextlib
from itertools import zip_longest
in_filenames = glob.glob("*/*-ambig")
with contextlib.ExitStack() as stack:
in_files = [stack.enter_context(open(filename)) for filename in in_filenames]
out_file = stack.enter_context(open(sys.argv[1], "w", newlines=""))
writer = csv.writer(out_file, delimiter=',', quoting=csv.QUOTE_NONE)
writer.writerow(('coverage',)) # do you want the header repeated for each column?
writer.writerows(zip_longest(*(map(str.strip, f) for f in in_files), fillvalue=""))
Here's my attempt to port this back to Python 2. I've not tested this version. I use a try/finally pair to handle the closing of the files (and imap to handle the stripping without reading all of each file into memory up front):
import glob
import csv
import sys
from itertools import izip_longest, imap
in_filenames = glob.glob("*/*-ambig")
with open(sys.argv[1], "wb") as out_file:
in_files = []
try:
for filename in in_filenames:
in_files.append(open(filename))
writer = csv.writer(out_file, delimiter=',', quoting=csv.QUOTE_NONE)
writer.writerow(('coverage',))
writer.writerows(izip_longest(*[imap(str.strip, f) for f in in_files],
fillvalue=""))
finally:
for f in in_files:
try:
f.close()
except: # ignore exceptions here, or the later files might not get closed!
pass

Add file name as last column of CSV file

I have a Python script which modifies a CSV file to add the filename as the last column:
import sys
import glob
for filename in glob.glob(sys.argv[1]):
file = open(filename)
data = [line.rstrip() + "," + filename for line in file]
file.close()
file = open(filename, "w")
file.write("\n".join(data))
file.close()
Unfortunately, it also adds the filename to the header (first) row of the file. I would like the string "ID" added to the header instead. Can anybody suggest how I could do this?
Have a look at the official csv module.
Here are a few minor notes on your current code:
It's a bad idea to use file as a variable name, since that shadows the built-in type.
You can close the file objects automatically by using the with syntax.
Don't you want to add an extra column in the header line, called something like Filename, rather than just omitting a column in the first row?
If your filenames have commas (or, less probably, newlines) in them, you'll need to make sure that the filename is quoted - just appending it won't do.
That last consideration would incline me to use the csv module instead, which will deal with the quoting and unquoting for you. For example, you could try something like the following code:
import glob
import csv
import sys
for filename in glob.glob(sys.argv[1]):
data = []
with open(filename) as finput:
for i, row in enumerate(csv.reader(finput)):
to_append = "Filename" if i == 0 else filename
data.append(row+[to_append])
with open(filename,'wb') as foutput:
writer = csv.writer(foutput)
for row in data:
writer.writerow(row)
That may quote the data slightly differently from your input file, so you might want to play with the quoting options for csv.reader and csv.writer described in the documentation for the csv module.
As a further point, you might have good reasons for taking a glob as a parameter rather than just the files on the command line, but it's a bit surprising - you'll have to call your script as ./whatever.py '*.csv' rather than just ./whatever.py *.csv. Instead, you could just do:
for filename in sys.argv[1:]:
... and let the shell expand your glob before the script knows anything about it.
One last thing - the current approach you're taking is slightly dangerous, in that if anything fails when writing back to the same filename, you'll lose data. The standard way of avoiding this is to instead write to a temporary file, and, if that was successful, rename the temporary file over the original. So, you might rewrite the whole thing as:
import csv
import sys
import tempfile
import shutil
for filename in sys.argv[1:]:
tmp = tempfile.NamedTemporaryFile(delete=False)
with open(filename) as finput:
with open(tmp.name,'wb') as ftmp:
writer = csv.writer(ftmp)
for i, row in enumerate(csv.reader(finput)):
to_append = "Filename" if i == 0 else filename
writer.writerow(row+[to_append])
shutil.move(tmp.name,filename)
You can try:
data = [file.readline().rstrip() + ",id"]
data += [line.rstrip() + "," + filename for line in file]
You can try changing your code, but using the csv module is recommended. This should give you the result you want:
import sys
import glob
import csv
filename = glob.glob(sys.argv[1])[0]
yourfile = csv.reader(open(filename, 'rw'))
csv_output=[]
for row in yourfile:
if len(csv_output) != 0: # skip the header
row.append(filename)
csv_output.append(row)
yourfile = csv.writer(open(filename,'w'),delimiter=',')
yourfile.writerows(csv_output)
Use the CSV module that comes with Python.
import csv
import sys
def process_file(filename):
# Read the contents of the file into a list of lines.
f = open(filename, 'r')
contents = f.readlines()
f.close()
# Use a CSV reader to parse the contents.
reader = csv.reader(contents)
# Open the output and create a CSV writer for it.
f = open(filename, 'wb')
writer = csv.writer(f)
# Process the header.
header = reader.next()
header.append('ID')
writer.writerow(header)
# Process each row of the body.
for row in reader:
row.append(filename)
writer.writerow(row)
# Close the file and we're done.
f.close()
# Run the function on all command-line arguments. Note that this does no
# checking for things such as file existence or permissions.
map(process_file, sys.argv[1:])
You can run this as follows:
blair#blair-eeepc:~$ python csv_add_filename.py file1.csv file2.csv
you can use fileinput to do in place editing
import sys
import glob
import fileinput
for filename in glob.glob(sys.argv[1]):
for line in fileinput.FileInput(filename,inplace=1) :
if fileinput.lineno()==1:
print line.rstrip() + " ID"
else
print line.rstrip() + "," + filename

Categories