Writing csv file in Python: unwanted commas appear - python

I'm trying to extract a list of dictionary values into a file with the following code:
import csv
def function(file, output, deli = ','):
dictionary = dict()
with open(file, 'r') as source, open(output, 'w') as outp:
data = csv.reader(source)
line0 = next(data)
i = 0
for element in line0:
dictionary[i] = element
i += 1
my_writer = csv.writer(outp)
for element in dictionary.values():
print(element)
my_writer.writerow(element)
if __name__ == '__main__':
from sys import argv
if len(argv) == 2:
function(argv[1])
elif len(argv) == 3:
function(argv[1], argv[2])
elif len(argv) == 4:
function(argv[1], argv[2], argv[3])
print("ok")
To run this code on the shell, I use the command:
python function.py input output
However, trying on a csv file like:
alpha, beta, gamma, delta
I get the following result:
a,l,p,h,a
,b,e,t,a
,g,a,m,m,a
,d,e,l,t,a
I tried to change the delimiter to ' ' and I got the same result with spaces instead of commas.
Am I missing something?

The problem is in the lines
for element in dictionary.values():
print(element)
my_writer.writerow(element)
From the docs argument to writerow should be a list of objects. In your case the argument is a string (which is iterable).
So what you are actually doing is my_writer.writerow("alpha") which is written as a,l,p,h,a.
You should simply do
my_writer.writerow(element.values())
Also, you are getting leading commas beacuse your CSV string is alpha, beta, gamma. So when the split happens the elements are ['aplha',' beta',' gamma',]. You could use strip() to remove them

You are looping over the characters in the line instead of the actual elements
with open(file_, 'r') as source, open(output, 'w') as outp:
data = csv.reader(source, delimiter=deli)
#line0 = next(data)
i = 0
for element in data:
dictionary[i] = element
i += 1
also it's considered to be good practice to not override the built-ins in this case you are overriding file, instead PEP 008 recommends that you add an underscore to the end of you variable name.

Related

How to match value in enumeration to a keyword?

I want to write a keyword-in-context script in which I first read a text file as an enumerated list and then return a given keyword and the five next words.
I saw that similar questions were asked for C# and I found solutions for the enum module in Python, but I hope there is a solution for just using the enumerate() function.
This is what I have got so far:
# Find keywords in context
import string
# open input txt file from local path
with open('C:\\Users\\somefile.txt', 'r', encoding='utf-8', errors='ignore') as f: # open file
data1=f.read() # read content of file as string
data2=data1.translate(str.maketrans('', '', string.punctuation)).lower() # remove punctuation
data3=" ".join(data2.split()) # remove additional whitespace from text
indata=list(data3.split()) # convert string to list
print(indata[:4])
searchterms=["text", "book", "history"]
def wordsafter(keyword, source):
for i, val in enumerate(source):
if val == keyword: # cannot access the enumeration value here
return str(source[i+5]) # intend to show searchterm and subsequent five words
else:
continue
for s in searchterms: # iterate through searchterms
print(s)
wordsafter(s, indata)
print("done")
I was hoping I could simply access the value of the enumeration like I did here, but that does not seem to be the case.
With credits to #jasonharper, your improved code:
import string
def wordsafter(keyword, source):
for i, val in enumerate(source):
if val == keyword:
return ' '.join(source[i:i + 5]) # intend to show searchterm and subsequent five words
# wordsafter() for all instances
def wordsafter(keyword, source):
instances = []
for i, val in enumerate(source):
if val == keyword:
instances.append(' '.join(source[i:i + 5]))
return instances
# open input txt file from local path
with open('README.md', 'r', encoding='utf-8', errors='ignore') as f: # open file
data1 = f.read() # read content of file as string
data2 = data1.translate(str.maketrans('', '', string.punctuation)).lower() # remove punctuation
data3 = " ".join(data2.split()) # remove additional whitespace from text
indata = list(data3.split()) # convert string to list
searchterms = ["this", "book", "history"]
for string in searchterms: # iterate through searchterms
result = wordsafter(string, indata)
if result:
print(result)

Reading and writing variables from CSV file in Python (Selenium)

I'm having some difficulties with my code - wondering if anyone could help me as to where I'm going wrong.
The general syntax of the goal I'm trying to achieve is:
Get user input
Split input into individual variables
Write variables (amend) to 'data.csv'
Read variables from newly amended 'data.csv'
Add variables to list
If variable 1 <= length of list, #run some code
If variable 2 <= length of list, #run some code
Here is my python code:
from selenium import webdriver
import time
import csv
x = raw_input("Enter numbers separated by a space")
integers = [[int(i)] for i in x.split()]
with open("data.csv", "a") as f:
writer = csv.writer(f)
writer.writerows(integers)
with open('data.csv', 'r') as f:
file_contents = f.read()
previous_FONs = file_contents.split(' ')
if list.count(integers[i]) == 1:
#run some code
elif list.count(integers[i]) == 2:
#run some code
The error message I'm receiving is TypeError: count() takes exactly one argument (0 given)
Because of the following line
integers = [[int(i)] for i in x.split()]
you're creating a list of lists. Therefore you're passing lists to the count method. Try this one:
integers = [int(i) for i in x.split()]
Edit: Based on your explanation what you want to achieve, this code should do it:
import csv
x = raw_input('Enter numbers separated by a space: ')
new_FONs = [[int(i)] for i in x.split()]
with open('data.csv', 'a', newline='') as f:
writer = csv.writer(f)
writer.writerows(new_FONs)
with open('data.csv', 'r') as f:
all_FONs_str = [line.split() for line in f]
all_FONs = [[int(FON[0])] for FON in all_FONs_str]
# For each of the user-input numbers
for FON in new_FONs:
# Count the occurance of this number in the CSV file
FON_count = all_FONs.count(FON)
if FON_count == 1:
print(f'{FON[0]} occurs once')
# do stuff
elif FON_count == 2:
print(f'{FON[0]} occurs twice')
# do stuff
I have changed the name of the list read from CSV to all_FONs just to remind that this contains the old entries and also the new once (as we wrote them into the file before reading).
In addition you need to convert the entries as when reading from CSV you get strings not integers, what would make the comparison difficult. Maybe the whole conversion to int is not necessary, just work on strings. But that depends on what you need.
Edit2: Sorry forgot to change from input to raw_input for Python 2.7 :)

String of lists (probably) to a csv file python

Following up my previous question, because I couldn't get a satisfactory answer. Now I have data like this, don't know what it exactly is
["'A','B','C'"]["'a1,a2','b1','c1'"]["'a2,a4','b3','ct'"]
I'd like my final output to be written to a csv file like below. How can I achieve this?
A ,B ,C
a1,a2 ,b1 ,c1
a2,a4 ,b3 ,ct
Assuming that ["'A','B','C'"]["'a1,a2','b1','c1'"]["'a2,a4','b3','ct'"] is one long string as the original post seems to imply, ie:
"""["'A','B','C'"]["'a1,a2','b1','c1'"]["'a2,a4','b3','ct'"]"""
then the following code should work:
# ORIGINAL STRING
s = """["'A','B','C'"]["'a1,a2','b1','c1'"]["'a2,a4','b3','ct'"]"""
# GET RID OF UNNECESSARY CHARACTERS FOR OUR CSV
s = s.replace("][", "--") # temporary chars to help split into lines later on
s = s.replace("[", "")
s = s.replace("]", "")
s = s.replace("\'", "")
s = s.replace("\"", "")
# SPLIT UP INTO A LIST OF LINES OF TEXT
lines = s.split("--")
# WRITE EACH LINE IN TURN TO A CSV FILE
with open("myFile.csv", mode = "w") as textFile:
# mode = w to override any other contents of an existing file, or
# create a new one.
# mode = a To append to an exising file
for line in lines:
textFile.write(line + str("\n"))
An alternative way, again assuming that the data is encoded as one long string:
import ast
# ORIGINAL STRING
s = """["'A','B','C'"]["'a1,a2','b1','c1'"]["'a2,a4','b3','ct'"]"""
# PARSE INTO A LIST OF LISTS WITH STRING ELEMENTS
s2 = s.replace("][", "],[")
s2 = ast.literal_eval(s2)
s2 = [ast.literal_eval(s2[x][0]) for x in range(len(s2))]
# WRITE EACH LIST AS A LINE IN THE CSV FILE
with open("myFile.csv", mode = "w") as textFile:
# mode = w to override any other contents of an existing file, or
# create a new one.
# mode = a To append to an exising file
for i in range(len(s2)):
line = ",".join(s2[i])
textFile.write(line + str("\n"))
Since the given input won't be accepted by any inbuilt data structure, you need to convert it either into a string or a list of lists. Assuming your input as a string in the following. Also, you can modify the formatting as per your requirement.
#!/usr/bin/python
from ast import literal_eval
def csv(li):
file_handle = open("test.csv", "w")
#stripping the outer double_quotes and splitting the list by commas
for outer in li:
temp = outer[0].strip("'")
temp = temp.split("',")
value = ""
#bulding a formatted string(change this as per your requirement
for inner in temp:
value += '{0: <10}'.format(inner.strip("'")) + '{0: >10}'.format(",")
value = value.strip(", ")
#writing the built string into the file
file_handle.write(value + "\n")
file_handle.close()
#assuming your input as string
def main():
li_str = """["'A','B','C'"]["'a1,a2','b1','c1'"]["'a2,a4','b3','ct'"]"""
li = []
start_pos, end_pos = 0, -1
#break each into a new list and appending it to li
while(start_pos != -1):
start_pos = li_str.find("[", end_pos+1)
if start_pos == -1:
break
end_pos = li_str.find("]", start_pos+1)
li.append(literal_eval(li_str[start_pos:end_pos+1]))
#li now conatins a list of lists i.e. same as the input
csv(li)
if __name__=="__main__":
main()

Write individual array elements to unique files

I have two arrays, infile and outfile:
infile = ['Apple', 'Orange', 'Banana']
outfile = ['Applefile', 'Orangefile', 'Bananafile']
I search readin.txt for each element of the infile array, and for any line containing said element, I do a couple of things. This is what readin.txt looks like:
Apple = 13
Celery = 2
Orange = 5
Banana =
Grape = 4
The outfile array contains the names of files I would like to create; each corresponding to an element in infile. The first element in infile corresponds to the first element (file name) in outfile, and so on.
The problem I'm having is with this bit of code:
for line in open("readin.txt", "r"):
for i in infile:
if i in line:
sp = line.split('=')
sp1 = str(sp[1])
def parseline(l):
return sp1.strip() if len(sp) > 1 and sp[1].strip() != '' else None
for s in outfile:
out = parseline(line)
outw = open(s, "w")
outw.write(str(out))
outw.close()
In the first part of the code, I want to search readin.txt for any one of the words from infile (i.e. Apple, Orange, and Banana). I then want the code to select out the entire line in which that word occurs. I know that any such line in readin.txt will contain an equal sign, so I then want the code to split the line around the equal sign, and produce only that which follows the equal sign.
While the last part of the code indeed creates separate files for each element in outfile, the actual output always corresponds to the last element of infile. It's as though each subsequent step in the loop overwrites the previous steps. I feel like I need to be looking at the i-th elements of line, but I'm not sure how to do that in Python. Any help would be great.
Editing for clarity and hopes of having the question re-opened:
In fact, the following code seems to do exactly what I want:
for line in open("parameters.txt", "r"):
for i in infile:
if i in line:
sp = line.split('=')
sp1 = str(sp[1]).strip() if len(sp) > 1 and sp[1].strip() != '' else None
print sp1
On the command line, I get:
13
5
None
So this tells me that the first portion of the code is doing essentially what I want it to (although perhaps not in the most efficient way, so any other suggestions would be appreciated).
At this point, I'd like all of the information that was printed out to be written to individual files based on the outfile array. ie. 13 should be entered into a file called Applefile, None should be written into a file called Bananafile, etc. This is the point at which I'm having trouble. I know that 'outfile' should be indexed similarly, so that the first element of outfile corresponds to the first element of infile, but my attempts so far have not worked.
This is my most recent attempt:
for line in open("parameters.txt", "r"):
for i in infile:
if i in line:
def parseline(l):
sp = l.split('=')
sp1 = str(sp[1]).strip() if len(sp) > 1 and sp[1].strip() != '' else None
if sp1:
out = parseline(line)
outw = open(outfile[i], "w")
outw.write(line)
outw.close()
where defining parseline any sooner in the code negates the whole beginning part of the code for some reason.
I'm not looking for just the answer. I would like to understand what is going on and be able to figure out how to fix it.
the actual output within every single file created corresponds to the last element of infile
because for every element of infile, you are looping over every element of outfile and writing the latest line, so it makes sense that you end up with all files containing the last line. Since your infile/outfile lines correspond you can use the i index from the main infile loop to grab the label you want from outfile.. something like:
for line in open("readin.txt", "r"):
for i in infile:
if i in line:
sp = line.split('=')
sp1 = str(sp[1]).strip() if len(sp) > 1 and sp[1].strip() != '' else None
if sp1:
out = parseline(line)
outw = open(outfile[i], "w")
outw.write(str(out))
outw.close()
I would break this down into two steps:
def parse_params(filename):
"""Convert the parameter file into a map from filename to value."""
out = {}
with open(filename) as f:
for line in f:
word, num = map(str.strip, line.split("="))
out[word] = num
return out # e.g. {'Celery': '2', 'Apple': '13', 'Orange': '5'}
def process(in_, out, paramfile):
"""Write the values defined in param to the out files based on in_."""
value_map = parse_params(paramfile)
for word, filename in zip(infile, outfile):
if word in value_map:
with open(filename, 'w') as f: # or '"{0}.txt".format(filename)'
f.write(value_map[word])
else:
print "No value found for '{0}'.".format(word)
process(infile, outfile, "parameters.txt")
Your current code really doesn't make much sense:
for line in open("parameters.txt", "r"): # iterate over lines in file
for i in infile: # iterate over words in infile list
if i in line: # iterate over characters in the file line (why?)
def parseline(l): # define a function
sp = l.split('=')
sp1 = str(sp[1]).strip() if len(sp) > 1 and sp[1].strip() != '' else None
if sp1:
out = parseline(line)
outw = open(outfile[i], "w")
outw.write(line)
outw.close()
# but apparently never call it (why?)
Using the same loop variable name in two loops is a bad idea, you only ever see the inner value:
>>> for x in range(2):
for x in "ab":
print x
a
b
a
b
If you find that a function "needs" to be defined in a particular place, it suggests that you are relying on scoping to access variables. It is much better to define specific arguments and return values for the parameters you need; it makes development and testing significantly easier.

Check whether string is in CSV

I want to search a CSV file and print either True or False, depending on whether or not I found the string. However, I'm running into the problem whereby it will return a false positive if it finds the string embedded in a larger string of text. E.g.: It will return True if string is foo and the term foobar is in the CSV file. I need to be able to return exact matches.
username = input()
if username in open('Users.csv').read():
print("True")
else:
print("False")
I've looked at using mmap, re and csv module functions, but I haven't got anywhere with them.
EDIT: Here is an alternative method:
import re
import csv
username = input()
with open('Users.csv', 'rt') as f:
reader = csv.reader(f)
for row in reader:
re.search(r'\bNOTSUREHERE\b', username)
when you look inside a csv file using the csv module, it will return each row as a list of columns. So if you want to lookup your string, you should modify your code as such:
import csv
username = input()
with open('Users.csv', 'rt') as f:
reader = csv.reader(f, delimiter=',') # good point by #paco
for row in reader:
for field in row:
if field == username:
print "is in file"
but as it is a csv file, you might expect the username to be at a given column:
with open('Users.csv', 'rt') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
if username == row[2]: # if the username shall be on column 3 (-> index 2)
print "is in file"
You should have a look at the csv module in python.
is_in_file = False
with open('my_file.csv', 'rb') as csvfile:
my_content = csv.reader(csvfile, delimiter=',')
for row in my_content:
if username in row:
is_in_file = True
print is_in_file
It assumes that your delimiter is a comma (replace with the your delimiter. Note that username must be defined previously. Also change the name of the file.
The code loops through all the lines in the CSV file. row a list of string containing each element of your row. For example, if you have this in your CSV file: Joe,Peter,Michel the row will be ['Joe', 'Peter', 'Michel']. Then you can check if your username is in that list.
I have used the top comment, it works and looks OK, but it was too slow for me.
I had an array of many strings that I wanted to check if they were in a large csv-file. No other requirements.
For this purpose I used (simplified, I iterated through a string of arrays and did other work than print):
with open('my_csv.csv', 'rt') as c:
str_arr_csv = c.readlines()
Together with:
if str(my_str) in str(str_arr_csv):
print("True")
The reduction in time was about ~90% for me. Code locks ugly but I'm all about speed. Sometimes.
import csv
scoresList=[]
with open ("playerScores_v2.txt") as csvfile:
scores=csv.reader(csvfile, delimiter= ",")
for row in scores:
scoresList.append(row)
playername=input("Enter the player name you would like the score for:")
print("{0:40} {1:10} {2:10}".format("Name","Level","Score"))
for i in range(0,len(scoresList)):
print("{0:40} {1:10} {2:10}".format(scoresList[i] [0],scoresList[i] [1], scoresList[i] [2]))
EXTENDED ALGO:
As i can have in my csv some values with space:
", atleft,atright , both " ,
I patch the code of zmo as follow
if field.strip() == username:
and it's ok, thanks.
OLD FASHION ALGO
i had previously coded an 'old fashion' algorithm that takes care of any allowed separators ( here comma, space and newline),so i was curious to compare performances.
With 10000 rounds on a very simple csv file, i got:
------------------ algo 1 old fashion ---------------
done in 1.931804895401001 s.
------------------ algo 2 with csv ---------------
done in 1.926626205444336 s.
As this is not too bad, 0.25% longer, i think that this good old hand made algo can help somebody (and will be useful if more parasitic chars as strip is only for spaces)
This algo uses bytes and can be used for anything else than strings.
It search for a name not embedded in another by checking left and right bytes that must be in the allowed separators.
It mainly uses loops with ejection asap through break or continue.
def separatorsNok(x):
return (x!=44) and (x!=32) and (x!=10) and (x!=13) #comma space lf cr
# set as a function to be able to run several chained tests
def searchUserName(userName, fileName):
# read file as binary (supposed to be utf-8 as userName)
f = open(fileName, 'rb')
contents = f.read()
lenOfFile = len(contents)
# set username in bytes
userBytes = bytearray(userName.encode('utf-8'))
lenOfUser = len(userBytes)
posInFile = 0
posInUser = 0
while posInFile < lenOfFile:
found = False
posInUser = 0
# search full name
while posInFile < lenOfFile:
if (contents[posInFile] == userBytes[posInUser]):
posInUser += 1
if (posInUser == lenOfUser):
found = True
break
posInFile += 1
if not found:
continue
# found a fulll name, check if isolated on left and on right
# left ok at very beginning or space or comma or new line
if (posInFile > lenOfUser):
if separatorsNok(contents[posInFile-lenOfUser]): #previousLeft
continue
# right ok at very end or space or comma or new line
if (posInFile < lenOfFile-1):
if separatorsNok(contents[posInFile+1]): # nextRight
continue
# found and bordered
break
# main while
if found:
print(userName, "is in file") # at posInFile-lenOfUser+1)
else:
pass
to check: searchUserName('pirla','test.csv')
As other answers, code exit at first match but can be easily extended to find all.
HTH
#!/usr/bin/python
import csv
with open('my.csv', 'r') as f:
lines = f.readlines()
cnt = 0
for entry in lines:
if 'foo' in entry:
cnt += 1
print"No of foo entry Count :".ljust(20, '.'), cnt

Categories