How to write a unique value to a csv with python - python

I am trying to writer unique values to a csv that already has a list of ints inside it.
Currently I have tried to loop through a range of possible numbers then check if those numbers are in the csv. It appears that the checking is not working properly.
def generateUserCode():
with open ('/MyLocation/user_codes.csv') as csvDataFile:
userCodes = csv.reader(csvDataFile)
for x in range(0, 201):
if x not in userCodes:
return x
def writeUserCode(userCode):
with open ('/MyLocation/user_codes.csv', 'a') as csvDataFile:
csvDataFile.write('\n' + str(userCode))
userCode = generateUserCode()
writeUserCode(userCode)
So it should print the first number not in the csv and add the number to the csv. However all it is doing is printing 0 and adding 0 to my csv every time it is run even if 0 is in the csv.
Update:
The csv looks something like this:
3
4
5
35
56
100
There are more values but it is generally the same with no repeats and values between 0 and 200

The problem is with the following line:
if x not in userCodes:
userCodes is not a list it is a csvReader object. Also, you should use
if str(x) not in line:
#use str(x) instead of x
This is the code that works for me:
import csv
def generateUserCode():
with open ('file.csv') as csvDataFile:
csvread = csv.reader(csvDataFile)
userCodes = []
#print(userCodes)
for line in csvread:
try:
userCodes.append(line[0]) # As long as the code is the first
# element in that line, it should work
except:
IndexError # Avoid blank lines
print(userCodes)
for x in range(0, 201):
if str(x) not in userCodes:
return x
def writeUserCode(userCode):
with open ('file.csv', 'a') as csvDataFile:
csvDataFile.write('\n' + str(userCode))
userCode = generateUserCode()
writeUserCode(userCode)

Iterating userCodes shows each item is a list of strings:
for x in userCodes:
print(x)
returns:
['3']
['4']
['5']
['35']
['56']
['100']
So there are a lot of possible fixes, one would be:
def generateUserCode():
with open ('/MyLocation/user_codes.csv') as csvDataFile:
userCodes = csv.reader(csvDataFile)
userCodes = [int(item[0]) for item in userCodes]
for x in range(0, 201):
if x not in userCodes:
return x

It’s tricky to answer without seeing the CSV, but when you read the CSV, all fields are strings. Therefor you need to convert either the userCodes to int or x to string for the comparison to work.
For example:
userCodes = [int(d[0]) for d in csv.reader(csvDataFile)]
for x in range(0, 201):
if x not in userCodes:
return x

You are checking if a str is in an instance of csv.reader. This syntax doesn't work even with a normal file handle:
with open('somefile.txt') as fh:
x = fh.read()
x
'Spatial Reference: 43006\nName: Jones Tract\n424564.620666, 4396443.55267\n425988.30892, 4395630.01652\n426169.09473, 4395426.63249\n426214.291182, 4395268.4449\n\nName: Lewis Tract\n427909.158152, 4393935.14955\n428587.104939, 4393731.76552\n428700.096071, 4393528.38148\n428745.292523, 4393347.59567\n\nName: Adams Tract\n424180.450819, 4393957.74778\n424361.236629, 4393709.16729\n424655.013571, 4393641.37261\n424858.397607, 4393776.96197\n'
# now check if 'e' is in fh
with open('somefile.txt') as fh:
'e' in fh
False
'e' in x
True
Also, your csv file isn't really a csv file, so I'd just use a normal file handle and ignore the csv entirely.
The better approach may be to aggregate your codes in a set and check from there:
def get_codes():
with open('user_codes.csv') as fh:
# return a set to test membership quickly
return {line.strip() for line in fh}
codes = get_codes()
def add_code(code):
if code not in codes:
codes.add(code)
with open('user_codes.csv', 'a') as fh:
fh.write(code)
else:
raise ValueError("Code already exists")
# or do something else
add_code(88)
add_code(88)
# ValueError
To generate a user code automatically, since you are using a range, this becomes relatively easy:
def generate_user_code():
try:
# this returns the first number not in codes
return next(i for i in range(201) if i not in codes)
except StopIteration:
# you've exhausted your range, nothing is left
raise ValueError("No unique codes available")
# and your write method can be changed to
def add_code(code):
with open('user_codes.csv', 'a') as fh:
codes.add(code)
fh.write(code)
codes = get_codes()
user_code = generate_user_code()
add_code(user_code)

You may try to do this:
....
userCodes = csv.reader(csvDataFile)
uc = []
for y in userCodes:
uc += y
for x in range(0, 201):
if str(x) not in uc:
return x
....

Related

How to correctly append the output to an empty list

I am looking for words in my chosen text files.
def cash_sum(self):
with open(self.infile.name, "r") as myfile:
lines = myfile.readlines()
for line in lines:
if re.search("AMOUNT", line):
x = []
x.append(line[26:29])
print(x)
I have this output:
['100']
['100']
['100']
And I want to add them so I can have sum of all amounts from this file.
Any advices?
Initialize x outside the line-iterating loop.
Also, you don't need to readlines() separately...
def cash_sum(self):
x = []
with open(self.infile.name, "r") as myfile:
for line in myfile:
if re.search("AMOUNT", line):
x.append(line[26:29])
# or cast to int before appending:
# x.append(int(line[26:29]))
return x
This will give you the sum...
def cash_sum(self):
with open(self.infile.name, 'r') as myfile:
lines = myfile.readlines()
totalAmount = 0
for line in lines:
if re.search("AMOUNT", line):
totalAmount += float(line[26:29])
First thing I'd recommend is casting that value to a float. I say float instead of int just in case you have a decimal point. For example, float('100') == 100.0 . Once you have that, you should be able to then add them up as you would normal numbers.
You have redefined x inside your loop, so every time your loop iterates, it resets x to and empty list. Try the following:
def cash_sum(self):
with open(self.infile.name, "r") as myfile:
lines = myfile.readlines()
x = [] # That way x is defined outside the loop
for line in lines:
if re.search("AMOUNT", line):
x.append(line[26:29])
print(x)
# And if you want to add each item in the list...
num = float(0)
for number in x:
num = num + float(number)
return num

CSV Python Outputting: Outputting non-matching field once rather than once for every item in list

I've been trying to figure this out for about a year now and I'm really burnt out on it so please excuse me if this explanation is a bit rough.
I cannot include job data, but it would be accurate to imagine 2 csv files both with the first column populated with values (Serial numbers/phone numbers/names, doesn't matter - just values). Between both csv files, some values would match while other values would only be contained in one or the other (Timmy is in both files and is a match, Robert is only in file 1 and does not match any name in file 2).
I can successfully output a csv value ONCE that exists in the both csv files (I.e. both files contain "Value78", output file will contain "Value78" only once).
When I try to tack on an else statement to my if condition, to handle non-matching items, the program will output 1 entry for every item it does not match with (makes 100% sense, matches happen once but every other comparison result besides the match is a non-match).
I cannot envision a structure or method to hold the fields that don't match back so that they can be output once and not overrun my terminal or output file.
My goal is to output two csv files, matches and non-matches, with the non-matches having only one entry per value.
Anyways, onto the code:
import csv
MYUNITS = 'MyUnits.csv'
VENDORUNITS = 'VendorUnits.csv'
MATCHES = 'Matches.csv'
NONMATCHES = 'NonMatches.csv'
with open(MYUNITS,mode='r') as MFile,
open(VENDORUNITS,mode='r') as VFile,
open(MATCHES,mode='w') as OFile,
open(NONMATCHES,mode'w') as NFile:
MyReader = csv.reader(MFile,delimiter=',',quotechar='"')
MyList = list(MyReader)
VendorReader = csv.reader(VFile,delimiter=',',quotechar='"')
VList = list(VendorReader)
for x in range(len(MyList)):
for y in range(len(VList)):
if str(MyList[x][0]) == str(VList[y][0]):
OFile.write(MyList[x][0] + '\n')
else:
pass
The "else: pass" is where the logic of filtering out non-matches is escaping me. Outputting from this else statement will write the non-matching value (len(VList) - 1) times for an iteration that DOES produce 1 match, the entire len(VList) for an iteration with no match. I've tried using a counter and only outputting if the counter equals the len(VList), (incrementing in the else statement, writing output under the scope of the second for loop), but received the same output as if I tried outputting non-matches.
Below is one way you might go about deduplicating and then writing to a file:
import csv
MYUNITS = 'MyUnits.csv'
VENDORUNITS = 'VendorUnits.csv'
MATCHES = 'Matches.csv'
NONMATCHES = 'NonMatches.csv'
list_of_non_matches = []
with open(MYUNITS,mode='r') as MFile,
open(VENDORUNITS,mode='r') as VFile,
open(MATCHES,mode='w') as OFile,
open(NONMATCHES,mode'w') as NFile:
MyReader = csv.reader(MFile,delimiter=',',quotechar='"')
MyList = list(MyReader)
VendorReader = csv.reader(VFile,delimiter=',',quotechar='"')
VList = list(VendorReader)
for x in range(len(MyList)):
for y in range(len(VList)):
if str(MyList[x][0]) == str(VList[y][0]):
OFile.write(MyList[x][0] + '\n')
else:
list_of_non_matches.append(MyList[x][0])
# Remove duplicates from the non matches
new_list = []
[new_list.append(x) for x in list_of_non_matches if x not in new_list]
# Write the new list to a file
for i in new_list:
NFile.write(i + '\n')
Does this work?
import csv
MYUNITS = 'MyUnits.csv'
VENDORUNITS = 'VendorUnits.csv'
MATCHES = 'Matches.csv'
NONMATCHES = 'NonMatches.csv'
with open(MYUNITS,'r') as MFile,
(VENDORUNITS,'r') as VFile,
(MATCHES,'w') as OFile,
(NONMATCHES,mode,'w') as NFile:
MyReader = csv.reader(MFile,delimiter=',',quotechar='"')
MyList = list(MyReader)
MyVals = [x for x in MyList]
MyVals = [x[0] for x in MyVals]
VendorReader = csv.reader(VFile,delimiter=',',quotechar='"')
VList = list(VendorReader)
vVals = [x for x in VList]
vVals = [x[0] for x in vVals]
for val in MyVals:
if val in vVals:
OFile.write(Val + '\n')
else:
NFile.write(Val + '\n')
#for x in range(len(MyList)):
# for y in range(len(VList)):
# if str(MyList[x][0]) == str(VList[y][0]):
# OFile.write(MyList[x][0] + '\n')
# else:
# pass
Sorry, I had some issues with my PC. I was able to solve my own question the night I posted. The solution I used is so simple I'm kicking myself for not figuring it out way sooner:
import csv
MYUNITS = 'MyUnits.csv'
VENDORUNITS = 'VendorUnits.csv'
MATCHES = 'Matches.csv'
NONMATCHES = 'NonMatches.csv'
with open(MYUNITS,mode='r') as MFile,
open(VENDORUNITS,mode='r') as VFile,
open(MATCHES,mode='w') as OFile,
open(NONMATCHES,mode'w') as NFile:
MyReader = csv.reader(MFile,delimiter=',',quotechar='"')
MyList = list(MyReader)
VendorReader = csv.reader(VFile,delimiter=',',quotechar='"')
VList = list(VendorReader)
for x in range(len(MyList)):
tmpStr = ''
for y in range(len(VList)):
if str(MyList[x][0]) == str(VList[y][0]):
tmpStr = '' #Sets to blank so comparison fails, works because break
OFile.write(MyList[x][0] + '\n')
break
else:
tmp = str(MyList[x][0])
if tmp != '':
NFile.write(tmp + '\n')

Extract text in string until certain new line ("\n") [duplicate]

We have a large raw data file that we would like to trim to a specified size.
How would I go about getting the first N lines of a text file in python? Will the OS being used have any effect on the implementation?
Python 3:
with open(path_to_file) as input_file:
head = [next(input_file) for _ in range(lines_number)]
print(head)
Python 2:
with open(path_to_file) as input_file:
head = [next(input_file) for _ in xrange(lines_number)]
print head
Here's another way (both Python 2 & 3):
from itertools import islice
with open(path_to_file) as input_file:
head = list(islice(path_to_file, lines_number))
print(head)
N = 10
with open("file.txt", "a") as file: # the a opens it in append mode
for i in range(N):
line = next(file).strip()
print(line)
If you want to read the first lines quickly and you don't care about performance you can use .readlines() which returns list object and then slice the list.
E.g. for the first 5 lines:
with open("pathofmyfileandfileandname") as myfile:
firstNlines=myfile.readlines()[0:5] #put here the interval you want
Note: the whole file is read so is not the best from the performance point of view but it
is easy to use, fast to write and easy to remember so if you want just perform
some one-time calculation is very convenient
print firstNlines
One advantage compared to the other answers is the possibility to select easily the range of lines e.g. skipping the first 10 lines [10:30] or the lasts 10 [:-10] or taking only even lines [::2].
What I do is to call the N lines using pandas. I think the performance is not the best, but for example if N=1000:
import pandas as pd
yourfile = pd.read_csv('path/to/your/file.csv',nrows=1000)
There is no specific method to read number of lines exposed by file object.
I guess the easiest way would be following:
lines =[]
with open(file_name) as f:
lines.extend(f.readline() for i in xrange(N))
The two most intuitive ways of doing this would be:
Iterate on the file line-by-line, and break after N lines.
Iterate on the file line-by-line using the next() method N times. (This is essentially just a different syntax for what the top answer does.)
Here is the code:
# Method 1:
with open("fileName", "r") as f:
counter = 0
for line in f:
print line
counter += 1
if counter == N: break
# Method 2:
with open("fileName", "r") as f:
for i in xrange(N):
line = f.next()
print line
The bottom line is, as long as you don't use readlines() or enumerateing the whole file into memory, you have plenty of options.
Based on gnibbler top voted answer (Nov 20 '09 at 0:27): this class add head() and tail() method to file object.
class File(file):
def head(self, lines_2find=1):
self.seek(0) #Rewind file
return [self.next() for x in xrange(lines_2find)]
def tail(self, lines_2find=1):
self.seek(0, 2) #go to end of file
bytes_in_file = self.tell()
lines_found, total_bytes_scanned = 0, 0
while (lines_2find+1 > lines_found and
bytes_in_file > total_bytes_scanned):
byte_block = min(1024, bytes_in_file-total_bytes_scanned)
self.seek(-(byte_block+total_bytes_scanned), 2)
total_bytes_scanned += byte_block
lines_found += self.read(1024).count('\n')
self.seek(-total_bytes_scanned, 2)
line_list = list(self.readlines())
return line_list[-lines_2find:]
Usage:
f = File('path/to/file', 'r')
f.head(3)
f.tail(3)
most convinient way on my own:
LINE_COUNT = 3
print [s for (i, s) in enumerate(open('test.txt')) if i < LINE_COUNT]
Solution based on List Comprehension
The function open() supports an iteration interface. The enumerate() covers open() and return tuples (index, item), then we check that we're inside an accepted range (if i < LINE_COUNT) and then simply print the result.
Enjoy the Python. ;)
For first 5 lines, simply do:
N=5
with open("data_file", "r") as file:
for i in range(N):
print file.next()
If you want something that obviously (without looking up esoteric stuff in manuals) works without imports and try/except and works on a fair range of Python 2.x versions (2.2 to 2.6):
def headn(file_name, n):
"""Like *x head -N command"""
result = []
nlines = 0
assert n >= 1
for line in open(file_name):
result.append(line)
nlines += 1
if nlines >= n:
break
return result
if __name__ == "__main__":
import sys
rval = headn(sys.argv[1], int(sys.argv[2]))
print rval
print len(rval)
If you have a really big file, and assuming you want the output to be a numpy array, using np.genfromtxt will freeze your computer. This is so much better in my experience:
def load_big_file(fname,maxrows):
'''only works for well-formed text file of space-separated doubles'''
rows = [] # unknown number of lines, so use list
with open(fname) as f:
j=0
for line in f:
if j==maxrows:
break
else:
line = [float(s) for s in line.split()]
rows.append(np.array(line, dtype = np.double))
j+=1
return np.vstack(rows) # convert list of vectors to array
This worked for me
f = open("history_export.csv", "r")
line= 5
for x in range(line):
a = f.readline()
print(a)
I would like to handle the file with less than n-lines by reading the whole file
def head(filename: str, n: int):
try:
with open(filename) as f:
head_lines = [next(f).rstrip() for x in range(n)]
except StopIteration:
with open(filename) as f:
head_lines = f.read().splitlines()
return head_lines
Credit go to John La Rooy and Ilian Iliev. Use the function for the best performance with exception handle
Revise 1: Thanks FrankM for the feedback, to handle file existence and read permission we can futher add
import errno
import os
def head(filename: str, n: int):
if not os.path.isfile(filename):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename)
if not os.access(filename, os.R_OK):
raise PermissionError(errno.EACCES, os.strerror(errno.EACCES), filename)
try:
with open(filename) as f:
head_lines = [next(f).rstrip() for x in range(n)]
except StopIteration:
with open(filename) as f:
head_lines = f.read().splitlines()
return head_lines
You can either go with second version or go with the first one and handle the file exception later. The check is quick and mostly free from performance standpoint
Starting at Python 2.6, you can take advantage of more sophisticated functions in the IO base clase. So the top rated answer above can be rewritten as:
with open("datafile") as myfile:
head = myfile.readlines(N)
print head
(You don't have to worry about your file having less than N lines since no StopIteration exception is thrown.)
This works for Python 2 & 3:
from itertools import islice
with open('/tmp/filename.txt') as inf:
for line in islice(inf, N, N+M):
print(line)
fname = input("Enter file name: ")
num_lines = 0
with open(fname, 'r') as f: #lines count
for line in f:
num_lines += 1
num_lines_input = int (input("Enter line numbers: "))
if num_lines_input <= num_lines:
f = open(fname, "r")
for x in range(num_lines_input):
a = f.readline()
print(a)
else:
f = open(fname, "r")
for x in range(num_lines_input):
a = f.readline()
print(a)
print("Don't have", num_lines_input, " lines print as much as you can")
print("Total lines in the text",num_lines)
Here's another decent solution with a list comprehension:
file = open('file.txt', 'r')
lines = [next(file) for x in range(3)] # first 3 lines will be in this list
file.close()
An easy way to get first 10 lines:
with open('fileName.txt', mode = 'r') as file:
list = [line.rstrip('\n') for line in file][:10]
print(list)
#!/usr/bin/python
import subprocess
p = subprocess.Popen(["tail", "-n 3", "passlist"], stdout=subprocess.PIPE)
output, err = p.communicate()
print output
This Method Worked for me
Simply Convert your CSV file object to a list using list(file_data)
import csv;
with open('your_csv_file.csv') as file_obj:
file_data = csv.reader(file_obj);
file_list = list(file_data)
for row in file_list[:4]:
print(row)

Python3: read from a file and sort the values

I have a txt file that contains data in the following fashion:
13
56
9
32
99
74
2
each value in a different file. I created three function:
the first one is to swap the values
def swap(lst,x,y):
temp = lst[x]
lst[x] = lst[y]
lst[y] = temp
and the second function is to sort the values:
def selection_sort(lst):
for x in range(0,len(lst)-1):
print(lst)
swap(lst,x,findMinFrom(lst[x:])+ x)
the third function is to find the minimum value from the list:
def findMinFrom(lst):
minIndex = -1
for m in range(0,len(lst)):
if minIndex == -1:
minIndex = m
elif lst[m] < lst[minIndex]:
minIndex = m
return minIndex
Now, how can I read from the file that contains the numbers and print them sorted?
Thanks in advance!
I used:
def main():
f = []
filename = input("Enter the file name: ")
for line in open(filename):
for eachElement in line:
f += eachElement
print(f)
selectionSort(f)
print(f)
main()
but still not working! any help?
Good programmers don't reinvent the wheel and use sorting routines that are standard in most modern languages. You can do:
with open('input.txt') as fp:
for line in sorted(fp):
print(line, end='')
to print the lines sorted alphabetically (as strings). And
with open('input.txt') as fp:
for val in sorted(map(int, fp)):
print(val)
to sort numerically.
To read all the lines in a file:
f = open('test.txt')
your_listname = list(f)
To sort and print
selection_sort(output_listname)
print(output_listname)
You may need to strip newline characters before sorting/printing
stripped_listname=[]
for i in your_listname:
i = i.strip('\n')
stripped_listname.append(i)
You probably also want to take the print statement out of your sort function so it doesn't print the list many times while sorting it.

"IndexError: list index out of range" when reading file

Just started learning Python and I'm struggling with this a little.
I'm opening a txt file that will be variable in length and I need to iterate over a user definable amount of lines at a time. When I get to the end of the file I receive the error in the subject field. I've also tried the readlines() function and a couple of variations on the "if" statement that causes the problem. I just can't seem to get the code to find EOF.
Hmm, as I write this, I'm thinking ... do I need to addlist "EOF" to the array and just look for that? Is that the best solution, to find a custom EOF?
My code snippet goes something like:
### variables defined outside of scapy PacketHandler ##
x = 0
B = 0
##########
with open('dict.txt') as f:
lines = list(f)
global x
global B
B = B + int(sys.argv[3])
while x <= B:
while y <= int(sys.argv[2]):
if lines[x] != "":
#...do stuff...
# Scapy send packet Dot11Elt(ID="SSID",info"%s" % (lines[x].strip())
# ....more code...
x = x 1
Let’s say you need to read X lines at a time, put it in a list and process it:
with open('dict.txt') as f:
enoughLines = True
while enoughLines:
lines = []
for i in range(X):
l = f.readline()
if l != '':
lines.append( l )
else:
enoughLines = False
break
if enoughLines:
#Do what has to be done with the list “lines”
else:
break
#Do what needs to be done with the list “lines” that has less than X lines in it
Try a for in loop. You have created your list, now iterate through it.
with open('dict.txt') as f:
lines = list(f)
for item in lines: #each item here is an item in the list you created
print(item)
this way you go through each line of your text file and don't have to worry about where it ends.
edit:
you can do this as well!
with open('dict.txt') as f:
for row in f:
print(row)
The following function will return a generator that returns the next n lines in a file:
def iter_n(obj, n):
iterator = iter(obj)
while True:
result = []
try:
while len(result) < n:
result.append(next(iterator))
except StopIteration:
if len(result) == 0:
raise
yield result
Here is how you can use it:
>>> with open('test.txt') as f:
... for three_lines in iter_n(f, 3):
... print three_lines
...
['first line\n', 'second line\n', 'third line\n']
['fourth line\n', 'fifth line\n', 'sixth line\n']
['seventh line\n']
Contents of test.txt:
first line
second line
third line
fourth line
fifth line
sixth line
seventh line
Note that, because the file does not have a multiple of 3 lines, the last value returned is not 3 lines, but just the rest of the file.
Because this solution uses a generator, it doesn't require that the full file be read into memory (into a list), but iterates over it as needed.
In fact, the above function can iterate over any iterable object, like lists, strings, etc:
>>> for three_numbers in iter_n([1, 2, 3, 4, 5, 6, 7], 3):
... print three_numbers
...
[1, 2, 3]
[4, 5, 6]
[7]
>>> for three_chars in iter_n("1234567", 3):
... print three_chars
...
['1', '2', '3']
['4', '5', '6']
['7']
If you want to get n lines in a list use itertools.islice yielding each list:
from itertools import islice
def yield_lists(f,n):
with open(f) as f:
for sli in iter(lambda : list(islice(f,n)),[]):
yield sli
If you want to use loops, you don't need a while loop at all, you can use an inner loop in range n-1 calling next on the file object with a default value of an empty string, if we get an empty string break the loop if not just append and again yield each list:
def yield_lists(f,n):
with open(f) as f:
for line in f:
temp = [line]
for i in range(n-1):
line = next(f,"")
if not line:
break
temp.append(line)
yield temp

Categories