Counting to 100,000 and writing that to a file - python

I haven't used Python for a while but I decided to create a program today to help me with some work I am trying to do. I am trying to create a program that writes the numbers 1-100,000 with the symbol | after each but can't seem to strip the file after I create it so it shows like this: 1|2|3|4.
My Code:
a = 0
b = "|"
while a < 100000:
a += 1 # Same as a = a + 1
new = (a,b)
f = open("export.txt","a") #opens file with name of "export.txt"
f.write(str(new))
f.close()
infile = "export.txt"
outfile = "newfile.txt"
delete_list = ["(","," "'"]
fin = open(infile)
fout = open(outfile, "w+")
for line in fin:
for word in delete_list:
line = line.replace(word, "")
fout.write(line)
fin.close()
fout.close()
export.txt:
newfile.txt:

It looks like you're doing a lot of work unnecessarily.
If all you want is a file that has the numbers 0-99999 with | after each, you could do:
delim = "|"
with open('export.txt', 'w') as f:
for a in xrange(100):
f.write("%d%s" % (a, delim))
I'm not sure what the purpose of the second file is, but, in general, to open one file to read from and a second to write to, you could do:
with open('export.txt', 'r') as fi:
with open('newfile.txt', 'w') as fo:
for line in fi:
for word in line.split('|'):
print(word)
fo.write(word)
Note that there are no newlines in the original file, so for line in fi is actually reading the entire contents of "export.txt" -- this could cause issues.

Try this for writing your file:
numbers = []
for x in range(1,100001):
numbers.append(str(x))
f = open('export.txt', 'w')
f.write('|'.join(numbers))
f.close()

Related

Not all duplicates are deleted from a text file in Python

I am new to Python. I am trying to delete duplicates from my text file by doing the following:
line_seen = set()
f = open('a.txt', 'r')
w = open('out.txt', 'w')
for i in f:
if i not in line_seen:
w.write(i)
line_seen.add(i)
f.close()
w.close()
In the initial file I had
hello
world
python
world
hello
And in output file I got
hello
world
python
hello
So it did not remove the last duplicate. Can anyone help me to understand why it happened and how could I fix it?
The first line probably contains 'hello\n' - the last line contains only 'hello' - they are not the same.
Use
line_seen = set()
with open('a.txt', 'r') as f, open('out.txt', 'w') as w:
for i in f:
i = i.strip() # remove the \n from line
if i not in line_seen:
w.write(i + "\n")
line_seen.add(i)
The main problem is with the break line characters ("\n") which appears at the end of each line but the last line. You can use a combination of set, map and join function such as what follows:
f = open('a.txt', 'r')
w = open('out.txt', 'w')
w.write("\n".join(list(set(map(str.strip,f.readlines())))))
out.txt
python
world
hello
If you want to stick to your previous approach you can use:
line_seen = set()
f = open('a.txt', 'r')
w = open('out.txt', 'w')
for i in f:
i = i.strip()
if i not in line_seen:
w.write(i)
line_seen.add(i)
f.close()
w.close()
Most likely you didn't end the last line with a newline. The known line is `hello\n'. The last just 'hello'
Fix the input or strip() the read i
# Since we check if the line exists in lines, we can use a list instead of
# a set to preserve order
lines = []
infile = open('a.txt', 'r')
outfile = open('out.txt', 'w')
# Use the readlines method
for line in infile.readlines():
if line not in lines:
# Strip whitespace
line = line.strip()
lines.append(line)
for line in lines:
# Add the whitespace back
outfile.write("{}\n".format(line))
infile.close()
outfile.close()

How to output ONLY new additions between files with Python Difflib?

I am comparing two text files using Difflib like so:
import difflib
new_file = open(file_name, "r")
old_file = open(old_file_name, "r")
file_difference = difflib.ndiff(old_file.readlines(), new_file.readlines())
My goal is to ONLY output additions. I do not want to know about changes to existing lines. However, I've run into a problem where all changes/additions are marked with "+ ", and all subtractions are marked with "- ". I've done a lot of searching, and it appears there's no way to differentiate a line that has been changed, and a line that is brand new. I am confused on how to proceed.
import csv
f1 = open(old_file_name, "r")
contents1 = f1.read()
f2 = open(file_name, "r",)
contents2 = f2.read()
for data in contents2:
if data not in contents1:
file = open(output_path, 'a', newline='')
# writing the data into the file
with file:
write = csv.writer(file)
write.writerows(data)
A great friend of mine provided a code snippet that answered my question:
# Open the files for comparison
with open(file_name, "r") as new_file:
with open(old_file_name, "r") as old_file:
# Find the differences between the two files
file_difference = difflib.ndiff(old_file.readlines(), new_file.readlines())
new_lines = []
file_difference = tuple(x for x in file_difference)
idx = 0
fdiff_size = len(file_difference)
while idx < fdiff_size:
line = file_difference[idx]
if line.startswith("- "):
if idx + 1 < fdiff_size and file_difference[idx + 1].startswith("? "):
# this chunk is a change, so ignore this and the next 3 lines
idx += 4
continue
elif line.startswith("+ "):
new_lines.append(line)
# always iterate after new item or no change
idx += 1

Adding a new string to the end of a specific line in a text file

I'm new to python hence I am unable to implement the solutions I've found online in order to fix my problem.
I am trying to add a specific string to the end of a specific line to a textfile. As I understand text commands, I must overwrite the file if I don't want to append to the end of it. So, my solution is as follows:
ans = 'test'
numdef = ['H',2]
f = open(textfile, 'r')
lines = f.readlines()
f.close()
f = open(textfile, 'w')
f.write('')
f.close()
f = open(textfile, 'a')
for line in lines:
if int(line[0]) == numdef[1]:
if str(line[2]) == numdef[0]:
k = ans+ line
f.write(k)
else:
f.write(line)
Basically, I am trying to add variable ans to the end of a specific line, the line which appears in my list numdef. So, for example, for
2 H: 4,0 : Where to search for information : google
I want
2 H: 4,0 : Where to search for information : google test
I have also tried using line.insert() but to no avail.
I understand using the 'a' function of the open command is not so relevant and helpful here, but I am out of ideas. Would love tips with this code, or if maybe I should scrap it and rethink the whole thing.
Thank you for your time and advice!
When you use the method
lines = f.readlines()
Python automatically adds "\n" to the end of each line.
Try instead of :
k = line+ans
The following:
k = line.rstrip('\n') + ans
Good luck!
Try this. You don't have an else case if it meets the first requirement but not the other.
ans = 'test'
numdef = ['H',2]
f = open(textfile, 'r')
lines = f.readlines()
f.close()
f = open(textfile, 'w')
f.write('')
f.close()
f = open(textfile, 'a')
for line in lines:
if int(line[0]) == numdef[1] and str(line[2]) == numdef[0]:
k = line.replace('\n','')+ans
f.write(k)
else:
f.write(line)
f.close()
Better way:
#initialize variables
ans = 'test'
numdef = ['H',2]
#open file in read mode, add lines into lines
with open(textfile, 'r') as f:
lines=f.readlines()
#open file in write mode, override everything
with open(textfile, 'w') as f:
#in the list comprehension, loop through each line in lines, if both of the conditions are true, then take the line, remove all newlines, and add ans. Otherwise, remove all the newlines and don't add anything. Then combine the list into a string with newlines as separators ('\n'.join), and write this string to the file.
f.write('\n'.join([line.replace('\n','')+ans if int(line[0]) == numdef[1] and str(line[2]) == numdef[0] else line.replace('\n','') for line in lines]))

How do append characters on either side of certain words on every line in a file in python?

Say I have a file my_file, and I want to search for a certain word x on every line of the file, and if the word exists, attach my variable y to the left and right side of the word. Then I want replace the old line with the new, modified line in my_new_file. How do I do this? So far I have:
output = open(omy_new_file, "w")
for line in open(my_file):
if (" " + x + "") in line:
You can try this:
y = "someword"
x = "target_string"
lines = [i.strip('\n') for i in open('filename.txt')]
final_lines = ["{}{}{}".format(y, i, y) if x in i else i for i in lines]
f = open(omy_new_file, "w")
for i in final_lines:
f.write("{}\n".format(i))
f.close()
with open('inputfile.txt', 'r') as infile:
with open('outfile.txt', 'w') as outfile:
for line in infile.readlines():
outfile.write(line.replace('string', y + 'string' + y)
Try This:
with open("my_file", "r") as my_file:
raw_data = my_file.read()
# READ YOUR FILE
new_data = raw_data.split("\n")
for line in new_data:
if "sd" in line:
my_new_line = "y" + line + "y"
raw_data = raw_data.replace(line, my_new_line)
print(raw_data)
It's tough to replace a line in a file while reading it, for the same reason that it's tough to safely modify a list as you iterate over it.
It's much better to read through the file, collect a list of lines, then overwrite the original. If the file is particularly large (such that it would be infeasible to hold it all in memory at once), you can write to disk twice.
import tempfile
y = "***"
your_word = "Whatever you're filtering by"
with tempfile.TemporaryFile(mode="w+") as tmpf:
with open(my_file, 'r') as f:
for line in f:
if your_word in line:
line = f"{y}{line.strip()}{y}\n"
tmpf.write(line) # write to the temp file
tmpf.seek(0) # move back to the beginning of the tempfile
with open(my_file, 'w') as f:
for line in tmpf: # reading from tempfile now
my_file.write(line)

Read lines from a text file, reverse and save in a new text file

So far I have this code:
f = open("text.txt", "rb")
s = f.read()
f.close()
f = open("newtext.txt", "wb")
f.write(s[::-1])
f.close()
The text in the original file is:
This is Line 1
This is Line 2
This is Line 3
This is Line 4
And when it reverses it and saves it the new file looks like this:
4 eniL si sihT 3 eniL si sihT 2 eniL si sihT 1 eniL si sihT
When I want it to look like this:
This is line 4
This is line 3
This is line 2
This is line 1
How can I do this?
You can do something like:
with open('test.txt') as f, open('output.txt', 'w') as fout:
fout.writelines(reversed(f.readlines()))
read() returns the whole file in a single string. That's why when you reverse it, it reverses the lines themselves too, not just their order. You want to reverse only the order of lines, you need to use readlines() to get a list of them (as a first approximation, it is equivalent to s = f.read().split('\n')):
s = f.readlines()
...
f.writelines(s[::-1])
# or f.writelines(reversed(s))
f = open("text.txt", "rb")
s = f.readlines()
f.close()
f = open("newtext.txt", "wb")
s.reverse()
for item in s:
print>>f, item
f.close()
The method file.read() returns a string of the whole file, not the lines.
And since s is a string of the whole file, you're reversing the letters, not the lines!
First, you'll have to split it to lines:
s = f.read()
lines = s.split('\n')
Or:
lines = f.readlines()
And your method, it is already correct:
f.write(lines[::-1])
Hope this helps!
There are a couple of steps here. First we want to get all the lines from the first file, and then we want to write them in reversed order to the new file. The code for doing this is as follows
lines = []
with open('text.txt') as f:
lines = f.readlines()
with open('newtext.txt', 'w') as f:
for line in reversed(lines):
f.write(line)
Firstly, we initialize a variable to hold our lines. Then we read all the lines from the 'test.txt' file.
Secondly, we open our output file. Here we loop through the lines in reversed order, writing them to the output file as we go.
A sample using list so it will be much easier:
I'm sure there answer that are more elegant but this way is clear to understand.
f = open(r"c:\test.txt", "rb")
s = f.read()
f.close()
rowList = []
for value in s:
rowList.append(value + "\n")
rowList.reverse()
f = open(r"c:\test.txt", "wb")
for value in rowList:
f.write(value)
f.close()
You have to work line by line.
f = open("text.txt", "rb")
s = f.read()
f.close()
f = open("newtext.txt", "wb")
lines = s.split('\n')
f.write('\n'.join(lines[::-1]))
f.close()
Use it like this if your OS uses \n to break lines
f = open("text.txt", "rb")
s = f.read()
f.close()
f = open("newtext.txt", "wb")
f.write(reversed(s.split("\n")).join("\n"))
f.close()
Main thing here is reversed(s.split("\n")).join("\n").
It does the following:
Split your string by line breaks - \n,
resulting an array
reverses the array
merges the array back with linebreaks \n to a string
Here the states:
string: line1 \n line2 \n line3
array: ["line1", "line2", "line3"]
array: ["line3", "line2", "line1"]
string: line3 \n line2 \n line1 \n
If your input file is too big to fit in memory, here is an efficient way to reverse it:
Split input file into partial files (still in original order).
Read each partial file from last to first, reverse it and append to output file.
Implementation:
import os
from itertools import islice
input_path = "mylog.txt"
output_path = input_path + ".rev"
with open(input_path) as fi:
for i, sli in enumerate(iter(lambda: list(islice(fi, 100000)), []), 1):
with open(f"{output_path}.{i:05}", "w") as fo:
fo.writelines(sli)
with open(output_path, "w") as fo:
for file_index in range(i, 0, -1):
path = f"{output_path}.{file_index:05}"
with open(path) as fi:
lines = fi.readlines()
os.remove(path)
for line in reversed(lines):
fo.write(line)

Categories