Essentially what I am attempting to do is read 'n' number of lines from a file and then write them to a separate file. This program essentially should take a file that has 100 lines and separate that file into 50 separate files.
def main():
from itertools import islice
userfile = raw_input("Please enter the file you wish to open\n(must be in this directory): ")
file1 = open(userfile, "r+")
#print "Name: ", file1.name
#print "Closed or not", file1.closed
#print "Opening mode: ", file1.mode
#print "Softspace flag: ", file1.softspace
jcardtop = file1.read(221);
#print jcardtop
n = 2
count = 0
while True:
next_n_lines = list(islice(file1,n))
print next_n_lines
count = count + 1
fileout = open(str(count)+ ".txt", "w+")
fileout.write(str(jcardtop))
fileout.write(str(next_n_lines))
fileout.close()
break
if not next_n_lines:
break
I do have the file printing as well to show what is in the variable next_n_lines.
*['\n', "randomtext' more junk here\n"]
I would like it instead to look like
randomtext' more junk here
Is this a limitatoin of the islice function? Or am I missing a portion of the syntax?
Thanks for your time!
Where you call str() or print, you want to ''.join(next_n_lines) instead:
print ''.join(next_n_lines)
and
fileout.write(''.join(next_n_lines))
You can store the flattened string in a variable if you don't want to call join twice.
Did you mean something like this?
f = open(userfile,"r")
start = 4
n_lines = 100
for line in f.readlines()[start:(start + n_lines)]:
print line
#do stuff with line
or maybe this rough, yet effective code:
f = open(userfile,"r")
start = 4
end = start + 100
count = start
while count != end:
for line in f.readlines()[count:(count + 2)]:
fileout = open(str(count)+ ".txt", "w+")
fileout.write(str(line))
fileout.close()
count = count + 2
Related
I am writing a code in python where I am removing all the text after a specific word but in output lines are missing. I have a text file in unicode which have 3 lines:
my name is test1
my name is
my name is test 2
What I want is to remove text after word "test" so I could get the output as below
my name is test
my name is
my name is test
I have written a code but it does the task but also removes the second line "my name is"
My code is below
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
with open(r"test.txt", "w") as fp:
fp.write(txt)
It looks like if there is no keyword found the index become -1.
So you are avoiding the lines w/o keyword.
I would modify your if by adding the condition as follows:
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
elif index < 0:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
No need to add \n because the line already contains it.
Your code does not append the line if the splitStr is not defined.
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index != -1:
txt += line[:index + len(splitStr)] + "\n"
else:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
In my solution I simulate the input file via io.StringIO. Compared to your code my solution remove the else branch and only use one += operater. Also splitStr is set only one time and not on each iteration. This makes the code more clear and reduces possible errore sources.
import io
# simulates a file for this example
the_file = io.StringIO("""my name is test1
my name is
my name is test 2""")
txt = ""
splitStr = "test"
with the_file as fp:
# each line
for line in fp.readlines():
# cut somoething?
if splitStr in line:
# find index
index = line.find(splitStr)
# cut after 'splitStr' and add newline
line = line[:index + len(splitStr)] + "\n"
# append line to output
txt += line
print(txt)
When handling with files in Python 3 it is recommended to use pathlib for that like this.
import pathlib
file_path = pathlib.Path("test.txt")
# read from wile
with file_path.open('r') as fp:
# do something
# write back to the file
with file_path.open('w') as fp:
# do something
Suggestion:
for line in fp.readlines():
i = line.find('test')
if i != -1:
line = line[:i]
Im writing a program to output a table that calculates pay roll.
it is reading from an imported file.
The file I'm using to test contains:
Barb,Moran,51,12.85
Joy,Rinehimer,30,9.35
Joe,Bellucci,45,9.55
Dave,Flaim,37,17.70
The idea here was to take each line and split it by the , and then go through position by position to print what I need to.
Here is the portion of my code that I think contains the issue:
def getFile():
filename = input("Please enter the employee file name: ")
print()
infile = open(filename, "r")
return infile
def outputs(infile):
paylist = []
count = 0
for lines in infile:
individual = lines.split(",")
for pos in individual:
first = ("{0:7}".format (individual[0]))
print(first, end = "")
last = ("{0:12}".format(individual[1]))
print(last, end = "")
hours = ("{0:6}".format(individual[2]))
print(hours, end = "")
wage = ("{0:6}".format(individual[3]))
print ("$",wage, end = "")
When I run it, it outputs all of the correct values but 4 times over
The inner loop of outputs() is the culprit. Remove it.
def outputs(infile):
paylist = []
count = 0
for lines in infile:
individual = lines.split(",")
first = ("{0:7}".format (individual[0]))
print(first, end = "")
last = ("{0:12}".format(individual[1]))
print(last, end = "")
hours = ("{0:6}".format(individual[2]))
print(hours, end = "")
wage = ("{0:6}".format(individual[3]))
print ("$",wage, end = "")
I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.
Here is my code segment I am working with:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
#Now use tokenize to split it apart by space and set to new array for me to call column2
else:
print "not found"
lines.append(movieWordCount[z] + " 1" + "\n")
Here is my full code:
#!/usr/bin/python
#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re
class Bayes_Classifier:
def __init__(self, trainDirectory = "movie_reviews/"):
#If file listing exists skip to train
if os.path.isfile('iFileList'):
print "file found"
self.train()
#self.classify()
#If file listing does not exist skip to train
if not os.path.isfile('iFileList'):
print "no file"
newfile = 'iFileList'
tempList = set()
subDir = './movie_reviews'
for filenames in os.listdir(subDir):
my_sub_path = os.path.join(os.sep,subDir,filenames)
tempList.add(filenames)
self.save("filenames", "try3")
f = []
for fFileObj in os.walk("movie_reviews/"):
f.extend(fFileObj)
break
pickle.dump(f, open( "save.p", "wb" ))
self.save(f, "try4")
with open(newfile, 'wb') as fi:
pickle.dump(tempList, fi)
#print tempList
self.train()
#self.classify()
def train(self):
'''Trains the Naive Bayes Sentiment Classifier.'''
print "File ready for training"
#Open iFileList to use as input for opening movie files
x = 0
OpenIFileList = open('iFileList','r')
print "iFileList now Open"
#Loop through the file
for line in OpenIFileList:
#print "Ready to read lines"
#print "reading line " + line
if x > 4:
if x % 2 == 0:
#print line
s = line
if '-' in s:
comp = s.split("'")
#print comp[2]
print comp[1] #This is What you need for t he movie file
compValue1 = comp[1]
#Determine Positive/Negative.
#compType is the variable I am storing it to.
compType = compValue1.split("-",2)[1]
#print compType #Prints that middle value like 5 or 1
# This will do the work based on the value.
if compType == '5':
# print "you have a five" #Confirms the loop I am in.
#If file does not exists create it
if not os.path.exists('PosList'):
print "no file"
file('PosList', 'w').close()
#Open file that needs to be reviewed for word count
compValue2 = "movie_reviews/" + compValue1
print compValue2 #Prints the directory and file path
OpenMovieList = open(compValue2,'r')
for commentLine in OpenMovieList:
commentPositive = commentLine.split(" ")
commentPositiveCounter = Counter(commentPositive)
#print commentPositiveCounter # " Comment Pos goes here"
#if commentLine != '' or commentLine != ' ':
#Get first word, second word, ....
if commentLine and (not commentLine.isspace()):
movieWordCount = self.tokenize(commentLine)
y = len(movieWordCount) #determines length of string
print y
z = 0
#print movieWordCount[0] # Shows the zero position in the file.
while z < y:
print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
z = z + 1
#Close the files
OpenMovieList.close()
OpenPos.close()
x += 1
#for line2 in OpenIFileList.readlines():
#for line in open('myfile','r').readlines():
#do_something(line)
#Save results
#Close the File List
OpenIFileList.close()
def loadFile(self, sFilename):
'''Given a file name, return the contents of the file as a string.'''
f = open(sFilename, "r")
sTxt = f.read()
f.close()
return sTxt
def save(self, dObj, sFilename):
'''Given an object and a file name, write the object to the file using pickle.'''
f = open(sFilename, "w")
p = pickle.Pickler(f)
p.dump(dObj)
f.close()
def load(self, sFilename):
'''Given a file name, load and return the object stored in the file.'''
f = open(sFilename, "r")
u = pickle.Unpickler(f)
dObj = u.load()
f.close()
return dObj
def tokenize(self, sText):
'''Given a string of text sText, returns a list of the individual tokens that
occur in that string (in order).'''
lTokens = []
sToken = ""
for c in sText:
if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
sToken += c
else:
if sToken != "":
lTokens.append(sToken)
sToken = ""
if c.strip() != "":
lTokens.append(str(c.strip()))
if sToken != "":
lTokens.append(sToken)
return lTokens
To open a file for writing, you can use
with open('PosList', 'w') as Open_Pos
As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.
So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close(), and append 2 lines to your code:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
OpenPos.write(lines)
First, I have my txt file data here as:
51
2
2
49
15
2
1
14
I would like to convert them into a list to do further calculation, however I couldn't do it and I try to print it one by one by using for loop. Then the error message "Attributes error" kept showing up.
def main():
file = str(input("Please enter the full name of the desired file(with extension) at the prompt below: \n"))
print (get_value(file))
def get_value(file):
file_open = open(file,"r")
print (file_open.read())
a = len(file)
print ("Length =",a)
for line in range file_open:
print (line)
main()
def main():
file = str(input("Please enter the full name of the desired file(with extension) at the prompt below: \n"))
print (get_value(file))
def get_value(file):
file_open = open(file,"r")
lsLines = file_open.readlines()
lsLines = [int(x) for x in lsLines if len(x.strip()) > 0]
file_open.close()
return lsLines
main()
EDIT: wrong language.
with open("file.txt") as f:
ints = [int(line) in f.readlines()]
I'm in trouble here. I need to read a file. Txt file that contains a sequence of records, check the records that I want to copy them to a new file.
The file content is like this (this is just an example, the original file has more than 30 000 lines):
AAAAA|12|120 #begin file
00000|46|150 #begin register
03000|TO|460
99999|35|436 #end register
00000|46|316 #begin register
03000|SP|467
99999|33|130 #end register
00000|46|778 #begin register
03000|TO|478
99999|33|457 #end register
ZZZZZ|15|111 #end file
The records that begin with 03000 and have the characters 'TO' must be written to a new file. Based on the example, the file should look like this:
AAAAA|12|120 #begin file
00000|46|150 #begin register
03000|TO|460
99999|35|436 #end register
00000|46|778 #begin register
03000|TO|478
99999|33|457 #end register
ZZZZZ|15|111 #end file
Code:
file = open("file.txt",'r')
newFile = open("newFile.txt","w")
content = file.read()
file.close()
# here I need to check if the record exists 03000 characters 'TO', if it exists, copy the recordset 00000-99999 for the new file.
I did multiple searches and found nothing to help me.
Thank you!
with open("file.txt",'r') as inFile, open("newFile.txt","w") as outFile:
outFile.writelines(line for line in inFile
if line.startswith("03000") and "TO" in line)
If you need the previous and the next line, then you have to iterate inFile in triads. First define:
def gen_triad(lines, prev=None):
after = current = next(lines)
for after in lines:
yield prev, current, after
prev, current = current, after
And then do like before:
outFile.writelines(''.join(triad) for triad in gen_triad(inFile)
if triad[1].startswith("03000") and "TO" in triad[1])
import re
pat = ('^00000\|\d+\|\d+.*\n'
'^03000\|TO\|\d+.*\n'
'^99999\|\d+\|\d+.*\n'
'|'
'^AAAAA\|\d+\|\d+.*\n'
'|'
'^ZZZZZ\|\d+\|\d+.*')
rag = re.compile(pat,re.MULTILINE)
with open('fifi.txt','r') as f,\
open('newfifi.txt','w') as g:
g.write(''.join(rag.findall(f.read())))
For files with additional lines between lines beginning with 00000, 03000 and 99999, I didn't find simpler code than this one:
import re
pat = ('(^00000\|\d+\|\d+.*\n'
'(?:.*\n)+?'
'^99999\|\d+\|\d+.*\n)'
'|'
'(^AAAAA\|\d+\|\d+.*\n'
'|'
'^ZZZZZ\|\d+\|\d+.*)')
rag = re.compile(pat,re.MULTILINE)
pit = ('^00000\|.+?^03000\|TO\|\d+.+?^99999\|')
rig = re.compile(pit,re.DOTALL|re.MULTILINE)
def yi(text):
for g1,g2 in rag.findall(text):
if g2:
yield g2
elif rig.match(g1):
yield g1
with open('fifi.txt','r') as f,\
open('newfifi.txt','w') as g:
g.write(''.join(yi(f.read())))
file = open("file.txt",'r')
newFile = open("newFile.txt","w")
content = file.readlines()
file.close()
newFile.writelines(filter(lambda x:x.startswith("03000") and "TO" in x,content))
This seems to work. The other answers seem to only be writing out records that contain '03000|TO|' but you have to write out the record before and after that as well.
import sys
# ---------------------------------------------------------------
# ---------------------------------------------------------------
# import file
file_name = sys.argv[1]
file_path = 'C:\\DATA_SAVE\\pick_parts\\' + file_name
file = open(file_path,"r")
# ---------------------------------------------------------------
# create output files
output_file_path = 'C:\\DATA_SAVE\\pick_parts\\' + file_name + '.out'
output_file = open(output_file_path,"w")
# create output files
# ---------------------------------------------------------------
# process file
temp = ''
temp_out = ''
good_write = False
bad_write = False
for line in file:
if line[:5] == 'AAAAA':
temp_out += line
elif line[:5] == 'ZZZZZ':
temp_out += line
elif good_write:
temp += line
temp_out += temp
temp = ''
good_write = False
elif bad_write:
bad_write = False
temp = ''
elif line[:5] == '03000':
if line[6:8] != 'TO':
temp = ''
bad_write = True
else:
good_write = True
temp += line
temp_out += temp
temp = ''
else:
temp += line
output_file.write(temp_out)
output_file.close()
file.close()
Output:
AAAAA|12|120 #begin file
00000|46|150 #begin register
03000|TO|460
99999|35|436 #end register
00000|46|778 #begin register
03000|TO|478
99999|33|457 #end register
ZZZZZ|15|111 #end file
Does it have to be python? These shell commands would do the same thing in a pinch.
head -1 inputfile.txt > outputfile.txt
grep -C 1 "03000|TO" inputfile.txt >> outputfile.txt
tail -1 inputfile.txt >> outputfile.txt
# Whenever I have to parse text files I prefer to use regular expressions
# You can also customize the matching criteria if you want to
import re
what_is_being_searched = re.compile("^03000.*TO")
# don't use "file" as a variable name since it is (was?) a builtin
# function
with open("file.txt", "r") as source_file, open("newFile.txt", "w") as destination_file:
for this_line in source_file:
if what_is_being_searched.match(this_line):
destination_file.write(this_line)
and for those who prefer a more compact representation:
import re
with open("file.txt", "r") as source_file, open("newFile.txt", "w") as destination_file:
destination_file.writelines(this_line for this_line in source_file
if re.match("^03000.*TO", this_line))
code:
fileName = '1'
fil = open(fileName,'r')
import string
##step 1: parse the file.
parsedFile = []
for i in fil:
##tuple1 = (1,2,3)
firstPipe = i.find('|')
secondPipe = i.find('|',firstPipe+1)
tuple1 = (i[:firstPipe],\
i[firstPipe+1:secondPipe],\
i[secondPipe+1:i.find('\n')])
parsedFile.append(tuple1)
fil.close()
##search criterias:
searchFirst = '03000'
searchString = 'TO' ##can be changed if and when required
##step 2: used the parsed contents to write the new file
filout = open('newFile','w')
stringToWrite = parsedFile[0][0] + '|' + parsedFile[0][1] + '|' + parsedFile[0][2] + '\n'
filout.write(stringToWrite) ##to write the first entry
for i in range(1,len(parsedFile)):
if parsedFile[i][1] == searchString and parsedFile[i][0] == searchFirst:
for j in range(-1,2,1):
stringToWrite = parsedFile[i+j][0] + '|' + parsedFile[i+j][1] + '|' + parsedFile[i+j][2] + '\n'
filout.write(stringToWrite)
stringToWrite = parsedFile[-1][0] + '|' + parsedFile[-1][1] + '|' + parsedFile[-1][2] + '\n'
filout.write(stringToWrite) ##to write the first entry
filout.close()
I know that this solution may be a bit long. But it is quite easy to understand. And it seems an intuitive way to do it. And I have already checked this with the Data that you have provided and it works perfectly.
Please tell me if you need some more explanation on the code. I will definitely add the same.
I tip (Beasley and Joran elyase) very interesting, but it only allows to get the contents of the line 03000. I would like to get the contents of the lines 00000 to line 99999.
I even managed to do here, but I am not satisfied, I wanted to make a more cleaner.
See how I did:
file = open(url,'r')
newFile = open("newFile.txt",'w')
lines = file.readlines()
file.close()
i = 0
lineTemp = []
for line in lines:
lineTemp.append(line)
if line[0:5] == '03000':
state = line[21:23]
if line[0:5] == '99999':
if state == 'TO':
newFile.writelines(lineTemp)
else:
linhaTemp = []
i = i+1
newFile.close()
Suggestions...
Thanks to all!