python read text file to array and edit the array - python

hello all im trying to read text file line by line and then store all the data into an array and i want to add text in the value of array such as
admin
administrator
adm
log
login
after got this lines i want to add (.php)
in the end of it
and this is my code
current_folder= os.path.dirname(os.path.realpath(__file__))
current_list=str(current_folder)+"\pages.txt"
ins = open( current_list, "r" )
array = []
for line in ins:
array.append(line.rstrip())
for fahad in array:
array+".php"

This code:
try:
with open('test.txt', 'r') as ins: #Opens the file and closes it when Python is done with it
array = []
for line in ins:
array.append(line.rstrip()) # appends each line of the file with trailing white space stripped
for fahad in array:
fahad += ".php" # for each item in the list 'array' it concatenates '.php' on to the end. The += operator is the same as fahad = fahad + '.php'
print(fahad)
except FileNotFoundError: # this is part of a try/except block. If the file isn't found instead of throwing an error this will trigger. Right now nothing happens because of the pass statement but you can change that to print something if you like.
pass
produces:
>>> fahad
'admin administrator adm log login.php'

I guess this should work.
current_folder= os.path.dirname(os.path.realpath(__file__))
current_list=str(current_folder)+"\pages.txt"
ins = open( current_list, "r" ).read().split()
array = []
for line in ins:
array.append(line + ".php")

You could try this code:
ins = open( "hello.txt", "r" )
array = []
rows = ins.read().split('\n') #or \r\n - it depends from your txt
for row in rows:
array.append(row+".php")
ins.close()

Related

Copy last line in python file and modify it before adding a new line

I would like to check for a string "rele" in a python text file and if string is not present then copy the last line of the file and then modify it as below to add as a new entry.
Example:
Actual File: Where "rele" is not present
"123456",1,0,"mher",0,"N",01Jan1986 00:00,130:00,
"123456",1,1,"ermt",0,"N",01Jan1986 00:00,100:00,
"123456",1,2,"irbt",0,"N",01Jan1986 00:00,120:00,
Expected Output:
"123456",1,0,"mher",0,"N",01Jan1986 00:00,130:00,
"123456",1,1,"ermt",0,"N",01Jan1986 00:00,0:00,
"123456",1,2,"irbt",0,"N",01Jan1986 00:00,0:00,
"123456",1,3,"rele",0,"0000",01Jan1986 00:00,0:00,
Last entry of the file is similar to its previous except few changes to it's 3,4 and 6 columns.
My code:
fp = open(srcEtab.txt, 'w')
for line in lines:
if 'rele' in line:
foundRelOrPickup = True
if not foundRelOrPickup:
fp1 = open ( 'srcEtab.txt',"w" )
lineList = fp1.readlines()
new_line = lineList[len(lineList)-1]
fp1.write(new_line)
fp.close()
fp1.close()
with open(yourFile) as f:
lines = f.readlines()
if not any(map(lambda line: "rele" in line,lines)):
last_line_words = lines[-1].split(',')
last_line_words[2] = len(lines)
last_line_words[3] = '"rele"'
last_line_words[5] = '"0000"'
lines.append(",".join([str(i) for i in last_line_words]))
with open(otherFile, "w") as f1:
for line in lines:
f1.write(line)

replace line if found or append - python

I have text that is key-value pairs separated by '='. I would like to replace the line if the key matches. if not, i would like to append it at the bottom. I've tried several ways, including:
def split_command_key_and_value(command):
if '=' in command:
command2 = command.split('=')
return command2
def test(command, path):
command2 = split_command_key_and_value(command)
pattern = command2[0]
myfile = open(path,'r') # open file handle for read
# use r'', you don't need to replace '\' with '/'
result = open(path, 'w') # open file handle for write
for line in myfile:
line = line.strip() # it's always a good behave to strip what you read from files
if pattern in line:
line = command # if match, replace line
result.write(line) # write every line
myfile.close() # don't forget to close file handle
result.close()
I know the above is just to replace text, but it deletes the text in the file, and I can't see why. Could someone point me in the right direction?
Thanks
Update:
I'm almost there, but some of my lines have similar keys, so mutiple lines are matching when only 1 should. I've tried to incorporate a regex boundary in my loop with no luck. My code is below. Does anyone have a suggestion?
There is some text in the file that isn't key-value, so I would like to skip that.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
if setting_name in line:
tmpstream.write(comb)
else:
tmpstream.write(line)
I think I got it. See code below.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
I spoke too soon. It stops at the key-value I provide. So, it only writes one line, and doesn't write the lines that don't match.
def modify(name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file1', 'w') as tmpstream:
with open('/file2', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
Can anyone see the issue?
Because when you open file for writing
result = open(path, 'w') # open file handle for write
you just erase it content. Try to write in different file and after all work done replace old file with new one. Or read all data into memory and then process it and write to file.
with open(path) as f:
data = f.read()
with open(path, 'w') as f:
for l in data:
# make job here
first of all you are reading an writing the same file ...
you could first read it all and the write line by line
with open(path,'r') as f:
myfile = f.read() # read everything in the variable "myfile"
result = open(path, 'w') # open file handle for write
for line in myfile.splitlines(): # process the original file content 1 line at a time
# as before
I strongly recommend reading python's documentation on how to read and write files.
If you open an existing file in write-mode open(path, 'w'), its content will be erased:
mode can be (...) 'w' for only writing (an existing file with the same name will be erased)
To replace a line in python you can have a look at this: Search and replace a line in a file in Python
Here is one the solutions provided there adapted to your context (tested for python3):
from tempfile import mkstemp
from shutil import move
from os import close
def test(filepath, command):
# Split command into key/value
key, _ = command.split('=')
matched_key = False
# Create a temporary file
fh, tmp_absolute_path = mkstemp()
with open(tmp_absolute_path, 'w') as tmp_stream:
with open(filepath, 'r') as stream:
for line in stream:
if key in line:
matched_key = True
tmp_stream.write(command + '\n')
else:
tmp_stream.write(line)
if not matched_key:
tmp_stream.write(command + '\n')
close(fh)
move(tmp_absolute_path, filepath)
Note that with the code above every line that matches key (key=blob or blob=key) will be replaced.

Python -- How to split headers/chapters into separate files automatically

I'm converting text directly to epub and I'm having a problem automatically splitting the HTML book file into separate header/chapter files. At the moment, the code below partially works but only creates every other chapter file. So half the header/chapter files are missing from the output. Here is the code:
def splitHeaderstoFiles(fpath):
infp = open(fpath, 'rt', encoding=('utf-8'))
for line in infp:
# format and split headers to files
if '<h1' in line:
#-----------format header file names and other stuff ------------#
# create a new file for the header/chapter section
path = os.getcwd() + os.sep + header
with open(path, 'wt', encoding=('utf-8')) as outfp:
# write html top meta headers
outfp = addMetaHeaders(outfp)
# add the header
outfp = outfp.write(line)
# add the chapter/header bodytext
for line in infp:
if '<h1' not in line:
outfp.write(line)
else:
outfp.write('</body>\n</html>')
break
else:
continue
infp.close()
The problem occurs in the second 'for loop' at the bottom of the code, when I look for the next h1 tag to stop the split. I cannot use seek() or tell() to rewind or move back one line so the program can find the next header/chapter on the next iteration. Apparently you cannot use these in python in a for loop containing an implicit iter or next object in operation. Just gives a 'can't do non-zero cur-relative seeks' error.
I've also tried the while line != ' ' + readline() combination in the code which also gives the same error as above.
Does anyone know an easy way to split HTML headers/chapters of varying lengths into separate files in python? Are there any special python modules(such as pickles) that could help make this task easier?
I'm using Python 3.4
My grateful thanks in advance for any solutions to this problem...
I ran into similar problem a while ago, here is a simplified solution:
from itertools import count
chapter_number = count(1)
output_file = open('000-intro.html', 'wb')
with open('index.html', 'rt') as input_file:
for line in input_file:
if '<h1' in line:
output_file.close()
output_file = open('{:03}-chapter'.format(next(chapter_number)), 'wb')
output_file.write(line)
output_file.close()
In this approach, the first block of text leading to the first h1 block is written into 000-intro.html, the first chapter will be written into 001-chapter.html and so on. Please modify it to taste.
The solution is a simple one: Upon encountering the h1 tag, close the last output file and open a new one.
You are looping over your input file twice, which is likely causing your problems:
for line in infp:
...
with open(path, 'wt', encoding=('utf-8')) as outfp:
...
for line in infp:
...
Each for is going to have it's own iterator, so you are going to loop over the file many times.
You might try transforming your for loop into a while so you're not using two different iterators:
while infp:
line = infp.readline()
if '<h1' in line:
with open(...) as outfp:
while infp:
line = infp.readline()
if '<h1' in line:
break
outfp.writeline(...)
Alternatively, you may wish to use an HTML parser (i.e., BeautifulSoup). Then you can do something like what is described here: https://stackoverflow.com/a/8735688/65295.
Update from comment - essentially, read the entire file all at once so you can freely move back or forward as necessary. This probably won't be a performance issue unless you have a really really big file (or very little memory).
lines = infp.readlines() # read the entire file
i = 0
while i < len(lines):
if '<h1' in lines[i]:
with open(...) as outfp:
j = i + 1
while j < len(lines):
if '<h1' in lines[j]:
break
outfp.writeline(lines[j])
# line j has an <h1>, set i to j so we detect the it at the
# top of the next loop iteration.
i = j
else:
i += 1
I eventually found the answer to the above problem. The code below does alot more that just get the file header. It also simultaneously loads two parallel list arrays with formatted file name data(with extension) and pure header name data respectively so I can use these lists to fill in the and formatted filename extension in these html files within a while loop in one hit. The code now works well and is shown below.
def splitHeaderstoFiles(dir, inpath):
count = 1
t_count = 0
out_path = ''
header = ''
write_bodytext = False
file_path_names = []
pure_header_names = []
inpath = dir + os.sep + inpath
with open(inpath, 'rt', encoding=('utf-8')) as infp:
for line in infp:
if '<h1' in line:
#strip html tags, convert to start caps
p = re.compile(r'<.*?>')
header = p.sub('', line)
header = capwords(header)
line_save = header
# Add 0 for count below 10
if count < 10:
header = '0' + str(count) + '_' + header
else:
header = str(count) + '_' + header
# remove all spaces + add extension in header
header = header.replace(' ', '_')
header = header + '.xhtml'
count = count + 1
#create two parallel lists used later
out_path = dir + os.sep + header
outfp = open(out_path, 'wt', encoding=('utf-8'))
file_path_names.insert(t_count, out_path)
pure_header_names.insert(t_count, line_save)
t_count = t_count + 1
# Add html meta headers and write it
outfp = addMainHeaders(outfp)
outfp.write(line)
write_bodytext = True
# add header bodytext
elif write_bodytext == True:
outfp.write(line)
# now add html titles and close the html tails on all files
max_num_files = len(file_path_names)
tmp = dir + os.sep + 'temp1.tmp'
i = 0
while i < max_num_files:
outfp = open(tmp, 'wt', encoding=('utf-8'))
infp = open(file_path_names[i], 'rt', encoding=('utf-8'))
for line in infp:
if '<title>' in line:
line = line.strip(' ')
line = line.replace('<title></title>', '<title>' + pure_header_names[i] + '</title>')
outfp.write(line)
else:
outfp.write(line)
# add the html tail
if '</body>' in line or '</html>' in line:
pass
else:
outfp.write(' </body>' + '\n</html>')
# clean up
infp.close()
outfp.close()
shutil.copy2(tmp, file_path_names[i])
os.remove(tmp)
i = i + 1
# now rename just the title page
if os.path.isfile(file_path_names[0]):
title_page_name = file_path_names[0]
new_title_page_name = dir + os.sep + '01_Title.xhtml'
os.rename(title_page_name, new_title_page_name)
file_path_names[0] = '01_Title.xhtml'
else:
logmsg27(DEBUG_FLAG)
os._exit(0)
# xhtml file is no longer needed
if os.path.isfile(inpath):
os.remove(inpath)
# returned list values are also used
# later to create epub opf and ncx files
return(file_path_names, pure_header_names)
#Hai Vu and #Seth -- Thanks for all your help.

MD5 decrypt script

__author__ = 'Zane'
import hashlib
import sys
if (len(sys.argv)!=2 ) or (len(sys.argv[1])!= 32):
print("[---] md5cracker.py & hash")
sys.exit(1)
crackedmd5 = sys.argv[1]
# open a file and read its contents
f = open('file.txt')
lines = f.readline()
f.close()
for line in lines:
cleanline = line.rstrip()
hashobject = hashlib.md5(cleanline)
if (hashobject==crackedmd5):
print('Plain text password for ' + crackedmd5 + "is " + hashobject + '\n')
I get no error with exit code 1 and i do not know where i get it wrong
Your program exits with status code one because you told it so (roughly on line 8):
sys.exit(1)
Pythons code structure is based on indent of lines. For now your whole code is part of the if (len(sys.argv)!=2 ) or (len(sys.argv[1])!= 32): condition.
You need to unindent all lines with one tab starting from crackedmd5 = sys.argv[1]
EDIT
You also used lines = f.readline() which will read only one line and so for line in lines will iterate over every single char in that line and not over multiple lines. You need to use lines = f.readlines() instead.

Using python to read txt files and answer questions

a01:01-24-2011:s1
a03:01-24-2011:s2
a02:01-24-2011:s2
a03:02-02-2011:s2
a03:03-02-2011:s1
a02:04-19-2011:s2
a01:05-14-2011:s2
a02:06-11-2011:s2
a03:07-12-2011:s1
a01:08-19-2011:s1
a03:09-19-2011:s1
a03:10-19-2011:s2
a03:11-19-2011:s1
a03:12-19-2011:s2
So I have this list of data as a txt file, where animal name : date : location
So I have to read this txt file to answer questions.
So so far I have
text_file=open("animal data.txt", "r") #open the text file and reads it.
I know how to read one line, but here since there are multiple lines im not sure how i can read every line in the txt.
Use a for loop.
text_file = open("animal data.txt","r")
for line in text_file:
line = line.split(":")
#Code for what you want to do with each element in the line
text_file.close()
Since you know the format of this file, you can shorten it even more over the other answers:
with open('animal data.txt', 'r') as f:
for line in f:
animal_name, date, location = line.strip().split(':')
# You now have three variables (animal_name, date, and location)
# This loop will happen once for each line of the file
# For example, the first time through will have data like:
# animal_name == 'a01'
# date == '01-24-2011'
# location == 's1'
Or, if you want to keep a database of the information you get from the file to answer your questions, you can do something like this:
animal_names, dates, locations = [], [], []
with open('animal data.txt', 'r') as f:
for line in f:
animal_name, date, location = line.strip().split(':')
animal_names.append(animal_name)
dates.append(date)
locations.append(location)
# Here, you have access to the three lists of data from the file
# For example:
# animal_names[0] == 'a01'
# dates[0] == '01-24-2011'
# locations[0] == 's1'
You can use a with statement to open the file, in case of the open was failed.
>>> with open('data.txt', 'r') as f_in:
>>> for line in f_in:
>>> line = line.strip() # remove all whitespaces at start and end
>>> field = line.split(':')
>>> # field[0] = animal name
>>> # field[1] = date
>>> # field[2] = location
You are missing the closing the file. You better use the with statement to ensure the file gets closed.
with open("animal data.txt","r") as file:
for line in file:
line = line.split(":")
# Code for what you want to do with each element in the line

Categories