python string split resulting in list with wrong indexes - python

file = open(fullname, 'r')
for line in file:
if line.endswith('\n'): line = line[:-1]
line = line.split(',')
for tile in line:
index = line.index(tile)
tile = tile.split('>')
print(tile)
copies = int(tile[1])
tile = tile * copies
line[index:index+1] = tile
the text file format:
block>20, otherblock>10
the output:
['block', '20']
['20']
Traceback (most recent call last):
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 92, in <module>
main()
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 77, in main
test_map = MapClass("map.txt")
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 23, in __init__
self.load_map(name)
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 39, in load_map
copies = int(tile[1])
IndexError: list index out of range
Process finished with exit code 1
when i reference tile[1] it states the index is invalid, and when i reference tile[0] it's just 'block', not ['block','10'].
printing tile before spliting results in :
'block>20'
'20'
i'm too tired for this at this point, it's probably something dumb i'm skipping

Based on your code, it looks like you want to expand the 'text>count' format with the text repeated 'count' times.
Try this code. Note that this code removes the leading space before the text.
ss = '''
block>20, otherblock>10
b2>21, ob2>12
b3>22, ob3>13
'''.strip()
with open('test.csv','w') as f: f.write(ss) # write test file
##############
fullname = 'test.csv'
alllines = []
file = open(fullname, 'r')
for line in file:
lineout = line
if line.endswith('\n'): line = line[:-1]
line = line.split(',')
for idx,tile in enumerate(line):
#index = line.index(tile)
tilex = tile.strip().split('>')
copies = int(tilex[1])
tilex2 = tilex[0] * copies
lineout = lineout.replace(tile, tilex2)
alllines.append(lineout)
print(''.join(alllines))
Output
blockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblock,otherblockotherblockotherblockotherblockotherblockotherblockotherblockotherblockotherblockotherblock
b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2,ob2ob2ob2ob2ob2ob2ob2ob2ob2ob2ob2ob2
b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3,ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3
If you want minimal code, you can use list comprehension.
ss = '''
block>20, otherblock>10
b2>21, ob2>12
b3>22, ob3>13
'''.strip()
with open('test.csv','w') as f: f.write(ss) # write test file
#######################
with open(fullname, 'r') as f:
lines = f.readlines()
xx = '\n'.join([','.join([e.split('>')[0]*int(e.split('>')[1]) for e in ln.split(', ')]) for ln in lines])
print(xx)
Output is the same

Related

Lines missing in python

I am writing a code in python where I am removing all the text after a specific word but in output lines are missing. I have a text file in unicode which have 3 lines:
my name is test1
my name is
my name is test 2
What I want is to remove text after word "test" so I could get the output as below
my name is test
my name is
my name is test
I have written a code but it does the task but also removes the second line "my name is"
My code is below
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
with open(r"test.txt", "w") as fp:
fp.write(txt)
It looks like if there is no keyword found the index become -1.
So you are avoiding the lines w/o keyword.
I would modify your if by adding the condition as follows:
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
elif index < 0:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
No need to add \n because the line already contains it.
Your code does not append the line if the splitStr is not defined.
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index != -1:
txt += line[:index + len(splitStr)] + "\n"
else:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
In my solution I simulate the input file via io.StringIO. Compared to your code my solution remove the else branch and only use one += operater. Also splitStr is set only one time and not on each iteration. This makes the code more clear and reduces possible errore sources.
import io
# simulates a file for this example
the_file = io.StringIO("""my name is test1
my name is
my name is test 2""")
txt = ""
splitStr = "test"
with the_file as fp:
# each line
for line in fp.readlines():
# cut somoething?
if splitStr in line:
# find index
index = line.find(splitStr)
# cut after 'splitStr' and add newline
line = line[:index + len(splitStr)] + "\n"
# append line to output
txt += line
print(txt)
When handling with files in Python 3 it is recommended to use pathlib for that like this.
import pathlib
file_path = pathlib.Path("test.txt")
# read from wile
with file_path.open('r') as fp:
# do something
# write back to the file
with file_path.open('w') as fp:
# do something
Suggestion:
for line in fp.readlines():
i = line.find('test')
if i != -1:
line = line[:i]

Reading a Python File to EOF while performing if statments

I am working on creating a program to concatenate rows within a file. Each file has a header, datarows labeled DAT001 to DAT113 and a trailer. Each line of concatenated rows will have DAT001 to DAT100 and 102-113 is optional. I need to print the header, concatenating DAT001-113 and when the file finds a row with DAT001 I need to start a new line concatenating DAT001-113 again. After that is all done, I will print the trailer. I have an IF statement started but it only writes the header and skips all other logic. I apologize that this is very basic - but I am struggling with reading rows over and over again without knowing how long the file might be.
I have tried the below code but it won't read or print after the header.
import pandas as pd
destinationFile = "./destination-file.csv"
sourceFile = "./TEST.txt"
header = "RHR"
data = "DPSPOS"
beg_data = "DAT001"
data2 = "DAT002"
data3 = "DAT003"
data4 = "DAT004"
data5 = "DAT005"
data6 = "DAT006"
data7 = "DAT007"
data8 = "DAT008"
data100 = "DAT100"
data101 = "DAT101"
data102 = "DAT102"
data103 = "DAT103"
data104 = "DAT104"
data105 = "DAT105"
data106 = "DAT106"
data107 = "DAT107"
data108 = "DAT108"
data109 = "DAT109"
data110 = "DAT110"
data111 = "DAT111"
data112 = "DAT112"
data113 = "DAT113"
req_data = ''
opt101 = ''
opt102 = ''
with open(sourceFile) as Tst:
for line in Tst.read().split("\n"):
if header in line:
with open(destinationFile, "w+") as dst:
dst.write(line)
elif data in line:
if beg_data in line:
req_data = line+line+line+line+line+line+line+line+line
if data101 in line:
opt101 = line
if data102 in line:
opt102 = line
new_line = pd.concat(req_data,opt101,opt102)
with open(destinationFile, "w+") as dst:
dst.write(new_line)
else:
if trailer in line:
with open(destinationFile, "w+") as dst:
dst.write(line)
Just open the output file once for the whole loop, not every time through the loop.
Check whether the line begins with DAT101. If it does, write the trailer to the current line and start a new line by printing the header.
Then for every line that begins with DAT, write it to the file in the current line.
first_line = True
with open(sourceFile) as Tst, open(destinationFile, "w+") as dst:
for line in Tst.read().split("\n"):
# start a new line when reading DAT101
if line.startswith(beg_data):
if not first_line: # need to end the current line
dst.write(trailer + '\n')
first_line = False
dst.write(header)
# copy all the lines that begin with `DAT`
if line.startswith('DAT'):
dst.write(line)
# end the last line
dst.write(trailer + '\n')
See if the following code helps make progress. It was not tested because no
Minimum Runnable Example is provided.
with open(destinationFile, "a") as dst:
# The above will keep the file open until after all the indented code runs
with open(sourceFile) as Tst:
# The above will keep the file open until after all the indented code runs
for line in Tst.read().split("\n"):
if header in line:
dst.write(line)
elif data in line:
if beg_data in line:
req_data = line + line + line + line + line + line + line + line + line
if data101 in line:
opt101 = line
if data102 in line:
opt102 = line
new_line = pd.concat(req_data, opt101, opt102)
dst.write(new_line)
else:
if trailer in line:
dst.write(line)
# With is a context manager which will automatically close the files.

Appending two text lines if the second line starts with a particular word

Consider a .txt file with the following content:
Pinus ponderosa P. & C. Lawson
var. scopulorum Engelm.
[5,800] - [7,800] 9,200 ft. [May] - [Jun]. Needleleaf
evergreen tree, mesophanerophyte; nanophyll, sclerophyll.
I would like to append any line starting with var. to the previous line.
Here's my code:
with open('myfile.txt', 'r') as f:
txt = ''
for line in f:
line = line.replace('\n', '')
if next(f)[:4] == 'var.':
txt = '{}\n{} {}'.format(txt, line, next(f))
This throws the following error:
Traceback (most recent call last): File "<stdin>", line 5, in <module> StopIteration
The expected output is:
Pinus ponderosa P. & C. Lawson var. scopulorum Engelm.
[5,800] - [7,800] 9,200 ft. [May] - [Jun]. Needleleaf
evergreen tree, mesophanerophyte; nanophyll, sclerophyll.
You can do it in one shot instead of iterating over the lines. Also if you wanted edit the file:
with open('myfile.txt', 'r') as f:
txt = f.read()
txt = txt.replace('\nvar.', ' var.')
with open('myfile.txt', 'w') as f:
f.write(txt)
This is one approach.
Ex:
with open(filename, 'r') as f:
txt = ''
for line in f:
line = line.strip()
if line.startswith('var.'): #Use str.startswith
txt += " " + line
else:
txt += "\n" + line
print(txt.strip())
Output:
Pinus ponderosa P. & C. Lawson var. scopulorum Engelm.
[5,800] - [7,800] 9,200 ft. [May] - [Jun]. Needleleaf
evergreen tree, mesophanerophyte; nanophyll, sclerophyll.

Take a value from a line in Python, keep the value for processing on the next line on many iterations.

I have a python script that is checking data from a text file and writing it if it meets the right parameters. So far I have:
# -*- coding: utf-8 -*-
import math
f = open("COLLISON.txt", 'r')
linelist = f.readlines()
f.close
f2 = open("All_Collisions_Raw_Data.txt", "w")
for line in linelist:
if 'Û' in line[0]:
f2.write(line)
f2.close()
f3 = open("Primary_Recoils_Raw_Data.txt", "w")
for line in linelist:
if 'Prime Recoil' in line:
f3.write(line)
f3.close()
S = raw_input('Are you analysing a sphere?\n Y/n \n')
if S == 'Y' or S == 'y':
rad = input('What is the radius of the sphere in Angstroms? \n')
f14 = open('All_Collisions_in_sphere', 'w')
for line in linelist:
if len(line) >55:
if 'Û' in line[0]:
Xa = float(''.join(line[25:29]))
Xs = float((Xa - rad))
Ya = float(''.join(line[36:40]))
Za = float(''.join(line[47:51]))
Xf = float(''.join(line[31:34]))
Yf = float(''.join(line[42:45]))
Zf = float(''.join(line[53:56]))
Xf1 = float(10**Xf)
Yf1 = float(10**Yf)
Zf1 = float(10**Zf)
Xd = float((Xs*Xf1))
Yd = float((Ya*Yf1))
Zd = float((Za*Zf1))
Xb = float((Xd*Xd))
Yb = float((Yd*Yd))
Zb = float((Zd*Zd))
ra = float(Xb + Yb + Zb)
r = float(math.sqrt(ra))
I = (line[6])
if r < rad:
f14.write(line)
f14.close()
I only want to write if I = 1 or is equal to the previous lines I + 1. However I'm unsure how to call the previous line, or keep the current line for future recall. Does anyone know how i can achieve this?
One way is to just store the previous (we initialise to None and check if it is None):
prev = None
for line in file:
if prev is not None:
if line == prev:
# do stuff
prev = line
Another way is to user iterators
itr = iter(file)
prev = next(itr)
for line in itr:
if line == prev:
# do stuff
prev = line
Edit
If you want to get each line number as well, use the enumerate function:
for line_number, line in enumerate(file, start=1):
...
Just as an FYI don't do
file = open(path)
linelist = file.readlines()
file.close()
for line in linelist:
...
but instead do this:
with open(path) as file:
for line in file:
...
The reason is that the first method reads the entire file into memory and will not close the file if an exception happens in the read, which could corrupt the file. the with statement handles that all for you, and then you can iterate over the file directly.

reading log files in a loop and printing desired results (python)

I am trying to read 3 log files and use parsing to extract the requered information; I need this code to run in a loop and obtain new lines if they meet requered parameters.
I wrote the following code:
import os
x_list = []
y_list = []
z_list = []
x_log = open('x.txt')
for line in x_log:
line = line.rstrip()
if 'error' in line:
x = line
for x in x_log:
if not x in x_log:
x_list.append(x)
print('ERROR1',x)
y_log = open('y.txt')
for line in y_log:
line = line.rstrip()
if 'error' in line:
x = line
for x in y_list:
if not x in y_list:
y_list.append(x)
print('ERROR2',x)
z_log = open('z.txt')
for line in z_log:
line = line.rstrip()
if 'error' in line:
x = line
for x in z_log:
if not x in z_list:
z_list.append(x)
print('ERROR3',x)
what I am trying to accomplish:
1. read the file.
2. search for relevant line.
3. if the information does not exist in the list, append to list.
4. print line.
I need help setting a while loop, and I am decently doing something wrong while comparing the line to the content of the list.
UPDATE1:
Ok so I managed to get my code to work by adding:
and line not in x_list:
to my original line:
if 'error' in line:
so now I got:
if 'error' in line and line not in x_list:
full code:
x_list = []
y_list = []
z_list = []
x_log = open('x.txt')
for line in x_log:
line = line.rstrip()
if 'error' in line and line not in x_list:
x_list.append(line)
print('ERROR-X',line)
y_log = open('y.txt')
for line in y_log:
line = line.rstrip()
if 'error' in line and line not in y_list:
y_list.append(line)
print('ERROR-Y',line)
z_log = open('z.txt')
for line in z_log:
line = line.rstrip()
if 'error' in line and line not in z_list:
z_list.append(line)
print('ERROR-Z',line)
it does what i need but i still need to run it in a loop, can anyone help me?
UPDATE2:
managed to get it to work in a loop, if a new line is added and it meets the parsing parameters it will be printed.
code:
x_list = []
y_list = []
z_list = []
t = 1
while t == 1:
x_log = open('x.txt','r')
for line in x_log:
line = line.rstrip()
if 'error' in line and line not in x_list:
x_list.append(line)
print('ERROR-X',line)
y_log = open('y.txt','r')
for line in y_log:
line = line.rstrip()
if 'error' in line and line not in y_list:
y_list.append(line)
print('ERROR-Y',line)
z_log = open('z.txt','r')
for line in z_log:
line = line.rstrip()
if 'error' in line and line not in z_list:
z_list.append(line)
print('ERROR-Z',line)
The optimized approach:
def get_error_lines(fp, lines_set, suffix=''):
''' fp - file pointer;
lines_set - a set of unique error lines;
sufix - ERROR number(suffix) '''
for line in fp:
line = line.rstrip()
if 'error' in line and line not in lines_set:
lines_set.add(line)
print('ERROR' + suffix, line)
# using set objects to hold unique items
x_set = set()
y_set = set()
z_set = set()
with open('x.txt', 'r') as x_log, open('y.txt', 'r') as y_log, open('z.txt', 'r') as z_log:
get_error_lines(x_log, x_set, '1')
get_error_lines(y_log, y_set, '2')
get_error_lines(z_log, z_set, '3')

Categories