I am trying to read 3 log files and use parsing to extract the requered information; I need this code to run in a loop and obtain new lines if they meet requered parameters.
I wrote the following code:
import os
x_list = []
y_list = []
z_list = []
x_log = open('x.txt')
for line in x_log:
line = line.rstrip()
if 'error' in line:
x = line
for x in x_log:
if not x in x_log:
x_list.append(x)
print('ERROR1',x)
y_log = open('y.txt')
for line in y_log:
line = line.rstrip()
if 'error' in line:
x = line
for x in y_list:
if not x in y_list:
y_list.append(x)
print('ERROR2',x)
z_log = open('z.txt')
for line in z_log:
line = line.rstrip()
if 'error' in line:
x = line
for x in z_log:
if not x in z_list:
z_list.append(x)
print('ERROR3',x)
what I am trying to accomplish:
1. read the file.
2. search for relevant line.
3. if the information does not exist in the list, append to list.
4. print line.
I need help setting a while loop, and I am decently doing something wrong while comparing the line to the content of the list.
UPDATE1:
Ok so I managed to get my code to work by adding:
and line not in x_list:
to my original line:
if 'error' in line:
so now I got:
if 'error' in line and line not in x_list:
full code:
x_list = []
y_list = []
z_list = []
x_log = open('x.txt')
for line in x_log:
line = line.rstrip()
if 'error' in line and line not in x_list:
x_list.append(line)
print('ERROR-X',line)
y_log = open('y.txt')
for line in y_log:
line = line.rstrip()
if 'error' in line and line not in y_list:
y_list.append(line)
print('ERROR-Y',line)
z_log = open('z.txt')
for line in z_log:
line = line.rstrip()
if 'error' in line and line not in z_list:
z_list.append(line)
print('ERROR-Z',line)
it does what i need but i still need to run it in a loop, can anyone help me?
UPDATE2:
managed to get it to work in a loop, if a new line is added and it meets the parsing parameters it will be printed.
code:
x_list = []
y_list = []
z_list = []
t = 1
while t == 1:
x_log = open('x.txt','r')
for line in x_log:
line = line.rstrip()
if 'error' in line and line not in x_list:
x_list.append(line)
print('ERROR-X',line)
y_log = open('y.txt','r')
for line in y_log:
line = line.rstrip()
if 'error' in line and line not in y_list:
y_list.append(line)
print('ERROR-Y',line)
z_log = open('z.txt','r')
for line in z_log:
line = line.rstrip()
if 'error' in line and line not in z_list:
z_list.append(line)
print('ERROR-Z',line)
The optimized approach:
def get_error_lines(fp, lines_set, suffix=''):
''' fp - file pointer;
lines_set - a set of unique error lines;
sufix - ERROR number(suffix) '''
for line in fp:
line = line.rstrip()
if 'error' in line and line not in lines_set:
lines_set.add(line)
print('ERROR' + suffix, line)
# using set objects to hold unique items
x_set = set()
y_set = set()
z_set = set()
with open('x.txt', 'r') as x_log, open('y.txt', 'r') as y_log, open('z.txt', 'r') as z_log:
get_error_lines(x_log, x_set, '1')
get_error_lines(y_log, y_set, '2')
get_error_lines(z_log, z_set, '3')
Related
..can.. Hi all, can someone take a look on this code, I have one problem but I don't know what is?
I'm working on generating various length and shapes of text on image, and when segmentated area is enough big then text is placed, but when the length of text a little bigger then this error shows me. Example, when the text has 1-8 words then the output is fine, but when the length is bigger then it shows me this error, but on some images works fine because it have bigger area to render the text. So I don't know what to do?
Terminal shows me these errors:
File "/..../text_utils.py", line 679, in sample
return self.fdict[kind](nline_max,nchar_max)
File "/..../text_utils.py", line 725, in sample_para
lines = self.get_lines(nline, nword, nchar_max, f=0.35)
File "/..../text_utils.py", line 657, in get_lines
lines = h_lines(niter=100)
File "/..../text_utils.py", line 649, in h_lines
line_start = np.random.choice(len(self.txt)-nline)
File "mtrand.pyx", line 902, in numpy.random.mtrand.RandomState.choice
ValueError: a must be greater than 0 unless no samples are taken
I saw this on this link: https://github.com/numpy/numpy/blob/main/numpy/random/mtrand.pyx there is some statement at 902 line but I don't understand.
And this is my code:
def get_lines(self, nline, nword, nchar_max, f=0.35, niter=100):
def h_lines(niter=100):
lines = ['']
iter = 0
while not np.all(self.is_good(lines,f)) and iter < niter:
iter += 1
**649 ---->** line_start = np.random.choice(len(self.txt)-nline)
lines = [self.txt[line_start+i] for i in range(nline)]
return lines
lines = ['']
iter = 0
while not np.all(self.is_good(lines,f)) and iter < niter:
iter += 1
**657 ---->** lines = h_lines(niter=100)
# get words per line:
nline = len(lines)
for i in range(nline):
words = lines[i].split()
dw = len(words)-nword[i]
if dw > 0:
first_word_index = random.choice(range(dw+1))
lines[i] = ' '.join(words[first_word_index:first_word_index+nword[i]])
while len(lines[i]) > nchar_max: #chop-off characters from end:
if not np.any([ch.isspace() for ch in lines[i]]):
lines[i] = ''
else:
lines[i] = lines[i][:len(lines[i])-lines[i][::-1].find(' ')].strip()
if not np.all(self.is_good(lines,f)):
return #None
else:
return lines
def sample(self, nline_max,nchar_max,kind='WORD'):
**679 ---->** return self.fdict[kind](nline_max,nchar_max)
def sample_para(self,nline_max,nchar_max):
# get number of lines in the paragraph:
nline = nline_max*sstat.beta.rvs(a=self.p_para_nline[0], b=self.p_para_nline[1])
nline = max(1, int(np.ceil(nline)))
# get number of words:
nword = [self.p_para_nword[2]*sstat.beta.rvs(a=self.p_para_nword[0], b=self.p_para_nword[1])
for _ in range(nline)]
nword = [max(1,int(np.ceil(n))) for n in nword]
**725 ---->** lines = self.get_lines(nline, nword, nchar_max, f=0.35)
if lines is not None:
# center align the paragraph-text:
if np.random.rand() < self.center_para:
lines = self.center_align(lines)
return '\n'.join(lines)
else:
return []
file = open(fullname, 'r')
for line in file:
if line.endswith('\n'): line = line[:-1]
line = line.split(',')
for tile in line:
index = line.index(tile)
tile = tile.split('>')
print(tile)
copies = int(tile[1])
tile = tile * copies
line[index:index+1] = tile
the text file format:
block>20, otherblock>10
the output:
['block', '20']
['20']
Traceback (most recent call last):
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 92, in <module>
main()
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 77, in main
test_map = MapClass("map.txt")
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 23, in __init__
self.load_map(name)
File "C:/Users/CAIO/Documents/Pycharm/vitoria/main.py", line 39, in load_map
copies = int(tile[1])
IndexError: list index out of range
Process finished with exit code 1
when i reference tile[1] it states the index is invalid, and when i reference tile[0] it's just 'block', not ['block','10'].
printing tile before spliting results in :
'block>20'
'20'
i'm too tired for this at this point, it's probably something dumb i'm skipping
Based on your code, it looks like you want to expand the 'text>count' format with the text repeated 'count' times.
Try this code. Note that this code removes the leading space before the text.
ss = '''
block>20, otherblock>10
b2>21, ob2>12
b3>22, ob3>13
'''.strip()
with open('test.csv','w') as f: f.write(ss) # write test file
##############
fullname = 'test.csv'
alllines = []
file = open(fullname, 'r')
for line in file:
lineout = line
if line.endswith('\n'): line = line[:-1]
line = line.split(',')
for idx,tile in enumerate(line):
#index = line.index(tile)
tilex = tile.strip().split('>')
copies = int(tilex[1])
tilex2 = tilex[0] * copies
lineout = lineout.replace(tile, tilex2)
alllines.append(lineout)
print(''.join(alllines))
Output
blockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblockblock,otherblockotherblockotherblockotherblockotherblockotherblockotherblockotherblockotherblockotherblock
b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2,ob2ob2ob2ob2ob2ob2ob2ob2ob2ob2ob2ob2
b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3b3,ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3ob3
If you want minimal code, you can use list comprehension.
ss = '''
block>20, otherblock>10
b2>21, ob2>12
b3>22, ob3>13
'''.strip()
with open('test.csv','w') as f: f.write(ss) # write test file
#######################
with open(fullname, 'r') as f:
lines = f.readlines()
xx = '\n'.join([','.join([e.split('>')[0]*int(e.split('>')[1]) for e in ln.split(', ')]) for ln in lines])
print(xx)
Output is the same
>gene1
ATGATGATGGCG
>gene2
GGCATATC
CGGATACC
>gene3
TAGCTAGCCCGC
This is the text file which I am trying to read.
I want to read every gene in a different string and then add it in a list
There are header lines starting with ’>’ character to recognize if this is a start or end of a gene
with open('sequences1.txt') as input_data:
for line in input_data:
while line != ">":
list.append(line)
print(list)
When printed the list should display list should be
list =["ATGATGATGGCG","GGCATATCCGGATACC","TAGCTAGCCCGC"]
with open('sequences1.txt') as input_data:
sequences = []
gene = []
for line in input_data:
if line.startswith('>gene'):
if gene:
sequences.append(''.join(gene))
gene = []
else:
gene.append(line.strip())
sequences.append(''.join(gene)) # append last gene
print(sequences)
output:
['ATGATGATGGCG', 'GGCATATCCGGATACC', 'TAGCTAGCCCGC']
You have multiple mistakes in your code, look here:
with open('sequences1.txt', 'r') as file:
list = []
for line in file.read().split('\n'):
if not line.startswith(">") and len(line$
list.append(line)
print(list)
Try this:
$ cat genes.txt
>gene1
ATGATGATGGCG
>gene2
GGCATATC
CGGATACC
>gene3
TAGCTAGCCCGC
$ python
>>> genes = []
>>> with open('genes.txt') as file_:
... for line in f:
... if not line.startswith('>'):
... genes.append(line.strip())
...
>>> print(genes)
['ATGATGATGGCG', 'GGCATATC', 'CGGATACC', 'TAGCTAGCCCGC']
sequences1.txt:
>gene1
ATGATGATGGCG
>gene2
GGCATATC
CGGATACC
>gene3
TAGCTAGCCCGC
and then:
desired_text = []
with open('sequences1.txt') as input_data:
content = input_data.readlines()
content = [l.strip() for l in content if l.strip()]
for line in content:
if not line.startswith('>'):
desired_text.append(line)
print(desired_text)
OUTPUT:
['ATGATGATGGCG', 'GGCATATC', 'CGGATACC', 'TAGCTAGCCCGC']
EDIT:
Sped-read it, fixed it with the desired output
with open('sequences1.txt') as input_data:
content = input_data.readlines()
# you may also want to remove empty lines
content = [l.strip() for l in content if l.strip()]
# flag
nextLine = False
# list to save the lines
textList = []
concatenated = ''
for line in content:
find_TC = line.find('gene')
if find_TC > 0:
nextLine = not nextLine
else:
if nextLine:
textList.append(line)
else:
if find_TC < 0:
if concatenated != '':
concatenated = concatenated + line
textList.append(concatenated)
else:
concatenated = line
print(textList)
OUTPUT:
['ATGATGATGGCG', 'GGCATATCCGGATACC', 'TAGCTAGCCCGC']
I have a python script that is checking data from a text file and writing it if it meets the right parameters. So far I have:
# -*- coding: utf-8 -*-
import math
f = open("COLLISON.txt", 'r')
linelist = f.readlines()
f.close
f2 = open("All_Collisions_Raw_Data.txt", "w")
for line in linelist:
if 'Û' in line[0]:
f2.write(line)
f2.close()
f3 = open("Primary_Recoils_Raw_Data.txt", "w")
for line in linelist:
if 'Prime Recoil' in line:
f3.write(line)
f3.close()
S = raw_input('Are you analysing a sphere?\n Y/n \n')
if S == 'Y' or S == 'y':
rad = input('What is the radius of the sphere in Angstroms? \n')
f14 = open('All_Collisions_in_sphere', 'w')
for line in linelist:
if len(line) >55:
if 'Û' in line[0]:
Xa = float(''.join(line[25:29]))
Xs = float((Xa - rad))
Ya = float(''.join(line[36:40]))
Za = float(''.join(line[47:51]))
Xf = float(''.join(line[31:34]))
Yf = float(''.join(line[42:45]))
Zf = float(''.join(line[53:56]))
Xf1 = float(10**Xf)
Yf1 = float(10**Yf)
Zf1 = float(10**Zf)
Xd = float((Xs*Xf1))
Yd = float((Ya*Yf1))
Zd = float((Za*Zf1))
Xb = float((Xd*Xd))
Yb = float((Yd*Yd))
Zb = float((Zd*Zd))
ra = float(Xb + Yb + Zb)
r = float(math.sqrt(ra))
I = (line[6])
if r < rad:
f14.write(line)
f14.close()
I only want to write if I = 1 or is equal to the previous lines I + 1. However I'm unsure how to call the previous line, or keep the current line for future recall. Does anyone know how i can achieve this?
One way is to just store the previous (we initialise to None and check if it is None):
prev = None
for line in file:
if prev is not None:
if line == prev:
# do stuff
prev = line
Another way is to user iterators
itr = iter(file)
prev = next(itr)
for line in itr:
if line == prev:
# do stuff
prev = line
Edit
If you want to get each line number as well, use the enumerate function:
for line_number, line in enumerate(file, start=1):
...
Just as an FYI don't do
file = open(path)
linelist = file.readlines()
file.close()
for line in linelist:
...
but instead do this:
with open(path) as file:
for line in file:
...
The reason is that the first method reads the entire file into memory and will not close the file if an exception happens in the read, which could corrupt the file. the with statement handles that all for you, and then you can iterate over the file directly.
Why is this function not printing in the shell? I keep getting these things:
<function terms at 0x025BD3D8>
def gdp_sections ():
file1 = open("GDP_Section1All_Hist1.csv" , 'r')
for i in range(7):
file1.readline()
a_tuple = []
line = file1.readline()
line = line.split(',')
year = line[22:43]
line = file1.readline()
line = line.split(',')
GDP_Change = line[22:43]
differences = []
for i in range (len(year)):
a_tuple = ((year[i]), (GDP_Change[i]))
differences.append(a_tuple)
file1 = open("GDP_Section1All_Hist2.csv" , 'r')
for i in range(7):
file1.readline()
a_tuple = []
line = file1.readline()
line = line.split(',')
year = line[4:48]
line = file1.readline()
line = line.split(',')
GDP_Change = line[4:48]
for i in range (len(year)):
a_tuple = ((year[i]), (GDP_Change[i]))
differences.append(a_tuple)
print(differences)
def terms():
file2 = open('the_correct_presidents.txt','r')
file2.readline()
for line in range(1,12):
line = file2.readline()
line = line.strip().split(',')
if len(line)>3:
del line[1]
name_of_president = line[0]
term = line[1].split('-')
year_started_term = int(term[0])
year_ended_term = int(term[1])
party = line[2]
print (line)
How are you invoking terms? Make sure you include the parenthesis otherwise you are asking for the function object named terms which appears to be what you are seeing. Functions are first class objects in Python and thus can be used as values and printed.
In other words, are you saying this at the shell?
>>> print(terms)
When you really intend to mean this?
>>> print(terms())