How to fix output file not breaking output line by line? - python

I am writing a Python script to read in a file, read through that file line by line and parse out data from that file to another text file via user command line argument. Right now, I am able to read the input file line by line and parse out the data via command line argument. However, the output file that I am trying to write to print all in one line rather than break the output line by line.
temp.log:
06 May 19 03:40:35 3 abCodeClearTrap Error Clear Trap (agent: 12367a12,
chassis:12367a12, ErrIdText: ERROR ID TEXT, csssi: EXTIFG, clearedID:
0x089088394)
06 May 19 03:44:35 3 abCodeErrorTrap Error Trap (agent: 12368a15, chassis:
12368a15, ErrIdText: Skip this item, csssi: SSRSSR, clearedID:
0x089088394)
My code:
import re, sys
with open('temp.log') as f:
lines = f.readlines()
with open('output.txt') as o:
data = []
for line in lines:
if 'date' in sys.argv:
try:
date = re.match(r'\date{2} \w+ \date{2}', line).group()
row.append(date)
except:
date = 'date'
if 'agent' in sys.argv:
try:
agent = re.search(r'agent:\s(.*?),', line).group()
row.append(agent)
except:
agent = 'agent:'
if 'err' in sys.argv:
try:
errID = re.search(r'ErrIdText:\s(.*?),', line).group()
row.append(errID)
except:
errID = 'ErrIdText:'
if 'clear' in sys.argv:
try:
clear = re.search(r'clearedID:\s(.*?)\)', line).group()
row.append(clear)
except:
clear = 'clearedID:'
row = []
data.append(row)
for row in data:
lines = o.writelines(row)
print(row)
o.close()
There is no error message but I want my output.txt file to break down line by line.
For example:
If the user run:
python export.py agent chassis
I expect the output.txt to print
['agent: 12367a12,', 'chassis:12367a12,']
['agent: 12368a15,', 'chassis:12368a15,']
But the output in the output.txt is:
agent:12367a12, chassis:12367a12, agent:12368a15, chassis:12368a15,

Here you go :)
for row in data:
lines = o.writelines(row)
lines = o.writelines("\n")
print(row)
or
for row in data:
row.append("\n")
lines = o.writelines(row)
print(row)
btw I am surprised that this code works ... because of you have defined row after you are using it
// ( * )
if something
try:
date = re.match(r'\date{2} \w+ \date{2}', line).group()
row.append(date) // should crash
except:
date = 'date'
if 'agent' in sys.argv:
try:
agent = re.search(r'agent:\s(.*?),', line).group()
row.append(agent) // should crash
except:
agent = 'agent:'
if 'err' in sys.argv:
try:
errID = re.search(r'ErrIdText:\s(.*?),', line).group()
row.append(errID) // should crash
except:
errID = 'ErrIdText:'
if 'clear' in sys.argv:
try:
clear = re.search(r'clearedID:\s(.*?)\)', line).group()
row.append(clear) // should crash
except:
clear = 'clearedID:'
row = [] // this should be defined where I put the star ( * )
data.append(row) // always appends empty row ( [] )

Related

Reading a Python File to EOF while performing if statments

I am working on creating a program to concatenate rows within a file. Each file has a header, datarows labeled DAT001 to DAT113 and a trailer. Each line of concatenated rows will have DAT001 to DAT100 and 102-113 is optional. I need to print the header, concatenating DAT001-113 and when the file finds a row with DAT001 I need to start a new line concatenating DAT001-113 again. After that is all done, I will print the trailer. I have an IF statement started but it only writes the header and skips all other logic. I apologize that this is very basic - but I am struggling with reading rows over and over again without knowing how long the file might be.
I have tried the below code but it won't read or print after the header.
import pandas as pd
destinationFile = "./destination-file.csv"
sourceFile = "./TEST.txt"
header = "RHR"
data = "DPSPOS"
beg_data = "DAT001"
data2 = "DAT002"
data3 = "DAT003"
data4 = "DAT004"
data5 = "DAT005"
data6 = "DAT006"
data7 = "DAT007"
data8 = "DAT008"
data100 = "DAT100"
data101 = "DAT101"
data102 = "DAT102"
data103 = "DAT103"
data104 = "DAT104"
data105 = "DAT105"
data106 = "DAT106"
data107 = "DAT107"
data108 = "DAT108"
data109 = "DAT109"
data110 = "DAT110"
data111 = "DAT111"
data112 = "DAT112"
data113 = "DAT113"
req_data = ''
opt101 = ''
opt102 = ''
with open(sourceFile) as Tst:
for line in Tst.read().split("\n"):
if header in line:
with open(destinationFile, "w+") as dst:
dst.write(line)
elif data in line:
if beg_data in line:
req_data = line+line+line+line+line+line+line+line+line
if data101 in line:
opt101 = line
if data102 in line:
opt102 = line
new_line = pd.concat(req_data,opt101,opt102)
with open(destinationFile, "w+") as dst:
dst.write(new_line)
else:
if trailer in line:
with open(destinationFile, "w+") as dst:
dst.write(line)
Just open the output file once for the whole loop, not every time through the loop.
Check whether the line begins with DAT101. If it does, write the trailer to the current line and start a new line by printing the header.
Then for every line that begins with DAT, write it to the file in the current line.
first_line = True
with open(sourceFile) as Tst, open(destinationFile, "w+") as dst:
for line in Tst.read().split("\n"):
# start a new line when reading DAT101
if line.startswith(beg_data):
if not first_line: # need to end the current line
dst.write(trailer + '\n')
first_line = False
dst.write(header)
# copy all the lines that begin with `DAT`
if line.startswith('DAT'):
dst.write(line)
# end the last line
dst.write(trailer + '\n')
See if the following code helps make progress. It was not tested because no
Minimum Runnable Example is provided.
with open(destinationFile, "a") as dst:
# The above will keep the file open until after all the indented code runs
with open(sourceFile) as Tst:
# The above will keep the file open until after all the indented code runs
for line in Tst.read().split("\n"):
if header in line:
dst.write(line)
elif data in line:
if beg_data in line:
req_data = line + line + line + line + line + line + line + line + line
if data101 in line:
opt101 = line
if data102 in line:
opt102 = line
new_line = pd.concat(req_data, opt101, opt102)
dst.write(new_line)
else:
if trailer in line:
dst.write(line)
# With is a context manager which will automatically close the files.

How to search for the next keyword in a line of data

I'm trying to iterate through datafile and search for a keyword in the data. If that keyword is found, I want to print the third word in the next line. Example:
for line in data:
nextline = next(data)
if 'REMARK 350 BIOMOLECULE:' == line.strip():
bio = nextline[2]
print("\t".join([bio]), file=datafile)
datafile.close()
Here, I'm searching for the words 'REMARK 350 BIOMOLECULE:', and if found, print out the third column of the next line : "AUTHOR" as output
EDIT: removed extra : on catch
You need a flag.
catch = False
for line in data:
if catch:
magic = line.split()[2]
print(f"\t{magic}", file=datafile)
catch = 'REMARK 350 BIOMOLECULE:' == line.strip()
datafile.close()
Turned into an complete script:
import sys
datafile = sys.stdout
data = """\
REMARK 350 BIOMOLECULE:
REMARK 350 AUTHOR""".splitlines()
catch = False
for line in data:
if catch:
magic = line.split()[2]
print(f"\t{magic}", file=datafile)
catch = 'REMARK 350 BIOMOLECULE:' == line.strip()
datafile.close()
Output:
C:\tmp>python x.py
AUTHOR
C:\tmp>

How to completely delete the first line of a text file?

I have a script that outputs a text file (Mod_From_SCRSTXT.txt). I need to delete the first line of that file.
I have tried changing the last line of the find function shown below. The first line still get printed in the new file created even with the changes.
def find(substr, infile, outfile):
with open(infile) as a, open(outfile, 'a') as b:
for line in a:
if substr in line:
b.write(line[1:])
srcn_path1 = input(" Enter Path. Example: U:\...\...\SRCNx\SCRS.TXT\n" +
" Enter SRCS.TXT's Path: ")
print ()
scrNumber1 = input(' Enter SCR number: ')
print ()
def find(substr, infile, outfile):
with open(infile) as a, open(outfile, 'a') as b:
for line in a:
if substr in line:
b.write(line) # or (line + '\n')
# action station:
find(scrNumber1, srcn_path1, 'Mod_From_SCRSTXT.txt')
Actual result:
VSOAU-0004 16999
VSOAU-0004
VSOAU-0004
VSOAU-0004
VSOAU-0004
Expected result:
VSOAU-0004
VSOAU-0004
VSOAU-0004
VSOAU-0004
You'll want to make a minor adjustment:
You can either count the lines in the file:
numberOfLines = 0
for line in file:
numberOfLines += 1
for line in range(1, linesInFile + 1):
Or you can ignore the first line through many different ways, this being a simple one:
ignoredLine = 0
for line in file:
if not ignoredLine:
ignoredLine = 1
else:
#Do stuff with the other lines
import pathlib
import os
import copy
import io
def delete_first_line(read_path):
try:
read_path = pathlib.Path(str(read_path))
write_path = str(copy.copy(read_path)) + ".temp"
while os.path.exists(write_path):
write_path = write_path + ".temp"
with open(read_path , mode = "r") as inf:
with open(write_path, mode="w") as outf:
it_inf = iter(inf)
next(it_inf) # discard first line
for line in it_inf:
print(line, file = outf)
os.remove(read_path)
os.rename(write_path, read_path)
except StopIteration:
with io.StringIO() as string_stream:
print(
"Cannot remove first line from an empty file",
read_path,
file = string_stream,
sep = "\n"
)
msg = string_stream.getvalue()
raise ValueError(msg)
except FileNotFoundError:
with io.StringIO() as string_stream:
print(
"Cannot remove first line from non-existant file",
read_path,
file = string_stream,
sep = "\n"
)
msg = string_stream.getvalue()
raise ValueError(msg)
finally:
pass
return

Parsing a dictionary in Python to my current table

I have a table that contains a few categories and two of them are: mac address and device name. I had a the list of my mac address written in my code (hardcoded) with their corresponding device names (ie deviceDict['00:00:00:00:00:00']= name)
Now, I passed those mac addresses and device names to a text file to be read from that same Python code and parse it onto my table. The code currently recognizes the text file but it is not parsing that information onto the table.
Here is the code:
# File: WapLogParser.py
# Desc: Parses a WAP log file and pulls out information relating to connected clients
# Usage: python WapLogParser.py [file glob]
import re
import sys
import glob
import os
deviceDict = dict()
# Base table for storing client info
# All names must match what is in the Wap Log file
# Exceptions: Date, Wap Name, Device Name - which are provided outside of the result parsing
table = [["Ssid", "Vlan", "Mac Address", "Connected Time", "Ip Address", "Rssi", "Date", "Wap Name", "Device Name"]]
def ParseResult(result, date, wapName):
lines = result.split('\n')
lines = list(filter(None, lines))
# Any useful info will be at least 2 lines long
if len(lines) == 1:
return
# create empty row
data = [""] * len(table[0])
# for each item in the result place it in the correct spot in the row
for line in lines:
if line != "":
# Parse the key/value pair
m = re.match(r"(.*):\s\.*\s?(.*)", line)
if m is not None:
for idx in range(len(table[0])):
if table[0][idx].lower() == m[1].lower():
data[idx] = m[2]
else:
break
# Remove the '(dBm)' from the RSSI value
data[5] = data[5].split()[0]
# Append WAP specific items to row
data[6] = date
data[7] = wapName
data[8] = GetDeviceName(data[2].upper())
# Add row to table
table.append(data)
def ParseFile(path):
with open(path) as f:
lines = f.readlines()
result = ""
command = ""
date = ""
# WAP name is always on the first line 16 characters in with 4
# unnecessary characters trailing
wapName = lines[0].strip()[16:-4]
for line in lines:
line = line.strip()
# Is an issued command?
if line.startswith("/#"):
if command != "":
ParseResult(result, date, wapName)
command = ""
# reset the result for the new command
result = ""
m = re.match(r"^/#.*show\sclient.*stats$", line)
if m is not None:
command = line
# Anything that is not a command add to the result
else:
result += line + "\n"
# Do we have the date?
if line.startswith("Current date:"):
date = line.replace("Current date: ", "")
# Print output to stderr
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
# Print a 2d array in a csv format
def PrintAsCsv(table):
for row in table:
print(",".join(row))
def Main():
InitDeviceDict()
numArgs = len(sys.argv)
for filename in glob.iglob(sys.argv[numArgs - 1], recursive=True):
# Globs get directories too
if os.path.isfile(filename):
eprint("Parsing " + filename)
try:
ParseFile(filename)
except Exception as e: # Mainly for if we see a binary file
eprint("Bad file: " + e)
# Print in a format we can use
PrintAsCsv(table)
def GetDeviceName(macAddress):
if macAddress in deviceDict:
return deviceDict[macAddress]
manufacturerPart = macAddress[:8]
if manufacturerPart in deviceDict:
return deviceDict[manufacturerPart]
return 'Unknown Device'
def InitDeviceDict():
with open('try.txt','r') as fo:
for line in fo:
deviceDict = {}
line = line.split(',')
macAddress = line[0].strip()
manufacturerPart = line[1].strip()
if macAddress in deviceDict:
deviceDict[macAddress].append(manufacturerPart)
else:
deviceDict[macAddress]=(manufacturerPart)
print(deviceDict)
# entry point
# script arguments:
# WapLogParser.py [file glob]
if __name__ == "__main__":
Main()
The issue is on the functions GetDeviceName and InitDeviceDict. When I run the code and then a batch file to display my info on excel, I keep getting "unknown device" (as if it is not recognizing the mac address I entered to produce the device name)
Any way I can correct this? Thank you
The deviceDict that is populated in InitDeviceDict is not the global deviceDict. You are only modifying a function-local dictionary (and resetting it every line as well). Remove deviceDict = {} from that function and, at the top of the function use global deviceDict to declare that you are modifying the global.
def InitDeviceDict():
global deviceDict
with open('try.txt','r') as fo:
for line in fo:
line = line.split(',')
macAddress = line[0].strip()
manufacturerPart = line[1].strip()
if macAddress in deviceDict:
deviceDict[macAddress].append(manufacturerPart)
else:
deviceDict[macAddress]=[manufacturerPart]

Print text between two separators?

I have config file:
$ cat ../secure/test.property
#<TITLE>Connection setting
#MAIN DEV
jdbc.main.url=
jdbc.main.username=
jdbc.main.password=
#<TITLE>Mail settings
mail.smtp.host=127.0.0.1
mail.smtp.port=25
mail.smtp.on=false
email.subject.prefix=[DEV]
#<TITLE>Batch size for package processing
exposureImportService.batchSize=10
exposureImportService.waitTimeInSecs=10
ImportService.batchSize=400
ImportService.waitTimeInSecs=10
#<TITLE>Other settings
usePrecalculatedAggregation=true
###################### Datasource wrappers, which allow to log additional information
bean.datasource.query_log_wrapper=mainDataSourceWrapper
bean.gpc_datasource.query_log_wrapper=gpcDataSourceWrapper
time.to.keep.domain=7*12
time.to.keep.uncompress=1
#oracle max batch size
dao.batch.size.max=30
And function, which return line "#<TITLE>Other settings" (for example), to select "config section".
Next, need to print all lines between selected "section", and next line, startwith #<TITLE>.
How it can be realized?
P.S.
def select_section(property_file):
while True:
with open(os.path.join(CONF_DIR, property_file), 'r+') as file:
text = file.readlines()
list = []
print()
for i in text:
if '<TITLE>' in i:
line = i.lstrip('#<TITLE>').rstrip('\n')
list.append(line)
print((list.index(line)), line)
res_section = int(raw_input('\nPlease, select section to edit: '))
print('You selected: %s' % list[res_section])
if answer('Is it OK? '):
return(list[res_section])
break
And it's work like:
...
0 Connection setting
1 Mail settings
2 Batch size for package processing
3 Other settings
Please, select section to edit:
...
And expected output, if select Connection setting:
...
0 jdbc.main.url
1 jdbc.main.username
2 jdbc.main.password
Please, select line to edit:
...
If I understand the problem correctly, here's a solution that assembles the requested section as it reads the file:
def get_section(section):
marker_line = '#<TITLE>{}'.format(section)
in_section = False
section_lines = []
with open('test.property') as f:
while True:
line = f.readline()
if not line:
break
line = line.rstrip()
if line == marker_line:
in_section = True
elif in_section and line.startswith('#<TITLE>'):
break
if in_section:
if not line or line.startswith('#'):
continue
section_lines.append(line)
return '\n'.join(['{} {}'.format(i, line)
for i, line in enumerate(section_lines)])
print get_section('Connection setting')
Output:
0 jdbc.main.url=
1 jdbc.main.username=
2 jdbc.main.password=
Perhaps this will get you started.
Here's a quick solution:
def get_section(section):
results = ''
with open('../secure/test.property') as f:
lines = [l.strip() for l in f.readlines()]
indices = [i for i in range(len(lines)) if lines[i].startswith('#<TITLE>')]
for i in xrange(len(indices)):
if lines[indices[i]] == '#<TITLE>' + section:
for j in xrange(indices[i], indices[i+1] if i < len(indices)-1 else len(lines) - 1):
results += lines[j] + '\n'
break
return results
You can use it like:
print get_section('Connection setting')
Not very elegant but it works!

Categories