Parsing a dictionary in Python to my current table - python

I have a table that contains a few categories and two of them are: mac address and device name. I had a the list of my mac address written in my code (hardcoded) with their corresponding device names (ie deviceDict['00:00:00:00:00:00']= name)
Now, I passed those mac addresses and device names to a text file to be read from that same Python code and parse it onto my table. The code currently recognizes the text file but it is not parsing that information onto the table.
Here is the code:
# File: WapLogParser.py
# Desc: Parses a WAP log file and pulls out information relating to connected clients
# Usage: python WapLogParser.py [file glob]
import re
import sys
import glob
import os
deviceDict = dict()
# Base table for storing client info
# All names must match what is in the Wap Log file
# Exceptions: Date, Wap Name, Device Name - which are provided outside of the result parsing
table = [["Ssid", "Vlan", "Mac Address", "Connected Time", "Ip Address", "Rssi", "Date", "Wap Name", "Device Name"]]
def ParseResult(result, date, wapName):
lines = result.split('\n')
lines = list(filter(None, lines))
# Any useful info will be at least 2 lines long
if len(lines) == 1:
return
# create empty row
data = [""] * len(table[0])
# for each item in the result place it in the correct spot in the row
for line in lines:
if line != "":
# Parse the key/value pair
m = re.match(r"(.*):\s\.*\s?(.*)", line)
if m is not None:
for idx in range(len(table[0])):
if table[0][idx].lower() == m[1].lower():
data[idx] = m[2]
else:
break
# Remove the '(dBm)' from the RSSI value
data[5] = data[5].split()[0]
# Append WAP specific items to row
data[6] = date
data[7] = wapName
data[8] = GetDeviceName(data[2].upper())
# Add row to table
table.append(data)
def ParseFile(path):
with open(path) as f:
lines = f.readlines()
result = ""
command = ""
date = ""
# WAP name is always on the first line 16 characters in with 4
# unnecessary characters trailing
wapName = lines[0].strip()[16:-4]
for line in lines:
line = line.strip()
# Is an issued command?
if line.startswith("/#"):
if command != "":
ParseResult(result, date, wapName)
command = ""
# reset the result for the new command
result = ""
m = re.match(r"^/#.*show\sclient.*stats$", line)
if m is not None:
command = line
# Anything that is not a command add to the result
else:
result += line + "\n"
# Do we have the date?
if line.startswith("Current date:"):
date = line.replace("Current date: ", "")
# Print output to stderr
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
# Print a 2d array in a csv format
def PrintAsCsv(table):
for row in table:
print(",".join(row))
def Main():
InitDeviceDict()
numArgs = len(sys.argv)
for filename in glob.iglob(sys.argv[numArgs - 1], recursive=True):
# Globs get directories too
if os.path.isfile(filename):
eprint("Parsing " + filename)
try:
ParseFile(filename)
except Exception as e: # Mainly for if we see a binary file
eprint("Bad file: " + e)
# Print in a format we can use
PrintAsCsv(table)
def GetDeviceName(macAddress):
if macAddress in deviceDict:
return deviceDict[macAddress]
manufacturerPart = macAddress[:8]
if manufacturerPart in deviceDict:
return deviceDict[manufacturerPart]
return 'Unknown Device'
def InitDeviceDict():
with open('try.txt','r') as fo:
for line in fo:
deviceDict = {}
line = line.split(',')
macAddress = line[0].strip()
manufacturerPart = line[1].strip()
if macAddress in deviceDict:
deviceDict[macAddress].append(manufacturerPart)
else:
deviceDict[macAddress]=(manufacturerPart)
print(deviceDict)
# entry point
# script arguments:
# WapLogParser.py [file glob]
if __name__ == "__main__":
Main()
The issue is on the functions GetDeviceName and InitDeviceDict. When I run the code and then a batch file to display my info on excel, I keep getting "unknown device" (as if it is not recognizing the mac address I entered to produce the device name)
Any way I can correct this? Thank you

The deviceDict that is populated in InitDeviceDict is not the global deviceDict. You are only modifying a function-local dictionary (and resetting it every line as well). Remove deviceDict = {} from that function and, at the top of the function use global deviceDict to declare that you are modifying the global.
def InitDeviceDict():
global deviceDict
with open('try.txt','r') as fo:
for line in fo:
line = line.split(',')
macAddress = line[0].strip()
manufacturerPart = line[1].strip()
if macAddress in deviceDict:
deviceDict[macAddress].append(manufacturerPart)
else:
deviceDict[macAddress]=[manufacturerPart]

Related

Text parsing via python

I have 100+ files with the extension *.log. Each file contains the results from an ansible-playbook run. I would like to parse the data using python, so that I can import it into an excel spreadsheet. I need some help to automate the process.
File content are:
ok: [wrt02.test1] => {
"msg": "nxos"
}
TASK [checklist : OUTPUT IOS_XR] *******************************************************************************************************************************************************************************************************
skipping: [leaf1J0101.test2]
skipping: [leaf1J0102.test2]
ok: [spine01.test1] => {
"msg": [
"Bundle-Ether1.100 192.168.245.65 Up Up default ",
"Bundle-Ether10.151 192.168.203.3 Up Up default ",
"Loopback0 192.168.255.7 Up Up default ",
"MgmtEth0/RSP0/CPU0/0 192.168.224.15 Up Up MANAGEMENT",
"TenGigE0/0/0/2 192.168.114.114 Up Up default ",
"TenGigE0/0/0/3 192.168.82.170 Up Up default"
]
}
RESULTS:
spine01.test1,Bundle-Ether1.100,192.168.245.65,
spine01.test1,Bundle-Ether10.151,192.168.203.3,
spine01.test1,Loopback0,192.168.255.7,
spine01.test1,MgmtEth0/RSP0/CPU0/0,192.168.224.15,
spine01.test1,TenGigE0/0/0/2,192.168.114.114,
spine01.test1,TenGigE0/0/0/3,192.168.82.170
CODE:
def findIOS(output):
# String we're looking for
OUTIOS_string = "TASK [checklist : OUTPUTIOS] ***************************************************************************************************************************************************************************************************************"
end_string = "TASK"
# Find the start of our string
start_index = output.find(OUTIOS_string) + len(OUTIOS_string) + 2
# Find the end of our string
end_index = output.find(end_string, start_index + 1)
lines = output[start_index:end_index].split('\n')
# Create a list to store our resulting dictionaries
#print lines
d = []
for line in lines:
#print line
if line != "":
# If line is not empty, find our starting and closing brackets
# Find the host:
hstart = line.find('[')
hend = line.rfind(']') + 1
start = line.find('{')
end = line.rfind('}') + 1
hostname = line[hstart:hend]
# Store content between brackets
obj = line[start:end]
hostname = hostname.replace("[", "").replace("]","")
print hostname
print obj
# Convert string to dictionary, and store the results
d.append(eval(obj))
print d
return d
def main():
output = None
with open("../showint.log", "rb") as f:
output = f.read()
if __name__ == '__main__':
main()
How to fetch the into above format? Thanks for the help
I let you the pleasure of creating the code around that.
import re
# This matches the msg pattern
find_start = r"^\s+\"msg\":\s\["
# This matches the line with the information you want that you can access with match.groups()
find_line = r"^\s+\"([^\s]+)\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"

Python coding for opening and saving data to a file

I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.
Here is my code segment I am working with:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
#Now use tokenize to split it apart by space and set to new array for me to call column2
else:
print "not found"
lines.append(movieWordCount[z] + " 1" + "\n")
Here is my full code:
#!/usr/bin/python
#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re
class Bayes_Classifier:
def __init__(self, trainDirectory = "movie_reviews/"):
#If file listing exists skip to train
if os.path.isfile('iFileList'):
print "file found"
self.train()
#self.classify()
#If file listing does not exist skip to train
if not os.path.isfile('iFileList'):
print "no file"
newfile = 'iFileList'
tempList = set()
subDir = './movie_reviews'
for filenames in os.listdir(subDir):
my_sub_path = os.path.join(os.sep,subDir,filenames)
tempList.add(filenames)
self.save("filenames", "try3")
f = []
for fFileObj in os.walk("movie_reviews/"):
f.extend(fFileObj)
break
pickle.dump(f, open( "save.p", "wb" ))
self.save(f, "try4")
with open(newfile, 'wb') as fi:
pickle.dump(tempList, fi)
#print tempList
self.train()
#self.classify()
def train(self):
'''Trains the Naive Bayes Sentiment Classifier.'''
print "File ready for training"
#Open iFileList to use as input for opening movie files
x = 0
OpenIFileList = open('iFileList','r')
print "iFileList now Open"
#Loop through the file
for line in OpenIFileList:
#print "Ready to read lines"
#print "reading line " + line
if x > 4:
if x % 2 == 0:
#print line
s = line
if '-' in s:
comp = s.split("'")
#print comp[2]
print comp[1] #This is What you need for t he movie file
compValue1 = comp[1]
#Determine Positive/Negative.
#compType is the variable I am storing it to.
compType = compValue1.split("-",2)[1]
#print compType #Prints that middle value like 5 or 1
# This will do the work based on the value.
if compType == '5':
# print "you have a five" #Confirms the loop I am in.
#If file does not exists create it
if not os.path.exists('PosList'):
print "no file"
file('PosList', 'w').close()
#Open file that needs to be reviewed for word count
compValue2 = "movie_reviews/" + compValue1
print compValue2 #Prints the directory and file path
OpenMovieList = open(compValue2,'r')
for commentLine in OpenMovieList:
commentPositive = commentLine.split(" ")
commentPositiveCounter = Counter(commentPositive)
#print commentPositiveCounter # " Comment Pos goes here"
#if commentLine != '' or commentLine != ' ':
#Get first word, second word, ....
if commentLine and (not commentLine.isspace()):
movieWordCount = self.tokenize(commentLine)
y = len(movieWordCount) #determines length of string
print y
z = 0
#print movieWordCount[0] # Shows the zero position in the file.
while z < y:
print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
z = z + 1
#Close the files
OpenMovieList.close()
OpenPos.close()
x += 1
#for line2 in OpenIFileList.readlines():
#for line in open('myfile','r').readlines():
#do_something(line)
#Save results
#Close the File List
OpenIFileList.close()
def loadFile(self, sFilename):
'''Given a file name, return the contents of the file as a string.'''
f = open(sFilename, "r")
sTxt = f.read()
f.close()
return sTxt
def save(self, dObj, sFilename):
'''Given an object and a file name, write the object to the file using pickle.'''
f = open(sFilename, "w")
p = pickle.Pickler(f)
p.dump(dObj)
f.close()
def load(self, sFilename):
'''Given a file name, load and return the object stored in the file.'''
f = open(sFilename, "r")
u = pickle.Unpickler(f)
dObj = u.load()
f.close()
return dObj
def tokenize(self, sText):
'''Given a string of text sText, returns a list of the individual tokens that
occur in that string (in order).'''
lTokens = []
sToken = ""
for c in sText:
if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
sToken += c
else:
if sToken != "":
lTokens.append(sToken)
sToken = ""
if c.strip() != "":
lTokens.append(str(c.strip()))
if sToken != "":
lTokens.append(sToken)
return lTokens
To open a file for writing, you can use
with open('PosList', 'w') as Open_Pos
As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.
So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close(), and append 2 lines to your code:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
OpenPos.write(lines)

python: read file and split the data

I have a file config and the contents are separated by space " "
cat config
/home/user1 *.log,*.txt 30
/home/user2 *.trm,*.doc,*.jpeg 10
I want to read this file,parse each line and print each field from the each line.
Ex:-
Dir = /home/user1
Fileext = *.log,*.txt
days=30
I couldn't go further than the below..
def dir():
file = open('config','r+')
cont = file.readlines()
print "file contents are %s" % cont
for i in range(len(cont)):
j = cont[i].split(' ')
dir()
Any pointers how to move further?
Your code is fine, you are just missing the last step processing each element of the splitted string, try this:
def dir():
file = open('config','r+')
cont = file.readlines()
print "file contents are %s" % cont + '\n'
elements = []
for i in range(len(cont)):
rowElems = cont[i].split(' ')
elements.append({ 'dir' : rowElems[0], 'ext' : rowElems[1], 'days' : rowElems[2] })
for e in elements:
print "Dir = " + e['dir']
print "Fileext = " + e['ext']
print "days = " + e['days']
dir()
At the end of this code, you will have all the rows processed and stored in an array of dictionaries you can easily access later.
You can write a custom function to parse each line, and then use the map function to apply that function against each line in file.readlines():
def parseLine(line):
# function to split and parse each line,
# and return the formatted string
Dir, FileExt, Days = line.split(' ')[:3]
return 'Dir = {}\nFileext = {}\nDays = {}'.format(Dir, FileExt, Days)
def dir():
with open('config','r+') as file:
print 'file contents are\n' + '\n'.join(map(parseLine, file.readlines()))
Results:
>>> dir()
file contents are
Dir = /home/user1
Fileext = *.log,*.txt
Days = 30
Dir = /home/user2
Fileext = *.trm,*.doc,*.jpeg
Days = 10

Pick parts from a txt file and copy to another file with python

I'm in trouble here. I need to read a file. Txt file that contains a sequence of records, check the records that I want to copy them to a new file.
The file content is like this (this is just an example, the original file has more than 30 000 lines):
AAAAA|12|120 #begin file
00000|46|150 #begin register
03000|TO|460
99999|35|436 #end register
00000|46|316 #begin register
03000|SP|467
99999|33|130 #end register
00000|46|778 #begin register
03000|TO|478
99999|33|457 #end register
ZZZZZ|15|111 #end file
The records that begin with 03000 and have the characters 'TO' must be written to a new file. Based on the example, the file should look like this:
AAAAA|12|120 #begin file
00000|46|150 #begin register
03000|TO|460
99999|35|436 #end register
00000|46|778 #begin register
03000|TO|478
99999|33|457 #end register
ZZZZZ|15|111 #end file
Code:
file = open("file.txt",'r')
newFile = open("newFile.txt","w")
content = file.read()
file.close()
# here I need to check if the record exists 03000 characters 'TO', if it exists, copy the recordset 00000-99999 for the new file.
I did multiple searches and found nothing to help me.
Thank you!
with open("file.txt",'r') as inFile, open("newFile.txt","w") as outFile:
outFile.writelines(line for line in inFile
if line.startswith("03000") and "TO" in line)
If you need the previous and the next line, then you have to iterate inFile in triads. First define:
def gen_triad(lines, prev=None):
after = current = next(lines)
for after in lines:
yield prev, current, after
prev, current = current, after
And then do like before:
outFile.writelines(''.join(triad) for triad in gen_triad(inFile)
if triad[1].startswith("03000") and "TO" in triad[1])
import re
pat = ('^00000\|\d+\|\d+.*\n'
'^03000\|TO\|\d+.*\n'
'^99999\|\d+\|\d+.*\n'
'|'
'^AAAAA\|\d+\|\d+.*\n'
'|'
'^ZZZZZ\|\d+\|\d+.*')
rag = re.compile(pat,re.MULTILINE)
with open('fifi.txt','r') as f,\
open('newfifi.txt','w') as g:
g.write(''.join(rag.findall(f.read())))
For files with additional lines between lines beginning with 00000, 03000 and 99999, I didn't find simpler code than this one:
import re
pat = ('(^00000\|\d+\|\d+.*\n'
'(?:.*\n)+?'
'^99999\|\d+\|\d+.*\n)'
'|'
'(^AAAAA\|\d+\|\d+.*\n'
'|'
'^ZZZZZ\|\d+\|\d+.*)')
rag = re.compile(pat,re.MULTILINE)
pit = ('^00000\|.+?^03000\|TO\|\d+.+?^99999\|')
rig = re.compile(pit,re.DOTALL|re.MULTILINE)
def yi(text):
for g1,g2 in rag.findall(text):
if g2:
yield g2
elif rig.match(g1):
yield g1
with open('fifi.txt','r') as f,\
open('newfifi.txt','w') as g:
g.write(''.join(yi(f.read())))
file = open("file.txt",'r')
newFile = open("newFile.txt","w")
content = file.readlines()
file.close()
newFile.writelines(filter(lambda x:x.startswith("03000") and "TO" in x,content))
This seems to work. The other answers seem to only be writing out records that contain '03000|TO|' but you have to write out the record before and after that as well.
import sys
# ---------------------------------------------------------------
# ---------------------------------------------------------------
# import file
file_name = sys.argv[1]
file_path = 'C:\\DATA_SAVE\\pick_parts\\' + file_name
file = open(file_path,"r")
# ---------------------------------------------------------------
# create output files
output_file_path = 'C:\\DATA_SAVE\\pick_parts\\' + file_name + '.out'
output_file = open(output_file_path,"w")
# create output files
# ---------------------------------------------------------------
# process file
temp = ''
temp_out = ''
good_write = False
bad_write = False
for line in file:
if line[:5] == 'AAAAA':
temp_out += line
elif line[:5] == 'ZZZZZ':
temp_out += line
elif good_write:
temp += line
temp_out += temp
temp = ''
good_write = False
elif bad_write:
bad_write = False
temp = ''
elif line[:5] == '03000':
if line[6:8] != 'TO':
temp = ''
bad_write = True
else:
good_write = True
temp += line
temp_out += temp
temp = ''
else:
temp += line
output_file.write(temp_out)
output_file.close()
file.close()
Output:
AAAAA|12|120 #begin file
00000|46|150 #begin register
03000|TO|460
99999|35|436 #end register
00000|46|778 #begin register
03000|TO|478
99999|33|457 #end register
ZZZZZ|15|111 #end file
Does it have to be python? These shell commands would do the same thing in a pinch.
head -1 inputfile.txt > outputfile.txt
grep -C 1 "03000|TO" inputfile.txt >> outputfile.txt
tail -1 inputfile.txt >> outputfile.txt
# Whenever I have to parse text files I prefer to use regular expressions
# You can also customize the matching criteria if you want to
import re
what_is_being_searched = re.compile("^03000.*TO")
# don't use "file" as a variable name since it is (was?) a builtin
# function
with open("file.txt", "r") as source_file, open("newFile.txt", "w") as destination_file:
for this_line in source_file:
if what_is_being_searched.match(this_line):
destination_file.write(this_line)
and for those who prefer a more compact representation:
import re
with open("file.txt", "r") as source_file, open("newFile.txt", "w") as destination_file:
destination_file.writelines(this_line for this_line in source_file
if re.match("^03000.*TO", this_line))
code:
fileName = '1'
fil = open(fileName,'r')
import string
##step 1: parse the file.
parsedFile = []
for i in fil:
##tuple1 = (1,2,3)
firstPipe = i.find('|')
secondPipe = i.find('|',firstPipe+1)
tuple1 = (i[:firstPipe],\
i[firstPipe+1:secondPipe],\
i[secondPipe+1:i.find('\n')])
parsedFile.append(tuple1)
fil.close()
##search criterias:
searchFirst = '03000'
searchString = 'TO' ##can be changed if and when required
##step 2: used the parsed contents to write the new file
filout = open('newFile','w')
stringToWrite = parsedFile[0][0] + '|' + parsedFile[0][1] + '|' + parsedFile[0][2] + '\n'
filout.write(stringToWrite) ##to write the first entry
for i in range(1,len(parsedFile)):
if parsedFile[i][1] == searchString and parsedFile[i][0] == searchFirst:
for j in range(-1,2,1):
stringToWrite = parsedFile[i+j][0] + '|' + parsedFile[i+j][1] + '|' + parsedFile[i+j][2] + '\n'
filout.write(stringToWrite)
stringToWrite = parsedFile[-1][0] + '|' + parsedFile[-1][1] + '|' + parsedFile[-1][2] + '\n'
filout.write(stringToWrite) ##to write the first entry
filout.close()
I know that this solution may be a bit long. But it is quite easy to understand. And it seems an intuitive way to do it. And I have already checked this with the Data that you have provided and it works perfectly.
Please tell me if you need some more explanation on the code. I will definitely add the same.
I tip (Beasley and Joran elyase) very interesting, but it only allows to get the contents of the line 03000. I would like to get the contents of the lines 00000 to line 99999.
I even managed to do here, but I am not satisfied, I wanted to make a more cleaner.
See how I did:
file = open(url,'r')
newFile = open("newFile.txt",'w')
lines = file.readlines()
file.close()
i = 0
lineTemp = []
for line in lines:
lineTemp.append(line)
if line[0:5] == '03000':
state = line[21:23]
if line[0:5] == '99999':
if state == 'TO':
newFile.writelines(lineTemp)
else:
linhaTemp = []
i = i+1
newFile.close()
Suggestions...
Thanks to all!

Parsing Input File in Python

I have a plain text file with some data in it, that I'm trying to open and read using a Python (ver 3.2) program, and trying to load that data into a data structure within the program.
Here's what my text file looks like (file is called "data.txt")
NAME: Joe Smith
CLASS: Fighter
STR: 14
DEX: 7
Here's what my program looks like:
player_name = None
player_class = None
player_STR = None
player_DEX = None
f = open("data.txt")
data = f.readlines()
for d in data:
# parse input, assign values to variables
print(d)
f.close()
My question is, how do I assign the values to the variables (something like setting player_STR = 14 within the program)?
player = {}
f = open("data.txt")
data = f.readlines()
for line in data:
# parse input, assign values to variables
key, value = line.split(":")
player[key.strip()] = value.strip()
f.close()
now the name of your player will be player['name'], and the same goes for all other properties in your file.
import re
pattern = re.compile(r'([\w]+): ([\w\s]+)')
f = open("data.txt")
v = dict(pattern.findall(f.read()))
player_name = v.get("name")
plater_class = v.get('class')
# ...
f.close()
The most direct way to do it is to assign the variables one at a time:
f = open("data.txt")
for line in f: # loop over the file directly
line = line.rstrip() # remove the trailing newline
if line.startswith('NAME: '):
player_name = line[6:]
elif line.startswith('CLASS: '):
player_class = line[7:]
elif line.startswith('STR: '):
player_strength = int(line[5:])
elif line.startswith('DEX: '):
player_dexterity = int(line[5:])
else:
raise ValueError('Unknown attribute: %r' % line)
f.close()
That said, most Python programmers would stored the values in a dictionary rather than in variables. The fields can be stripped (removing the line endings) and split with: characteristic, value = data.rstrip().split(':'). If the value should be a number instead of a string, convert it with float() or int().

Categories