How to use findall or search to extract data in python? - python

Here is my string,
str = 'A:[{type:"mb",id:9,name:"John",url:"/mb9/",cur:0,num:83498},
{type:"mb",id:92,name:"Mary",url:"/mb92/",cur:0,num:404},
{type:"mb",id:97,name:"Dan",url:"/mb97/",cur:0,num:139},
{type:"mb",id:268,name:"Jennifer",url:"/mb268/",cur:0,num:0},
{type:"mb",id:289,name:"Mike",url:"/mb289/",cur:0,num:0}],B:
[{type:"mb",id:157,name:"Sue",url:"/mb157/",cur:0,num:35200},
{type:"mb",id:3,name:"Rob",url:"/mb3/",cur:0,num:103047},
{type:"mb",id:2,name:"Tracy",url:"/mb2/",cur:0,num:87946},
{type:"mb",id:26,name:"Jenny",url:"/mb26/",cur:0,num:74870},
{type:"mb",id:5,name:"Florence",url:"/mb5/",cur:0,num:37261},
{type:"mb",id:127,name:"Peter",url:"/mb127/",cur:0,num:63711},
{type:"mb",id:15,name:"Grace",url:"/mb15/",cur:0,num:63243},
{type:"mb",id:82,name:"Tony",url:"/mb82/",cur:0,num:6471},
{type:"mb",id:236,name:"Lisa",url:"/mb236/",cur:0,num:4883}]'
I want to use findall or search to extract all the data under "name" and "url" from str. Here is what I did,
pattern = re.comile(r'type:(.*),id:(.*),name:(.*),url:(.*),cur:(.*),num:
(.*)')
for (v1, v2, v3, v4, v5, v6) in re.findall(pattern, str):
print v3
print v4
But unfortunately, this doesn't do what I want. Is there anything wrong? Thanks for your inputs.

You shouldn't call you string "str," because that's a built-in function. But here's an option for you:
# Find all of the entries
x = re.findall('(?<![AB]:)(?<=:).*?(?=[,}])', s)
['"mb"', '9', '"John"', '"/mb9/"', '0', '83498', '"mb"', '92', '"Mary"',
'"/mb92/"', '0', '404', '"mb"', '97', '"Dan"', '"/mb97/"', '0', '139',
'"mb"', '268', '"Jennifer"', '"/mb268/"', '0', '0', '"mb"', '289', '"Mike"',
'"/mb289/"', '0', '0', '"mb"', '157', '"Sue"', '"/mb157/"', '0', '35200',
'"mb"', '3', '"Rob"', '"/mb3/"', '0', '103047', '"mb"', '2', '"Tracy"',
'"/mb2/"', '0', '87946', '"mb"', '26', '"Jenny"', '"/mb26/"', '0', '74870',
'"mb"', '5', '"Florence"', '"/mb5/"', '0', '37261', '"mb"', '127', '"Peter"',
'"/mb127/"', '0', '63711', '"mb"', '15', '"Grace"', '"/mb15/"', '0', '63243',
'"mb"', '82', '"Tony"', '"/mb82/"', '0', '6471', '"mb"', '236', '"Lisa"',
'"/mb236/"', '0', '4883']
# Break up into each section
y = []
for i in range(0, len(x), 6):
y.append(x[i:i+6])
[['"mb"', '9', '"John"', '"/mb9/"', '0', '83498']
['"mb"', '92', '"Mary"', '"/mb92/"', '0', '404']
['"mb"', '97', '"Dan"', '"/mb97/"', '0', '139']
['"mb"', '268', '"Jennifer"', '"/mb268/"', '0', '0']
['"mb"', '289', '"Mike"', '"/mb289/"', '0', '0']
['"mb"', '157', '"Sue"', '"/mb157/"', '0', '35200']
['"mb"', '3', '"Rob"', '"/mb3/"', '0', '103047']
['"mb"', '2', '"Tracy"', '"/mb2/"', '0', '87946']
['"mb"', '26', '"Jenny"', '"/mb26/"', '0', '74870']
['"mb"', '5', '"Florence"', '"/mb5/"', '0', '37261']
['"mb"', '127', '"Peter"', '"/mb127/"', '0', '63711']
['"mb"', '15', '"Grace"', '"/mb15/"', '0', '63243']
['"mb"', '82', '"Tony"', '"/mb82/"', '0', '6471']
['"mb"', '236', '"Lisa"', '"/mb236/"', '0', '4883']]
# Name is 3rd value in each list and url is 4th
for i in y:
name = i[2]
url = i[3]

You can try this:
import re
data = """
A:[{type:"mb",id:9,name:"John",url:"/mb9/",cur:0,num:83498},
{type:"mb",id:92,name:"Mary",url:"/mb92/",cur:0,num:404},
{type:"mb",id:97,name:"Dan",url:"/mb97/",cur:0,num:139},
{type:"mb",id:268,name:"Jennifer",url:"/mb268/",cur:0,num:0},
{type:"mb",id:289,name:"Mike",url:"/mb289/",cur:0,num:0}],B:
[{type:"mb",id:157,name:"Sue",url:"/mb157/",cur:0,num:35200},
{type:"mb",id:3,name:"Rob",url:"/mb3/",cur:0,num:103047},
{type:"mb",id:2,name:"Tracy",url:"/mb2/",cur:0,num:87946},
{type:"mb",id:26,name:"Jenny",url:"/mb26/",cur:0,num:74870},
{type:"mb",id:5,name:"Florence",url:"/mb5/",cur:0,num:37261},
{type:"mb",id:127,name:"Peter",url:"/mb127/",cur:0,num:63711},
{type:"mb",id:15,name:"Grace",url:"/mb15/",cur:0,num:63243},
{type:"mb",id:82,name:"Tony",url:"/mb82/",cur:0,num:6471},
{type:"mb",id:236,name:"Lisa",url:"/mb236/",cur:0,num:4883}]
"""
full_data = [i[1:-1] for i in re.findall('(?<=name:)".*?"(?=,)|(?<=url:)".*?"(?=,)', data)]
final_data = [full_data[i]+":"+full_data[i+1] for i in range(0, len(full_data)-1, 2)]
print(full_data)
Output
['John:/mb9/', 'Mary:/mb92/', 'Dan:/mb97/', 'Jennifer:/mb268/', 'Mike:/mb289/', 'Sue:/mb157/', 'Rob:/mb3/', 'Tracy:/mb2/', 'Jenny:/mb26/', 'Florence:/mb5/', 'Peter:/mb127/', 'Grace:/mb15/', 'Tony:/mb82/', 'Lisa:/mb236/']

Related

Why am I getting error after looping into file and trying to join the list

I have large text file which has numbers. I want to loop through the file and append the numbers to the list_of_numbers. Problem is that the loop appends to the list but not in the way I want, that's why the list looks like this after iteration
['\n', '+', '1', '6', '1', '0', '8', '5', '0', '7', '7', '6', '4', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '6', '0', '2', '9', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '6', '8', '4', '6', '\n', '+', '1', '6', '1', '0', '8', '5', '0', '5', '9', '3', '4', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '0', '7', '8', '3', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '9', '2', '8', '2', '\n', '+', '1', '6', '1', '0', '7', '6', '4', '0', '0', '4', '9', '\n']
this is just part of the output. I want this to be in this type [123455334,492023232,32322323]
I tried to do this but it does not work and gets errors
print(list([int(x) for x in ''.join(list_of_numbers).split('\n')]))
here is my full code
from tkinter import *
from tkinter import filedialog
import selenium
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium import webdriver
from selenium import webdriver
list_of_numbers=[]
full_list_of_numbers=[]
def openFile():
tf = filedialog.askopenfilename(
initialdir="C:/Users/MainFrame/Desktop/",
title="Open Text file",
filetypes=(("Text Files", "*.txt"),)
)
pathh.insert(END, tf)
tf = open(tf) # or tf = open(tf, 'r')
data = tf.read()
txtarea.insert(END, data)
tf.close()
for i in data:
list_of_numbers.append(i)
print(list_of_numbers)
ws = Tk()
ws.title("PythonGuides")
ws.geometry("400x450")
ws['bg']='#fb0'
txtarea = Text(ws, width=40, height=20)
txtarea.pack(pady=20)
pathh = Entry(ws)
pathh.pack(side=LEFT, expand=True, fill=X, padx=20)
Button(
ws,
text="Open File",
command=openFile
).pack(side=RIGHT, expand=True, fill=X, padx=20)
ws.mainloop()
print(list_of_numbers)
while ' ' in list_of_numbers:
list_of_numbers.remove(' ')
print(list([int(x) for x in ''.join(list_of_numbers).split('\n')]))
Look at that part
tf = open(tf) # or tf = open(tf, 'r')
data = tf.read()
txtarea.insert(END, data)
tf.close()
for i in data:
list_of_numbers.append(i)
data is one big string. Then you iterate over it one char at a time and append that single char (incl, '+' and '\n' to the list. So you get what you get.
Replace the above snippet with following:
with open(tf) as f: # use context manager
for line in f:
txtarea.insert(END, line)
list_of_numbers.append(int(line))
Note, this assumes there are no empty lines in your file. If there are, then
with open(tf) as f: # use context manager
for line in f:
txtarea.insert(END, line)
line = line.strip()
if line:
list_of_numbers.append(int(line))

Best way to create dict out of string in Python

I have the following string:
mystring = "freq1=440,freq2=425,freq3=410,freq4=0,freq5=0,freq6=0,freq7=0,freq8=0,freq9=0,freq10=0,freq11=0,freq12=0,freq13=0,freq14=0,freq15=0,freq16=0"
I would like to create a dictionary out of it such that I can access the values like mydict.freq1.
I can split the string easily enough:
splitstring = mystring.split(',')
So I get:
freq1=410
freq2=425
freq3=410
freq4=0
Which seems far from what I need. Is there an easy way to do this? I am currently on Python 2.7 but working on a migration to 3.0 eventually.
Code:
dict(x.split('=') for x in my_string.split(','))
Test Code:
my_string = "freq1=440,freq2=425,freq3=410,freq4=0,freq5=0,freq6=0,freq7=0,freq8=0,freq9=0,freq10=0,freq11=0,freq12=0,freq13=0,freq14=0,freq15=0,freq16=0"
my_dict = dict(x.split('=') for x in my_string.split(','))
print(my_dict)
Results:
{'freq1': '440', 'freq2': '425', 'freq3': '410', 'freq4': '0', 'freq5': '0', 'freq6': '0', 'freq7': '0', 'freq8': '0', 'freq9': '0', 'freq10': '0', 'freq11': '0', 'freq12': '0', 'freq13': '0', 'freq14': '0', 'freq15': '0', 'freq16': '0'}
Code:
mystring = "freq1=440,freq2=425,freq3=410,freq4=0,freq5=0,freq6=0,freq7=0,freq8=0
splitdic={}
for value in mystring.split(','):
splitdic[value.split("=")[0]]=value.split("=")[1]
print(splitdic)
Result:
{'freq9': '0', 'freq8': '0', 'freq2': '425', 'freq5': '0', 'freq4': '0', 'freq6': '0', 'freq15': '0', 'freq1': '440', 'freq3': '410', 'freq11': '0', 'freq12': '0', 'freq16': '0', 'freq10': '0', 'freq13': '0', 'freq14': '0', 'freq7': '0'}
mystring = "freq1=440,freq2=425,freq3=410,freq4=0,freq5=0,freq6=0,freq7=0,freq8=0,freq9=0,freq10=0,freq11=0,freq12=0,freq13=0,freq14=0,freq15=0,freq16=0"
res= {i.split('=')[0]:i.split('=')[1] for i in mystring.split(',') }
print(res)
output
{'freq1': '440', 'freq2': '425', 'freq3': '410', 'freq4': '0',
'freq5': '0', 'freq6': '0', 'freq7': '0', 'freq8': '0', 'freq9': '0',
'freq10': '0', 'freq11': '0', 'freq12': '0',
'freq13': '0', 'freq14': '0', 'freq15': '0', 'freq16': '0'}

How to save dictionary value into MySQL database tkinter python?

I want to save the value stored in dictionary into MySQL database from tkinter GUI The dictionary of value is as follows. Currently
I am using if else statement for each length which is very lengthy.
db=storage.connect()
cursor=db.cursor()
sd1={'AWB Fees': ('122', 'Rupees', '1', '34', '4', '136.00', '8', '10.88'),
'Agency Charges': ('122', 'Rupees', '1', '78', '4', '312.00', '8',
'24.96'),'AWB': ('122', 'Rupees', '1', '34', '4', '136.00', '8',
'10.88')}
length=len(sd1)
y = (sd1.keys())
if (length==1):
za = ((tuple(y))[0])
xa = ((sd1[za])[0])
xb = ((sd1[za])[1])
xc = ((sd1[za])[2])
xd = ((sd1[za])[3])
xf = ((sd1[za])[4])
xg = ((sd1[za])[5])
xh = ((sd1[za])[6])
xi = ((sd1[za])[7])
cursor.execute(
"INSERT INTO sea_exp_tra_raiselocal_inv_fright_est
(rlcn_id,billing_head, sac, currency, ex_rate, rate, value,
amount, gst, gst_amnt) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(sepmId,za,xa,xb,xc,xd,xf,xg,xh,xi))
else:
pass
db.commit()
db.close()
You're really overcomplicating things. First lets clean up the code:
sd1 = {
'AWB Fees': ('122', 'Rupees', '1', '34', '4', '136.00', '8', '10.88'),
'Agency Charges': ('122', 'Rupees', '1', '78', '4', '312.00', '8', '24.96'),
'AWB': ('122', 'Rupees', '1', '34', '4', '136.00', '8', '10.88')
}
sql = """
INSERT INTO sea_exp_tra_raiselocal_inv_fright_est
(rlcn_id,billing_head, sac, currency, ex_rate, rate, value, amount, gst, gst_amnt)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
key = list(d.keys())[0]
values = (sepmId, key,) + sd1[key]
cursor.execute(sql, values)
now all you have to do is to iterate over sd1.items() (which yields key,value pairs):
sd1 = {
'AWB Fees': ('122', 'Rupees', '1', '34', '4', '136.00', '8', '10.88'),
'Agency Charges': ('122', 'Rupees', '1', '78', '4', '312.00', '8', '24.96'),
'AWB': ('122', 'Rupees', '1', '34', '4', '136.00', '8', '10.88')
}
sql = """
INSERT INTO sea_exp_tra_raiselocal_inv_fright_est
(rlcn_id,billing_head, sac, currency, ex_rate, rate, value, amount, gst, gst_amnt)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
for key, val in sd1.items():
values = (sepmId, key,) + val
cursor.execute(sql, values)

Add numbers of a column within an array based of value from another column

I asked this question once but was very inconsistent in my wording. Here is my full code. I have a dataArray and wish to add numbers within the 5th column but only if within the same row, column 7 has a 0.
#!/usr/bin/python
#Date: 4.24.18
#importing necessary modules
import csv
import collections
import sys
from array import array
#variables for ease of use in script
fileName = 'medicaldata.tsv'
filePath = '/home/pjvaglic/Desktop/scripts/pythonScripts/final/data/'
dataURL = 'http://pages.mtu.edu/~toarney/sat3310/final/'
dataArray = []
sumBeds = 0
count = 0
countFac = 0
sumNSal = 0
sumNSalR = 0
#download file from MTU
downloadFile = urllib2.urlopen(dataURL + fileName)
#opening the file
with open(filePath + fileName, 'w') as output:
output.write(downloadFile.read())
output.close()
#count number of lines in the data file, take off the header, print results to screen
count = open(filePath + fileName).readlines()
print "There are", len(count)-1, "facilities accounted for in", filePath + fileName
#keep track of number of facilities
countFac = len(count)-1
#open data file, put everything in an array, cut everything at the tab delimiter
with open(filePath + fileName, 'rt') as inputfile:
next(inputfile)
dataArray = csv.reader(inputfile, delimiter='\t')
#sum the amount of beds are in the first column
for row in dataArray:
sumBeds += int(row[0])
print "There are ", sumBeds, "in the medical file."
print "There are about", sumBeds/countFac, "beds per facility."
#this line does not work for my purposes.
#list = [[row[4] for row in dataArray if row[6] == '1']]
#print list
Here is the dataArray. The last column has 0's and 1's. I believe they are strings. For example, in the first row it has a 0, so I want to take 5230 and add that to 6304 and then 6590, so forth and so on. Just rows that include a 0 in the last column.
['244', '128', '385', '23521', '5230', '5334', '0']
['59', '155', '203', '9160', '2459', '493', '1']
['120', '281', '392', '21900', '6304', '6115', '0']
['120', '291', '419', '22354', '6590', '6346', '0']
['120', '238', '363', '17421', '5362', '6225', '0']
['65', '180', '234', '10531', '3622', '449', '1']
['120', '306', '372', '22147', '4406', '4998', '1']
['90', '214', '305', '14025', '4173', '966', '1']
['96', '155', '169', '8812', '1955', '1260', '0']
['120', '133', '188', '11729', '3224', '6442', '1']
['62', '148', '192', '8896', '2409', '1236', '0']
['120', '274', '426', '20987', '2066', '3360', '1']
['116', '154', '321', '17655', '5946', '4231', '0']
['59', '120', '164', '7085', '1925', '1280', '1']
['80', '261', '284', '13089', '4166', '1123', '1']
['120', '338', '375', '21453', '5257', '5206', '1']
['80', '77', '133', '7790', '1988', '4443', '1']
['100', '204', '318', '18309', '4156', '4585', '1']
['60', '97', '213', '8872', '1914', '1675', '1']
['110', '178', '280', '17881', '5173', '5686', '1']
['120', '232', '336', '17004', '4630', '907', '0']
['135', '316', '442', '23829', '7489', '3351', '0']
['59', '163', '191', '9424', '2051', '1756', '1']
['60', '96', '202', '12474', '3803', '2123', '0']
['25', '74', '83', '4078', '2008', '4531', '1']
['221', '514', '776', '36029', '1288', '2543', '1']
['64', '91', '214', '8782', '4729', '4446', '1']
['62', '146', '204', '8951', '2367', '1064', '0']
['108', '255', '366', '17446', '5933', '2987', '1']
['62', '144', '220', '6164', '2782', '411', '1']
['90', '151', '286', '2853', '4651', '4197', '0']
['146', '100', '375', '21334', '6857', '1198', '0']
['62', '174', '189', '8082', '2143', '1209', '1']
['30', '54', '88', '3948', '3025', '137', '1']
['79', '213', '278', '11649', '2905', '1279', '0']
['44', '127', '158', '7850', '1498', '1273', '1']
['120', '208', '423', '29035', '6236', '3524', '0']
['100', '255', '300', '17532', '3547', '2561', '1']
['49', '110', '177', '8197', '2810', '3874', '1']
['123', '208', '336', '22555', '6059', '6402', '1']
['82', '114', '136', '8459', '1995', '1911', '1']
['58', '166', '205', '10412', '2245', '1122', '1']
['110', '228', '323', '16661', '4029', '3893', '1']
['62', '183', '222', '12406', '2784', '2212', '1']
['86', '62', '200', '11312', '3720', '2959', '1']
['102', '326', '355', '14499', '3866', '3006', '1']
['135', '157', '471', '24274', '7485', '1344', '0']
['78', '154', '203', '9327', '3672', '1242', '1']
['83', '224', '390', '12362', '3995', '1484', '1']
['60', '48', '213', '10644', '2820', '1154', '0']
['54', '119', '144', '7556', '2088', '245', '1']
['120', '217', '327', '20182', '4432', '6274', '0']
I know there is a short hand way of placing all those numbers within a list and use a sum function to add them up. I'm just not sure of how to go about it.
There are 2 ways. Below I use only an extract of your data.
Setup
We assume you begin with a list of lists of strings.
lst = [['244', '128', '385', '23521', '5230', '5334', '0'],
['59', '155', '203', '9160', '2459', '493', '1'],
['120', '281', '392', '21900', '6304', '6115', '0'],
['120', '291', '419', '22354', '6590', '6346', '0'],
['120', '238', '363', '17421', '5362', '6225', '0'],
['65', '180', '234', '10531', '3622', '449', '1'],
['120', '306', '372', '22147', '4406', '4998', '1'],
['90', '214', '305', '14025', '4173', '966', '1'],
['96', '155', '169', '8812', '1955', '1260', '0']]
Pure Python
A = [[int(i) for i in row] for row in lst]
res = sum(row[4] for row in A if row[6] == 0)
# 25441
Vectorised solution
You can use a 3rd party library such as numpy:
import numpy as np
A = np.array(lst, dtype=int)
res = A[np.where(A[:, 6] == 0), 4].sum()
# 25441
Turn your data file into an array of arrays.
['244', '128', '385', '23521', '5230', '5334', '0']
['59', '155', '203', '9160', '2459', '493', '1']
['120', '281', '392', '21900', '6304', '6115', '0']
Instead:
[['244', '128', '385', '23521', '5230', '5334', '0'],
['59', '155', '203', '9160', '2459', '493', '1'],
['120', '281', '392', '21900', '6304', '6115', '0']]
Then iterate over the elements in the array of arrays looking for the string '0' then adding the element [i][4] to your total sum. You'll need to convert the strings to a number value to add them though, otherwise you'll get one long string of numbers instead of a sum.
var sum = 0;
for (i = 0; while i < dataArray.length; i ++) {
if (dataArray[i][7] === '0') {
var sum += Number(dataArray[i][4])
}
};
At the end of the loop you'll have your total in var sum and can do with it as you please.
Just realized your working in python, my answer is in javascript. Whoops. Might not be the best answer but if you find the python version of the above solution it should get you on the right track. Cheers

Turn a text file into a dictionary with Python

I have a text file with a pattern:
[Badges_373382]
Deleted=0
Button2=0 1497592154
Button1=0 1497592154
ProxReader=0
StartProc=100 1509194246 ""
NextStart=0
LastSeen=1509194246
Enabled=1
Driver=Access Control
Program=AccessProxBadge
LocChg=1509120279
Name=asd
Neuron=7F0027BF2D
Owner=373381
LostSince=1509120774
Index1=218
Photo=unknown.jpg
LastProxReader=0
Temp=0
LastTemp=0
LastMotionless=0
LastMotion=1497592154
BatteryLow=0
PrevReader=10703
Reader=357862
SuspendTill=0
SuspendSince=0
Status=1001
ConvertUponDownload=0
AXSFlags=0
Params=10106
Motion=1
USER_DATA_CreationDate=6/15/2017 4:48:15 PM
OwnerOldName=asd
[Badges_373384]
Deleted=0
Button2=0 1497538610
Button1=0 1497538610
ProxReader=0
StartProc=100 1509194246 ""
NextStart=0
LastSeen=1513872678
Enabled=1
Driver=Access Control
Program=AccessProxBadge
LocChg=1513872684
Name=dsa
Neuron=7F0027CC1C
Owner=373383
LostSince=1513872723
Index1=219
Photo=unknown.jpg
LastProxReader=0
Temp=0
LastTemp=0
LastMotionless=0
LastMotion=1497538610
BatteryLow=0
PrevReader=357874
Reader=357873
SuspendTill=0
SuspendSince=0
Status=1001
ConvertUponDownload=0
AXSFlags=0
Params=10106
Motion=1
USER_DATA_CreationDate=6/15/2017 4:48:51 PM
OwnerOldName=dsa
[Badges_373386]
Deleted=0
Button2=0 1497780768
Button1=0 1497780768
ProxReader=0
StartProc=100 1509194246 ""
NextStart=0
LastSeen=1514124910
Enabled=1
Driver=Access Control
Program=AccessProxBadge
LocChg=1514124915
Name=ss
Neuron=7F0027B5FD
Owner=373385
LostSince=1514124950
Index1=220
Photo=unknown.jpg
LastProxReader=0
Temp=0
LastTemp=0
LastMotionless=0
LastMotion=1497780768
BatteryLow=0
PrevReader=357872
Reader=357871
SuspendTill=0
SuspendSince=0
Status=1001
ConvertUponDownload=0
AXSFlags=0
Params=10106
Motion=1
USER_DATA_CreationDate=6/15/2017 4:49:24 PM
OwnerOldName=ss
Every new "Badge" info starts with [Badges_number] and end with blank line.
Using Python 3.6, I would like to turn this file into a dictionary so that I could easily access that information.
It should look like this:
content = {"Badges_373382:{"Deleted:0,.."},"Badges_371231":{"Deleted":0,..}"}
I'm pretty confused on how to do that, I'd love to get some help.
Thanks!
This is basically an INI file, and Python provides the configparser module to parse such files.
import configparser
config = configparser.ConfigParser()
config.readfp(open('badges.ini'))
r = {section: dict(config[section]) for section in config.sections()}
You can loop through each line and keep track if you have seen a header in the format [Badges_373382]:
import re
import itertools
with open('filename.txt') as f:
f = filter(lambda x:x, [i.strip('\n') for i in f])
new_data = [(a, list(b)) for a, b in itertools.groupby(f, key=lambda x:bool(re.findall('\[[a-zA-Z]+_+\d+\]', x)))]
final_data = {new_data[i][-1][-1]:dict(c.split('=') for c in new_data[i+1][-1]) for i in range(0, len(new_data), 2)}
Output:
{'[Badges_373384]': {'OwnerOldName': 'dsa', 'LastMotionless': '0', 'NextStart': '0', 'Driver': 'Access Control', 'LastTemp': '0', 'USER_DATA_CreationDate': '6/15/2017 4:48:51 PM', 'Program': 'AccessProxBadge', 'LocChg': '1513872684', 'Reader': '357873', 'LostSince': '1513872723', 'LastMotion': '1497538610', 'Status': '1001', 'Deleted': '0', 'SuspendTill': '0', 'ProxReader': '0', 'LastSeen': '1513872678', 'BatteryLow': '0', 'Index1': '219', 'Name': 'dsa', 'Temp': '0', 'Enabled': '1', 'StartProc': '100 1509194246 ""', 'Motion': '1', 'Button2': '0 1497538610', 'Button1': '0 1497538610', 'SuspendSince': '0', 'ConvertUponDownload': '0', 'PrevReader': '357874', 'AXSFlags': '0', 'LastProxReader': '0', 'Photo': 'unknown.jpg', 'Neuron': '7F0027CC1C', 'Owner': '373383', 'Params': '10106'}, '[Badges_373382]': {'OwnerOldName': 'asd', 'LastMotionless': '0', 'NextStart': '0', 'Driver': 'Access Control', 'LastTemp': '0', 'USER_DATA_CreationDate': '6/15/2017 4:48:15 PM', 'Program': 'AccessProxBadge', 'LocChg': '1509120279', 'Reader': '357862', 'LostSince': '1509120774', 'LastMotion': '1497592154', 'Status': '1001', 'Deleted': '0', 'SuspendTill': '0', 'ProxReader': '0', 'LastSeen': '1509194246', 'BatteryLow': '0', 'Index1': '218', 'Name': 'asd', 'Temp': '0', 'Enabled': '1', 'StartProc': '100 1509194246 ""', 'Motion': '1', 'Button2': '0 1497592154', 'Button1': '0 1497592154', 'SuspendSince': '0', 'ConvertUponDownload': '0', 'PrevReader': '10703', 'AXSFlags': '0', 'LastProxReader': '0', 'Photo': 'unknown.jpg', 'Neuron': '7F0027BF2D', 'Owner': '373381', 'Params': '10106'}, '[Badges_373386]': {'OwnerOldName': 'ss', 'LastMotionless': '0', 'NextStart': '0', 'Driver': 'Access Control', 'LastTemp': '0', 'USER_DATA_CreationDate': '6/15/2017 4:49:24 PM', 'Program': 'AccessProxBadge', 'LocChg': '1514124915', 'Reader': '357871', 'LostSince': '1514124950', 'LastMotion': '1497780768', 'Status': '1001', 'Deleted': '0', 'SuspendTill': '0', 'ProxReader': '0', 'LastSeen': '1514124910', 'BatteryLow': '0', 'Index1': '220', 'Name': 'ss', 'Temp': '0', 'Enabled': '1', 'StartProc': '100 1509194246 ""', 'Motion': '1', 'Button2': '0 1497780768', 'Button1': '0 1497780768', 'SuspendSince': '0', 'ConvertUponDownload': '0', 'PrevReader': '357872', 'AXSFlags': '0', 'LastProxReader': '0', 'Photo': 'unknown.jpg', 'Neuron': '7F0027B5FD', 'Owner': '373385', 'Params': '10106'}}
You can just go through each line of the file and add what you need. Their are three cases of lines you can come across:
1. The is a header, it will be a key final dictionary. You can just check if a line starts with "[Badges" here, and store the current header with a temporary variable while reading the file.
2. The line is a blank line, marking the end of the current badge data being read. All you need to do here is add the information collected from the current badge and add it to the dictionary, with the correct corresponding key. Depending on your implementation, you can delete these beforehand, or keep them when reading the lines.
3. Otherwise, the line has some info that needs to be stored. You first need to split this info on "=", and store it in your dictionary.
With these suggestions, you can write something like this to accomplish this task:
from collections import defaultdict
# dictionary of dictionary values
data = defaultdict(dict)
with open('pattern.txt') as file:
lines = [line.strip('\n') for line in file]
# keeps track of current header
header = None
# case 2, deletes empty lines before hand
valid_lines = [line for line in lines if line]
for line in valid_lines:
# case 1, for headers
if line.startswith('[Badges'):
# updates current header, and deletes square brackets
header = line.replace('[', '').replace(']', '')
# case 3, data has been found
else:
# split and add the data
info = line.split('=')
key, value = info[0], info[1]
data[header][key] = value
print(dict(data))
Which outputs:
{'Badges_373382': {'Deleted': '0', 'Button2': '0 1497592154', 'Button1': '0 1497592154', 'ProxReader': '0', 'StartProc': '100 1509194246 ""', 'NextStart': '0', 'LastSeen': '1509194246', 'Enabled': '1', 'Driver': 'Access Control', 'Program': 'AccessProxBadge', 'LocChg': '1509120279', 'Name': 'asd', 'Neuron': '7F0027BF2D', 'Owner': '373381', 'LostSince': '1509120774', 'Index1': '218', 'Photo': 'unknown.jpg', 'LastProxReader': '0', 'Temp': '0', 'LastTemp': '0', 'LastMotionless': '0', 'LastMotion': '1497592154', 'BatteryLow': '0', 'PrevReader': '10703', 'Reader': '357862', 'SuspendTill': '0', 'SuspendSince': '0', 'Status': '1001', 'ConvertUponDownload': '0', 'AXSFlags': '0', 'Params': '10106', 'Motion': '1', 'USER_DATA_CreationDate': '6/15/2017 4:48:15 PM', 'OwnerOldName': 'asd'}, 'Badges_373384': {'Deleted': '0', 'Button2': '0 1497538610', 'Button1': '0 1497538610', 'ProxReader': '0', 'StartProc': '100 1509194246 ""', 'NextStart': '0', 'LastSeen': '1513872678', 'Enabled': '1', 'Driver': 'Access Control', 'Program': 'AccessProxBadge', 'LocChg': '1513872684', 'Name': 'dsa', 'Neuron': '7F0027CC1C', 'Owner': '373383', 'LostSince': '1513872723', 'Index1': '219', 'Photo': 'unknown.jpg', 'LastProxReader': '0', 'Temp': '0', 'LastTemp': '0', 'LastMotionless': '0', 'LastMotion': '1497538610', 'BatteryLow': '0', 'PrevReader': '357874', 'Reader': '357873', 'SuspendTill': '0', 'SuspendSince': '0', 'Status': '1001', 'ConvertUponDownload': '0', 'AXSFlags': '0', 'Params': '10106', 'Motion': '1', 'USER_DATA_CreationDate': '6/15/2017 4:48:51 PM', 'OwnerOldName': 'dsa'}, 'Badges_373386': {'Deleted': '0', 'Button2': '0 1497780768', 'Button1': '0 1497780768', 'ProxReader': '0', 'StartProc': '100 1509194246 ""', 'NextStart': '0', 'LastSeen': '1514124910', 'Enabled': '1', 'Driver': 'Access Control', 'Program': 'AccessProxBadge', 'LocChg': '1514124915', 'Name': 'ss', 'Neuron': '7F0027B5FD', 'Owner': '373385', 'LostSince': '1514124950', 'Index1': '220', 'Photo': 'unknown.jpg', 'LastProxReader': '0', 'Temp': '0', 'LastTemp': '0', 'LastMotionless': '0', 'LastMotion': '1497780768', 'BatteryLow': '0', 'PrevReader': '357872', 'Reader': '357871', 'SuspendTill': '0', 'SuspendSince': '0', 'Status': '1001', 'ConvertUponDownload': '0', 'AXSFlags': '0', 'Params': '10106', 'Motion': '1', 'USER_DATA_CreationDate': '6/15/2017 4:49:24 PM', 'OwnerOldName': 'ss'}}
Note: The above code is just a possibility, feel free to adapt it to your needs, or even improve it.
I also used collections.defaultdict to add the data, since its easier to use. You can also wrap dict() at the end to convert it to a normal dictionary, which is optional.
You can try regex and split the result of output:
pattern='^\[Badges.+?OwnerOldName=\w+'
import re
with open('file.txt','r') as f:
match=re.finditer(pattern,f.read(),re.DOTALL | re.MULTILINE)
new=[]
for kk in match:
if kk.group()!='\n':
new.append(kk.group())
print({i.split()[0]:i.split()[1:] for i in new})
output:
{'[Badges_373384]': ['Deleted=0', 'Button2=0', '1497538610', 'Button1=0', '1497538610', 'ProxReader=0', 'StartProc=100', '1509194246', '""', 'NextStart=0', 'LastSeen=1513872678', 'Enabled=1', 'Driver=Access', 'Control', 'Program=AccessProxBadge', 'LocChg=1513872684', 'Name=dsa', 'Neuron=7F0027CC1C', 'Owner=373383', 'LostSince=1513872723', 'Index1=219', 'Photo=unknown.jpg', 'LastProxReader=0', 'Temp=0', 'LastTemp=0', 'LastMotionless=0', 'LastMotion=1497538610', 'BatteryLow=0', 'PrevReader=357874', 'Reader=357873', 'SuspendTill=0', 'SuspendSince=0', 'Status=1001', 'ConvertUponDownload=0', 'AXSFlags=0', 'Params=10106', 'Motion=1', 'USER_DATA_CreationDate=6/15/2017', '4:48:51', 'PM', 'OwnerOldName=dsa'], '[Badges_373382]': ['Deleted=0', 'Button2=0', '1497592154', 'Button1=0', '1497592154', 'ProxReader=0', 'StartProc=100', '1509194246', '""', 'NextStart=0', 'LastSeen=1509194246', 'Enabled=1', 'Driver=Access', 'Control', 'Program=AccessProxBadge', 'LocChg=1509120279', 'Name=asd', 'Neuron=7F0027BF2D', 'Owner=373381', 'LostSince=1509120774', 'Index1=218', 'Photo=unknown.jpg', 'LastProxReader=0', 'Temp=0', 'LastTemp=0', 'LastMotionless=0', 'LastMotion=1497592154', 'BatteryLow=0', 'PrevReader=10703', 'Reader=357862', 'SuspendTill=0', 'SuspendSince=0', 'Status=1001', 'ConvertUponDownload=0', 'AXSFlags=0', 'Params=10106', 'Motion=1', 'USER_DATA_CreationDate=6/15/2017', '4:48:15', 'PM', 'OwnerOldName=asd'], '[Badges_373386]': ['Deleted=0', 'Button2=0', '1497780768', 'Button1=0', '1497780768', 'ProxReader=0', 'StartProc=100', '1509194246', '""', 'NextStart=0', 'LastSeen=1514124910', 'Enabled=1', 'Driver=Access', 'Control', 'Program=AccessProxBadge', 'LocChg=1514124915', 'Name=ss', 'Neuron=7F0027B5FD', 'Owner=373385', 'LostSince=1514124950', 'Index1=220', 'Photo=unknown.jpg', 'LastProxReader=0', 'Temp=0', 'LastTemp=0', 'LastMotionless=0', 'LastMotion=1497780768', 'BatteryLow=0', 'PrevReader=357872', 'Reader=357871', 'SuspendTill=0', 'SuspendSince=0', 'Status=1001', 'ConvertUponDownload=0', 'AXSFlags=0', 'Params=10106', 'Motion=1', 'USER_DATA_CreationDate=6/15/2017', '4:49:24', 'PM', 'OwnerOldName=ss']}

Categories