if filename not in dict1.keys():
dict1[filename] = {}
if transId not in dict1[filename].keys():
dict1[filename][transId] = {}
if error_type in dict1[filename][transId].keys():
count1 = dict1[filename][transId][error_type]
count1 = count1 + 1
dict1[filename][transId][error_type] = count1
dict data is :
{'abc': {'ACE12345678': {'ERR-2': 2}, {'ERR-3': 4}}}
where 'abc' is a filename, 'ACE12345678' a TransId, and 'ERR-2' an Error Type.
I would also like to add loglines for each transid(Eg: 'ACE12345678') so that the dict looks like as below :
{'abc': {'ACE12345678': {'ERR-2': 2, data1\n data2\n data3\n}, {'ERR-3': 4, data1\n data2\n data3\n}}}.
Can someone help me getting this output.
you can add a new key loglines that holds all the lines in a list:
dict1 = {'abc': {'ACE12345678': {'ERR-2': 2}}}
filename = 'abc'
transID = 'ACE12345678'
error_type = 'ERR-2'
logline = 'data1\n'
my_error = dict1.setdefault(filename, {}).setdefault(transID, {})
my_error[error_type] = my_error.get(error_type, 0) + 1
my_error.setdefault('loglines', []).append(logline)
print(dict1)
output:
{'abc': {'ACE12345678': {'ERR-2': 3, 'loglines': ['data1\n']}}}
Related
Is there a quick way to combine word dictionaries(MapType) in a list?
word
[[word1 -> 2], [wor2 ->3] .... [word2 -> 4]]
--------------------------------------result-----------------------
word
[[word1 ->2] ,[wor2 -> 7]]
There is a problem that it takes a long time using the udf function.
def dictsum(keywords) :
dictlist = []
sumdict = {}
for wordcounts in keywords :
for k, v in wordcounts.items() :
print(wordcounts.items())
if k not in sumdict :
sumdict[k] = 1
else :
sumdict[k] += 1
dictlist.append(sumdict)
return dictlist
dict_df = noun_df.select("createDate","nounwords")
wordcountUdf = udf(wordcount, ArrayType(MapType(StringType(),IntegerType())))
dict_df = dict_df.withColumn("wordcount",wordcountUdf(dict_df['nounwords']))
#dict_df.show(100,False)
keyword_f = dict_df.select("createDate","wordcount")
keyword_f = keyword_f.groupby("createDate").agg(flatten(collect_list("wordcount")).alias("keywords"))
keyword_f = keyword_f.withColumn("statistic_type",lit("keyword_f"))
#keyword_f.show(10,False)
dictsumUdf = udf(dictsum, ArrayType(MapType(StringType(),IntegerType())))
keyword_f = keyword_f.withColumn("wordcounts",dictsumUdf(keyword_f['keywords']))
keyword_f = keyword_f.drop("keywords")
#keyword_f.show(100,False)
I am extracting from the log file and print using the below code
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
print (g)
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'GET')]
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'POST')]
How to add to the output
output
1.1.1.1: PUT = 2
2.2.2.2: GET = 1,POST=1
You could use a dictionary to count:
# initialize the count dict
count_dict= dict()
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
for tup in g:
# get the counts for tuple tup if we don't have it yet
# use 0 (second argument to .get)
num= count_dict.get(tup, 0)
# increase the count and write it back
count_dict[tup]= num+1
# now iterate over the key (tuple) - value (counts)-pairs
# and print the result
for tup, count in count_dict.items():
print(tup, count)
Ok, I have to admit this doesn't give the exact output, you want, but from this you can do in a similar manner:
out_dict= dict()
for (comma_string, request_type), count in count_dict.items():
out_str= out_dict.get(comma_string, '')
sep='' if out_str == '' else ', '
out_str= f'{out_str}{sep}{request_type} = {count}'
out_dict[comma_string]= out_str
for tup, out_str in out_dict.items():
print(tup, out_str)
From your data that outputs:
1.1.1.1 PUT = 2
2.2.2.2 GET = 1, POST = 1
I would look towards Counter.
from collections import Counter
results = []
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
results.append(g[0])
ip_list = set(result[0] for result in results)
for ip in ip_list:
print(ip, Counter(result[1] for result in results if result[0] == ip ))
You can use collection.defaultdict
Ex:
from collections import defaultdict
result = defaultdict(list)
for line in data:
for ip, method in re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line):
result[ip].append(method)
for k, v in result.items():
temp = ""
for i in set(v):
temp += " {} = {}".format(i, v.count(i))
print("{}{}".format(k, temp))
from collections import Counter
x = [[('1.1.1.1', 'PUT')],[('2.2.2.2', 'GET')],[('1.1.1.1', 'PUT')],[('2.2.2.2', 'POST')]]
# step 1: convert x into a dict.
m = {}
for i in x:
a, b = i[0]
if a not in m.keys():
m[a] = [b]
else:
x = m[a]
x.append(b)
m[a] = x
print('new dict is {}'.format(m))
# step 2 count frequency
m_values = list(m.values())
yy = []
for i in m_values:
x = []
k = list(Counter(i).keys())
v = list(Counter(i).values())
for i in range(len(k)):
x.append(k[i] + '=' + str(v[i]))
yy.append(x)
# step 3, update the value of the dict
m_keys = list(m.keys())
n = len(m_keys)
for i in range(n):
m[m_keys[i]] = yy[i]
print("final dict is{}".format(m))
Output is
new dict is {'1.1.1.1': ['PUT', 'PUT'], '2.2.2.2': ['GET', 'POST']}
final dict is{'1.1.1.1': ['PUT=2'], '2.2.2.2': ['GET=1', 'POST=1']}
Without dependencies and using a dict for counting, in a very basic way. Given the data_set:
data_set = [[('1.1.1.1', 'PUT')],
[('2.2.2.2', 'GET')],
[('2.2.2.2', 'POST')],
[('1.1.1.1', 'PUT')]]
Initialize the variables (manually, just few verbs) then iterate over the data:
counter = {'PUT': 0, 'GET': 0, 'POST': 0, 'DELETE': 0}
res = {}
for data in data_set:
ip, verb = data[0]
if not ip in res:
res[ip] = counter
else:
res[ip][verb] += 1
print(res)
#=> {'1.1.1.1': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}, '2.2.2.2': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}}
It's required to format the output to better fits your needs.
I am trying to merge three dictionaries together.
I am receiving an unsupported operand types error.
Here is my code:
def add_student():
global Snumber
global iCode
global kCode
Snumber = Student_number.get()
Sname = Student_name.get()
Ssurnname = Student_surname.get()
Sdetail = Student_detail.get()
i = Students(Snumber,Sname,Ssurnname,Sdetail)
Sinfo[Snumber]=[Sname,Ssurnname,Sdetail]
iName = Student_subject.get()
iCode = Student_code.get()
iMark1 = Student_Mark1.get()
iMark2 = Student_Mark2.get()
iMark3 = Student_Mark3.get()
iProject = Student_project.get()
j = Subjects(iName,iCode,iMark1,iMark2,iMark3,iProject)
SSubject[iCode]=[iName,iMark1,iMark2,iMark3,iProject]
kCourse = Degree_course.get()
kCode = Degree_code.get()
kYear = Degree_year.get()
v = Degrees(kCourse,kCode,kYear)
SDegree[kCode]=[kCourse,kYear]
popup_add()
student_list = (Sinfo.items() + SSubject.items() + SDegree.items())
print(student_list)
I believe my problem is in:
student_list = (Sinfo.items() + SSubject.items() + SDegree.items())
print(student_list)
you can use dict.update()
>>> a = {1:1,2:2,3:3}
>>> a
{1: 1, 2: 2, 3: 3}
>>> b = {4:4,5:5}
>>> c = {6:6,7:7}
>>> a.update(b)
>>> a.update(c)
>>> a
{1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7}
if you dont want to modify the original you can use the following to copy it into a new variable
>>> new_dict = dict(a)
To merge multiple dictionary, lets say we have dict Sinfo, SSubject and SDegree
student_list = dict(Sinfo.items() + SSubject.items() + SDegree.items())
code above will works with python 2 only. for python 3, need to add list to convert dict.items into list first as shown below
student_list = dict(list(Sinfo.items()) + list(SSubject.items()) + list(SDegree.items()))
I am trying to read through a csv file in the following format:
number,alphabet
1,a
2,b
3,c
2,b
1,a
My code to create a dictionary:
alpha = open('alpha.csv','r')
csv_alpha = csv.reader(alpha)
alpha_file = {row[0]:row[1] for row in csv_alpha}
OUTPUT:
alpha_file = { 1:'a', 2:'b', 3:'c' }
By looking at the file, 1 and 2 have duplicate values.
How can i possibly change my output to :
alpha_file = { 1:'a', 1:'a', 2:'b', 2:'b', 3:'c' }
LNG - PYTHON
use a list to hold key's value
alpha = open('alpha.csv','r')
csv_alpha = csv.reader(alpha)
alpha_file = dict()
for row in csv_alpha:
if row[0] in alpha_file:
alpha_file[row[0]].append(row[1])
else:
alpha_file[row[0]] = [row[1]]
the output will be like:
{ 1:['a','a'],2:['b','b'], 3:['c'] }
to output the number of key occurrences, use a for loop
d = { 1:['a','a'],2:['b','b'], 3:['c'] }
amount = []
for key, value in d.iteritems():
amount += [key] * len(value)
print amount
output looks like:
[1, 1, 2, 2, 3]
I am trying to create a nested dictionary from a mysql query but I am getting a key error
result = {}
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
error
KeyError: 'data'
desired result
result = {
'data': {
0: {'firstName': ''...}
1: {'firstName': ''...}
2: {'firstName': ''...}
}
}
You wanted to create a nested dictionary
result = {} will create an assignment for a flat dictionary, whose items can have any values like "string", "int", "list" or "dict"
For this flat assignment
python knows what to do for result["first"]
If you want "first" also to be another dictionary you need to tell Python by an assingment
result['first'] = {}.
otherwise, Python raises "KeyError"
I think you are looking for this :)
>>> from collections import defaultdict
>>> mydict = lambda: defaultdict(mydict)
>>> result = mydict()
>>> result['Python']['rules']['the world'] = "Yes I Agree"
>>> result['Python']['rules']['the world']
'Yes I Agree'
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data']['i'] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
Alternatively, you can use you own class which adds the extra dicts automatically
class AutoDict(dict):
def __missing__(self, k):
self[k] = AutoDict()
return self[k]
result = AutoDict()
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
result['data'] does exist. So you cannot add data to it.
Try this out at the start:
result = {'data': []};
You have to create the key data first:
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data'][i] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email