How to add the value from list of tuples - python

I am extracting from the log file and print using the below code
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
print (g)
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'GET')]
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'POST')]
How to add to the output
output
1.1.1.1: PUT = 2
2.2.2.2: GET = 1,POST=1

You could use a dictionary to count:
# initialize the count dict
count_dict= dict()
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
for tup in g:
# get the counts for tuple tup if we don't have it yet
# use 0 (second argument to .get)
num= count_dict.get(tup, 0)
# increase the count and write it back
count_dict[tup]= num+1
# now iterate over the key (tuple) - value (counts)-pairs
# and print the result
for tup, count in count_dict.items():
print(tup, count)
Ok, I have to admit this doesn't give the exact output, you want, but from this you can do in a similar manner:
out_dict= dict()
for (comma_string, request_type), count in count_dict.items():
out_str= out_dict.get(comma_string, '')
sep='' if out_str == '' else ', '
out_str= f'{out_str}{sep}{request_type} = {count}'
out_dict[comma_string]= out_str
for tup, out_str in out_dict.items():
print(tup, out_str)
From your data that outputs:
1.1.1.1 PUT = 2
2.2.2.2 GET = 1, POST = 1

I would look towards Counter.
from collections import Counter
results = []
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
results.append(g[0])
ip_list = set(result[0] for result in results)
for ip in ip_list:
print(ip, Counter(result[1] for result in results if result[0] == ip ))

You can use collection.defaultdict
Ex:
from collections import defaultdict
result = defaultdict(list)
for line in data:
for ip, method in re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line):
result[ip].append(method)
for k, v in result.items():
temp = ""
for i in set(v):
temp += " {} = {}".format(i, v.count(i))
print("{}{}".format(k, temp))

from collections import Counter
x = [[('1.1.1.1', 'PUT')],[('2.2.2.2', 'GET')],[('1.1.1.1', 'PUT')],[('2.2.2.2', 'POST')]]
# step 1: convert x into a dict.
m = {}
for i in x:
a, b = i[0]
if a not in m.keys():
m[a] = [b]
else:
x = m[a]
x.append(b)
m[a] = x
print('new dict is {}'.format(m))
# step 2 count frequency
m_values = list(m.values())
yy = []
for i in m_values:
x = []
k = list(Counter(i).keys())
v = list(Counter(i).values())
for i in range(len(k)):
x.append(k[i] + '=' + str(v[i]))
yy.append(x)
# step 3, update the value of the dict
m_keys = list(m.keys())
n = len(m_keys)
for i in range(n):
m[m_keys[i]] = yy[i]
print("final dict is{}".format(m))
Output is
new dict is {'1.1.1.1': ['PUT', 'PUT'], '2.2.2.2': ['GET', 'POST']}
final dict is{'1.1.1.1': ['PUT=2'], '2.2.2.2': ['GET=1', 'POST=1']}

Without dependencies and using a dict for counting, in a very basic way. Given the data_set:
data_set = [[('1.1.1.1', 'PUT')],
[('2.2.2.2', 'GET')],
[('2.2.2.2', 'POST')],
[('1.1.1.1', 'PUT')]]
Initialize the variables (manually, just few verbs) then iterate over the data:
counter = {'PUT': 0, 'GET': 0, 'POST': 0, 'DELETE': 0}
res = {}
for data in data_set:
ip, verb = data[0]
if not ip in res:
res[ip] = counter
else:
res[ip][verb] += 1
print(res)
#=> {'1.1.1.1': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}, '2.2.2.2': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}}
It's required to format the output to better fits your needs.

Related

How can I use my helper functions to get the correct output

So I created a helper function to help my main function in extracting stuff from a dictionary...
and here is my code and function
def rdict(recipes):
recipes_splitted = {}
for r in recipes:
recipe_name, parts = r.split(":")
recipe_parts = {}
for part in parts.split(','):
product, number = part.split('*')
recipe_parts[product] = int(number)
recipes_splitted[recipe_name] = recipe_parts
return recipes_splitted
def extract(recipes, data):
result = []
for r in recipes:
tmp = []
for key in data[r]:
tmp.append(f"{key}:{data[r][key]}")
final_string = ""
for i in range(len(tmp)):
if i < len(tmp) - 1:
final_string += tmp[i] + ", "
else:
final_string += tmp[i]
result.append(final_string)
return result
So what I'm trying to do is make sure data in extract(recipe, data) go through rdict(data) since rdict will convert data into a dictionary, which is what I need.. However, when I tried doing for key in rdict(data[r]): the output returns Error. String is not supscriptable..
what should I do to successfully implement the changes??
Edit
So from my current code, here is a sample input..
print(extract(recipes = ['T-Bone', 'Green Salad1'],data = ["Pork Stew:Cabbage*5,Carrot*1,Fatty Pork*10",
"Green Salad1:Cabbage*10,Carrot*2,Pineapple*5",
"T-Bone:Carrot*2,Steak Meat*1"]
))
and in order for my code to work, it has to be like this
print(extract(recipes = ['T-Bone', 'Green Salad1'], data = {'Pork Stew': {'Cabbage': 5, 'Carrot': 1, 'Fatty Pork': 10}, 'Green Salad1': {'Cabbage': 10, 'Carrot': 2, 'Pineapple': 5},'T-Bone': {'Carrot': 2, 'Steak Meat': 1}}))
So from the input, data should be changed from
data = ["Pork Stew:Cabbage*5,Carrot*1,Fatty Pork*10",
"Green Salad1:Cabbage*10,Carrot*2,Pineapple*5",
"T-Bone:Carrot*2,Steak Meat*1"]
to
data = {'Pork Stew': {'Cabbage': 5, 'Carrot': 1, 'Fatty Pork': 10}, 'Green Salad1': {'Cabbage': 10, 'Carrot': 2, 'Pineapple': 5},'T-Bone': {'Carrot': 2, 'Steak Meat': 1}}
Convert the data to dict in extract().
recipes = ['T-Bone', 'Green Salad1']
data = ["Pork Stew:Cabbage*5,Carrot*1,Fatty Pork*10",
"Green Salad1:Cabbage*10,Carrot*2,Pineapple*5",
"T-Bone:Carrot*2,Steak Meat*1"]
def rdict(recipes):
recipes_splitted = {}
for r in recipes:
recipe_name, parts = r.split(":")
recipe_parts = {}
for part in parts.split(','):
product, number = part.split('*')
recipe_parts[product] = int(number)
recipes_splitted[recipe_name] = recipe_parts
return recipes_splitted
def extract(recipes, data):
data = rdict(data) # convert data to dict first
result = []
for r in recipes:
tmp = []
for key in data[r]:
tmp.append(f"{key}:{data[r][key]}")
final_string = ""
for i in range(len(tmp)):
if i < len(tmp) - 1:
final_string += tmp[i] + ", "
else:
final_string += tmp[i]
result.append(final_string)
return result
print(extract(recipes, data))
Output:
['Carrot:2, Steak Meat:1', 'Cabbage:10, Carrot:2, Pineapple:5']
Renamed rdict to parse_recipe, and modified it to return a tuple that is lighter and easier to process
In extract:
a) Build a dict of recipes: data_recipes
b) Built result by getting the wanted recipes, with a guard against missing recipe (which be an empty dict:{} )
def parse_recipe(s):
recipe, ings_s = s.split(':')
ings_l = ings_s.split(',')
ings_d= {}
for ing in ings_l:
i,q = ing.split('*')
ings_d[i.strip()] = q.strip()
return recipe.strip(), ings_d
def extract(recipes, data):
data_recipes = {}
for s in data:
recipe, ings_d = parse_recipe(s)
data_recipes[recipe] = ings_d
return {r: data_recipes.get(r, dict()) for r in recipes}

Is there a quick way to combine word dictionaries in a list?

Is there a quick way to combine word dictionaries(MapType) in a list?
word
[[word1 -> 2], [wor2 ->3] .... [word2 -> 4]]
--------------------------------------result-----------------------
word
[[word1 ->2] ,[wor2 -> 7]]
There is a problem that it takes a long time using the udf function.
def dictsum(keywords) :
dictlist = []
sumdict = {}
for wordcounts in keywords :
for k, v in wordcounts.items() :
print(wordcounts.items())
if k not in sumdict :
sumdict[k] = 1
else :
sumdict[k] += 1
dictlist.append(sumdict)
return dictlist
dict_df = noun_df.select("createDate","nounwords")
wordcountUdf = udf(wordcount, ArrayType(MapType(StringType(),IntegerType())))
dict_df = dict_df.withColumn("wordcount",wordcountUdf(dict_df['nounwords']))
#dict_df.show(100,False)
keyword_f = dict_df.select("createDate","wordcount")
keyword_f = keyword_f.groupby("createDate").agg(flatten(collect_list("wordcount")).alias("keywords"))
keyword_f = keyword_f.withColumn("statistic_type",lit("keyword_f"))
#keyword_f.show(10,False)
dictsumUdf = udf(dictsum, ArrayType(MapType(StringType(),IntegerType())))
keyword_f = keyword_f.withColumn("wordcounts",dictsumUdf(keyword_f['keywords']))
keyword_f = keyword_f.drop("keywords")
#keyword_f.show(100,False)

Adding to JSON in Python and converting to an object

I have a JSON array shown below.
[
"3D3iAR9M4HDETajfD79gs9BM8qhMSq5izX",
"35xfg4UnpEJeHDo55HNwJbr1V3G1ddCuVA"
]
I would like to add a value in the form of the string (self.tx_amount_5) so I get a JSON OBJECT something like this:
{
"3D3iAR9M4HDETajfD79gs9BM8qhMSq5izX" : 100000
"35xfg4UnpEJeHDo55HNwJbr1V3G1ddCuVA" : 100000
}
The part of code that has generated the first JSON array is:
r = requests.get('http://api.blockcypher.com/v1/btc/main/addrs/A/balance')
balance = r.json()['balance']
with open("Entries#x1.csv") as f,open("winningnumbers.csv") as nums:
nums = set(imap(str.rstrip, nums))
r = csv.reader(f)
results = defaultdict(list)
for row in r:
results[sum(n in nums for n in islice(row, 1, None))].append(row[0])
self.number_matched_0 = results[0]
self.number_matched_1 = results[1]
self.number_matched_2 = results[2]
self.number_matched_3 = results[3]
self.number_matched_4 = results[4]
self.number_matched_5 = results[5]
self.number_matched_5_json = json.dumps(self.number_matched_5, sort_keys = True, indent = 4)
print(self.number_matched_5_json)
if len(self.number_matched_3) == 0:
print('Nobody matched 3 numbers')
else:
self.tx_amount_3 = int((balance*0.001)/ len(self.number_matched_3))
if len(self.number_matched_4) == 0:
print('Nobody matched 4 numbers')
else:
self.tx_amount_4 = int((balance*0.1)/ len(self.number_matched_4))
if len(self.number_matched_5) == 0:
print('Nobody matched 3 numbers')
else:
self.tx_amount_5 = int((balance*0.4)/ len(self.number_matched_5))
If I understand correctly, you can create the dictionary like this:
import json
s="""[
"3D3iAR9M4HDETajfD79gs9BM8qhMSq5izX",
"35xfg4UnpEJeHDo55HNwJbr1V3G1ddCuVA"
]"""
d = {el: self.tx_amount_5 for el in json.loads(s)}
print(d)
which produces
{'3D3iAR9M4HDETajfD79gs9BM8qhMSq5izX': 100000,
'35xfg4UnpEJeHDo55HNwJbr1V3G1ddCuVA': 100000}

how to skip a value while printing from dict, if the value is blank

I have a dict
x4={'c;1': 'c4;;c6', 'b;1': 'a2;b2;c2;d2', 'b;0': 'A1;B1;C1;D1', 'a;1': 'a1;b1;c1;d1', 'a;0': 'A;B;C;D', 'c;0': 'c1;c2;c3'}
i am using this code:
for k,v in x4.iteritems():
a = k.split(";")
b = v.split(";")
if a[1] is not '0':
val = x4[a[0]+';0']
values = val.split(";")
for i in range(len(values)):
if values[i]=='' is True:
b[i]=''
else:
print '<%s>%s<%s>' % (values[i],b[i],values[i])
output its printing is
<c1>c4<c1>
<c2><c2>
<c3>c6<c3>
<A1>a2<A1>
<B1>b2<B1>
<C1>c2<C1>
<D1>d2<D1>
<A>a1<A>
<B>b1<B>
<C>c1<C>
<D>d1<D>
But What i want is
<c1>c4<c1>
<c3>c6<c3>
<A1>a2<A1>
<B1>b2<B1>
<C1>c2<C1>
<D1>d2<D1>
<A>a1<A>
<B>b1<B>
<C>c1<C>
<D>d1<D>
The 'c2' should not be printed.. I am using this code to print grouped data. pls help me
x4={'c;1': 'c4;;c6', 'b;1': 'a2;b2;c2;d2', 'b;0': 'A1;B1;C1;D1', 'a;1': 'a1;b1;c1;d1', 'a;0': 'A;B;C;D', 'c;0': 'c1;c2;c3'}
for k,v in x4.iteritems():
a = k.split(";")
b = v.split(";")
if a[1] is not '0':
val = x4[a[0]+';0']
values = val.split(";")
for i in range(len(values)):
if values[i] is '':
b[i]=''
else:
if b[i]:
print '<%s>%s<%s>' % (values[i],b[i],values[i])
output::
<A1>a2<A1>
<B1>b2<B1>
<C1>c2<C1>
<D1>d2<D1>
<c1>c4<c1>
<c3>c6<c3>
<A>a1<A>
<B>b1<B>
<C>c1<C>
<D>d1<D>

Learning Python: Store values in dict from stdout

How can I do the following in Python:
I have a command output that outputs this:
Datexxxx
Clientxxx
Timexxx
Datexxxx
Client2xxx
Timexxx
Datexxxx
Client3xxx
Timexxx
And I want to work this in a dict like:
Client:(date,time), Client2:(date,time) ...
After reading the data into a string subject, you could do this:
import re
d = {}
for match in re.finditer(
"""(?mx)
^Date(.*)\r?\n
Client\d*(.*)\r?\n
Time(.*)""",
subject):
d[match.group(2)] = (match.group(1), match.group(2))
How about something like:
rows = {}
thisrow = []
for line in output.split('\n'):
if line[:4].lower() == 'date':
thisrow.append(line)
elif line[:6].lower() == 'client':
thisrow.append(line)
elif line[:4].lower() == 'time':
thisrow.append(line)
elif line.strip() == '':
rows[thisrow[1]] = (thisrow[0], thisrow[2])
thisrow = []
print rows
Assumes a trailing newline, no spaces before lines, etc.
What about using a dict with tuples?
Create a dictionary and add the entries:
dict = {}
dict['Client'] = ('date1','time1')
dict['Client2'] = ('date2','time2')
Accessing the entires:
dict['Client']
>>> ('date1','time1')

Categories