How to add digits in output - python

I have a file which I'm reading using python. In this file, I'm selecting certain numbers which are displayed as a list in the output, I want to add these numbers. Here is the code I'm using:
with open ("C:/xampp/htdocs/Final/uploads/file.dist", 'r') as rf:
g = [rf.replace(' ', '') for rf in rf]
k=[]
for e in g[1::47]:
r=(e[:12])
s=(r[:2])
i.append(s)
m= Counter(i)
for letter in m:
t= m[letter]
print(t)
This gives me output as follows:
80
80
80
80
I want to add these number so that the final output will be 320 (80+80+80+80). I've tried listing method, import math library, but none of them is giving me the required output. Any help will be highly appreciated.

Use += instead of = to add the values of m[letter] to t:
from collections import Counter
with open ("C:/path/file.dist", 'r') as rf:
g = [rf.replace(' ', '') for rf in rf]
i=[]
for e in g[1::47]:
r=(e[:12])
s=(r[:2])
i.append(s)
m = Counter(i)
t = 0
for letter in m:
t += m[letter]
print(t)

Related

python get most common words , greater tha 3 characters

Hi I m quite new of Python
I m trying to figured out how to get the most common words listed in clean.txt file , but only word lenght > 3
`
import re
from collections import Counter
words = re.findall(r'\w+', open('clean.txt', 'r', encoding='utf-8').read().lower())
count = Counter(words).most_common(100)
# define a sort key
def sort_key(count):
return count[1]
def read_data():
f = open('clean.txt', 'r', encoding='utf-8')
s = f.read()
x = s.split()
for i in x:
if len(i) > 5:
print(i)
count.sort(key=sort_key, reverse=True)
print (count)
`
I tried print read_data but I ve got listed all words without showing number of times mentioned

How to write itertools.izip to txt file one by line?

My code
import itertools
import os
with open("base.txt") as fv, open("new2.txt", 'r') as fi, open('sortedf.txt','w') as fs:
vel = (line.strip() for line in fv)
ind = (int(line.strip()) for line in fi)
z = itertools.izip(ind, vel) # sort according to ind
# itertools.izip(vel, ind) # sort according to vel
for i, v in sorted(z):
fs.write(str(v))
I got everything on one line
2.900000e+032.900000e+032.900000e+032.900000e+032.
When I change to
fs.write('\n'.join(str(v)))
Then I got
2
.
9
0
0
0
0
0
e
+
0
32
.
9
0
0
0
0
0
e
+
0
32
.
How to get proper one by line value output?
Just Change
for i, v in sorted(z):
fs.write(str(v))
to
for i,v in sorted(z):
print(v, file=fs)
\n is added automatically due to the end="\n" default parameter of print
works for any datatype. No need for str(v)
Please try the following
fs.writelines(map(lambda x: x[1], sorted(z)))
Why below statement failed
fs.write('\n'.join(str(v)))
Here v, is converted into a list and join is applied over it. Look at below example for clarity
>>> sam = 'hello'
>>> '-'.join(sam)
'h-e-l-l-o'
so how to use fs.write ?
write just as : fs.write(v)
now add linebreak : fs.write('\n')
Few suggestions:
import os # load important modules first
from itertools import izip # makes processing faster, if you only need izip
with open("base.txt") as fv, open("new2.txt", 'r') as fi, open('sortedf.txt','w') as fs:
vel = [line.strip() for line in fv] # use square braces
ind = int(line.strip()) for line in fi] # use square braces
z = izip(ind, vel) # sort according to ind
for i, v in sorted(z):
fs.write(v)
fs.write('\n') # adding line break

How to extract numbers from a text file and multiply them together?

I have a text file which contains 800 words with a number in front of each. (Each word and its number is in a new line. It means the file has 800 lines) I have to find the numbers and then multiply them together. Because multiplying a lot of floats equals to zero, I have to use logarithm to prevent the underflow, but I don't know how.
this is the formula:
cNB=argmaxlogP(c )+log P(x | c )
this code doesn't print anything.
output = []
with open('c:/python34/probEjtema.txt', encoding="utf-8") as f:
w, h = map(int, f.readline().split())
tmp = []
for i, line in enumerate(f):
if i == h:
break
tmp.append(map(int, line.split()[:w]))
output.append(tmp)
print(output)
the file language is persian.
a snippet of the file:
فعالان 0.0019398642095053346
محترم 0.03200775945683802
اعتباري 0.002909796314258002
مجموع 0.0038797284190106693
حل 0.016488845780795344
مشابه 0.004849660523763337
مشاوران 0.027158098933074686
مواد 0.005819592628516004
معادل 0.002909796314258002
ولي 0.005819592628516004
ميزان 0.026188166828322017
دبير 0.0019398642095053346
دعوت 0.007759456838021339
اميد 0.002909796314258002
You can use regular expressions to find the first number in each line, e.g.
import re
output = []
with open('c:/python34/probEjtema.txt', encoding="utf-8") as f:
for line in f:
match = re.search(r'\d+.?\d*', line)
if match:
output.append(float(match.group()))
print(output)
re.search(r'\d+.?\d*', line) looks for the first number (integer or float with . in each line.
Here is a nice online regex tester: https://regex101.com/ (for debuging / testing).
/Edit: changed regex to \d+.?\d* to catch integers and float numbers.
If I understood you correctly, you could do something along the lines of:
result = 1
with open('c:/python34/probEjtema.txt', encoding="utf-8") as f:
for line in f:
word, number = line.split() # line.split("\t") if numbers are seperated by tab
result = result * float(number)
This will create an output list with all the numbers.And result will give the final multiplication result.
import math
output = []
result=1
eres=0
with open('c:/python34/probEjtema.txt', encoding="utf-8") as f:
for line in (f):
output.append(line.split()[1])
result *= float((line.split()[1]))
eres += math.log10(float((line.split()[1]))) #result in log base 10
print(output)
print(result)
print eres

How to perform summation for regular expression match results in python

Im trying to extract certain numbers from multiple files and perform a summation for the extracted numbers here is what i have written till now
import re, os
path = "F:/s"
in_files = os.listdir(path)
for g in in_files:
file = os.path.join(path, g)
text = open(file, "r")
a = text.readlines()
b = a[6]
m = re.search('\t(.+?)\n', b)
if m:
found = m.group()
print (found)
Extraction is working i get the results like this.
122
74
97
Now i want to sum all these numbers.
Lets do it using re.findall()
count = 0
for number in re.findall('\t(.+?)\n', b):
## add int(number.strip()) to count
You can create an empty list above your loop and instead of printing, just append found to that list. You can then sum the contents of that list (if everything goes well you should end up with a list of 'strings of integers').
import re, os
path = "F:/s"
in_files = os.listdir(path)
l = []
for g in in_files:
...
...
if m:
found = m.group()
l.append(found)
Your list should look like this now: ['122', '74', '97'],
so you can use map() and sum() to find the total (outside the loop):
print sum(map(int, l)) # 293

generating a single outfile after analyzing multiple files in python

i have multiple files each containing 8/9 columns.
for a single file : I have to read last column containing some value and count the number of occurrence of each value and then generate an outfile.
I have done it like:
inp = open(filename,'r').read().strip().split('\n')
out = open(filename,'w')
from collections import Counter
C = Counter()
for line in inp:
k = line.split()[-1] #as to read last column
C[k] += 1
for value,count in C.items():
x = "%s %d" % (value,count)
out.write(x)
out.write('\n')
out.close()
now the problem is it works fine if I have to generate one output for one input. But I need to scan a directory using glob.iglobfunction for all files to be used as input. And then have to perform above said program on each file to gather result for each file and then of course have to write all of the analyzed results for each file into a single OUTPUT file.
NOTE: During generating single OUTPUT file if any value is found to be getting repeated then instead of writing same entry twice it is preferred to sum up the 'count' only. e.g. analysis of 1st file generate:
123 6
111 5
0 6
45 5
and 2nd file generate:
121 9
111 7
0 1
22 2
in this case OUTPUT file must be written such a way that it contain:
123 6
111 12 #sum up count no. in case of similar value entry
0 7
45 5
22 2
i have written prog. for single file analysis BUT i'm stuck in mass analysis section.
please help.
from collections import Counter
import glob
out = open(filename,'w')
g_iter = glob.iglob('path_to_dir/*')
C = Counter()
for filename in g_iter:
f = open(filename,'r')
inp = f.read().strip().split('\n')
f.close()
for line in inp:
k = line.split()[-1] #as to read last column
C[k] += 1
for value,count in C.items():
x = "%s %d" % (value,count)
out.write(x)
out.write('\n')
out.close()
After de-uglification:
from collections import Counter
import glob
def main():
# create Counter
cnt = Counter()
# collect data
for fname in glob.iglob('path_to_dir/*.dat'):
with open(fname) as inf:
cnt.update(line.split()[-1] for line in inf)
# dump results
with open("summary.dat", "w") as outf:
outf.writelines("{:5s} {:>5d}\n".format(val,num) for val,num in cnt.iteritems())
if __name__=="__main__":
main()
Initialise a empty dictionary at the top of the program,
lets say, dic=dict()
and for each Counter update the dic so that the values of similar keys are summed and the new keys are also added to the dic
to update dic use this:
dic=dict( (n, dic.get(n, 0)+C.get(n, 0)) for n in set(dic)|set(C) )
where C is the current Counter, and after all files are finished write the dic to the output file.
import glob
from collections import Counter
dic=dict()
g_iter = glob.iglob(r'c:\\python32\fol\*')
for x in g_iter:
lis=[]
with open(x) as f:
inp = f.readlines()
for line in inp:
num=line.split()[-1]
lis.append(num)
C=Counter(lis)
dic=dict( (n, dic.get(n, 0)+C.get(n, 0)) for n in set(dic)|set(C) )
for x in dic:
print(x,'\t',dic[x])
I did like this.
import glob
out = open("write.txt",'a')
from collections import Counter
C = Counter()
for file in glob.iglob('temp*.txt'):
for line in open(file,'r').read().strip().split('\n'):
k = line.split()[-1] #as to read last column
C[k] += 1
for value,count in C.items():
x = "%s %d" % (value,count)
out.write(x)
out.write('\n')
out.close()

Categories