converting bash/shell line into python 2.6 - python

I am relatively new to programming especially in BASH and python, as well as this site. Sorry for multiple posts!
I am trying to get this line into python. I have tried os.popen.
Are there any other ways you guys can think of how to do it. I am limited to python v2.6 and cannot upgrade to a newer version, otherwise I would know how to do it in 3.whatever.
Thanks!
sample1=($(/bin/cat /proc/meminfo | egrep 'MemTotal|MemFree|Cached|SwapTotal|SwapFree|AnonPages|Dirty|Writeback|PageTables|HugePages_' | awk ' { print $2} ' | pr -t -T --columns=15 --width=240))
This is what I have in python but it isn't working. Any one have any idea how to rearrange it so it would be the same as the line in BASH.
I know these shouldn't be elif. Honestly i'm stumped and don't know where to go from here.
lst = [] #
inFile = open('/proc/meminfo') # open file
line = inFile.readline()
sample1 = {} #
while(line): #
if line.find('MemTotal'):
line = line.split()
sample1['MemTotal'] = line[1]
elif line.find('MemFree'):
line = line.split()
sample1['MemFree'] = line[1]
elif line.find(line, 'Cached'):
line = line.split()
sample1['Cached'] = line[1]
elif line.find(line, 'SwapTotal'):
line = line.split()
sample1['SwapTotal'] = line[1]
elif line.find(line, 'SwapFree'):
line = line.split()
sample1['SwapFree'] = line[1]
elif line.find(line, 'AnonPages'):
line = line.split()
sample1['AnonPages'] = line[1]
elif line.find(line, 'Dirty'):
line = line.split()
sample1['Dirty'] = line[1]
elif line.find(line, 'Writeback'):
line = line.split()
sample1['WriteBack'] = line[1]
elif line.find(line, 'PageTables'):
line = line.split()
sample1['PageTables'] = line[1]
elif line.find(line, 'HugePages_'):
line = line.split()
sample1['HugePages'] = line[1]

This should run the bash command from python by piping the output through subprocess.Popen and work for python2.6:
from subprocess import Popen, PIPE
p1 = Popen(["cat","/proc/meminfo"], stdout=PIPE)
p2 = Popen(["egrep", 'MemTotal|MemFree|Cached|SwapTotal|SwapFree|AnonPages|Dirty|Writeback|PageTables|HugePages_' ], stdin=p1.stdout, stdout=PIPE)
p1.stdout.close()
p3 = Popen(["awk","{ print $2}"],stdin=p2.stdout,stdout=PIPE)
p2.stdout.close()
p4 = Popen(["pr", "-t", "-T", "--columns=15", "--width=240"],stdin=p3.stdout,stdout=PIPE)
p3.stdout.close()
output = p4.communicate()
print(output[0])
The output from my system is:
16341932 4484840 5105220 0 8388604 8388604 108 0 5106832 78100 0 0 0 0 0
You can also open the file with python and pass the file object to the first process:
from subprocess import Popen,PIPE,STDOUT
with open("/proc/meminfo") as f:
p1 = Popen(["egrep", 'MemTotal|MemFree|Cached|SwapTotal|SwapFree|AnonPages|Dirty|Writeback|PageTables|HugePages_' ], stdin=f, stdout=PIPE)
p2 = Popen(["awk","{ print $2}"],stdin=p1.stdout,stdout=PIPE)
p1.stdout.close()
p3 = Popen(["pr", "-t", "-T", "--columns=15", "--width=240"],stdin=p2.stdout,stdout=PIPE)
p2.stdout.close()
output = p3.communicate()
print(output[0])
A pure python solution using str.find to mimic egrep finding lines that contain any of the substrings from pre in the file and using str.rsplit to get the second column i.e the digits:
pre = ('MemTotal', 'MemFree', 'Cached', 'SwapTotal', 'SwapFree', 'AnonPages', 'Dirty', 'Writeback', 'PageTables', 'HugePages_')
with open("/proc/meminfo") as f:
out = []
for line in f:
# if line.find(p) is not -1 we have a match
if any(line.find(p) != -1 for p in pre):
# split twice from the end on whitespace and get the second column
v = line.rsplit(None, 2)[1]
out.append(v)
print(" ".join(out))
Output:
16341932 4507652 5128624 0 8388604 8388604 48 0 5059044 78068 0 0 0 0 0
Using any in the above code will lazily evaluate and short circuit on a match, if there is no match it will evaluate to False so nothing gets added.
Staying truer to egrep we can use re.search compiling the patterns/substrings to check for:
import re
r = re.compile(r"MemTotal|MemFree|Cached|SwapTotal|SwapFree|AnonPages|Dirty|Writeback|PageTables|HugePages_")
with open("/proc/meminfo") as f:
out =[]
for line in f:
if r.search(line):
v = line.rsplit(None, 2)[1]
out.append(v)
print(" ".join(out))
Output:
16341932 4507596 5128952 0 8388604 8388604 0 16788 5058092 78464 0 0 0 0 0
And python being python we can put all the logic in a single list comp to get the data:
pre = ('MemTotal', 'MemFree', 'Cached', 'SwapTotal', 'SwapFree', 'AnonPages', 'Dirty', 'Writeback', 'PageTables', 'HugePages_')
with open("/proc/meminfo") as f:
out = [line.rsplit(None, 2)[1] for line in f if r.search(line)]
print(" ".join(out))
Output:
16341932 4443796 5133420 0 8388604 8388604 120 0 5118004 78572 0 0 0 0 0

This gives the same output, but using built-in Python features instead of shelling out for everything:
columns = [
'MemTotal', 'MemFree', 'Cached', 'SwapTotal', 'SwapFree', 'AnonPages',
'Dirty', 'Writeback', 'WritebackTmp', 'PageTables', 'HugePages_Free',
'HugePages_Rsvd', 'HugePages_Surp', 'HugePages_Total'
]
stats = {}
with open('/proc/meminfo') as infile:
for line in infile:
line = line.split()
stats[line[0][:-1]] = line[1]
values = [stats[key] for key in columns]
print '\t'.join(values)

Something along this line perhaps:
desiredTags = [ 'MemTotal', 'MemFree', 'Cached', 'SwapCached', 'SwapTotal',
'SwapFree', 'AnonPages', 'Dirty', 'Writeback', 'PageTables',
'HugePages_Total', 'HugePages_Free', 'HugePages_Rsvd',
'HugePages_Surp' ]
stats = []
with open('/proc/meminfo') as fd:
for line in fd:
fields = line.strip().split()
# strip off the colon from the first field
if fields[0][:-1] in desiredTags:
stats.append(fields[1])
print ' '.join(stats)
Not sure I got the list of desired tags exactly right - feel free to amend those as necessary.

Related

Count nr of occurrences of strings in one column based on value in other column Python

Sorry in advance for the really basic question and I know there are posts about this issue everywhere, but I cannot seem to get around it also with all the help on those other web pages.
For starters, I am a beginner with python so sorry for the blurry code. But what I simply want is to count the number of times a certain string occurs in column 2, when the value in column 1 stays the same. If this value changes, the loop should start over. It sounds really simple, but I am confused by python reading my text file as a string (giving me the issues with strip and split and so on). I cannot seem to get this code working. Please someone help out this noob in distress!
Input:
6 ABMV
6 ABMV
6 FOOD
6 FOOD
6 IDLE
10 IDLE
10 ABMV
10 IDLE
Code:
#! /usr/bin/env python
from collections import Counter
outfile = open ("counts_outfile.txt", "w")
with open("test_counts.txt", "r") as infile:
lines = infile.readlines()
for i, item in enumerate(lines):
lines[i] = item.rstrip().split('\t')
last_chimp = lines[0][0]
behavior = lines[0][1]
nr_ABMV = 0
nr_FOOD = 0
nr_IDLE = 0
for lines in infile:
chimp = lines[0][0]
behavior = lines[0][1]
if chimp == last_chimp:
if behavior == "ABMV":
nr_ABMV += 1
elif behavior == "FOOD":
nr_FOOD += 1
elif behavior == "IDLE":
nr_IDLE += 1
else:
continue
else:
outline = "chimp_header %s\t%s\t%s\t%s" % (last_chimp, nr_ABMV, nr_FOOD, nr_IDLE)
outfile.write(outline)
last_chimp == lines[0][0]
nr_ABMV = 0
nr_FOOD = 0
nr_IDLE = 0
outfile.close()
Thank you in advance, you will help me and obviously a lot of 'chimps' (chimpanzees) a lot!!
regards,
Here is an example, very similar to your code :
outfile = open ("counts_outfile.txt", "w")
outfile.write("chimp_header {:>4} {:4} {:4} {:4}\r\n".format('chimp', 'ABMV', 'FOOD', 'IDLE'))
with open("test_counts.txt", "r") as infile:
lines = [ line.strip() for line in infile if line.strip() ]
last_chimp = lines[0].split()[0]
behavior = { "ABMV":0, "FOOD":0, "IDLE":0 }
for line in lines :
line_split = line.strip().split()
chimp = line_split[0]
if chimp != last_chimp :
outfile.write("chimp_header {:>4} {:4} {:4} {:4}\r\n".format(last_chimp, behavior["ABMV"], behavior["FOOD"], behavior["IDLE"]))
last_chimp = chimp
behavior = { "ABMV":0, "FOOD":0, "IDLE":0 }
behavior[line_split[1]] += 1
outfile.write("chimp_header {:>4} {:4} {:4} {:4}\r\n".format(last_chimp, behavior["ABMV"], behavior["FOOD"], behavior["IDLE"]))
outfile.close()
Here is another example using Counter and a dictionary :
from collections import Counter
with open("test_counts.txt", "r") as infile:
lines = [ tuple(line.strip().split()) for line in infile if line.strip() ]
chimps = { line[0] : { "ABMV":0, "FOOD":0, "IDLE":0 } for line in lines }
for k, v in Counter(lines).items() :
chimps[k[0]][k[1]] = v
with open("counts_outfile.txt", "w") as outfile :
outfile.write("chimp_header {:>4} {:4} {:4} {:4}\r\n".format('chimp', 'ABMV', 'FOOD', 'IDLE'))
for chimp in chimps :
outfile.write("chimp_header {:>4} {:4} {:4} {:4}\r\n".format(chimp, chimps[chimp]["ABMV"], chimps[chimp]["FOOD"], chimps[chimp]["IDLE"]))
Both examples produce the same results :
chimp_header chimp ABMV FOOD IDLE
chimp_header 6 2 2 1
chimp_header 10 1 0 2
I hope this gives you some ideas.

How do I count the number of lines that are full-line comments in python?

I'm trying to create a function that accepts a file as input and prints the number of lines that are full-line comments (i.e. the line begins with #followed by some comments).
For example a file that contains say the following lines should print the result 2:
abc
#some random comment
cde
fgh
#another random comment
So far I tried along the lines of but just not picking up the hash symbol:
infile = open("code.py", "r")
line = infile.readline()
def countHashedLines(filename) :
while line != "" :
hashes = '#'
value = line
print(value) #here you will get all
#if(value == hashes): tried this but just wasn't working
# print("hi")
for line in value:
line = line.split('#', 1)[1]
line = line.rstrip()
print(value)
line = infile.readline()
return()
Thanks in advance,
Jemma
I re-worded a few statements for ease of use (subjective) but this will give you the desired output.
def countHashedLines(lines):
tally = 0
for line in lines:
if line.startswith('#'): tally += 1
return tally
infile = open('code.py', 'r')
all_lines = infile.readlines()
num_hash_nums = countHashedLines(all_lines) # <- 2
infile.close()
...or if you want a compact and clean version of the function...
def countHashedLines(lines):
return len([line for line in lines if line.startswith('#')])
I would pass the file through standard input
import sys
count = 0
for line in sys.stdin: """ Note: you could also open the file and iterate through it"""
if line[0] == '#': """ Every time a line begins with # """
count += 1 """ Increment """
print(count)
Here is another solution that uses regular expressions and will detect comments that have white space in front.
import re
def countFullLineComments(infile) :
count = 0
p = re.compile(r"^\s*#.*$")
for line in infile.readlines():
m = p.match(line)
if m:
count += 1
print(m.group(0))
return count
infile = open("code.py", "r")
print(countFullLineComments(infile))

Reading repeated information from the file in different order in Python

I tried to search for similar questions, but I couldn't find. Please mark as a duplicate if there is similar questions available.
I'm trying to figure out a way to read and gather multiple information from single file. Here in the file Block-A,B & C are repeated in random order and Block-C has more than one information to capture. Every block end with 'END' text. Here is the input file:
Block-A:
(info1)
END
Block-B:
(info2)
END
Block-C:
(info3)
(info4)
END
Block-C:
(info7)
(info8)
END
Block-A:
(info5)
END
Block-B:
(info6)
END
Here is my code:
import re
out1 = out2 = out3 = ""
a = b = c = False
array=[]
with open('test.txt', 'r') as f:
for line in f:
if line.startswith('Block-A'):
line = next(f)
out1 = line
a = True
if line.startswith('Block-B'):
line=next(f)
out2 = line
b = True
if line.startswith('Block-C'):
c = True
if c:
line=next(f)
if not line.startswith('END\n'):
out3 = line
array.append(out3.strip())
if a == b == c == True:
print(out1.rstrip() +', ' + out2.rstrip() + ', ' + str(array))
a = b = c = False
array=[]
Thank you in advance for your valuable inputs.
Use a dictionary for the datas from each block. When you read the line that starts a block, set a variable to that name, and use it as the key into the dictionary.
out = {}
with open('test.txt', 'r') as f:
for line in f:
if line.endswidth(':'):
blockname = line[:-1]
if not blockname in out:
out[blockname] = ''
elif line == 'END'
blockname = None
else if blockname:
out[blockname] += line
print(out)
If you don't want the Block-X to print, unhash the elif statment
import os
data = r'/home/x/Desktop/test'
txt = open(data, 'r')
for line in txt.readlines():
line = line[:-1]
if line in ('END'):
pass
#elif line.startswith('Block'):
# pass
else:
print line
>>>>
Block-A:
(info1)
Block-B:
(info2)
Block-C:
(info3)
(info4)
Block-C:
(info7)
(info8)
Block-A:
(info5)
Block-B:
(info6)

Terminal returning directory without output everytime I run my code

I am trying to solve a tuple problem in python and I am running my python code on terminal but everytime I run it , It takes me back the folder I have stored it in, without any output. Here's the code :
name = raw_input("Enter file:")
if len(name) < 1 :
name = "mbox-short.txt"
handle = open(name)
count = dict()
fh = handle.read()
for line in fh :
lines = line.rstrip()
if lines.startswith('From '):
word = lines.split()``
words = word[5]
wordss = words.split()
wordsss = wordss[0]
for letters in wordsss :
count[letters] = wordsss.get(letter, 0) +1
lst = list ()
for k,v in count.items() :
lst.append( (k,v) )
lst.sort(k)
print lst
name = raw_input("Enter file: ")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = {}
# handle = open("mbox-short.txt")
for line in handle:
if line.startswith('From '):
hour = line.split()[-2].split(':')[0]
if hour in hours:
hours[hour] = hours[hour] + 1
else:
hours[hour] = 1
hours = sorted(hours.items())
for hour, count in hours:
print hour, count
Output:
bharat#bhansa:~/Desktop/Stack$ python edit_narang.py
Enter file:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
Please go through this: http://www.pythonlearn.com/html-007/cfbook011.html
fh = handle.read()
for line in fh :
lines = line.rstrip()
In these lines you are reading all the file contents and storing it in fh as a string. Now when you iterate over it in for line in fh, you are just getting individual characters in line, so line.rstrip() doesnt make much sense and neither does if lines.startswith('From ').

Hadoop python job gives me an error when the code using echo works perfectly

I am trying to find the solution to stanford's CS246 2014 hw solution of the most popular guy on campus.
I keep running into the error "Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 1"
My python code for the mapper is
{#!/usr/bin/python
import sys
ID = None
fcount = 0
count = 0
for line in sys.stdin:
line = line.strip()
words = line.split(" ")
ID,friends = words
fcount = len(friends.split(","))
#count = fcount * -1
#for i in friends:
# if i != ','
# fcount +=1
print "%d\t%s" %(fcount,ID)
}
and my reducer code is
{
#!/usr/bin/python
import sys
ID = None
fcount = 0
count = 0
for line in sys.stdin:
line = line.strip()
words = line.split(" ")
ID,friends = words
fcount = len(friends.split(","))
#count = fcount * -1
#for i in friends:
# if i != ','
# fcount +=1
print "%d\t%s" %(fcount,ID)
}
Any help would be appreciated
Finally figured it out. The error happened because of the two different versions of python. I had python 3 on my system and python 2 on the hadoop system.

Categories