Syntax error in code converted from bash to python - python

I am very novice to python. I have code in bash need to convert it to python, tired with converter but getting syntax error. Will be very helpful if someone help me in finding error !
Error :
File "cp_file.py", line 75
print(myfilename.val)
SyntaxError : Invalid Syntax
Will be very helpful if someone convert following bash code to python without or help me in finding error !
Bash Code :
grep " 200 " /var/log/ba/access.log |awk '{print $7}'|sort|uniq > /tmp/read_log.txt
for i in $(cat /tmp/read_log.txt); do
echo $i
myfilename="$(echo ${i##*/})"
echo $myfilename
wget http://mydata.na.xyz/$i
curl -X POST -d #$myfilename http://xyz.xyz/ba/$i
done
Python Code :
#! /usr/bin/env python
from __future__ import print_function
import sys,os,subprocess
class Bash2Py(object):
__slots__ = ["val"]
def __init__(self, value=''):
self.val = value
def setValue(self, value=None):
self.val = value
return value
def GetVariable(name, local=locals()):
if name in local:
return local[name]
if name in globals():
return globals()[name]
return None
def Make(name, local=locals()):
ret = GetVariable(name, local)
if ret is None:
ret = Bash2Py(0)
globals()[name] = ret
return ret
def Str(value):
if isinstance(value, list):
return " ".join(value)
if isinstance(value, basestring):
return value
return str(value)
def Array(value):
if isinstance(value, list):
return value
if isinstance(value, basestring):
return value.strip().split(' ')
return [ value ]
_rc0 = _rcr1, _rcw1 = os.pipe()
if os.fork():
os.close(_rcw1)
os.dup2(_rcr1, 0)
_rcr2, _rcw2 = os.pipe()
if os.fork():
os.close(_rcw2)
os.dup2(_rcr2, 0)
_rcr3, _rcw3 = os.pipe()
if os.fork():
os.close(_rcw3)
os.dup2(_rcr3, 0)
subprocess.call("uniq",shell=True,stdout=file("/tmp/read_log.txt",'wb'))
else:
os.close(_rcr3)
os.dup2(_rcw3, 1)
subprocess.call(["sort"],shell=True)
sys.exit(0)
else:
os.close(_rcr2)
os.dup2(_rcw2, 1)
subprocess.call(["awk","{print $7}"],shell=True)
sys.exit(0)
else:
os.close(_rcr1)
os.dup2(_rcw1, 1)
subprocess.call(["grep","200","/var/log/ba/access.log"],shell=True)
sys.exit(0)
for Make("i").val in Array(os.popen("cat /tmp/read_log.txt").read().rstrip("\n")):
print(i.val)
Make("myfilename").setValue(os.popen("echo "+str(i.val##*/)).read().rstrip("\n"))
print(myfilename.val)
subprocess.call(["wget","http://xyz.xyz/"+str(i.val)],shell=True)
subprocess.call(["curl","-X","POST","-D","#"+str(myfilename.val),"http://xyz.xyz/ba/"+str(i.val)],shell=True)

That auto-generated Python code is horrible. You'd be much better off sticking with Bash. But best would be to actually migrate your code to Python using human understanding. For example, take just this part:
grep " 200 " /var/log/ba/access.log | awk '{print $7}'|sort|uniq > /tmp/read_log.txt
In Python that is something like:
with open('/var/log/ba/access.log') as infile, open('/tmp/read_log.txt', 'w') as outfile:
results = set()
for line in infile:
if ' 200 ' in line:
tokens = line.split()
results.add(tokens[6]) # 7th token
for result in sorted(results):
print >>outfile, result
For the HTTP part, use the Python module requests. It's easy to use. Quite possibly you won't need outfile anymore--you can just directly use for result in sorted(results) to make your HTTP requests.

First never read lines of a file with for, use while instead. See here why
It's a very small script, is more easy rewrite in python than use your converter.
If are in hurry and really need the script in python you can use linux commands inside python, is not the best way but is more faster and easy for someone who don't know python
import subprocess
p = subprocess.Popen(["curl","POST","-X", "YOUR_URL"],
stdout=subprocess.PIPE, shell=True) (output, err) = p.communicate()

Related

Custom Popen.communicate method gives wrong output

Let's start by considering this code:
proc_stdin.py
import sys
if __name__ == '__main__':
for i, line in enumerate(sys.stdin):
sys.stdout.write(line)
test.py
import subprocess
def run_bad(target, input=None):
proc = subprocess.Popen(
target,
universal_newlines=True,
shell=True,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE if input else subprocess.DEVNULL,
stdout=subprocess.PIPE,
)
if input:
proc.stdin.write(input)
proc.stdin.flush()
proc.stdin.close()
lines = []
for line in iter(proc.stdout.readline, ""):
line = line.rstrip("\n")
lines.append(line)
proc.stdout.close()
ret_code = proc.wait()
return "\n".join(lines)
def run_good(target, input):
return subprocess.Popen(
target,
universal_newlines=True,
shell=True,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
).communicate(input=input)[0]
if __name__ == '__main__':
lst = [
"",
"token1",
"token1\n",
"token1\r\n",
"token1\n\n",
"token1\r\n\ntoken2",
"token1 token2",
"token1\ntoken2",
"token1\r\ntoken2",
"token1\n\ntoken2",
"token1\r\n\ntoken2",
"token1 \ntoken2\ntoken2\n"
]
cmd = "python proc_stdin.py"
for inp in lst:
a, b = run_bad(cmd, inp), run_good(cmd, inp)
if a != b:
print("Error: {} vs {}".format(repr(a), repr(b)))
else:
print("ok: {}".format(repr(a)))
Output:
ok: ''
ok: 'token1'
Error: 'token1' vs 'token1\n'
Error: 'token1\n' vs 'token1\n\n'
Error: 'token1\n' vs 'token1\n\n'
ok: 'token1\n\n\ntoken2'
ok: 'token1 token2'
ok: 'token1\ntoken2'
ok: 'token1\n\ntoken2'
ok: 'token1\n\ntoken2'
ok: 'token1\n\n\ntoken2'
Error: 'token1 \ntoken2\ntoken2' vs 'token1 \ntoken2\ntoken2\n'
My question is, why is the output of both run_bad & run_good not equal in all cases? How would you change the run_bad function so the output becomes equal than run_good?
You also may wonder, why are you not using directly Popen.communicate for this particular case or other helpers from subprocess module? Well, in the real world case I'm creating a plugin for SublimeText3 which is forcing me to stick to python3.3 (can't use many of the modern subprocess goodies) plus I'd like to inject some callbacks while reading the lines from stdout and that's something I can't do by using the Popen.communicate method (as far as I know).
Thanks in advance.
If you strip newlines from every line and then add them back between the lines, what happens to the last newline (if any)? (There’s no final, empty line after a final newline because your iter discards it.) This is why Python’s readline (or line iteration) function includes the newlines: they’re necessary to represent the end of the file accurately.

TypeError: Can't convert 'bytes' object to str implicitly using Python3.5

I am using Python 3.5 on the following code.
def raxml(DIR,cleaned,num_cores,seqtype):
assert cleaned.endswith(".aln-cln"),\
"raxml infile "+cleaned+" not ends with .aln-cln"
assert seqtype == "aa" or seqtype == "dna","Input data type: dna or aa"
assert len(read_fasta_file(DIR+cleaned)) >= 4,\
"less than 4 sequences in "+DIR+cleaned
clusterID = cleaned.split(".")[0]
tree = DIR+clusterID+".raxml.tre"
raw_tree = "RAxML_bestTree."+cleaned
model = "PROTCATWAG" if seqtype == "aa" else "GTRCAT"
if not os.path.exists(tree) and not os.path.exists(raw_tree):
# raxml crashes if input file starts with .
infasta = cleaned if DIR == "./" else DIR+cleaned
cmd = ["raxml","-T",str(num_cores),"-p","12345","-s",\
infasta,"-n",cleaned,"-m",model]
print (" ".join(cmd))
p = subprocess.Popen(cmd,stdout=subprocess.PIPE)
out = p.communicate()
assert p.returncode == 0,"Error raxml"+out[0]
try:
os.rename(raw_tree,tree)
os.remove("RAxML_info."+cleaned)
os.remove("RAxML_log."+cleaned)
os.remove("RAxML_parsimonyTree."+cleaned)
os.remove("RAxML_result."+cleaned)
os.remove(DIR+cleaned+".reduced")
except: pass # no need to worry about extra intermediate files
return tree
It runs and returns the following code:
"raxml_wrapper.py", line 30, in raxml
assert p.returncode == 0,"Error raxml"+out[0]
TypeError: Can't convert 'bytes' object to str implicitly
Initially, I tried the following:
p = subprocess.Popen(cmd,stdout=subprocess.PIPE)
p = p.decode('utf-8')
out = p.communicate()
assert p.returncode == 0,"Error raxml"+out[0]
That didn't fix the issue at all. I have looked at similar questions, but I cannot come up with a solution to this. I would appreciate some help on this.
Thanks!
p, a Popen object, doesn't have a .decode(...) member.
You need to actually decode the output
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
out, _ = p.communicate()
out = out.decode('utf-8')
assert p.returncode == 0, 'Error raxml' + out[0]
That said, this code can be improved to use subprocess.check_output:
# does roughly the same thing, you'll get `subprocess.CalledProcessError` instead of `AssertionError`
out = subprocess.check_output(cmd).decode('UTF-8')
Or if you happen to be using python3.6+
out = subprocess.check_output(cmd, encoding='UTF-8')
I do not know exactly what your p.communicate() method does, but it seems that it returns a byte object as a result. And this piece of code, cannot add this byte object to "Error raxml" str object:
assert p.returncode == 0,"Error raxml"+out[0]
Maybe you should try converting it to str as this:
assert p.returncode == 0,"Error raxml"+str(out[0])

Reading from stdin in Python 2.7.6. Sys.stdout.flush() and python -u doesn't work

It seems like many people have been struggling with getting buffers and stdin and stout working across many flavors of Python. I'm writing a script in Python 2.7.6 to read from stdin, do a regex match, and print a list of matching strings.
import re, sys
barcodes=["The barcodes are:"]
curr=barcodes[0]
#iterate through stdin
for line in sys.stdin.readlines():
#do regex match in line
match = re.search('(?<=\:)[GATC]{6}', line.rstrip()).group(0)
matched = 0
#see if match has been seen before
if (match == curr):
matched = 1
print "matched curr"
else:
for a , val in enumerate(barcodes):
if (match == val):
print str(a) + " : " + val + " barcodes[a] " + str(barcodes[a])
curr = barcodes[a]
print curr
matched = 1
print "matched iteration"
#if match hasn't been seen before
if (matched == 0):
sys.stdout.write("NEW match")
sys.stdout.flush()
barcodes.append(match)
#print report of barcodes
for i in barcodes:
print i
Like many before me have found, this waits until it reads an EOF block from stdin to print anything and I can't seem to find any documentation on how to have the process run/print as it reads from stdin.
To be clear, this happens regardless of whether or not I call Python with the -u flag.
Thank you for any guidance you can give me.
Here are some examples that read sys.stdin a line at a time. They do not require the use of the python -u option.
#! /usr/bin/env python
import sys
def main():
count = 1
while True:
line = sys.stdin.readline()
if line == '':
break #EOF encountered
print "%3d: [%s]" % (count, line[:-1])
count += 1
if __name__ == '__main__':
main()
If you are using Linux / Unix, this version is better because it gives you line editing.
#! /usr/bin/env python
import sys
import readline
def main():
count = 1
while True:
try:
line = raw_input()
print "%3d: [%s]" % (count, line)
count += 1
except EOFError:
break
if __name__ == '__main__':
main()
sys.stdin is just a file object so if you use readlines() then reading would continue until all lines are read. This would happen only when you hit Ctrl+D (in Linux). Try reading line by line instead, something like this:
#!/usr/bin/env python
import sys,re
while True:
line = sys.stdin.readline()
m = re.search("end", line)
if m:
break
else:
print "I read:" + line
The solution is simply this:
for line in sys.stdin:
# process line
Since sys.stdin is a file-like object, iterating over it generates lines one at a time as they become available.

How can I make a Python script parse command line arguments like a Unix command? [duplicate]

This question already has answers here:
What's the best way to parse command line arguments? [duplicate]
(15 answers)
Closed 9 years ago.
I have a Python utility script that accepts arguments in the commandline and executes tasks against an open source search tool called Elasticsearch.
But simply put, here is how it's currently being used:
Myscript.py create indexname http://localhost:9260
Myscript.py create indexname http://localhost:9260 IndexMap.json
I would like to make it so that the user of the script doesn't have to remember the order of the arguments to the script. How can I enable this in my script? I was thinking along the lines of a Unix-like argument passing. For example:
import os
import sys
import glob
import subprocess
# collect command line arguments
commandline_args = sys.argv
# How to use this simple API:
# create indexname http://localhost:9260 IndexMap.json
command_type = commandline_args[1]
index_name = commandline_args[2]
base_elasticsearch_url = commandline_args[3]
file_to_index = sys.argv[4] if len(sys.argv) > 4 else None
def run_curl(command, url):
cmd = ['curl', command]
url = url.split(' ')
print 'sending command: '
print cmd+url
return subprocess.check_output(cmd+url)
if (command_type == 'delete'):
print 'About to run '+ command_type + ' on Index: ' + index_name
command = '-XDELETE'
composed_url = base_elasticsearch_url + '/' + index_name + '/'
output = run_curl(command, composed_url)
print 'output:'
print output
# create Index # works!
# curl -XPOST 'localhost:9260/icrd_client_1 -d #clientmappings.json
if (command_type == 'create'):
print 'About to run '+command_type+' for Index: '+index_name+' from filename: '+file_to_index
command = '-XPOST'
composed_url = base_elasticsearch_url + '/' + index_name +' -d ' + '#'+file_to_index
output = run_curl(command, composed_url)
print 'output:'
print output
If you're using Python 2.7 or newer, try argparse. For older versions, try optparse
I'll suggest a simple elegant solution using python Dictionary, you can use the dictionary key instead using if statement, it's not the best option i'm sure it's just a bit more elegant.
import sys
def func1():
print "I'm func1"
def func2():
print "I'm func2"
def func3():
print "I'm func3"
def func4():
print "I'm default!"
def main():
myCommandDict = {"arg1": func1(), "arg2": func2(), "arg3": func3(), "default": func4()}
commandline_args = sys.argv
for argument in commandline_args[1]:
if argument in myCommandDict:
myCommandDict[argument]
else:
myCommandDict["default"]
if __name__ == "__main__":
main()
Edit main can be replaced with this option:
myCommandDict = {"arg1": func1, "arg2": func2, "arg3": func3, "default": func4}
commandline_args = sys.argv[1:]
for argument in commandline_args:
if argument in myCommandDict:
myCommandDict[argument]()
else:
myCommandDict["default"]()
You can also use Getopt(it works in a similar way to GNU Getopt)

Sorting Problems when using a list

I have a .txt file that contains a list of IP address:
111.67.74.234:8080
111.67.75.89:8080
12.155.183.18:3128
128.208.04.198:2124
142.169.1.233:80
There's a lot more than that though :)
Anyway, imported this into a list using Python and I'm trying to get it to sort them, but I'm having trouble. Anybody have any ideas?
EDIT:
Ok since that was vague, this is what I had so fair.
f = open("/Users/jch5324/Python/Proxy/resources/data/list-proxy.txt", 'r+')
lines = [x.split() for x in f]
new_file = (sorted(lines, key=lambda x:x[:18]))
You're probably sorting them by ascii string-comparison ('.' < '5', etc.), when you'd rather that they sort numerically. Try converting them to tuples of ints, then sorting:
def ipPortToTuple(string):
"""
'12.34.5.678:910' -> (12,34,5,678,910)
"""
ip,port = string.strip().split(':')
return tuple(int(i) for i in ip.split('.')) + (port,)
with open('myfile.txt') as f:
nonemptyLines = (line for line in f if line.strip()!='')
sorted(nonemptyLines, key=ipPortToTuple)
edit: The ValueError you are getting is because your text files are not entirely in the #.#.#.#:# format as you imply. (There may be comments or blank lines, though in this case the error would hint that there is a line with more than one ':'.) You can use debugging techniques to home in on your issue, by catching the exception and emitting useful debugging data:
def tryParseLines(lines):
for line in lines:
try:
yield ipPortToTuple(line.strip())
except Exception:
if __debug__:
print('line {} did not match #.#.#.#:# format'.format(repr(line)))
with open('myfile.txt') as f:
sorted(tryParseLines(f))
I was a bit sloppy in the above, in that it still lets some invalid IP addresses through (e.g. #.#.#.#.#, or 257.-1.#.#). Below is a more thorough solution, which allows you do things like compare IP addresses with the < operators, also making sorting work naturally:
#!/usr/bin/python3
import functools
import re
#functools.total_ordering
class Ipv4Port(object):
regex = re.compile(r'(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}):(\d{1,5})')
def __init__(self, ipv4:(int,int,int,int), port:int):
try:
assert type(ipv4)==tuple and len(ipv4)==4, 'ipv4 not 4-length tuple'
assert all(0<=x<256 for x in ipv4), 'ipv4 numbers not in valid range (0<=n<256)'
assert type(port)==int, 'port must be integer'
except AssertionError as ex:
print('Invalid IPv4 input: ipv4={}, port={}'.format(repr(ipv4),repr(port)))
raise ex
self.ipv4 = ipv4
self.port = port
self._tuple = ipv4+(port,)
#classmethod
def fromString(cls, string:'12.34.5.678:910'):
try:
a,b,c,d,port = cls.regex.match(string.strip()).groups()
ip = tuple(int(x) for x in (a,b,c,d))
return cls(ip, int(port))
except Exception as ex:
args = list(ex.args) if ex.args else ['']
args[0] += "\n...indicating ipv4 string {} doesn't match #.#.#.#:# format\n\n".format(repr(string))
ex.args = tuple(args)
raise ex
def __lt__(self, other):
return self._tuple < other._tuple
def __eq__(self, other):
return self._tuple == other._tuple
def __repr__(self):
#return 'Ipv4Port(ipv4={ipv4}, port={port})'.format(**self.__dict__)
return "Ipv4Port.fromString('{}.{}.{}.{}:{}')".format(*self._tuple)
and then:
def tryParseLines(lines):
for line in lines:
line = line.strip()
if line != '':
try:
yield Ipv4Port.fromString(line)
except AssertionError as ex:
raise ex
except Exception as ex:
if __debug__:
print(ex)
raise ex
Demo:
>>> lines = '222.111.22.44:214 \n222.1.1.1:234\n 23.1.35.6:199'.splitlines()
>>> sorted(tryParseLines(lines))
[Ipv4Port.fromString('23.1.35.6:199'), Ipv4Port.fromString('222.1.1.1:234'), Ipv4Port.fromString('222.111.22.44:214')]
Changing the values to be for example 264... or ...-35... will result in the appropriate errors.
#Ninjagecko's solution is the best but here is another way of doing it using re:
>>> import re
>>> with open('ips.txt') as f:
print sorted(f, key=lambda line: map(int, re.split(r'\.|:', line.strip())))
['12.155.183.18:3128\n', '111.67.74.234:8080\n', '111.67.75.89:8080\n',
'128.208.04.198:2124\n', '142.169.1.233:80 \n']
You can pre-proces the list so it can be sorted using the built in comparison function. and then process it back to a more normal format.
strings will be the same length and can be sorted . Afterwards, we simply remove all spaces.
you can google around and find other examples of this.
for i in range(len(address)):
address[i] = "%3s.%3s.%3s.%3s" % tuple(ips[i].split("."))
address.sort()
for i in range(len(address)):
address[i] = address[i].replace(" ", "")
if you have a ton of ip address you are going to get better processing time if you use c++. it will be more work up front but you will get better processing times.

Categories