Translating python raw string / regex to ruby - python

I'm currently trying to translate a python script to ruby. Now I'm stuck on a part that uses a raw string for a regex.
This is the original python code:
pat = re.compile(r'.{4}\xAA\xEE\xAA\x76\x1B\xEC\xBB\x20\xF1\xE6\x51.{1}\x78\x9C')
match = pat.search(string)
(start_match, end_match) = match.span()
This is my attempt to translate it to ruby:
pat = Regexp.compile('.{4}\\xAA\\xEE\\xAA\\x76\\x1B\\xEC\\xBB\\x20\\xF1\\xE6\\x51.{1}\\x78\\x9C')
start_match, end_match = string.enum_for(:scan, pat).map { Regexp.last_match.begin(0) }
Unfortunately I must be doing it wrong because I get this error:
invalid multibyte escape: /.{4}\\xAA\\xEE\\xAA\\x76\\x1B\\xEC\\xBB\\x20\\xF1\\xE6\\x51.{1}\\x78\\x9C/ (RegexpError)
I also tried:
regex_String = <<'TEXT'
.{4}\xAA\xEE\xAA\x76\x1B\xEC\xBB\x20\xF1\xE6\x51.{1}\x78\x9C
TEXT
pat = Regexp.compile(regex_String)
start_match, end_match = string.enum_for(:scan, pat).map { Regexp.last_match.begin(0) }
But it results in the same error.
To place it in context, here is the whole script:
# Commented lines are the original python code
# Uncommented lines are the translated ruby code
#import zlib
#import sys
#import re
#import binascii
require "zlib"
require "hex_string"
#if(len(sys.argv) < 2 or sys.argv[1] == "-h"):
# print "usage: python DecompNewDell.py <biosupdate.exe>"
# exit()
if ARGV.length < 1 or ARGV[0] == "-h"
puts "usage: ruby DecompNewDell.rb <biosupdate.exe>";
exit
end
#f = open(sys.argv[1], "rb")
#string = f.read()
f = File.open(ARGV[0], 'rb')
string = f.read
#pat = re.compile(r'.{4}\xAA\xEE\xAA\x76\x1B\xEC\xBB\x20\xF1\xE6\x51.{1}\x78\x9C')
#match = pat.search(string)
#(start_match, end_match) = match.span()
pat = Regexp.compile('.{4}\\xAA\\xEE\\xAA\\x76\\x1B\\xEC\\xBB\\x20\\xF1\\xE6\\x51.{1}\\x78\\x9C')
start_match, end_match = string.enum_for(:scan, pat).map { Regexp.last_match.begin(0) }
#compessed_len = string[start_match:start_match+4]
compressed_len = string[start_match..start_match+4]
#compessed_len = binascii.b2a_hex(compessed_len[::-1])
compessed_len.reverse!
compessed_len = compessed_len.to_hex_string(false)
#compessed_len = long(compessed_len, 16)
compessed_len = compessed_len.to_i(16)
#read len bytes out of the file into the new string to decompress
#f.seek(start_match+16)
#string = f.read(compessed_len)
f.seek start_match+16
string = f.read compessed_len
#o = zlib.decompress(string)
o = Zlib::Inflate.inflate(string)
#f2 = open(sys.argv[1] + "_decompressed.hdr", "wb")
#f2.write(o)
#f.close()
#f2.close()
#print "Decompressed data written to %s_decompressed.hdr" % sys.argv[1]
f2 = File.open(ARGV[0] + "_decompressed.hdr", 'wb')
f2.write(o)
f.close()
f2.close()
puts "Decompressed data written to #{ARGV[0]}_decompressed.hdr"

This answer shows why the problem raised.
https://stackoverflow.com/a/47785810/12349985
And there has a solution for this situation.
https://techoverflow.net/2013/12/29/solving-invalid-multibyte-escape-xfexff-in-ruby-vpim/

Related

Simplified grep in python

I need to create a simplified version of grep in python which will print a line when a keyword is used such as using this command "python mygrep.py duck animals.txt" and getting the output, "The duck goes quack". I have a file where it contains different outputs but I'm not sure how to get it to print the line that contains the "keyword" such as the line with "duck" in it. Im suppose to only use "import sys" and not "re" since its suppose to be a simple version.
import sys
def main():
if len(sys.argv) != 3:
exit('Please pass 2 arguments.')
search_text = sys.argv[1]
filename = sys.argv[2]
with open("animals.txt", 'r') as f:
text = f.read()
for line in text:
print(line)
if __name__ == '__main__':
main()
The operator 'in' should be sufficient.
for line in text:
if search_text in line:
print(line)
Here is a an implementation of grep in python with after/before feature:
def _fetch_logs(self, data, log_file, max_result_size, current_result_size):
after = data.get("after", 0)
before = data.get("before", 0)
exceeded_max = False
result = []
before_counter = 0
frame = []
found = False
for line in log_file:
frame.append(line)
match_patterns = all(self._search_in(data, pattern, line) for pattern in data["patterns"])
if match_patterns:
before_counter = len(frame)
found = True
if not found and len(frame) > before:
frame.pop(0)
if found and len(frame) >= before_counter + after:
found = False
before_counter = 0
result += frame
frame = []
if current_result_size + len(result) >= max_result_size:
exceeded_max = True
break
if found:
result += frame
return exceeded_max, result

Text parsing via python

I have 100+ files with the extension *.log. Each file contains the results from an ansible-playbook run. I would like to parse the data using python, so that I can import it into an excel spreadsheet. I need some help to automate the process.
File content are:
ok: [wrt02.test1] => {
"msg": "nxos"
}
TASK [checklist : OUTPUT IOS_XR] *******************************************************************************************************************************************************************************************************
skipping: [leaf1J0101.test2]
skipping: [leaf1J0102.test2]
ok: [spine01.test1] => {
"msg": [
"Bundle-Ether1.100 192.168.245.65 Up Up default ",
"Bundle-Ether10.151 192.168.203.3 Up Up default ",
"Loopback0 192.168.255.7 Up Up default ",
"MgmtEth0/RSP0/CPU0/0 192.168.224.15 Up Up MANAGEMENT",
"TenGigE0/0/0/2 192.168.114.114 Up Up default ",
"TenGigE0/0/0/3 192.168.82.170 Up Up default"
]
}
RESULTS:
spine01.test1,Bundle-Ether1.100,192.168.245.65,
spine01.test1,Bundle-Ether10.151,192.168.203.3,
spine01.test1,Loopback0,192.168.255.7,
spine01.test1,MgmtEth0/RSP0/CPU0/0,192.168.224.15,
spine01.test1,TenGigE0/0/0/2,192.168.114.114,
spine01.test1,TenGigE0/0/0/3,192.168.82.170
CODE:
def findIOS(output):
# String we're looking for
OUTIOS_string = "TASK [checklist : OUTPUTIOS] ***************************************************************************************************************************************************************************************************************"
end_string = "TASK"
# Find the start of our string
start_index = output.find(OUTIOS_string) + len(OUTIOS_string) + 2
# Find the end of our string
end_index = output.find(end_string, start_index + 1)
lines = output[start_index:end_index].split('\n')
# Create a list to store our resulting dictionaries
#print lines
d = []
for line in lines:
#print line
if line != "":
# If line is not empty, find our starting and closing brackets
# Find the host:
hstart = line.find('[')
hend = line.rfind(']') + 1
start = line.find('{')
end = line.rfind('}') + 1
hostname = line[hstart:hend]
# Store content between brackets
obj = line[start:end]
hostname = hostname.replace("[", "").replace("]","")
print hostname
print obj
# Convert string to dictionary, and store the results
d.append(eval(obj))
print d
return d
def main():
output = None
with open("../showint.log", "rb") as f:
output = f.read()
if __name__ == '__main__':
main()
How to fetch the into above format? Thanks for the help
I let you the pleasure of creating the code around that.
import re
# This matches the msg pattern
find_start = r"^\s+\"msg\":\s\["
# This matches the line with the information you want that you can access with match.groups()
find_line = r"^\s+\"([^\s]+)\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"

Regular expression in Python issue

I have the below code in one of my configuration files:
appPackage_name = sqlncli
appPackage_version = 11.3.6538.0
The left side is the key and the right side is value.
Now i want to be able to replace the value part with something else given a key in Python.
import re
Filepath = r"C:\Users\bhatsubh\Desktop\Everything\Codes\Python\OO_CONF.conf"
key = "appPackage_name"
value = "Subhayan"
searchstr = re.escape(key) + " = [\da-zA-Z]+"
replacestr = re.escape(key) + " = " + re.escape(value)
filedata = ""
with open(Filepath,'r') as File:
filedata = File.read()
File.close()
print ("Before change:",filedata)
re.sub(searchstr,replacestr,filedata)
print ("After change:",filedata)
I assume there is something wrong with the regex i am using. But i am not able to figure out what . Can someone please help me ?
Use the following fix:
import re
#Filepath = r"C:\Users\bhatsubh\Desktop\Everything\Codes\Python\OO_CONF.conf"
key = "appPackage_name"
value = "Subhayan"
#searchstr = re.escape(key) + " = [\da-zA-Z]+"
#replacestr = re.escape(key) + " = " + re.escape(value)
searchstr = r"({} *= *)[\da-zA-Z.]+".format(re.escape(key))
replacestr = r"\1{}".format(value)
filedata = "appPackage_name = sqlncli"
#with open(Filepath,'r') as File:
# filedata = File.read()
#File.close()
print ("Before change:",filedata)
filedata = re.sub(searchstr,replacestr,filedata)
print ("After change:",filedata)
See the Python demo
There are several issues: you should not escape the replacement pattern, only the literal user-defined values in the regex pattern. You can use a capturing group (a pair of unescaped (...)) and a backreference (here, \1 since the group is only one in the pattern) to restore the part of the matched string you need to keep rather than build that replacement string dynamically. As the version value contains dots, you should add a . to the character class, [\da-zA-Z.]. You also need to assign new value after replacing, so as to actually modify it.

Searching a file for matches between two values and outputting search hits in Python

I am (attempting) to write a program that searches through a hex file for instances of a hex string between two values, eg. Between D4135B and D414AC, incrementing between the first value until the second is reached- D4135B, D4135C, D4135D etc etc.
I have managed to get it to increment etc, but it’s the search part I am having trouble with.
This is the code I have so far, it's been cobbled together from other places and I need to make it somehow output all search hits into the output file (file_out)
I have exceeded the limit of my Python understanding and I'm sure there's probably a much easier way of doing this. I would be very grateful for any help.
def search_process(hx): # searching for two binary strings
global FLAG
while threeByteHexPlusOne != threeByteHex2: #Keep incrementing until second value reached
If Flag:
if hx.find(threeByteHex2) != -1:
FLAG = False #If threeByteHex = ThreeByteHexPlusOne, end search
Print (“Reached the end of the search”,hx.find(threeByteHexPlusOne))
Else:
If hx.find(threeByteHexPlusOne) != -1:
FLAG = True
Return -1 #If no results found
if __name__ == '__main__':
try:
file_in = open(FILE_IN, "r") #opening input file
file_out = open(FILE_OUT, 'w') #opening output file
hx_read = file_in.read #read from input file
tmp = ''
found = ''
while hx_read: #reading from file till file is empty
hx_read = tmp + hx_read
pos = search_process(hx_read)
while pos != -1:
hex_read = hx_read[pos:]
if FLAG:
found = found + hx_read
pos = search_process(hx_read)
tmp = bytes_read[]
hx_read = file_in.read
file_out.write(found) #writing to output file
except IOError:
print('FILE NOT FOUND!!! Check your filename or directory/PATH')
Here's a program that looks through a hex string from a file 3 bytes at a time and if the 3-byte hex string is between the given hex bounds, it writes it to another file. It makes use of generators to make getting the bytes from the hex string a little cleaner.
import base64
import sys
_usage_string = 'Usage: python {} <input_file> <output_file>'.format(sys.argv[0])
def _to_base_10_int(value):
return int(value, 16)
def get_bytes(hex_str):
# Two characters equals one byte
for i in range(0, len(hex_str), 2):
yield hex_str[i:i+2]
def get_three_byte_hexes(hex_str):
bytes = get_bytes(hex_str)
while True:
try:
three_byte_hex = next(bytes) + next(bytes) + next(bytes)
except StopIteration:
break
yield three_byte_hex
def find_hexes_in_range(hex_str, lower_bound_hex, upper_bound_hex):
lower_bound = _to_base_10_int(lower_bound_hex)
upper_bound = _to_base_10_int(upper_bound_hex)
found = []
for three_byte_hex in get_three_byte_hexes(hex_str):
hex_value = _to_base_10_int(three_byte_hex)
if lower_bound <= hex_value < upper_bound:
found.append(three_byte_hex)
return found
if __name__ == "__main__":
try:
assert(len(sys.argv) == 3)
except AssertionError:
print _usage_string
sys.exit(2)
file_contents = open(sys.argv[1], 'rb').read()
hex_str = base64.decodestring(file_contents).encode('hex')
found = find_hexes_in_range(hex_str, 'D4135B', 'D414AC')
print('Found:')
print(found)
if found:
with open(sys.argv[2], 'wb') as fout:
for _hex in found:
fout.write(_hex)
Check out some more info on generators here

Select lines stack python

i writen this code:
import os
import re
import string
##
Path = 'C:/RESULT/BATCH/'
##
Nfile = 'Skin_Refined_v05'
f=open(Path + Nfile + '.inp')
n=open(Path + 'newfile.inp', 'w')
for lines, text in enumerate(f):
found = text.find('*SURFACE')
while found > -1:
print found, lines, text
found = text.find('*SURFACE', found + 1)
n.write(text)
##
f.close()
n.close()
This is what *.inp looks like (usually about 30Mb)
*SURFACE, NAME = BOTTOM, TYPE = ELEMENT
40012646, S2
40012647, S2
40012648, S2
40012649, S2
40012650, S2
40012651, S2
*SURFACE, NAME = ALL_INT_TIE_1, TYPE = ELEMENT
40243687, S3
40243703, S3
40243719, S3
40243735, S3
40243751, S3
40243767, S3
**
*TIE, NAME = INTERNAL_TIE, POSITION TOLERANCE = 1.0 , ADJUST=NO
SLAVE,MASTER
*TIE, NAME = SKN_REF_1
ALL_INT_FRONT, ALL_EXT_FRONT
*TIE, NAME = SKIN_LAT
ALL_INT_LAT, ALL_EXT_LAT
*TIE, NAME = SKIN_TIE_1
ALL_INT_TIE_1, ALL_INT_TIE_2
**
*SURFACE , NAME = TOP, COMBINE = UNION
TOP_1
TOP_2
**HM_UNSUPPORTED_CARDS
*END PART
*****
what he does it is clear. what I would like to achive is to get all the line between the *SURFACE that begin with a number, which then I will have to arrange differently, but I will worry about that later.
I rewrote the code cos i could not get it to work as suggested, now it is creating the blocks as I need them, but how do i work on each block?
I need to separate all the elements (number followed by S1, S2 and so on) and create groups for each block sorted by S1, S2 and so on the final result should look like
*ELSET, ELSET=TOP_S1
40221320, 40221306, 40221305, 40221304, 40221290, 40221289, 40221288, 40221274,
40221273, 40221272, 40221258, 40221257, 40221256, 40221242, 40221241, 40221240,
*SURFACE, NAME = TOP, TYPE = ELEMENT
TOP_S1,S1
import os
import re
import string
##
Path = 'C:/RESULT/BATCH/'
##
Nfile = 'Skin_Refined_v05'
f=open(Path + Nfile + '.inp')
n=open(Path + 'newfile.inp', 'w')
in_surface_block = False;
for line_num, text in enumerate(f):
found = text.find('*SURFACE')
if found > -1:
in_surface_block=True;
print found, line_num, text
surface_lines = []
continue
if in_surface_block:
m = re.match('\s*\d+\,\s*\w\d+',text)
if m:
mtext = m.group(0)
## p=surface_lines.append(text)
print mtext
## ntext = surface_lines.append(m.group(0))
## n.write(ntext)
##
f.close()
n.close()
I hope it is clear
I think this will do what you want:
import os
import re
##
Path = 'C:/RESULT/BATCH/'
##
Nfile = 'Skin_Refined_v05'
f=open(Path + Nfile + '.inp')
n=open(Path + 'newfile.inp', 'w')
in_surface_block = False;
for line_num, text in enumerate(f):
found = text.find('*SURFACE')
if found > -1:
in_surface_block=True;
print found, line_num, text
surface_lines = []
continue
if in_surface_block:
if re.match('\s*\d+', text):
surface_lines.append(text)
else:
in_surface_block = False
// do surface lines work here:
// surface_lines is a list with all the lines in a surface block
// that start with a number
...
##
f.close()
n.close()
Edit: Fixed logic error

Categories