Simplified grep in python

Simplified grep in python - python

I need to create a simplified version of grep in python which will print a line when a keyword is used such as using this command "python mygrep.py duck animals.txt" and getting the output, "The duck goes quack". I have a file where it contains different outputs but I'm not sure how to get it to print the line that contains the "keyword" such as the line with "duck" in it. Im suppose to only use "import sys" and not "re" since its suppose to be a simple version.
import sys
def main():
if len(sys.argv) != 3:
exit('Please pass 2 arguments.')
search_text = sys.argv[1]
filename = sys.argv[2]
with open("animals.txt", 'r') as f:
text = f.read()
for line in text:
print(line)
if __name__ == '__main__':
main()

The operator 'in' should be sufficient.
for line in text:
if search_text in line:
print(line)

Here is a an implementation of grep in python with after/before feature:
def _fetch_logs(self, data, log_file, max_result_size, current_result_size):
after = data.get("after", 0)
before = data.get("before", 0)
exceeded_max = False
result = []
before_counter = 0
frame = []
found = False
for line in log_file:
frame.append(line)
match_patterns = all(self._search_in(data, pattern, line) for pattern in data["patterns"])
if match_patterns:
before_counter = len(frame)
found = True
if not found and len(frame) > before:
frame.pop(0)
if found and len(frame) >= before_counter + after:
found = False
before_counter = 0
result += frame
frame = []
if current_result_size + len(result) >= max_result_size:
exceeded_max = True
break
if found:
result += frame
return exceeded_max, result

Related

How to pass a file and list_of_strings to search as a command line arguments in python

previously I am opening a file to search for a list of strings using a function
def search_multiple_strings_in_file(file_name, list_of_strings):
line_number = 0
parsing = False
list_of_results = []
with open(file_name, 'r') as read_obj:
for line in read_obj:
line_number += 1
for string_to_search in list_of_strings:
if string_to_search in line:
print("================================================================================================")
print('Line:' , line_number, '', line.strip())
#print('[ERROR]feature = ',Exception, ", action = ", 'signalhandler' , ", no = ", 'signalno')
#print("-----------------------------------------------------------------------------------------------")
next(read_obj)
for line1 in read_obj:
if line1.startswith(">>>>>>backtrace_"):
parsing = True
if line1.strip().endswith("<<<<<<backtrace_end"):
parsing = False
if parsing == True:
print(line1.strip())
else:
print("<<<<<<backtrace_end")
break
search_multiple_strings_in_file('CrashDump.log', ['[ERROR]feature=Exception, action=signalhandler, no='])
Output for the above code:
-----------------------------------------------------------------------------------------------
Line: 980 [ERROR]feature=Exception, action=signalhandler, no=14[SIGALRM] Thread[TKEL_Timer]
-----------------------------------------------------------------------------------------------
>>>>>>backtrace_start
0x75fc7edc : /usr/lib/libchal.so.1(+0x29edc) [0x75fc7edc]
0x770c08e8 : linux-vdso.so.1(+0x8e8) [0x770c08e8]
0x76c12854 : /lib/libpthread.so.0(+0x6854) [0x76c12854]
<<<<<<backtrace_end
-----------------------------------------------------------------------------------------------
Line: 1114 [ERROR]feature=Exception, action=signalhandler, no=1[SIGHUP] Thread[TASK_PLAYER]
-----------------------------------------------------------------------------------------------
>>>>>>backtrace_start
0x75fc7edc : /usr/lib/libchal.so.1(+0x29edc) [0x75fc7edc]
0x770c08e8 : linux-vdso.so.1(+0x8e8) [0x770c08e8]
0x76c2031a : /lib/libpthread.so.0(raise+0x3a) [0x76c2031a]
<<<<<<backtrace_end
But, now I need to pass the file name and list_of_strings as a command line argument. I tried like this:
if __name__ == '__main__':
if len(sys.argv) == 2:
print("Pass a file to parse as an argument")
sys.exit()
else:
search_multiple_strings_in_file(sys.argv[1], sys.argv[2])
but it's not getting the correct output (the correct output is the one I have written above).

['[ERROR]feature=Exception, action=signalhandler, no='] is an array containing a string. sys.argv[2] is a string.
You need
if __name__ == '__main__':
if len(sys.argv) == 2:
print("Pass a file to parse as an argument")
sys.exit()
else:
search_multiple_strings_in_file(sys.argv[1], sys.argv[2:])
Now the first command line argument is the filename and the following arguments are passed in a list as second argument to the function, e.g.
python3 prog.py "CrashDump.log" "[ERROR]feature=Exception, action=signalhandler, no="

I got the answer for my question, I have done this way:
import sys
def search_multiple_strings_in_file(file_name, list_of_strings):
#local variabes declaration
line_number = 0
parsing = False
list_of_results = []
#opening the file as read_obj in read mode
with open(file_name, 'r') as read_obj:
for line in read_obj:
line_number += 1
for string_to_search in list_of_strings: #searching the string in the file
if string_to_search in line: #if string is present in the particular line it will enter the if condition
print("================================================================================================")
print(line.strip())
next(read_obj)
for line1 in read_obj: #In this for loop, printing the logs present between the two strings ">>>>>>backtrace_" and "<<<<<<backtrace_end"
if line1.startswith(">>>>>>backtrace_"):
parsing = True
if line1.strip().endswith("<<<<<<backtrace_end"):
parsing = False
if parsing == True:
print(line1.strip())
else:
print("<<<<<<backtrace_end\n")
break
print("================================================================================================")
file_to_parse = sys.argv[1] #Assigning the argument to a variable
search_multiple_strings_in_file(file_to_parse, ['[ERROR]feature=Exception, action=signalhandler, no='])
command to execute:(passing only file)
python3 prog.py "CrashDump.log"
And for the list_of_strings as an argument we can do like this:
if __name__ == '__main__':
if len(sys.argv) == 2:
print("Pass a file to parse as an argument")
sys.exit()
else:
search_multiple_strings_in_file(sys.argv[1], sys.argv[2:])
For running the script:
python3 prog.py "CrashDump.log" "[ERROR]feature=Exception, action=signalhandler, no="

python - latexmk: error: no such option: -d

I am getting an error when I want to run a python script:
The error is following one:
The code is given below:
#!/usr/bin/python
import subprocess
code_dir = "code"
title = "Stanford ACM-ICPC Team Notebook"
def get_sections():
sections = []
section_name = None
with open('contents.txt', 'r') as f:
for line in f:
if '#' in line: line = line[:line.find('#')]
line = line.strip()
if len(line) == 0: continue
if line[0] == '[':
section_name = line[1:-1]
subsections = []
if section_name is not None:
sections.append((section_name, subsections))
else:
tmp = line.split('\t', 1)
if len(tmp) == 1:
raise ValueError('Subsection parse error: %s' % line)
filename = tmp[0]
subsection_name = tmp[1]
if subsection_name is None:
raise ValueError('Subsection given without section')
subsections.append((filename, subsection_name))
return sections
def get_style(filename):
ext = filename.lower().split('.')[-1]
if ext in ['c', 'cc', 'cpp']:
return 'cpp'
elif ext in ['java']:
return 'java'
elif ext in ['py']:
return 'py'
else:
return 'txt'
# TODO: check if this is everything we need
def texify(s):
#s = s.replace('\'', '\\\'')
#s = s.replace('\"', '\\\"')
return s
def get_tex(sections):
tex = ''
for (section_name, subsections) in sections:
tex += '\\section{%s}\n' % texify(section_name)
for (filename, subsection_name) in subsections:
tex += '\\subsection{%s}\n' % texify(subsection_name)
tex += '\\raggedbottom\\lstinputlisting[style=%s]{%s/%s}\n' % (get_style(filename), code_dir, filename)
tex += '\\hrulefill\n'
tex += '\n'
return tex
if __name__ == "__main__":
sections = get_sections()
tex = get_tex(sections)
with open('contents.tex', 'w') as f:
f.write(tex)
latexmk_options = ["latexmk", "-pdf", "notebook.tex"]
subprocess.call(latexmk_options)
I have already tried to install latexmk, But didn't succeed.
Can you help me about the detailed instruction of installation latexmk. I have already googled much. And for copyright thats not even my code. Its a code from stanford acm to make their own. Now I want to use to make my own.

Make sure that latexmk is accessible from your command line. You can check this by typing latexmk -version from your command line. If it is not accessible from command line then you need to add the latexmk path to environment variable.
If latexmk is not installed follow this link to properly install the latexmk.
I think following these steps might fix your problem.

Print text between two separators?

I have config file:
$ cat ../secure/test.property
#<TITLE>Connection setting
#MAIN DEV
jdbc.main.url=
jdbc.main.username=
jdbc.main.password=
#<TITLE>Mail settings
mail.smtp.host=127.0.0.1
mail.smtp.port=25
mail.smtp.on=false
email.subject.prefix=[DEV]
#<TITLE>Batch size for package processing
exposureImportService.batchSize=10
exposureImportService.waitTimeInSecs=10
ImportService.batchSize=400
ImportService.waitTimeInSecs=10
#<TITLE>Other settings
usePrecalculatedAggregation=true
###################### Datasource wrappers, which allow to log additional information
bean.datasource.query_log_wrapper=mainDataSourceWrapper
bean.gpc_datasource.query_log_wrapper=gpcDataSourceWrapper
time.to.keep.domain=7*12
time.to.keep.uncompress=1
#oracle max batch size
dao.batch.size.max=30
And function, which return line "#<TITLE>Other settings" (for example), to select "config section".
Next, need to print all lines between selected "section", and next line, startwith #<TITLE>.
How it can be realized?
P.S.
def select_section(property_file):
while True:
with open(os.path.join(CONF_DIR, property_file), 'r+') as file:
text = file.readlines()
list = []
print()
for i in text:
if '<TITLE>' in i:
line = i.lstrip('#<TITLE>').rstrip('\n')
list.append(line)
print((list.index(line)), line)
res_section = int(raw_input('\nPlease, select section to edit: '))
print('You selected: %s' % list[res_section])
if answer('Is it OK? '):
return(list[res_section])
break
And it's work like:
...
0 Connection setting
1 Mail settings
2 Batch size for package processing
3 Other settings
Please, select section to edit:
...
And expected output, if select Connection setting:
...
0 jdbc.main.url
1 jdbc.main.username
2 jdbc.main.password
Please, select line to edit:
...

If I understand the problem correctly, here's a solution that assembles the requested section as it reads the file:
def get_section(section):
marker_line = '#<TITLE>{}'.format(section)
in_section = False
section_lines = []
with open('test.property') as f:
while True:
line = f.readline()
if not line:
break
line = line.rstrip()
if line == marker_line:
in_section = True
elif in_section and line.startswith('#<TITLE>'):
break
if in_section:
if not line or line.startswith('#'):
continue
section_lines.append(line)
return '\n'.join(['{} {}'.format(i, line)
for i, line in enumerate(section_lines)])
print get_section('Connection setting')
Output:
0 jdbc.main.url=
1 jdbc.main.username=
2 jdbc.main.password=
Perhaps this will get you started.

Here's a quick solution:
def get_section(section):
results = ''
with open('../secure/test.property') as f:
lines = [l.strip() for l in f.readlines()]
indices = [i for i in range(len(lines)) if lines[i].startswith('#<TITLE>')]
for i in xrange(len(indices)):
if lines[indices[i]] == '#<TITLE>' + section:
for j in xrange(indices[i], indices[i+1] if i < len(indices)-1 else len(lines) - 1):
results += lines[j] + '\n'
break
return results
You can use it like:
print get_section('Connection setting')
Not very elegant but it works!

Searching a file for matches between two values and outputting search hits in Python

I am (attempting) to write a program that searches through a hex file for instances of a hex string between two values, eg. Between D4135B and D414AC, incrementing between the first value until the second is reached- D4135B, D4135C, D4135D etc etc.
I have managed to get it to increment etc, but it’s the search part I am having trouble with.
This is the code I have so far, it's been cobbled together from other places and I need to make it somehow output all search hits into the output file (file_out)
I have exceeded the limit of my Python understanding and I'm sure there's probably a much easier way of doing this. I would be very grateful for any help.
def search_process(hx): # searching for two binary strings
global FLAG
while threeByteHexPlusOne != threeByteHex2: #Keep incrementing until second value reached
If Flag:
if hx.find(threeByteHex2) != -1:
FLAG = False #If threeByteHex = ThreeByteHexPlusOne, end search
Print (“Reached the end of the search”,hx.find(threeByteHexPlusOne))
Else:
If hx.find(threeByteHexPlusOne) != -1:
FLAG = True
Return -1 #If no results found
if __name__ == '__main__':
try:
file_in = open(FILE_IN, "r") #opening input file
file_out = open(FILE_OUT, 'w') #opening output file
hx_read = file_in.read #read from input file
tmp = ''
found = ''
while hx_read: #reading from file till file is empty
hx_read = tmp + hx_read
pos = search_process(hx_read)
while pos != -1:
hex_read = hx_read[pos:]
if FLAG:
found = found + hx_read
pos = search_process(hx_read)
tmp = bytes_read[]
hx_read = file_in.read
file_out.write(found) #writing to output file
except IOError:
print('FILE NOT FOUND!!! Check your filename or directory/PATH')

Here's a program that looks through a hex string from a file 3 bytes at a time and if the 3-byte hex string is between the given hex bounds, it writes it to another file. It makes use of generators to make getting the bytes from the hex string a little cleaner.
import base64
import sys
_usage_string = 'Usage: python {} <input_file> <output_file>'.format(sys.argv[0])
def _to_base_10_int(value):
return int(value, 16)
def get_bytes(hex_str):
# Two characters equals one byte
for i in range(0, len(hex_str), 2):
yield hex_str[i:i+2]
def get_three_byte_hexes(hex_str):
bytes = get_bytes(hex_str)
while True:
try:
three_byte_hex = next(bytes) + next(bytes) + next(bytes)
except StopIteration:
break
yield three_byte_hex
def find_hexes_in_range(hex_str, lower_bound_hex, upper_bound_hex):
lower_bound = _to_base_10_int(lower_bound_hex)
upper_bound = _to_base_10_int(upper_bound_hex)
found = []
for three_byte_hex in get_three_byte_hexes(hex_str):
hex_value = _to_base_10_int(three_byte_hex)
if lower_bound <= hex_value < upper_bound:
found.append(three_byte_hex)
return found
if __name__ == "__main__":
try:
assert(len(sys.argv) == 3)
except AssertionError:
print _usage_string
sys.exit(2)
file_contents = open(sys.argv[1], 'rb').read()
hex_str = base64.decodestring(file_contents).encode('hex')
found = find_hexes_in_range(hex_str, 'D4135B', 'D414AC')
print('Found:')
print(found)
if found:
with open(sys.argv[2], 'wb') as fout:
for _hex in found:
fout.write(_hex)
Check out some more info on generators here

Python: Get/Scan All Text After a Certain String

I have a text file which I read using readlines(). I need to start extracting data after a keyword in the text file. For example, after the key word Hello World below, I would like to retrieve the value 100 from Blah=100:
Blah=0
Blah=2
Hello World
All the Text
Will be Scan
And Relevant
Info will be
Retrieved Blah=100
I can easily retrieved the information I want from the text file but I need it to start retrieving ONLY after a certain keyword in the textfile, such as after the 'Hello World' above. What I am currently doing is to retrieve the value using .split('='). Thus, I will retrieve all 3 values which are Blah=0, Blah=2 and Blah=100. I only wish to retrieve the value after a keyword in the text file, say 'Hello World', which is the value Blah=100.
There must be a simple way to do this. Please help. Thanks.

There are many ways to do it. Here's one:
STARTER = "Hello World"
FILENAME = "data.txt"
TARGET = "Blah="
with open(FILENAME) as f:
value = None
start_seen = False
for line in f:
if line.strip() == STARTER:
start_seen = True
continue
if TARGET in line and start_seen:
_,value = line.split('=')
break
if value is not None:
print "Got value %d" % int(value)
else:
print "Nothing found"

Here's a slightly pseudo-codish answer- you just need a flag that changes to True once you've found the keyword:
thefile = open('yourfile.txt')
key = "Hello World"
key_found = False
for line in thefile:
if key_found:
get_value(line)
# Optional: turn off key_found once you've found the value
# key_found = False
elif line.startswith(key):
key_found = True

Here's one way, not necessarily the best; I hard-coded the text here, but you could use file.read() to get similar results:
the_text = '''Blah=0
Blah=2
Hello World
All the Text
Will be Scan
And Relevant
Info will be
Retrieved Blah=100
'''
keyword = 'Hello World'
lines = the_text.split('\n')
for line_num, line in enumerate(lines):
if line.find(keyword) != -1:
lines = lines[line_num:]
break
the_value = None
value_key = 'Blah'
for line in lines:
if line.find(value_key) != -1:
the_value = line.split('=',2)[1]
break
if the_value:
print the_value

Example with regex.
reg = re.compile("Hello World")
data_re = re.ompile("Blah=(?P<value>\d)")
with open(f_name) as f:
need_search = False
for l in f:
if reg.search(l) is not None:
need_search = True
if need_search == True:
res = data_re.search(l)
if res is not None:
print res.groups('value')

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Simplified grep in python - python

The operator 'in' should be sufficient. for line in text: if search_text in line: print(line)

Related

How to pass a file and list_of_strings to search as a command line arguments in python

python - latexmk: error: no such option: -d

Print text between two separators?

Searching a file for matches between two values and outputting search hits in Python

Python: Get/Scan All Text After a Certain String

Categories

Resources