Find all lines that match regex pattern and grab part of string - python

f = open("machinelist.txt", 'r')
lines = f.readlines()
for host in lines:
hostnames = host.strip()
print hostnames
Returns:
\\TESTHOSTDEV01
\\TESTHOSTDEVDB01
\\TESTHOSTDEVDBQA
\\TESTHOSTDEVQA02
\\BTLCMOODY01 MRA Server
\\BTLCSTG05 StG Server
\\BTLCWEB02
\\BTLCWSUS01 Test Update Server
\\HIMSAPP01
\\SLVAPP01
\\TORAAPP01
\\HNSVAPP01
\\TESAPP01
I am curious if there is a way to use re.findall() to grab all lines that begin with "\" however I just want to capture return the hostnames, not the "\ or the comments after the host such as "MRA Server" (example: BTLCMOODY01)

You can do something like this(no need of regex):
Use str.startswith to check if a line starts with '\\':
>>> strs = "\\BTLCMOODY01 MRA Server\n"
>>> strs.startswith('\\')
True
Then use a combination of str.split and str.lstrip to get the first word:
>>> strs.split(None, 1)
['\\BTLCMOODY01', 'MRA Server\n']
#apply str.lstrip on the first item
>>> strs.split(None, 1)[0].lstrip('\\')
'BTLCMOODY01'
Code:
>>> with open('abc1') as f:
... for line in f:
... if line.startswith('\\'): #check if the line startswith `\`
... print line.split(None,1)[0].lstrip('\\')
...
TESTHOSTDEV01
TESTHOSTDEVDB01
TESTHOSTDEVDBQA
TESTHOSTDEVQA02
BTLCMOODY01
BTLCSTG05
BTLCWEB02
BTLCWSUS01
HIMSAPP01
SLVAPP01
TORAAPP01
HNSVAPP01
TESAPP01

An approach using regular expression:
import re
f = open("machinelist.txt", 'r')
lines = f.readlines()
for host in lines:
hostnames = host.strip()
if hostnames.startswith('\\'):
print(re.match(r'\\\\(\S+)',hostnames).group(1))
It yields:
TESTHOSTDEV01
TESTHOSTDEVDB01
TESTHOSTDEVDBQA
TESTHOSTDEVQA02
BTLCMOODY01
BTLCSTG05
BTLCWEB02
BTLCWSUS01
HIMSAPP01
SLVAPP01
TORAAPP01
HNSVAPP01
TESAPP01

import re
pattern = re.compile(r"\\([a-z]+)[\s]+",re.I) # single-slash, foll'd by word: \HOSTNAME
fh = open("file.txt","r")
for x in fh:
match = re.search(pattern,x)
if(match): print(match.group(1))

Related

Sort a file with a specific line pattern in Python

Given a file with the following content:
enum class Fruits(id: String) {
BANANA(id = "banana"),
LEMON(id = "lemon"),
DRAGON_FRUIT(id = "dragonFruit"),
APPLE(id = "apple"); }
I want to sort this file given the pattern "id = ", and then replace these lines with the new sorted lines.
I wrote a piece of code in python that sorts the whole file, but I'm struggling with regex to read/find the pattern so I can sort it.
My python script:
import re
fruitsFile = '/home/genericpath/Fruits.txt'
def sortFruitIds():
# this is an attempt to get/find the pattern, but it return an AttributeError:
# 'NoneType' object has no attribute 'group'
with open(fruitsFile, "r+") as f:
lines = sorted(f, key=lambda line: str(re.search(r"(?<=id = )\s+", line)))
for line in lines:
f.write(line)
When trying to find the pattern with regex, it returns an AttributeError: 'NoneType' object has no attribute 'group'
Any help is appreciated.
Looks like your main issue is that your regex expects a space character \s but what you want to be looking for is any non-space character \S. With that in mind this should work:
import re
fruitsFile = 'Fruits.txt'
def sortFruitIds():
with open(fruitsFile, "r+") as f:
lines = f.readlines()
lines_sorted = sorted(lines, key=lambda line: re.search(r"(?<=id = \")\S+|$", line).group())
for line in lines_sorted:
f.write(line)
I also added |$ to the regex to return an empty string if there is no match, and added group() to grab the match.
We can approach this by doing a regex find all for all entries in the enum. Then sort them alphabetically by the id string value, and join together the final enum code. Note that below I also extract the first line of the enum for use later in the output.
inp = '''enum class Fruits(id: String) {
BANANA(id = "banana"),
LEMON(id = "lemon"),
DRAGON_FRUIT(id = "dragonFruit"),
APPLE(id = "apple"); }'''
header = re.search(r'enum.*?\{', inp).group()
items = re.findall(r'\w+\(id\s*=\s*".*?"\)', inp)
items.sort(key=lambda m: re.search(r'"(.*?)"', m).group(1))
output = header + '\n ' + ',\n '.join(items) + '; }'
print(output)
This prints:
enum class Fruits(id: String) {
APPLE(id = "apple"),
BANANA(id = "banana"),
DRAGON_FRUIT(id = "dragonFruit"),
LEMON(id = "lemon"); }

How to match the list item to content in the file

I have a list1 = ['hi','world','of']
I have a .txt file
Hellohihowareyou
worldisfullofwonder
How to check whether hi ,'of', and world exists in the file
pseudo code
import re
pattern = r''
for i in list1:
#print (i)
with open('file.txt','r'):
content = f.read()
test= re.search(pattern,content)
print (test)
My Expected out
['hi','of'], since there is no world in the file
You can also do it with in keywords, use regex if your pattern start to be more advanced.
Normal
list1 = ['hi','world','of']
text = """ Hellohihowareyou
worldisfullofwonder"""
results = []
for element in list1:
if element in text:
results.append(element)
print(results)
List comprehension
results = [element for element in list1 if element in text]
print(results)
use this:
import re
file = 'file.txt'
content = ''
lst = ['hi','world','of']
with open(file, 'r') as file_handler:
content = file_handler.read()
result = re.findall('|'.join(lst), content)
import re
file = r'C:\Users\wind\Desktop\file.txt'
list1 = ['hi','of','wonderw']
pattern = r''
for i in list1:
pattern = re.compile(i, re.IGNORECASE)
with open(file,'r') as f:
content = f.read()
test= re.search(pattern,content)
print (test)

Splitting string in Python with multiple occurrence of split keyword

So, I wanna split filter and update parameter for MongoDB replace one method available in Pymongo Library.
{filter}, {update}
are passed to me from a file, one pair per line
Eg: {"k1":"v1"}, {"k1":"v1", "k2":"v2", "k3":"v3"}
What I wanna do ?
Split them such that I get two dict variables,
filter = {"k1":"v1"}
update = {"k1":"v1", "k2":"v2", "k3":"v3"}
What have I tried ?
Problem is, I don't wanna change the original format and if I split them using "," then it might split abruptly, I can't also rely on splitting on first occurrence of "," as filter part itself might have multiple ","
def data_replace_one(host, port, dbname, coll_name, file_path, authdb):
if LOCALHOST:
client = pymongo.MongoClient(host, port)
else:
print("Enter credentials:")
uname = input("Username: ")
pwd = getpass.getpass()
client = pymongo.MongoClient(
host, port, username=uname, password=pwd, authSource=authdb)
db = client[dbname]
coll = db[coll_name]
with open(file_path) as in_file:
list_dict_queries = [line.strip() for line in in_file]
list_dict_queries = list(filter(None, list_dict_queries))
for query in list_dict_queries:
query_list = query.split("|")
query_list[0] = query_list[0].strip()
query_list[1] = query_list[1].strip()
#print(literal_eval(query_list[0]), literal_eval(query_list[1]))
coll.replace_one(literal_eval(
query_list[0]), literal_eval(query_list[1]))
I think it would be simplest to add some square braces around each line, and then interpret it as JSON - assuming that your input format is guaranteed to be JSON compliant.
Something like:
import json
with open(file_path) as in_file:
list_dict_queries = [('[' + line.strip() + ']') for line in in_file]
query_list = [json.loads(n) for n in list_dict_queries]
If you would not have braces/curly brackets anywhere else, then you can use the following.
>>> filter, update = re.findall('{.*?}', '{"k1":"v1"}, {"k1":"v1", "k2":"v2", "k3":"v3"}')
>>> filter
'{"k1":"v1"}'
>>> update
'{"k1":"v1", "k2":"v2", "k3":"v3"}'

get wanted data from a text file with python without using splits

Hello i have a that file:
WORKERS = yovel:10.0.0.6,james:10.0.0.7
BLACKLIST = 92.122.197.45:ynet,95.1.2.2:twitter
I'm trying to write a function in python that will get the worker IP and returns the worker name like this:
workername = getName(ip)
The only method i thougt to do it is with splits(using .split(":") , .split(",") etc.) but it will be very long code and not smart.
is there a shorter way to do it?
You can use re:
import re
def getName(ip, content = open('filename.txt').read()):
_r = re.findall('\w+(?=:{})'.format(ip), content)
return _r[0] if _r else None
print(getName('10.0.0.6'))
Output:
'yovel'
Note, however, it is slightly more robust to use split:
def getName(ip):
lines = dict(i.strip('\n').split(' = ') for i in open('filename.txt')]
d = {b:a for a, b in map(lambda x:x.split(':'), lines['WORKERS'].split(','))}
return d.get(ip)
Using split() doesn't look too bad here:
def getName(ip_address, filename='file.txt', line_type='WORKERS'):
with open(filename) as in_file:
for line in in_file:
name, info = [x.strip() for x in line.strip().split('=')]
if name == line_type:
info = [x.split(':') for x in info.split(',')]
lookup = {ip: name for name, ip in info}
return lookup.get(ip_address)
Which works as follows:
>>> getName('10.0.0.6')
'yovel'

search for a variable in a text file

im trying to search for a variable in a text file.
sid = '185'
a = ((sid)"\n")
with open(filename) as f:
data = f.readlines()
if a in data:
print 'its here'
else:
print 'its NOT here'
here is my foo.txt
['306\n', '303\n', '313\n', '323\n', '417\n', '281\n', '304\n', '322\n', '320\n', '319\n', '308\n', '310\n', '414\n', '415\n', '314\n', '312\n', '307\n', '305\n', '285\n', '286\n', '318\n', '283\n', '282\n', '294\n', '309\n', '416\n', '418\n', '321\n', '185\n']
i think my problem lies with defining the /n properly? Not sure, as you can tell im a beginner. Any help or recommendations on how to do this would be appreciated.
You can use re.split and following list comprehension to grub the numbers,then use in operand for check the membership:
>>> s = """['306\n', '303\n', '313\n', '323\n', '417\n', '281\n', '304\n', '322\n', '320\n', '319\n', '308\n', '310\n', '414\n', '415\n', '314\n', '312\n', '307\n', '305\n', '285\n', '286\n', '318\n', '283\n', '282\n', '294\n', '309\n', '416\n', '418\n', '321\n', '185\n']"""
>>> import re
>>> def check(var):
... return var in [int(i.strip()) for i in re.split(r'[\[\]\',]*',s) if i.strip()]
...
>>> check(303)
True
>>> check(444)
False
And in your case you need to use open(filename).read() instead of s.Also for larger files you can use set that is more efficient for checking membership:
>>> def check(var):
... return var in set(int(i.strip()) for i in re.split(r'[\[\]\',]*',s) if i.strip())
...
>>> check(444)
False
>>> check(305)
True
You can use re.search function to search for a string
foo = """['306\n', '303\n', '313\n', '323\n', '417\n', '281\n', '304\n', '322\n', '320\n', '319\n', '308\n', '310\n', '414\n', '415\n', '314\n', '312\n', '307\n', '305\n', '285\n', '286\n', '318\n', '283\n', '282\n', '294\n', '309\n', '416\n', '418\n', '321\n', '185\n']"""
sid = '185'
a = sid + '\n'
if re.search(a,foo):
print "its here"
else:
print "its not here"
In general, if there could be more than one line in foo.txt you can use:
sid = '185'
a = sid + '\\n'
filename = 'foo.txt'
with open(filename) as f:
for line in f:
if a in line:
print 'its here'
break
else:
print 'its NOT here'
Don't forget to escape the backslash with '\\n' if foo.txt really is literally as you give it.

Categories