search for a variable in a text file - python

im trying to search for a variable in a text file.
sid = '185'
a = ((sid)"\n")
with open(filename) as f:
data = f.readlines()
if a in data:
print 'its here'
else:
print 'its NOT here'
here is my foo.txt
['306\n', '303\n', '313\n', '323\n', '417\n', '281\n', '304\n', '322\n', '320\n', '319\n', '308\n', '310\n', '414\n', '415\n', '314\n', '312\n', '307\n', '305\n', '285\n', '286\n', '318\n', '283\n', '282\n', '294\n', '309\n', '416\n', '418\n', '321\n', '185\n']
i think my problem lies with defining the /n properly? Not sure, as you can tell im a beginner. Any help or recommendations on how to do this would be appreciated.

You can use re.split and following list comprehension to grub the numbers,then use in operand for check the membership:
>>> s = """['306\n', '303\n', '313\n', '323\n', '417\n', '281\n', '304\n', '322\n', '320\n', '319\n', '308\n', '310\n', '414\n', '415\n', '314\n', '312\n', '307\n', '305\n', '285\n', '286\n', '318\n', '283\n', '282\n', '294\n', '309\n', '416\n', '418\n', '321\n', '185\n']"""
>>> import re
>>> def check(var):
... return var in [int(i.strip()) for i in re.split(r'[\[\]\',]*',s) if i.strip()]
...
>>> check(303)
True
>>> check(444)
False
And in your case you need to use open(filename).read() instead of s.Also for larger files you can use set that is more efficient for checking membership:
>>> def check(var):
... return var in set(int(i.strip()) for i in re.split(r'[\[\]\',]*',s) if i.strip())
...
>>> check(444)
False
>>> check(305)
True

You can use re.search function to search for a string
foo = """['306\n', '303\n', '313\n', '323\n', '417\n', '281\n', '304\n', '322\n', '320\n', '319\n', '308\n', '310\n', '414\n', '415\n', '314\n', '312\n', '307\n', '305\n', '285\n', '286\n', '318\n', '283\n', '282\n', '294\n', '309\n', '416\n', '418\n', '321\n', '185\n']"""
sid = '185'
a = sid + '\n'
if re.search(a,foo):
print "its here"
else:
print "its not here"

In general, if there could be more than one line in foo.txt you can use:
sid = '185'
a = sid + '\\n'
filename = 'foo.txt'
with open(filename) as f:
for line in f:
if a in line:
print 'its here'
break
else:
print 'its NOT here'
Don't forget to escape the backslash with '\\n' if foo.txt really is literally as you give it.

Related

Pythonic way to traverse etc shadows

I am trying to implement a logic using python :
cat /etc/shadow | awk -F: '($2 == "" ) { print $1 " does not have a password "}'
If the above returns the output for the user i will do
passwd -l <username>
I am trying to implement the above logic using python , but i am not really sure if it is working out in that way; here is my python code:
/etc/shadow looks like
root:*:17709:0:99999:7:::
daemon:*:17709:0:99999:7:::
bin:*:17709:0:99999:7:::
sys:*:17709:0:99999:7:::
sync:*:17709:0:99999:7:::
games:*:17709:0:99999:7:::
man:*:17709:0:99999:7:::
lp:*:17709:0:99999:7:::
mail:*:17709:0:99999:7:::
news:*:17709:0:99999:7:::
uucp:*:17709:0:99999:7:::
proxy:*:17709:0:99999:7:::
www-data:*:17709:0:99999:7:::
backup:*:17709:0:99999:7:::
CODE
with open("/etc/shadow") as file:
for line in file:
line = line.rstrip()
if line[line.find(":")+1:line.find(":")]=="":
print "This is a problem"
elif line[line.find(":")+1:line.find(":")]=="*":
print line[line.find(":")+1:line.find(":")]
else:
print "All Good"
The above code returns "This is a problem" , which isn't right
You can use re to extract desired column:
import re
data = """root:*:17709:0:99999:7:::
daemon:*:17709:0:99999:7:::
bin:*:17709:0:99999:7:::
sys:*:17709:0:99999:7:::
sync:*:17709:0:99999:7:::
games:*:17709:0:99999:7:::
man:*:17709:0:99999:7:::
lp:*:17709:0:99999:7:::
mail:*:17709:0:99999:7:::
news:*:17709:0:99999:7:::
uucp:*:17709:0:99999:7:::
proxy:*:17709:0:99999:7:::
www-data:*:17709:0:99999:7:::
backup:*:17709:0:99999:7:::"""
groups = re.findall('(.*?):(.*?):(.*?):(.*?):(.*?):(.*?):(.*?):(.*?):', data)
if all(g[1].strip() for g in groups):
print('All good')
else:
print('This is a problem')
This prints:
All good
Explanation of this regex here. In the second group (g[1]), you have the shadowed password (*), or empty string.
Try it:
with open("/etc/shadow/") as ff:
for line in ff:
login_name,pwd,remainder=line.split(":",maxsplit=2)
print(login_name,pwd) # change it as you like
Just split your lines on the ":" separator and check the value at the second position (which is at index 1 of course):
data = """
root:*:17709:0:99999:7:::
daemon:*:17709:0:99999:7:::
bin:*:17709:0:99999:7:::
sys:*:17709:0:99999:7:::
sync:*:17709:0:99999:7:::
games:*:17709:0:99999:7:::
man:*:17709:0:99999:7:::
lp:*:17709:0:99999:7:::
mail:*:17709:0:99999:7:::
news:*:17709:0:99999:7:::
uucp:*:17709:0:99999:7:::
proxy:*:17709:0:99999:7:::
www-data:*:17709:0:99999:7:::
backup:*:17709:0:99999:7:::
"""
for line in data.strip().splitlines():
row = [part.strip() for part in line.split(":")]
if row[1] == "":
print("this is a problem")
elif row[1] == "*":
print row[1]
else:
print "all good"

formatted output to an external txt file

fp = open ('data.txt','r')
saveto = open('backup.txt','w')
someline = fp.readline()
savemodfile = ''
while someline :
temp_array = someline.split()
print('temp_array[1] {0:20} temp_array[0] {0:20}'.format(temp_array[1], temp_array[0]), '\trating:', temp_array[len(temp_array)-1]))
someline = fp.readline()
savemodfile = temp_array[1] + ' ' + temp_array[0] +',\t\trating:'+ temp_array[10]
saveto.write(savemodfile + '\n')
fp.close()
saveto.close()
The input file :data.txt has records of this pattern: firstname Lastname age address
I would like the backup.txt to has this format: Lastname firstname address age
How do i store the data in the backup.txt in a nice formatted way? I think i should use format() method somehow...
I use the print object in the code to show you what i understood about format() so far. Of course, i do not get the desired results.
To answer your question:
you can indeed use the .format() method on a string template, see the documentation https://docs.python.org/3.5/library/stdtypes.html#str.format
For example:
'the first parameter is {}, the second parameter is {}, the third one is {}'.format("this one", "that one", "there")
Will output: 'the first parameter is this one, the second parameter is that one, the third one is there'
You do not seem to use format() properly in your case: 'temp_array[1] {0:20} temp_array[0] {0:20}'.format(temp_array[1], temp_array[0]) will output something like 'temp_array[1] Lastname temp_array[0] Lastname '. That is because {0:20} will output the 1st parameter to format(), right padded with spaces to 20 characters.
Additionally, there is many things to be improved in your code. I guess you are learning Python so that's normal. Here is a functionally equivalent code that produces the output you want, and makes good use of Python features and syntax:
with open('data.txt', 'rt') as finput, \
open('backup.txt','wt') as foutput:
for line in finput:
firstname, lastname, age, address = line.strip().split()
foutput.write("{} {} {} {}\n".format(lastname, firstname, address, age)
This code will give you a formatted output on the screen and in the output file
fp = open ('data.txt','r')
saveto = open('backup.txt','w')
someline = fp.readline()
savemodfile = ''
while someline :
temp_array = someline.split()
str = '{:20}{:20}{:20}{:20}'.format(temp_array[1], temp_array[0], temp_array[2], temp_array[3])
print(str)
savemodfile = str
saveto.write(savemodfile + '\n')
someline = fp.readline()
fp.close()
saveto.close()
But this is not a very nice code in working with files, try using the following pattern:
with open('a', 'w') as a, open('b', 'w') as b:
do_something()
refer to : How can I open multiple files using "with open" in Python?
fp = open ('data.txt','r')
saveto = open('backup.txt','w')
someline = fp.readline()
savemodfile = ''
while someline :
temp_array = someline.split()
someline = fp.readline()
savemodfile = '{:^20} {:^20} {:^20} {:^20}'.format(temp_array[1],temp_array[0],temp_array[3],temp_array[2])
saveto.write(savemodfile + '\n')
fp.close()
saveto.close()

Find all lines that match regex pattern and grab part of string

f = open("machinelist.txt", 'r')
lines = f.readlines()
for host in lines:
hostnames = host.strip()
print hostnames
Returns:
\\TESTHOSTDEV01
\\TESTHOSTDEVDB01
\\TESTHOSTDEVDBQA
\\TESTHOSTDEVQA02
\\BTLCMOODY01 MRA Server
\\BTLCSTG05 StG Server
\\BTLCWEB02
\\BTLCWSUS01 Test Update Server
\\HIMSAPP01
\\SLVAPP01
\\TORAAPP01
\\HNSVAPP01
\\TESAPP01
I am curious if there is a way to use re.findall() to grab all lines that begin with "\" however I just want to capture return the hostnames, not the "\ or the comments after the host such as "MRA Server" (example: BTLCMOODY01)
You can do something like this(no need of regex):
Use str.startswith to check if a line starts with '\\':
>>> strs = "\\BTLCMOODY01 MRA Server\n"
>>> strs.startswith('\\')
True
Then use a combination of str.split and str.lstrip to get the first word:
>>> strs.split(None, 1)
['\\BTLCMOODY01', 'MRA Server\n']
#apply str.lstrip on the first item
>>> strs.split(None, 1)[0].lstrip('\\')
'BTLCMOODY01'
Code:
>>> with open('abc1') as f:
... for line in f:
... if line.startswith('\\'): #check if the line startswith `\`
... print line.split(None,1)[0].lstrip('\\')
...
TESTHOSTDEV01
TESTHOSTDEVDB01
TESTHOSTDEVDBQA
TESTHOSTDEVQA02
BTLCMOODY01
BTLCSTG05
BTLCWEB02
BTLCWSUS01
HIMSAPP01
SLVAPP01
TORAAPP01
HNSVAPP01
TESAPP01
An approach using regular expression:
import re
f = open("machinelist.txt", 'r')
lines = f.readlines()
for host in lines:
hostnames = host.strip()
if hostnames.startswith('\\'):
print(re.match(r'\\\\(\S+)',hostnames).group(1))
It yields:
TESTHOSTDEV01
TESTHOSTDEVDB01
TESTHOSTDEVDBQA
TESTHOSTDEVQA02
BTLCMOODY01
BTLCSTG05
BTLCWEB02
BTLCWSUS01
HIMSAPP01
SLVAPP01
TORAAPP01
HNSVAPP01
TESAPP01
import re
pattern = re.compile(r"\\([a-z]+)[\s]+",re.I) # single-slash, foll'd by word: \HOSTNAME
fh = open("file.txt","r")
for x in fh:
match = re.search(pattern,x)
if(match): print(match.group(1))

Python: Get/Scan All Text After a Certain String

I have a text file which I read using readlines(). I need to start extracting data after a keyword in the text file. For example, after the key word Hello World below, I would like to retrieve the value 100 from Blah=100:
Blah=0
Blah=2
Hello World
All the Text
Will be Scan
And Relevant
Info will be
Retrieved Blah=100
I can easily retrieved the information I want from the text file but I need it to start retrieving ONLY after a certain keyword in the textfile, such as after the 'Hello World' above. What I am currently doing is to retrieve the value using .split('='). Thus, I will retrieve all 3 values which are Blah=0, Blah=2 and Blah=100. I only wish to retrieve the value after a keyword in the text file, say 'Hello World', which is the value Blah=100.
There must be a simple way to do this. Please help. Thanks.
There are many ways to do it. Here's one:
STARTER = "Hello World"
FILENAME = "data.txt"
TARGET = "Blah="
with open(FILENAME) as f:
value = None
start_seen = False
for line in f:
if line.strip() == STARTER:
start_seen = True
continue
if TARGET in line and start_seen:
_,value = line.split('=')
break
if value is not None:
print "Got value %d" % int(value)
else:
print "Nothing found"
Here's a slightly pseudo-codish answer- you just need a flag that changes to True once you've found the keyword:
thefile = open('yourfile.txt')
key = "Hello World"
key_found = False
for line in thefile:
if key_found:
get_value(line)
# Optional: turn off key_found once you've found the value
# key_found = False
elif line.startswith(key):
key_found = True
Here's one way, not necessarily the best; I hard-coded the text here, but you could use file.read() to get similar results:
the_text = '''Blah=0
Blah=2
Hello World
All the Text
Will be Scan
And Relevant
Info will be
Retrieved Blah=100
'''
keyword = 'Hello World'
lines = the_text.split('\n')
for line_num, line in enumerate(lines):
if line.find(keyword) != -1:
lines = lines[line_num:]
break
the_value = None
value_key = 'Blah'
for line in lines:
if line.find(value_key) != -1:
the_value = line.split('=',2)[1]
break
if the_value:
print the_value
Example with regex.
reg = re.compile("Hello World")
data_re = re.ompile("Blah=(?P<value>\d)")
with open(f_name) as f:
need_search = False
for l in f:
if reg.search(l) is not None:
need_search = True
if need_search == True:
res = data_re.search(l)
if res is not None:
print res.groups('value')

Running replace() method in a for loop?

Its late and I have been trying to work on a simple script to rename point cloud data to a working format. I dont know what im doing wrong as the code at the bottom works fine. Why doesnt the code in the for loop work? It is adding it to the list but its just not getting formatted by the replace function. Sorry I know this isnt a debugger but I am really stuck on this and it would probably take 2 seconds for someone else to see the problem.
# Opening and Loading the text file then sticking its lines into a list []
filename = "/Users/sacredgeometry/Desktop/data.txt"
text = open(filename, 'r')
lines = text.readlines()
linesNew = []
temp = None
# This bloody for loop is the problem
for i in lines:
temp = str(i)
temp.replace(' ', ', ',2)
linesNew.append(temp)
# DEBUGGING THE CODE
print(linesNew[0])
print(linesNew[1])
# Another test to check that the replace works ... It does!
test2 = linesNew[0].replace(' ', ', ',2)
test2 = test2.replace('\t', ', ')
print('Proof of Concept: ' + '\n' + test2)
text.close()
Your not assigning the return value of replace() to anything. Also, readlines and str(i) are unnecessary.
Try this:
filename = "/Users/sacredgeometry/Desktop/data.txt"
text = open(filename, 'r')
linesNew = []
for line in text:
# i is already a string, no need to str it
    # temp = str(i)
# also, just append the result of the replace to linesNew:
    linesNew.append(line.replace(' ', ', ', 2))
# DEBUGGING THE CODE    
print(linesNew[0])
print(linesNew[1])
# Another test to check that the replace works ... It does!
test2 = linesNew[0].replace(' ', ', ',2)
test2 = test2.replace('\t', ', ')
print('Proof of Concept: ' + '\n' + test2)
text.close()
Strings are immutable. replace returns a new string, which is what you have to insert into the linesNew list.
# This bloody for loop is the problem
for i in lines:
temp = str(i)
temp2 = temp.replace(' ', ', ',2)
linesNew.append(temp2)
I had a similar problem and came up with the code below to help solve it. My specific issue was that I need to swap out certain parts of a string with the corresponding label. I also wanted something that would be reusable in different places within my application.
With the code below, I'm able to do the following:
>>> string = "Let's take a trip to Paris next January"
>>> lod = [{'city':'Paris'}, {'month':'January'}]
>>> processed = TextLabeler(string, lod)
>>> processed.text
>>> Let's take a trip to [[ city ]] next [[ month ]]
Here is all of the code:
class TextLabeler():
def __init__(self, text, lod):
self.text = text
self.iterate(lod)
def replace_kv(self, _dict):
"""Replace any occurrence of a value with the key"""
for key, value in _dict.iteritems():
label = """[[ {0} ]]""".format(key)
self.text = self.text.replace(value, label)
return self.text
def iterate(self, lod):
"""Iterate over each dict object in a given list of dicts, `lod` """
for _dict in lod:
self.text = self.replace_kv(_dict)
return self.text

Categories