get wanted data from a text file with python without using splits - python

Hello i have a that file:
WORKERS = yovel:10.0.0.6,james:10.0.0.7
BLACKLIST = 92.122.197.45:ynet,95.1.2.2:twitter
I'm trying to write a function in python that will get the worker IP and returns the worker name like this:
workername = getName(ip)
The only method i thougt to do it is with splits(using .split(":") , .split(",") etc.) but it will be very long code and not smart.
is there a shorter way to do it?

You can use re:
import re
def getName(ip, content = open('filename.txt').read()):
_r = re.findall('\w+(?=:{})'.format(ip), content)
return _r[0] if _r else None
print(getName('10.0.0.6'))
Output:
'yovel'
Note, however, it is slightly more robust to use split:
def getName(ip):
lines = dict(i.strip('\n').split(' = ') for i in open('filename.txt')]
d = {b:a for a, b in map(lambda x:x.split(':'), lines['WORKERS'].split(','))}
return d.get(ip)

Using split() doesn't look too bad here:
def getName(ip_address, filename='file.txt', line_type='WORKERS'):
with open(filename) as in_file:
for line in in_file:
name, info = [x.strip() for x in line.strip().split('=')]
if name == line_type:
info = [x.split(':') for x in info.split(',')]
lookup = {ip: name for name, ip in info}
return lookup.get(ip_address)
Which works as follows:
>>> getName('10.0.0.6')
'yovel'

Related

How to print in a specific element in a list in a sorted order in python

I'm tasked to create a function that takes a string filename as an argument, reads the file with the name filename, and prints
all confirmed reservations in order of the time.
A line in the file is formatted like so
name, time, reservation_status (can either be confirmed or canceled in uppercase)
An example of a file "reservations.txt" could look like this:
Alex, 20, CONFIRMED
Thomas, 16, CANCELLED
William, 18, CONFIRMED
The correct output when calling the function on this example file should look like this:
show_reservations("reservations.txt")
>>> William, 18
Alex, 20
My solution:
def show_reservations(filename):
with open(filename) as f:
for line in f.readlines():
line = line.replace(',', '').split()
status = line[2]
if status == "CONFIRMED":
name = line[0]
time = line[1]
print(name + ",", time)
However, calling my solution on the example file above gives me following output:
show_reservations("reservations.txt")
>>> Alex, 20
William, 18
What to do?
you don't store your data and you also need some sorting
def funct(e):
return e['time']
def show_reservations(filename):
with open(filename) as f:
l=[]
for line in f.readlines():
line = line.replace(',', '').split()
dict={}
status = line[2]
if status == "CONFIRMED":
dict["name"] = line[0]
dict["time"] = line[1]
name = line[0]
time = line[1]
l.append(dict)
l.sort(key=funct)
for i in l:
print('{0} , {1}'.format(i["name"],i["time"]))
show_reservations("input.txt")
It happens because your original sequence comes in this way:
Alex, 20, CONFIRMED
...
William, 18, CONFIRMED
So you can save your filtered elements into a list and apply sorting on it. For example using sorted function.
You could try something like this:
def show_reservations(filename):
confirmed_orders = []
with open(filename) as f:
for line in f.readlines():
line = line.replace(',', '').split()
name, _time, status = line
if status == "CONFIRMED":
confirmed_orders.append((name, _time))
confirmed_orders_by_time = sorted(confirmed_orders, key=lambda x: x[1])
for name, _time in confirmed_orders_by_time:
print(name + ",", _time)
Also several additional suggestions in case the snippet you've provided is a real production code:
time is a bad name for variable because it can clash with built-in Python's module time.
split gives you tuple, so instead of messing with accessing by index you can unpack it:
name, _time, status = line
This thing line.replace(',', '').split() won't work correctly if name or status in the file will have a "space". Consider to use csv or something else for parsing data file.
If you use Python 3.5 or higher f-string is a preferable way instead of manual string concatenation:
print(f"{name}, {_time}")
def show_reservations(filename):
reservations = []
with open(filename) as f:
for line in f.readlines():
splitted_line = line.replace(',', '').split()
status = splitted_line[2]
if status == "CONFIRMED":
time = splitted_line[1]
name = splitted_line[0]
reservations.append({"time":time, "name":name})
return sorted(reservations, key=lambda k: k['time'])
for reservation in show_reservations("reservations.txt"):
print(reservation["name"] + ",", reservation["time"])
Instead of directly printing, append the entries to a list of tuples (time, name). Then after the list, sort it (li.sort()), and loop through it again, this time printing.

Splitting string in Python with multiple occurrence of split keyword

So, I wanna split filter and update parameter for MongoDB replace one method available in Pymongo Library.
{filter}, {update}
are passed to me from a file, one pair per line
Eg: {"k1":"v1"}, {"k1":"v1", "k2":"v2", "k3":"v3"}
What I wanna do ?
Split them such that I get two dict variables,
filter = {"k1":"v1"}
update = {"k1":"v1", "k2":"v2", "k3":"v3"}
What have I tried ?
Problem is, I don't wanna change the original format and if I split them using "," then it might split abruptly, I can't also rely on splitting on first occurrence of "," as filter part itself might have multiple ","
def data_replace_one(host, port, dbname, coll_name, file_path, authdb):
if LOCALHOST:
client = pymongo.MongoClient(host, port)
else:
print("Enter credentials:")
uname = input("Username: ")
pwd = getpass.getpass()
client = pymongo.MongoClient(
host, port, username=uname, password=pwd, authSource=authdb)
db = client[dbname]
coll = db[coll_name]
with open(file_path) as in_file:
list_dict_queries = [line.strip() for line in in_file]
list_dict_queries = list(filter(None, list_dict_queries))
for query in list_dict_queries:
query_list = query.split("|")
query_list[0] = query_list[0].strip()
query_list[1] = query_list[1].strip()
#print(literal_eval(query_list[0]), literal_eval(query_list[1]))
coll.replace_one(literal_eval(
query_list[0]), literal_eval(query_list[1]))
I think it would be simplest to add some square braces around each line, and then interpret it as JSON - assuming that your input format is guaranteed to be JSON compliant.
Something like:
import json
with open(file_path) as in_file:
list_dict_queries = [('[' + line.strip() + ']') for line in in_file]
query_list = [json.loads(n) for n in list_dict_queries]
If you would not have braces/curly brackets anywhere else, then you can use the following.
>>> filter, update = re.findall('{.*?}', '{"k1":"v1"}, {"k1":"v1", "k2":"v2", "k3":"v3"}')
>>> filter
'{"k1":"v1"}'
>>> update
'{"k1":"v1", "k2":"v2", "k3":"v3"}'

self modifying python script

I want to create python script which can modify code in that script itself using Python Language Services or using any other way.
e.g. A script which keep track of its count of successfull execution
import re
COUNT = 0
def updateCount():
# code to update second line e.g. COUNT = 0
pass
if __name__ == '__main__':
print('This script has run {} times'.format(COUNT))
updateCount()
On successful execution of this script code should get changed to
import re
COUNT = 1
def updateCount():
# code to update second line e.g. COUNT = 0
pass
if __name__ == '__main__':
print('This script has run {} times'.format(COUNT))
updateCount()
Simple approach came to my mind was to open __file__ in write mode and do requried modification using reguler expessions etc. But that did not work I got exception io.UnsupportedOperation: not readable. Even if this approach would be working then it would be very risky because it can spoil my whole script. so I am looking for solution using Python Language Services.
Yes, you can use the language services to achieve self-modification, as in following example:
>>> def foo(): print("original foo")
>>> foo()
original foo
>>> rewrite_txt="def foo(): print('I am new foo')"
>>> newcode=compile(rewrite_text,"",'exec')
>>> eval(newcode)
>>> foo()
I am new foo
So, by new dynamically generated code you can replace stuff contained in the original source file, without modifying the file itself.
A python script is nothing more than a text file. So, you are able to open it as an external file and read & write on that. (Using __file__ variable you can get the exact name of your script):
def updateCount():
fin = open(__file__, 'r')
code = fin.read()
fin.close()
second_line = code.split('\n')[1]
second_line_parts = second_line.split(' ')
second_line_parts[2] = str(int(second_line_parts[2])+1)
second_line = ' '.join(second_line_parts)
lines = code.split('\n')
lines[1] = second_line
code = '\n'.join(lines)
fout = open(__file__, 'w')
fout.write(code)
fout.close()
#kyriakosSt's answer works but hard-codes that the assignment to COUNT must be on the second line, which can be prone to unexpected behaviors over time when the line number changes due to the source being modified for something else.
For a more robust solution, you can use lib2to3 to parse and update the source code instead, by subclassing lib2to3.refactor.RefactoringTool to refactor the code using a fixer that is a subclass of lib2to3.fixer_base.BaseFix with a pattern that looks for an expression statement with the pattern 'COUNT' '=' any, and a transform method that updates the last child node by incrementing its integer value:
from lib2to3 import fixer_base, refactor
COUNT = 0 # this should be incremented every time the script runs
class IncrementCount(fixer_base.BaseFix):
PATTERN = "expr_stmt< 'COUNT' '=' any >"
def transform(self, node, results):
node.children[-1].value = str(int(node.children[-1].value) + 1)
return node
class Refactor(refactor.RefactoringTool):
def __init__(self, fixers):
self._fixers = [cls(None, None) for cls in fixers]
super().__init__(None)
def get_fixers(self):
return self._fixers, []
with open(__file__, 'r+') as file:
source = str(Refactor([IncrementCount]).refactor_string(file.read(), ''))
file.seek(0)
file.write(source)
Demo: https://repl.it/#blhsing/MushyStrangeClosedsource
This will edit the module level variables defined before _local_config. Later, process an update to the dictionary, then replace the line when iterating over the source file with the new _local_config values:
count = 0
a = 0
b = 1
c = 1
_local_config = dict(
filter(
lambda elem: (elem[0][:2] != "__") and (str(elem[1])[:1] != "<"),
globals().items(),
),
)
# do some stuff
count += 1
c = a + b
a = b
b = c
# update with new values
_local_config = dict(
filter(
lambda elem: elem[0] in _local_config.keys(),
globals().items(),
)
)
# read self
with open(__file__, "r") as f:
new_file = ""
for line in f.read().split("\n"):
for k, v in _local_config.items():
search = f"{k} = "
if search == line[: len(k) + 3]:
line = search + str(v)
_local_config.pop(k)
break
new_file += line + "\n"
# write self
with open(__file__, "w") as f:
f.write(new_file[:-1])

Searching a file with the contents of another file python

I have a file that has a unique ID number on each line. I am trying to search a different file for the occurrences of these ID numbers and return the line where these id numbers are in the second file, in this case into an output file. I am new to programming and this is what I have so far.
outlist = []
with open('readID.txt', 'r') as readID, \
open('GOlines.txt', 'w') as output, \
open('GO.txt', 'r') as GO:
x = readID.readlines()
print x
for line in GO:
if x[1:-1] in line:
outlist.append(line)
outlist.append('\n')
if x[1:-1] in line:
outlist.append(line)
outlist.append('\n')
print outlist
output.writelines(outlist)
The files look like this: readID.txt
00073810.1
00082422.1
00018647.1
00063072.1
GO.txt
#query GO reference DB reference family
HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
HumanDistalGut_READ_00043244.3 GO:0022625 TIGRFAM TIGR00001
HumanDistalGut_READ_00048644.4 GO:0000315 TIGRFAM TIGR00001
HumanDistalGut_READ_00067264.5 GO:0003735 TIGRFAM TIGR00001
The read ids match up with some but not all of the ids after READ...
#!/usr/bin/env python
# encoding: utf-8
import sys
import re
def extract_id(line):
"""
input: HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
returns: 00048904.2
"""
result = re.search(r'READ_(\d{8}\.\d)', line)
if result != None:
return result.group(1)
else:
return None
def extract_go_num(line):
"""
input: HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
returns: 0006412
"""
result = re.search(r'GO:(\d{7})', line)
if result != None:
return result.group(1)
else:
return None
def main(argv = None):
if argv is None:
argv = sys.argv
with open('readID.txt', 'r') as f:
ids = frozenset(f.readlines())
with open('GO.txt', 'r') as haystack, \
open('GOLines.txt', 'w') as output:
for line in haystack:
if extract_id(line) in ids:
output.write(extract_go_num(line) + '\n')
if __name__ == "__main__":
sys.exit(main())
I'm trading memory overhead for an O(n) solution rather than O(n^2).
I'm using regular expressions to extract the ids and go numbers, but it's brittle if the number of digits change.
Maybe something like this:
with open('readID.txt', 'r') as readID, open('GOlines.txt', 'w') as output, open('GO.txt', 'r') as GO:
for ID in readID:
for line in GO:
if ID in line:
output.write(line)
If your files are small enough to fit in your memory.
with open('/somepath/GO.txt') as f:
pool = f.readlines()
with open('/somepath/readID.txt') as f:
tokens = f.readlines()
# strip spaces/new lines
tokens = [t.strip() for t in tokens]
found = [(t, lno) for t in tokens for (lno, l) in enumerate(pool) if t in l]
You could then print your found list into your outfile.

looping over large csv python

I have a large csv-file(several hundreds of lines) containing following structure:
filename, sitename, servername
this csv-file contains several doubles, since the servernames are those from a cluster(always the same couples) and language-aliases for the sitenames(eg. mijnhuis, myhouse, mamaison)
the result I'd like to have is the following(notice the ; and the , :
filename; sitename1, sitename2, sitename3; servername1, servername2, servername3;
How could I do this in the most optimized way?
PS: actual code would be nice, but if you can give me directions, I'd be equally happy.
Use at your own risk:
import collections as c
f = open('input', 'r')
sites, servers = c.defaultdict(set), c.defaultdict(set)
files = set()
for line in f:
parts = line.split(',')
fi,site,server = [p.strip() for p in parts]
files.add(fi)
sites[fi].add(site)
servers[fi].add(server)
for f in files:
print "%s; %s; %s" % (f, ", ".join(sites[f]), ", ".join(servers[f]))
Your data structure depends on your program. Perhaps you could also store
a dictionary of filenames to a list of tuples (sitename1, servname2) THere are tons of ways to accomplish what you are tryign to do
You could do something like:
import csv
file_names_to_data = {}
with open('your_csv') as f:
reader = csv.reader(f)
for line_list in reader:
try:
file_names_to_data[line_list[0]]['sitenames'].append(line_list[1])
file_names_to_data[line_list[0]]['servernames'].append(line_list[2])
except KeyError:
# initialize it
file_names_to_data[line_list[0]] = {'sitenames': [line_list[1]], 'servernames': [line_list[2]]}
ADditionally it looks like defaultdict could be extremely usedful.
Use a dictionary for filename and 2 lists of sitenames and servernames for each dictionary item
as it so happens, here included, a solution for my problem. exampleFile included.
CODE:
fp = r'PATH_TO_FILE'
aliases = []
aliases.append(("sitex","sitez","sitey"))
splitFile = []
for l in open(fp):
parts = tuple(l[:-1].replace(" ","").split(","))
splitFile.append(parts)
def isAlias(old, new):
print old, new
aliasFound = False
for alias in aliases:
if old in alias and new in alias:
aliasFound = True
return aliasFound
handledSites = []
for split in splitFile:
log = split[0]
site = split[1]
rp = split[2]
matchFound = False
for hs in handledSites:
if site in hs[0]:
matchFound = True
if rp not in hs[1]:
hs[1].append(rp)
if log not in hs[2]:
hs[2].append(log)
if not matchFound:
if isAlias(hs[0][0], site):
matchFound = True
hs[0].append(site)
if rp not in hs[1]:
hs[1].append(rp)
if log not in hs[2]:
hs[2].append(log)
if not matchFound:
handledSites.append(([site],[rp],[log]))
for s in handledSites:
print s
EXAMPLE FILE
logfile[date]_[server]_sitex.log, sitex, rp1
logfile[date]_[server]_sitex.log, sitex, rp2
logfile[date]_[server]_sitey.log, sitey, rp1
logfile[date]_[server]_sitey.log, sitey, rp2
logfile[date]_[server]_sitez.log, sitez, rp1
logfile[date]_[server]_sitez.log, sitez, rp2
logfile[date]_[server]_site3.log, site3, rp1
logfile[date]_[server]_site3.log, site3, rp2

Categories