concatinating multiple strings from dictionary and save in file using python

concatinating multiple strings from dictionary and save in file using python - python

I able to write hostname in the /tmp/filter.log but any hint how can i write all three values[hostname, owner, seats] in the file?
def list_hosts(nc):
resp = nc.send_service_request('ListHosts', json.dumps({}))
result = resp['result']
l = []
f=open("/tmp/filter.log", "w+")
for r in result:
if "team-prod" in r['owner']:
print r['owner'], r['hostname'], r['seats']
f.write(r['hostname'] + "\n")
f.close()
l.append(r['hostname'])
return l
nc = create_client('zone', 'team_PROD_USERNAME', 'team_PROD_PASSWORD')
l = list_hosts(nc)
print l
The file should have entries as below:
team-prod\*, np-team-052, [u'123123123-18d1-483d-9af8-169ac66b26e4']
Current entry is:
np-team-052

f.write(str(r['owner']) + ', ' + str(r['hostname']) + ', ' + str(r['seats']) + '\n')

Related

How can I convert an array to JSON?

I have an array that I need to convert to a JSON file. There is a text file that holds the data. But I don't understand why it only adds one record.
import collections
list = []
with open("file.txt") as f:
for line in f:
info = line.split()
lists = ("ip" + " " + info[0].replace(":", " ").split()[0] + " " + "port" + " " + info[0].replace(":", " ").split()[1] + " " + "region" + " " + info[1].replace("-", " ").split()[0]).split()
list.append(lists)
d = collections.defaultdict(dict)
for l in list:
d[l[0]] = l[1]
d[l[2]] = l[3]
d[l[4]] = l[5]
print(json.dumps(d))
with open("proxy.json", "w") as f:
f.write(json.dumps(d))
Example of a text file:
154.0.5.178:8080 ZA-N-S! -
119.28.156.115:3128 KR-N -
207.144.111.230:8080 US-H -
3.20.236.208:49205 US-H-S -
217.60.194.43:8080 IR-N! -
190.61.41.106:999 CO-N-S +
What I get:
enter image description here

info[1].replace("-", " ").split()[0]
will always return a single value! Try this:
import json
alist = []
with open("file.txt") as f:
for line in f:
info = line.split()
data = {"ip": info[0].split(":")[0], "port": info[0].split(":")[1],"region": info[1].split("-")}
alist.append(data)
print(json.dumps(alist))
with open("proxy.json", "w") as f:
f.write(json.dumps(alist))

python read all files from a folder and write the file name and other info into a txt file

I have 30911 html files. I need to do webscraping and then save the info into a txt file named index.txt.
It should look like
filename1, title, t1, date, p1
filename2, title, t1, date, p1
filename3, title, t1, date, p2
and so on...
I only want filename, but output gave me path+filename.

Your problem is that filename is filepath in reality, in order to get the filename you could use os module
os.path.basename('filepath')
so in order to write to the file:
indexFile.write(os.path.basename(filename)+ ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')

You can use:
path = 'C:/Users/.../.../output/'
#read html files
for filename in glob.glob(os.path.join(path, '*.html')):
soup = bs4.BeautifulSoup(open(filename).read(), "lxml")
title = soup.find('h1')
ticker = soup.find('p')
d_date = soup.find_all('div', {"id": "a-body"})[0].find_all("p")[2]
try:
def find_participant(tag):
return tag.name == 'p' and tag.find("strong", text=re.compile(r"Executives|Corporate Participants"))
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
except:
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(filename + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + 'No participants' + '\n')
else:
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(os.path.basename(filename) + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
indexFile.close()

ntpath is another module used to get base name from path.
>>> import ntpath
>>> ntpath.basename('C:/Users/.../output1/' + 'index.txt')
'index.txt'

Why does my Python XML parser break after the first file?

I am working on a Python (3) XML parser that should extract the text content of specific nodes from every xml file within a folder. Then, the script should write the collected data into a tab-separated text file. So far, all the functions seem to be working. The script returns all the information that I want from the first file, but it always breaks, I believe, when it starts to parse the second file.
When it breaks, it returns "TypeError: 'str' object is not callable." I've checked the second file and found that the functions work just as well on that as the first file when I remove the first file from the folder. I'm very new to Python/XML. Any advice, help, or useful links would be greatly appreciated. Thanks!
import xml.etree.ElementTree as ET
import re
import glob
import csv
import sys
content_file = open('WWP Project/WWP_texts.txt','wt')
quotes_file = open('WWP Project/WWP_quotes.txt', 'wt')
list_of_files = glob.glob("../../../Documents/WWPtextbase/distribution/*.xml")
ns = {'wwp':'http://www.wwp.northeastern.edu/ns/textbase'}
def content(tree):
lines = ''.join(ET.tostring(tree.getroot(),encoding='unicode',method='text')).replace('\n',' ').replace('\t',' ').strip()
clean_lines = re.sub(' +',' ', lines)
return clean_lines.lower()
def quotes(tree):
quotes_list = []
for node in tree.findall('.//wwp:quote', namespaces=ns):
quote = ET.tostring(node,encoding='unicode',method='text')
clean_quote = re.sub(' +',' ', quote)
quotes_list.append(clean_quote)
return ' '.join(str(v) for v in quotes_list).replace('\t','').replace('\n','').lower()
def pid(tree):
for node in tree.findall('.//wwp:sourceDesc//wwp:author/wwp:persName[1]', namespaces=ns):
pid = node.attrib.get('ref')
return pid.replace('personography.xml#','') # will need to replace 'p:'
def trid(tree): # this function will eventually need to call OT (.//wwp:publicationStmt//wwp:idno)
for node in tree.findall('.//wwp:sourceDesc',namespaces=ns):
trid = node.attrib.get('n')
return trid
content_file.write('pid' + '\t' + 'trid' + '\t' +'text' + '\n')
quotes_file.write('pid' + '\t' + 'trid' + '\t' + 'quotes' + '\n')
for file_name in list_of_files:
file = open(file_name, 'rt')
tree = ET.parse(file)
file.close()
pid = pid(tree)
trid = trid(tree)
content = content(tree)
quotes = quotes(tree)
content_file.write(pid + '\t' + trid + '\t' + content + '\n')
quotes_file.write(pid + '\t' + trid + '\t' + quotes + '\n')
content_file.close()
quotes_file.close()

You are overwriting your function calls with the values they returned. changing the function names should fix it.
import xml.etree.ElementTree as ET
import re
import glob
import csv
import sys
content_file = open('WWP Project/WWP_texts.txt','wt')
quotes_file = open('WWP Project/WWP_quotes.txt', 'wt')
list_of_files = glob.glob("../../../Documents/WWPtextbase/distribution/*.xml")
ns = {'wwp':'http://www.wwp.northeastern.edu/ns/textbase'}
def get_content(tree):
lines = ''.join(ET.tostring(tree.getroot(),encoding='unicode',method='text')).replace('\n',' ').replace('\t',' ').strip()
clean_lines = re.sub(' +',' ', lines)
return clean_lines.lower()
def get_quotes(tree):
quotes_list = []
for node in tree.findall('.//wwp:quote', namespaces=ns):
quote = ET.tostring(node,encoding='unicode',method='text')
clean_quote = re.sub(' +',' ', quote)
quotes_list.append(clean_quote)
return ' '.join(str(v) for v in quotes_list).replace('\t','').replace('\n','').lower()
def get_pid(tree):
for node in tree.findall('.//wwp:sourceDesc//wwp:author/wwp:persName[1]', namespaces=ns):
pid = node.attrib.get('ref')
return pid.replace('personography.xml#','') # will need to replace 'p:'
def get_trid(tree): # this function will eventually need to call OT (.//wwp:publicationStmt//wwp:idno)
for node in tree.findall('.//wwp:sourceDesc',namespaces=ns):
trid = node.attrib.get('n')
return trid
content_file.write('pid' + '\t' + 'trid' + '\t' +'text' + '\n')
quotes_file.write('pid' + '\t' + 'trid' + '\t' + 'quotes' + '\n')
for file_name in list_of_files:
file = open(file_name, 'rt')
tree = ET.parse(file)
file.close()
pid = get_pid(tree)
trid = get_trid(tree)
content = get_content(tree)
quotes = get_quotes(tree)
content_file.write(pid + '\t' + trid + '\t' + content + '\n')
quotes_file.write(pid + '\t' + trid + '\t' + quotes + '\n')
content_file.close()
quotes_file.close()

How to encrypt a .bin file

I need to encrypt 3 .bin files which contain 2 keys for Diffie-Hellman. I have no clue how to do that, all I could think of was what I did in the following Python file. I have an example what the output should look like but my code doesn't seem to produce the right keys. The output file server.ini is used by a client to connect to a server.
import base64
fileList = [['game_key.bin', 'Game'], ['gate_key.bin', 'Gate'], ['auth_key.bin', 'Auth']]
iniList = []
for i in fileList:
file = open(i[0], 'rb')
n = list(file.read(64))
x = list(file.read(64))
file.close()
n.reverse()
x.reverse()
iniList.append(['Server.' + i[1] + '.N "' + base64.b64encode("".join(n)) + '"\n', 'Server.' + i[1] + '.X "' + base64.b64encode("".join(x)) + '"\n'])
iniList[0].append('\n')
#time for user Input
ip = '"' + raw_input('Hostname: ') + '"'
dispName = 'Server.DispName ' + '"' + raw_input('DispName: ') + '"' + '\n'
statusUrl = 'Server.Status ' + '"' + raw_input('Status URL: ') + '"' + '\n'
signupUrl = 'Server.Signup ' + '"' + raw_input('Signup URL: ') + '"' + '\n'
for l in range(1, 3):
iniList[l].append('Server.' + fileList[l][1] + '.Host ' + ip + '\n\n')
for l in [[dispName], [statusUrl], [signupUrl]]:
iniList.append(l)
outFile = open('server.ini', 'w')
for l in iniList:
for i in l:
outFile.write(i)
outFile.close()
The following was in my example file:
# Keys are Base64-encoded 512 bit RC4 keys, as generated by DirtSand's keygen
# command. Note that they MUST be quoted in the commands below, or the client
# won't parse them correctly!
I also tried it without inverting n and x

remove similar lines in text file

I am not using Python but I have script in python:
part of script
elif line.find("CONECT") > -1:
con = line.split()
line_value = line_value + 1
#print line_value
#print con[2]
try:
line_j = "e" + ', ' + str(line_value) + ', ' + con[2] + "\n"
output_file.write(line_j)
print(line_j)
line_i = "e" + ', ' + str(line_value) + ', ' + con[3] + "\n"
output_file.write(line_i)
print(line_i)
line_k = "e"+ ', ' + str(line_value) + ', ' + con[4] + "\n"
print(line_k)
output_file.write(line_k)
except IndexError:
continue
which give .txt output in format
e, 1, 2
e, 1, 3
e, 1, 4
e, 2, 1
e, 2, 3
etc.
I need remove similar lines with the same numbers, but no matter on order this numbers
i.e. line e, 2, 1..
Is it possible?

Of course, it is better to modify your code to remove that lines BEFORE you're writing them to file. You can use a list to store already saved values, and on each itereation, perfom a search if the values you're want to add is already exists in that list. The code below isn't tested and optimized, but it explains an idea:
# 'added = []' should be placed somewhere before 'if'
added = []
# you part of code
elif line.find("CONECT") > -1:
con = line.split()
line_value = line_value + 1
try:
line_j = "e, %s, %s\n" % (str(line_value),con[2])
tmp = sorted((str(line_value),con[2]))
if tmp not in added:
added.append(tmp)
output_file.write(line_j)
print(line_j)
line_i = "e, %s, %s\n" % (str(line_value),con[3])
tmp = sorted((str(line_value),con[3]))
if tmp not in added:
added.append(tmp)
output_file.write(line_i)
print(line_i)
line_k = "e, %s, %s\n" % (str(line_value),con[4])
tmp = sorted((str(line_value),con[4]))
if tmp not in added:
added.append(tmp)
print(line_k)
output_file.write(line_k)
except IndexError:
continue

Here is a comparison method for two lines of your file:
def compare(line1, line2):
els1 = line1.strip().split(', ')
els2 = line2.strip().split(', ')
return Counter(els1) == Counter(els2)
See the documentation for the Counter class.
If the count of elements doesn't matter you can replace the Counter class with set instead

The following approach should work. First add the following line further up in your code:
seen = set()
Then replace everything inside the try with the following code:
for con_value in con[2:5]:
entry = frozenset((line_value, con_value))
if entry not in seen:
seen.append(entry)
line_j = "e" + ', ' + str(line_value) + ', ' + con_value + "\n"
output_file.write(line_j)
print(line_j)
Make sure this code is indented to the same level as the code it replaces.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

concatinating multiple strings from dictionary and save in file using python - python

f.write(str(r['owner']) + ', ' + str(r['hostname']) + ', ' + str(r['seats']) + '\n')

Related

How can I convert an array to JSON?

python read all files from a folder and write the file name and other info into a txt file

Why does my Python XML parser break after the first file?

How to encrypt a .bin file

remove similar lines in text file

Categories

Resources