Piping my Python Program through another program - python

I'm trying to make program using Python.
I want to be able to pipe program through another program:
" #EXAMPLE " ./my_python | another programme "
Here is the code I have so far.
This code saves output to file:
#!/usr/bin/env python
import os, random, string
# This is not my own code
''' As far asi know, It belongs to NullUserException. Was found on stackoverflow.com'''
length = 8
chars = string.ascii_letters.upper()+string.digits
random.seed = (os.urandom(1024))
# my code
file_out = open('newRa.txt','w') # Create a 'FILE' to save Generated Passwords
list1=[]
while len(list1) < 100000:
list1.append(''.join(random.choice(chars) for i in range(length)))
for item in list1:
file_out.write('%s\n' % item)
file_out.close()
file_out1=open('test.txt','w')
for x in list1:
file_out1.write('%s\n' %x[::-1])
This is the code I have trying to pipe it through another program:
#!/usr/bin/env python
import os,string,random,sys
length = 8
chars = string.ascii_letters.upper()+string.digits
random.seed = (os.urandom(1024))
keep=[]
keep1=[]
while len(keep)<1000:
keep.append(''.join(random.choice(chars) for i in range(length)))
print '\n',keep[::-1]
for x in keep:
keep1.append(x[::-1])
while len(keep1) < 1000:
print keep1
I have tried chmod and using the script as a executable.

Ok sorry for my lack of google search.
sys.stdout is the answer
#!/usr/bin/env python
import os,string,random,sys
length = 8
chars = string.ascii_letters.upper()+string.digits
random.seed = (os.urandom(1024))
keep=[]
while len(keep)<1000:
keep = (''.join(random.choice(chars) for i in range(length)))
print sys.stdout.write(keep)
sys.stdout.flush()
I stripped my code down (as it makes it a lot faster, But I'm getting this when execute
my code........
P5DBLF4KNone
DVFV3JQVNone
CIMKZFP0None
UZ1QA3HTNone
How do I get rid of the 'None' on the end?
What I have done to cause this ?
Should This Be A Seperate Question??

Related

Is the for loop in my code the speed bottleneck?

The following code looks through 2500 markdown files with a total of 76475 lines, to check each one for the presence of two strings.
#!/usr/bin/env python3
# encoding: utf-8
import re
import os
zettelkasten = '/Users/will/Dropbox/zettelkasten'
def zsearch(s, *args):
for x in args:
r = (r"(?=.* " + x + ")")
p = re.search(r, s, re.IGNORECASE)
if p is None:
return None
return s
for filename in os.listdir(zettelkasten):
if filename.endswith('.md'):
with open(os.path.join(zettelkasten, filename),"r") as fp:
for line in fp:
result_line = zsearch(line, "COVID", "vaccine")
if result_line != None:
UUID = filename[-15:-3]
print(f'›[[{UUID}]] OR', end=" ")
This correctly gives output like:
›[[202202121717]] OR ›[[202003311814]] OR
, but it takes almost two seconds to run on my machine, which I think is much too slow. What, if anything, can be done to make it faster?
The main bottleneck is the regular expressions you're building.
If we print(f"{r=}") inside the zsearch function:
>>> zsearch("line line covid line", "COVID", "vaccine")
r='(?=.* COVID)'
r='(?=.* vaccine)'
The (?=.*) lookahead is what is causing the slowdown - and it's also not needed.
You can achieve the same result by searching for:
r=' COVID'
r=' vaccine'

Python Label Printer Program

I want to print two labels that have the same numbers on them. I am using ZPL. I have already made my print format in ZPL and it works properly. I am trying to print a data range. For example:
"What is the first number in the range?" User inputs 100
"What is the second number in the range?" User inputs 120
I would then get 40 labels in order.
I then want it to export that data into a notepad file and then print it to my default printer. My problem is that to print with ZPL I have to "tag" my data range with my ZPL code. I cant figure out how to get my data range to go into my print statement correctly. Please help. Thank you in advance!
import os
import sys
start = int(input("Enter the start of range: "))
end = int(input("Enter the end of range: "))
with open('TestFile.txt', 'a') as sys.stdout:
print('^XA')
print('^PQ2')
for labelRange in range(start, end + 1):
print('^FO185,50^A0,300^FD')(labelRange, end = " ")('^FS')
#print('\n')
print('^XZ')
os.startfile("C:/Users/joe.smith/desktop/TestFile.txt", "print")
exit()
here is something to get you started, but I doubt it is complete. You will need to provide a valid ZPL file for making the changes.
I also made the program use fixed numbers for now and so it just runs and outputs.You can change it back once you have it working.
start = 110
end = 111
notepad = ''
# these are header lines that go once (if windows you might need \r\n instead of \n)
notepad += '^XA\n'
notepad += '^PQ2\n'
for label in range(start, end + 1):
# use f-strings
notepad += f'^FO185,50^A0,300^FD{label}^FS\n'
# if you need some of those other numbers to increment
# then setup a counter and do the math here inside the f-string
notepad += f'^FO185,50^A0,300^FD{label}^FS\n'
notepad += '^XZ\n'
# with open('tf.txt', 'w') as sys.stdout:
# print(notepad)
print(notepad)
exit()
outputs:
^XA
^PQ2
^FO185,50^A0,300^FD110^FS
^FO185,50^A0,300^FD110^FS
^FO185,50^A0,300^FD111^FS
^FO185,50^A0,300^FD111^FS
^XZ

Code too slow USACO Bronze 2015 Problem 1 python

I am practicing for USACO and I came across the "Censoring" Problem: http://www.usaco.org/index.php?page=viewproblem2&cpid=526
I solved it pretty quickly and though I got it right. However, it turns out that my the server gives me a time error for test cases 7-15 (it works well for the first 6 test cases).
Here is my code.
import sys
sys.stdin = open('censor.in', 'r')
sys.stdout = open('censor.out', 'w')
# Real code begins here
original_string = input()
censor_string = input()
# print(original_string.find(censor_string) + len(censor_string))
while censor_string in original_string:
original_string = original_string[0:original_string.find(censor_string)] + original_string[original_string.find(censor_string) +
len(censor_string): len(original_string)]
print(original_string)
Can someone help me fix it? The problem is probably that while loop. Not sure how to fix it though.
This is fast enough to get accepted. I build the result string one character at a time. Whenever this creates the bad string (at the end of the partial result), I remove it.
import sys
sys.stdin = open('censor.in')
sys.stdout = open('censor.out', 'w')
s, t = input(), input()
res = ''
for c in s:
res += c
if res.endswith(t):
res = res[:-len(t)]
print(res)

Python script for parsing ldap logs for getting Searcches/Binds [duplicate]

I am writing a script a in python to parse ldap logs and then get the number of searches/binds by each user. I was testing my code on sample files and for smaller files till size of 5-10MB it runs quick and completes within a 1 minute on my local PC. However when i ran the script on a file worth 18M having around 150000 lines in it, it takes around 5 minutes, i want to run this script on file sizes of 100M and maybe be 5-6 files in each run so that means script has to parse almost of 600-700M of data in each run. But i suppose it would take long time to run, so i would need some advise from you guys if my below code can be fine tuned for better performance in terms of execution time.
import os,re,datetime
from collections import defaultdict
d=defaultdict(list)
k=defaultdict(list)
start_time=datetime.datetime.now()
fh = open("C:\\Rohit\\ECD Utilization Script - Copy\\logdir\\access","r").read()
pat=re.compile(' BIND REQ .*conn=([\d]*).*dn=(.*")')
srchStr='\n'.join(re.findall(r' SEARCH REQ .*',fh))
bindlist=re.findall(pat,fh)
for entry in bindlist:
d[entry[-1].split(",")[0]].append(entry[0])
for key in d:
for con in d[key]:
count = re.findall(con,srchStr)
k[key].append((con,len(count)))
#
for key in k:
print("Number of searches by ",key, " : ",sum([i[1] for i in k[key]]))
for key in d:
print("No of bind ",key," = ",len(d[key]))
end_time=datetime.datetime.now()
print("Total time taken - {}".format(end_time-start_time))
You are doing several scans on entire file on the line
count = re.findall('SEARCH REQ.*'+conid,fh1)
Avoid this. This is your major problem. Get all conids in a list and iterate on file again and list while your inner loop should consist of conids. Bring it out of outer loop. You will be doing two scans of file.
Also since it is plain Python run with PyPy for faster runs.
You can do this better with a FSM and by spending a bit more RAM. This is a hint and you will have to do your FSM yourself.
Edit 1: This is the version of script I wrote after seeing the log file. Please correct if there is any mistake:
#!/usr/bin/env python
import sys
import re
def parse(filepath):
d = {}
regex1 = re.compile(r'(.*)?BIND\sREQ(.*)uid=(\w+)')
regex2 = re.compile(r'(.*)?SEARCH\sREQ(.*)uid=(\w+)')
with open(filepath, 'r') as f:
for l in f:
m = re.search(regex1, l)
if m:
# print (m.group(3))
uid = m.group(3)
if uid in d:
d[uid]['bind_count'] += 1
else:
d[uid] = {}
d[uid]['bind_count'] = 1
d[uid]['search_count'] = 0
m = re.search(regex2, l)
if m:
# print (m.group(3))
uid = m.group(3)
if uid in d:
d[uid]['search_count'] += 1
else:
d[uid] = {}
d[uid]['search_count'] = 1
d[uid]['bind_count'] = 0
for k in d:
print('user id = ' + k, 'Bind count = ' + str(d[k]['bind_count']), 'Search count = ' + str(d[k]['search_count']))
def process_args():
if sys.argv < 2:
print('Usage: parse_ldap_log.py log_filepath')
exit(1)
if __name__ == '__main__':
process_args()
parse(sys.argv[1])
Thank the Gods that it was not complicated enough to warrant an FSM.
Use itertools library instead of so many loops.
Your script has a quadratic complexity: for each line in the file you are making a read again to match the log entry.
My suggestion is to read the file only one time and counting the occurrences of the needed entry (the one matching (" BIND REQ ")).
I was able to solve my problem with below code.
import os,re,datetime
from collections import defaultdict
start_time=datetime.datetime.now()
bind_count=defaultdict(int)
search_conn=defaultdict(int)
bind_conn=defaultdict(str)
j=defaultdict(int)
fh = open("C:\\access","r")
total_searches=0
total_binds=0
for line in fh:
reg1=re.search(r' BIND REQ .*conn=(\d+).*dn=(.*")', line)
reg2=re.search(r' SEARCH REQ .*conn=(\d+).*', line)
if reg1:
total_binds+=1
uid,con=reg1.group(2,1)
bind_count[uid]=bind_count[uid]+1
bind_conn[con]=uid
if reg2:
total_searches+=1
skey=reg2.group(1)
search_conn[skey] = search_conn[skey]+1
for conid in search_conn:
if conid in bind_conn:
new_key=bind_conn[conid]
j[new_key]=j[new_key]+search_conn[conid]
for k,v in bind_count.items():
print(k," = ",v)
print("*"*80)
for k,v in j.items():
print(k,"-->",v)
fh.close()
del search_conn
del bind_conn
end_time=datetime.datetime.now()
print("Total time taken - {}".format(end_time-start_time))

output in text file is not the same as python output

I came across a problem. Whenever I run the code the output in Python is correct. all numbers show like this.
1
2
3
4
5
When writing the output to a file. all the numbers are stored in a single line like this:
12345
anybody able to help out?
this is the code:
def main():
Numberfile = open ('number_list.txt', 'w')
s = 0
while s < 100:
s += 1
Numberfile.write(str(s))
print (s)
Numberfile.close()
main()
print() adds newlines for you. Writing to a file does not.
Either add the newline explitily:
Numberfile.write(str(s) + '\n')
or use the print() function for writing to the file:
print(s, file=Numberfile)
This all assumes you are using Python 3; in Python 2, you can still replace the print statement with the same print() function on a module-by-module basis by adding:
from __future__ import print_function
at the top of the module.

Categories