Whats the best way to write python code into a python file? - python

I want to write a script (generate_script.py) generating another python script (filegenerated.py)
So far i have created generate_script.py:
import os
filepath = os.getcwd()
def MakeFile(file_name):
temp_path = filepath + file_name
file = open(file_name, 'w')
file.write('def print_success():')
file.write(' print "sucesss"')
file.close()
print 'Execution completed.'
The file (filegenerated.py) looks now like this:
def print_success(): print "sucesss"
Now i don't want to manually insert all linebreaks (also due to operating system difficulties)...is there a template system i can use writing python code into a python file? Does someone have an example?
Thanks a lot!

You could just use a multiline string:
import os
filepath = os.getcwd()
def MakeFile(file_name):
temp_path = filepath + file_name
with open(file_name, 'w') as f:
f.write('''\
def print_success():
print "sucesss"
''')
print 'Execution completed.'
If you like your template code to be indented along with the rest of your code, but dedented when written to a separate file, you could use textwrap.dedent:
import os
import textwrap
filepath = os.getcwd()
def MakeFile(file_name):
temp_path = filepath + file_name
with open(file_name, 'w') as f:
f.write(textwrap.dedent('''\
def print_success():
print "sucesss"
'''))
print 'Execution completed.'

lines = []
lines.append('def print_success():')
lines.append(' print "sucesss"')
"\n".join(lines)
If you're building something complex dynamically:
class CodeBlock():
def __init__(self, head, block):
self.head = head
self.block = block
def __str__(self, indent=""):
result = indent + self.head + ":\n"
indent += " "
for block in self.block:
if isinstance(block, CodeBlock):
result += block.__str__(indent)
else:
result += indent + block + "\n"
return result
You could add some extra methods, to add new lines to the block and all that stuff, but I think you get the idea..
Example:
ifblock = CodeBlock('if x>0', ['print x', 'print "Finished."'])
block = CodeBlock('def print_success(x)', [ifblock, 'print "Def finished"'])
print block
Output:
def print_success(x):
if x>0:
print x
print "Finished."
print "Def finished."

Try using \n and \t
import os
filepath = os.getcwd()
def MakeFile(file_name):
temp_path = filepath + file_name
file = open(file_name, 'w')
file.write('def print_success():\n')
file.write('\tprint "sucesss"')
file.close()
print 'Execution completed.'
to output
def print_success():
print "sucesss"
or multiline
import os
filepath = os.getcwd()
def MakeFile(file_name):
temp_path = filepath + file_name
file = open(file_name, 'w')
file.write('''
def print_success():
print "sucesss"
''')
file.close()
print 'Execution completed.'

untubu answer is probably the more pythonic answer, but in your code example you're missing new line chars and tabs.
file.write("def print_success():\n")
file.write('\tprint "success"\n\n')
This will give you the spacing and newlines. The link below will give you some tips on the accepted ones.
http://docs.python.org/release/2.5.2/ref/strings.html

Related

how to not overwrite file in python in different os path

so i want to avoid overwrite the file name that existed. but i don't know how to combine the code with mycode. please help me
here's my code for write file:
def filepass(f):
print(f)
with open ('media/pass/'+'filepass.txt', 'a') as fo:
fo.write(f)
fo.close()
return fo
and here's the code to create number in name filepass:
def build_filename(name, num=0):
root, ext = os.path.splitext(name)
print(root)
return '%s%d%s' % (root, num, ext) if num else name
def find_next_filename(name, max_tries=20):
if not os.path.exists(name): return name
else:
for i in range(max_tries):
test_name = build_filename(name, i+1)
if not os.path.exists(test_name): return test_name
return None
all i want is to create filename : filepass.txt, filepass1.txt, filepass2.txt
Something like this?
def filepass(f):
print(f)
filename = find_next_filename('media/pass/filepass.txt')
with open (filename, 'a') as fo:
fo.write(f)
# you don't need to close when you use "with open";
# but then it doesn't make sense to return a closed file handle
# maybe let's return the filename instead
return filename

python script throwing an error that i can deal with

I have created the above script to remove unwanted genes from a database. It does work quite well if you only use it once but I re-run it, it giving me an error message such as:
shutil.Error: Destination path 'path/rejected_database_genes/gene_A.fa' already exists
And obviously it stops. I think that the problem arises when there are two or more genes in the file that needs to be removed.
Kind regards, and thanks for your help.
import glob, sys, os, shutil
from Bio import SeqIO, SearchIO
from Bio.SeqRecord import SeqRecord
import argparse
def help_function():
print 'Hi'
parser = argparse.ArgumentParser()
parser.add_argument('-input_file', '-i',type=str,help='path_to_data')
opts = parser.parse_args()
def check_file_exists(filepath, file_description):
if not os.path.exists(filepath):
print("The " + file_description + " (" + filepath + ") does not exist")
sys.exit(1)
else:
print file_description + " detected"
def remove_empty_files(alleles_files,destination):
input_handle=open(alleles_files, 'r')
gene_records=list(SeqIO.parse(input_handle, 'fasta'))
for gene_record in gene_records:
#filename=gene_record.id[0]
#count=0
if len(gene_record.seq)<5 or 'N'in gene_record.seq:
print gene_record.id
elif '-' in gene_record.seq:
print gene_record.id
#count+=1
shutil.move(alleles_files, destination)
def main():
destination=opts.input_file + '/rejected_database_genes'
if os.path.exists(destination):
print 'Folder already exits'
else:
os.makedirs(destination)
print 'Folder has been created'
files=glob.glob(opts.input_file+'/*.fa')
#print files
#sys.exit()
for f in files:
#print f
#sys.exit()
alleles_files=glob.glob(f)[0]
#print alleles_files
#sys.exit()
remove_empty_files(alleles_files,destination)
print 'Files have been removed'
main()
the problem you have is in the shutil.move line - if you will specify full path to source and destination this will overwrite the existing file and you will not get this error, if you don't want to overwrite and need both of the files, just rename the destination file to something else.
What I wanted was that the moment the script found to remove that file, and storage somewhere else so I can check it later. The problem I was having was that if in the same file there were two unwanted seq, it would through me and error telling me that the file already existed on destination, and stopped. So I managed to solve this problem by added an if statement. The corrected script is the one below:
import glob, sys, os, shutil
from Bio import SeqIO, SearchIO
from Bio.SeqRecord import SeqRecord
import argparse
def help_function():
print 'Hi'
parser = argparse.ArgumentParser()
parser.add_argument('-input_file', '-i',type=str,help='path_to_data')
opts = parser.parse_args()
def check_file_exists(filepath, file_description):
if not os.path.exists(filepath):
print("The " + file_description + " (" + filepath + ") does not exist")
sys.exit(1)
else:
print file_description + " detected"
def remove_empty_files(alleles_files,destination):
input_handle=open(alleles_files, 'r')
gene_records=list(SeqIO.parse(input_handle, 'fasta'))
geneID_list=[]
for gene_record in gene_records:
filename=gene_record.id.split('_')
geneID=filename[0]+'_'+filename[1]
if len(gene_record.seq)<5 or 'N'in gene_record.seq:
geneID_list.append(geneID)
shutil.move(alleles_files, destination)
print geneID_list
#break
if '-' in gene_record.seq:
geneID_list.append(geneID)
shutil.move(alleles_files, destination)
print geneID_list
#break
if len(geneID_list) >0:
break
def main():
if len(sys.argv) <=1:
parser.print_help()
sys.exit()
else:
check_file_exists(opts.input_file, 'input_file')
destination=opts.input_file + '/rejected_database_genes'
if os.path.exists(destination):
print 'Folder already exits'
else:
os.makedirs(destination)
print 'Folder has been created'
files=glob.glob(opts.input_file+'/*.fa')
#print files
#sys.exit()
for f in files:
#print f
#sys.exit()
alleles_files=glob.glob(f)[0]
#print alleles_files
#sys.exit()
remove_empty_files(alleles_files,destination)
print 'Files have been removed'
main()
By adding the third "if" statement, the file is removed as soon as it finds an unwanted sequence, remove the file to destination, and move to check the next file.

File Input/Output Error?

can anyone tell me what is wrong with my code?
def count_letters(in_file, out_file):
in_file = open("in_file.txt", "r")
for line in in_file:
for x in sorted(line):
count = x.count()
out_file = open("out_file.txt", "w")
print(x, " ", count)
in_file.close()
out_file.close()
print(out_file)
it's supposed to
Takes two filenames (in_file and out_file)as arguments
Opens and reads the input file specified by in_file, and counts the number of occurrences of each letter (in a case-insensitive manner)
Writes the result in the output file specified by out_file
when I run it, it tells me that "in_file" is not defined, but I have defined it, and made an "in_file.txt."
any help would be appreciated!
You want pass "in_file.txt" as in_file variable and use it like:
in_f=open(in_file,"r")
similarly for out_file otherwise, what is the point of making a function?
But i think your error relates of not having "in_file.txt" in your current working directory, you can check with
import os
print os.getcwd()
this will show what files code can see
in_file = open(in_file, "r") # you must do the way so got an error
try this function:
def count_letters(in_file, out_file):
in_file_fd = open(in_file, "r") # open in file
out_file_fd = open(out_file, "w") # open out file
for line in in_file_fd:
for x in sorted(line):
count = line.count(x)
res = x + " " + str(count) + '\n'
print (res)
out_file_fd.write(res) # write the data into out file
in_file_fd.close() # close in_file
out_file_fd.close() # close out_file
count_letters('test.py', 'out_file')
It works for me, hope helpful.

Python: Issue when trying to read and write multiple files

This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...

Script that reads PDF metadata and writes to CSV

I wrote a script to read PDF metadata to ease a task at work. The current working version is not very usable in the long run:
from pyPdf import PdfFileReader
BASEDIR = ''
PDFFiles = []
def extractor():
output = open('windoutput.txt', 'r+')
for file in PDFFiles:
try:
pdf_toread = PdfFileReader(open(BASEDIR + file, 'r'))
pdf_info = pdf_toread.getDocumentInfo()
#print str(pdf_info) #print full metadata if you want
x = file + "~" + pdf_info['/Title'] + " ~ " + pdf_info['/Subject']
print x
output.write(x + '\n')
except:
x = file + '~' + ' ERROR: Data missing or corrupt'
print x
output.write(x + '\n')
pass
output.close()
if __name__ == "__main__":
extractor()
Currently, as you can see, I have to manually input the working directory and manually populate the list of PDF files. It also just prints out the data in the terminal in a format that I can copy/paste/separate into a spreadsheet.
I'd like the script to work automatically in whichever directory I throw it in and populate a CSV file for easier use. So far:
from pyPdf import PdfFileReader
import csv
import os
def extractor():
basedir = os.getcwd()
extension = '.pdf'
pdffiles = [filter(lambda x: x.endswith('.pdf'), os.listdir(basedir))]
with open('pdfmetadata.csv', 'wb') as csvfile:
for f in pdffiles:
try:
pdf_to_read = PdfFileReader(open(f, 'r'))
pdf_info = pdf_to_read.getDocumentInfo()
title = pdf_info['/Title']
subject = pdf_info['/Subject']
csvfile.writerow([file, title, subject])
print 'Metadata for %s written successfully.' % (f)
except:
print 'ERROR reading file %s.' % (f)
#output.writerow(x + '\n')
pass
if __name__ == "__main__":
extractor()
In its current state it seems to just prints a single error (as in, the error message in the exception, not an error returned by Python) message and then stop. I've been staring at it for a while and I'm not really sure where to go from here. Can anyone point me in the right direction?
writerow([file, title, subject]) should be writerow([f, title, subject])
You can use sys.exc_info() to print the details of your error
http://docs.python.org/2/library/sys.html#sys.exc_info
Did you check the pdffiles variable contains what you think it does? I was getting a list inside a list... so maybe try:
for files in pdffiles:
for f in files:
#do stuff with f
I personally like glob. Notice I add * before the .pdf in the extension variable:
import os
import glob
basedir = os.getcwd()
extension = '*.pdf'
pdffiles = glob.glob(os.path.join(basedir,extension)))
Figured it out. The script I used to download the files was saving the files with '\r\n' trailing after the file name, which I didn't notice until I actually ls'd the directory to see what was up. Thanks for everyone's help.

Categories