Transforming declarative tests into pytest asserts - python

I have a long script that is full of lines like this
[UBInt8.parse, b"\x01", 0x01, None],
[UBInt8.build, 0x01, b"\x01", None],
I need to turn them through regular expressions into
assert UBInt8.parse(b"\x01") == 0x01
assert UBInt8.build(0x01) == b"\x01"
Lists are always of length 4. 1st is method, 2nd is its argument, 3rd is return value, 4th is always None. I already used regex to solve a similar problem (someone produced the parser) but I need help writing the formatting string:
See Removing six.b from multiple files . This is the code I used before, the formatting expression needs to be rewritten and I dont speak regex. :(
import re
import os
indir = 'files'
for root, dirs, files in os.walk(indir):
for f in files:
fname = os.path.join(root, f)
with open(fname) as f:
txt = f.read()
txt = re.sub(r'six\.(b\("[^"]*"\))', r'\1', txt)
with open(fname, 'w') as f:
f.write(txt)
print(fname)

Here is the manual parsing that I came up with. No regex.
#!/usr/bin/python3
import re, os, sys
def processfile(fname):
print(fname+'... ')
with open(fname, 'rt') as f:
txt = f.readlines()
with open(fname+"-trans", 'wt') as f:
for line in txt:
items = list(map(str.strip, line.strip().strip(",[]").split(",")))
if len(items) == 4:
if items[1] == "None":
items[1] = ""
if items[3] == "None":
o = "assert {0}({1}) == {2}".format(*items)
else:
if items[1] == "":
o = "assert raises({0}) == {3}".format(*items)
else:
o = "assert raises({0}, {1}) == {3}".format(*items)
f.write(" "+o+"\n")
else:
f.write(line)
processfile(sys.argv[1])

Related

Compare each line and remove the repeated/same line having the same numbers in python [duplicate]

I have a file with one column. How to delete repeated lines in a file?
On Unix/Linux, use the uniq command, as per David Locke's answer, or sort, as per William Pursell's comment.
If you need a Python script:
lines_seen = set() # holds lines already seen
outfile = open(outfilename, "w")
for line in open(infilename, "r"):
if line not in lines_seen: # not a duplicate
outfile.write(line)
lines_seen.add(line)
outfile.close()
Update: The sort/uniq combination will remove duplicates but return a file with the lines sorted, which may or may not be what you want. The Python script above won't reorder lines, but just drop duplicates. Of course, to get the script above to sort as well, just leave out the outfile.write(line) and instead, immediately after the loop, do outfile.writelines(sorted(lines_seen)).
If you're on *nix, try running the following command:
sort <file name> | uniq
uniqlines = set(open('/tmp/foo').readlines())
this will give you the list of unique lines.
writing that back to some file would be as easy as:
bar = open('/tmp/bar', 'w').writelines(uniqlines)
bar.close()
You can do:
import os
os.system("awk '!x[$0]++' /path/to/file > /path/to/rem-dups")
Here You are using bash into python :)
You have also other way:
with open('/tmp/result.txt') as result:
uniqlines = set(result.readlines())
with open('/tmp/rmdup.txt', 'w') as rmdup:
rmdup.writelines(set(uniqlines))
get all your lines in the list and make a set of lines and you are done.
for example,
>>> x = ["line1","line2","line3","line2","line1"]
>>> list(set(x))
['line3', 'line2', 'line1']
>>>
If you need to preserve the ordering of lines - as set is unordered collection - try this:
y = []
for l in x:
if l not in y:
y.append(l)
and write the content back to the file.
Its a rehash of whats already been said here - here what I use.
import optparse
def removeDups(inputfile, outputfile):
lines=open(inputfile, 'r').readlines()
lines_set = set(lines)
out=open(outputfile, 'w')
for line in lines_set:
out.write(line)
def main():
parser = optparse.OptionParser('usage %prog ' +\
'-i <inputfile> -o <outputfile>')
parser.add_option('-i', dest='inputfile', type='string',
help='specify your input file')
parser.add_option('-o', dest='outputfile', type='string',
help='specify your output file')
(options, args) = parser.parse_args()
inputfile = options.inputfile
outputfile = options.outputfile
if (inputfile == None) or (outputfile == None):
print parser.usage
exit(1)
else:
removeDups(inputfile, outputfile)
if __name__ == '__main__':
main()
Python One liners :
python -c "import sys; lines = sys.stdin.readlines(); print ''.join(sorted(set(lines)))" < InputFile > OutputFile
adding to #David Locke's answer, with *nix systems you can run
sort -u messy_file.txt > clean_file.txt
which will create clean_file.txt removing duplicates in alphabetical order.
Look at script I created to remove duplicate emails from text files. Hope this helps!
# function to remove duplicate emails
def remove_duplicate():
# opens emails.txt in r mode as one long string and assigns to var
emails = open('emails.txt', 'r').read()
# .split() removes excess whitespaces from str, return str as list
emails = emails.split()
# empty list to store non-duplicate e-mails
clean_list = []
# for loop to append non-duplicate emails to clean list
for email in emails:
if email not in clean_list:
clean_list.append(email)
return clean_list
# close emails.txt file
emails.close()
# assigns no_duplicate_emails.txt to variable below
no_duplicate_emails = open('no_duplicate_emails.txt', 'w')
# function to convert clean_list 'list' elements in to strings
for email in remove_duplicate():
# .strip() method to remove commas
email = email.strip(',')
no_duplicate_emails.write(f"E-mail: {email}\n")
# close no_duplicate_emails.txt file
no_duplicate_emails.close()
If anyone is looking for a solution that uses a hashing and is a little more flashy, this is what I currently use:
def remove_duplicate_lines(input_path, output_path):
if os.path.isfile(output_path):
raise OSError('File at {} (output file location) exists.'.format(output_path))
with open(input_path, 'r') as input_file, open(output_path, 'w') as output_file:
seen_lines = set()
def add_line(line):
seen_lines.add(line)
return line
output_file.writelines((add_line(line) for line in input_file
if line not in seen_lines))
edit it within the same file
lines_seen = set() # holds lines already seen
with open("file.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
Readable and Concise
with open('sample.txt') as fl:
content = fl.read().split('\n')
content = set([line for line in content if line != ''])
content = '\n'.join(content)
with open('sample.txt', 'w') as fl:
fl.writelines(content)
Here is my solution
if __name__ == '__main__':
f = open('temp.txt','w+')
flag = False
with open('file.txt') as fp:
for line in fp:
for temp in f:
if temp == line:
flag = True
print('Found Match')
break
if flag == False:
f.write(line)
elif flag == True:
flag = False
f.seek(0)
f.close()
cat <filename> | grep '^[a-zA-Z]+$' | sort -u > outfile.txt
To filter and remove duplicate values from the file.
Here is my solution
d = input("your file:") #write your file name here
file1 = open(d, mode="r")
file2 = open('file2.txt', mode='w')
file2 = open('file2.txt', mode='a')
file1row = file1.readline()
while file1row != "" :
file2 = open('file2.txt', mode='a')
file2read = open('file2.txt', mode='r')
file2r = file2read.read().strip()
if file1row not in file2r:
file2.write(file1row)
file1row = file1.readline()
file2read.close()
file2.close

Write to file line by line Python

Here, I want to write the word_count in each loop line by line to the file. However, they are written all back to back.
import os
import string
def remove_punctuation(value):
result = ""
for c in value:
# If char is not punctuation, add it to the result.
if c not in string.punctuation and c != '،' and c != '؟' and c ! = '؛' and c != '«' and c != '»':
result += c
return result
def all_words(file_path):
with open(file_path, 'r', encoding = "utf-8") as f:
p = f.read()
p = remove_punctuation(p)
words = p.split()
word_count = len(words)
return str(word_count)
myfile = open('D:/t.txt', 'w')
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
f = all_words(file_path)
myfile.write(f)
break
myfile.close()
I have also tried to add newline, but instead, it writes nothing.
myfile.write(f'\n')
Change this line:
return str(word_count)
to
return str(word_count) + '\n'
If you're using python 3.6+, you could also try:
return f'{word_count}\n'
You can write a newline character at the end of each iteration:
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
f = all_words(file_path)
myfile.write(f)
break
myfile.write('\n')
When you us file.write() try using this instead:
myfile.write(f+"\n")
This will add a new line after every iteration
For your code to work, however, you need to iterate in a for loop, like this:
for string in f:
file.write(string+"\n")
I hope this helps

Unable to implement the function

Implement the function isWhiteLine(), which takes a string and returns TRUE if the
string contains only white space & tab characters. Program should read a file given as command-line argument, and print only non-blank lines onto the standard output.
import sys
def isWhiteLine(x):
return x.isspace()
file_name = sys.argv[1]
f = open(file_name, "r")
for i in f:
if (isWhiteLine(i) == False):
print(str(i).strip())
f.close()
The code below should support what you are looking for.
import sys
def print_non_empty_lines(file_name):
with open(file_name, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line:
print(line)
if __name__ == "__main__":
if len(sys.argv) > 1:
print_non_empty_lines(sys.argv[1])

Python: Issue when trying to read and write multiple files

This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...

How might I remove duplicate lines from a file?

I have a file with one column. How to delete repeated lines in a file?
On Unix/Linux, use the uniq command, as per David Locke's answer, or sort, as per William Pursell's comment.
If you need a Python script:
lines_seen = set() # holds lines already seen
outfile = open(outfilename, "w")
for line in open(infilename, "r"):
if line not in lines_seen: # not a duplicate
outfile.write(line)
lines_seen.add(line)
outfile.close()
Update: The sort/uniq combination will remove duplicates but return a file with the lines sorted, which may or may not be what you want. The Python script above won't reorder lines, but just drop duplicates. Of course, to get the script above to sort as well, just leave out the outfile.write(line) and instead, immediately after the loop, do outfile.writelines(sorted(lines_seen)).
If you're on *nix, try running the following command:
sort <file name> | uniq
uniqlines = set(open('/tmp/foo').readlines())
this will give you the list of unique lines.
writing that back to some file would be as easy as:
bar = open('/tmp/bar', 'w').writelines(uniqlines)
bar.close()
You can do:
import os
os.system("awk '!x[$0]++' /path/to/file > /path/to/rem-dups")
Here You are using bash into python :)
You have also other way:
with open('/tmp/result.txt') as result:
uniqlines = set(result.readlines())
with open('/tmp/rmdup.txt', 'w') as rmdup:
rmdup.writelines(set(uniqlines))
get all your lines in the list and make a set of lines and you are done.
for example,
>>> x = ["line1","line2","line3","line2","line1"]
>>> list(set(x))
['line3', 'line2', 'line1']
>>>
If you need to preserve the ordering of lines - as set is unordered collection - try this:
y = []
for l in x:
if l not in y:
y.append(l)
and write the content back to the file.
Its a rehash of whats already been said here - here what I use.
import optparse
def removeDups(inputfile, outputfile):
lines=open(inputfile, 'r').readlines()
lines_set = set(lines)
out=open(outputfile, 'w')
for line in lines_set:
out.write(line)
def main():
parser = optparse.OptionParser('usage %prog ' +\
'-i <inputfile> -o <outputfile>')
parser.add_option('-i', dest='inputfile', type='string',
help='specify your input file')
parser.add_option('-o', dest='outputfile', type='string',
help='specify your output file')
(options, args) = parser.parse_args()
inputfile = options.inputfile
outputfile = options.outputfile
if (inputfile == None) or (outputfile == None):
print parser.usage
exit(1)
else:
removeDups(inputfile, outputfile)
if __name__ == '__main__':
main()
Python One liners :
python -c "import sys; lines = sys.stdin.readlines(); print ''.join(sorted(set(lines)))" < InputFile > OutputFile
adding to #David Locke's answer, with *nix systems you can run
sort -u messy_file.txt > clean_file.txt
which will create clean_file.txt removing duplicates in alphabetical order.
Look at script I created to remove duplicate emails from text files. Hope this helps!
# function to remove duplicate emails
def remove_duplicate():
# opens emails.txt in r mode as one long string and assigns to var
emails = open('emails.txt', 'r').read()
# .split() removes excess whitespaces from str, return str as list
emails = emails.split()
# empty list to store non-duplicate e-mails
clean_list = []
# for loop to append non-duplicate emails to clean list
for email in emails:
if email not in clean_list:
clean_list.append(email)
return clean_list
# close emails.txt file
emails.close()
# assigns no_duplicate_emails.txt to variable below
no_duplicate_emails = open('no_duplicate_emails.txt', 'w')
# function to convert clean_list 'list' elements in to strings
for email in remove_duplicate():
# .strip() method to remove commas
email = email.strip(',')
no_duplicate_emails.write(f"E-mail: {email}\n")
# close no_duplicate_emails.txt file
no_duplicate_emails.close()
If anyone is looking for a solution that uses a hashing and is a little more flashy, this is what I currently use:
def remove_duplicate_lines(input_path, output_path):
if os.path.isfile(output_path):
raise OSError('File at {} (output file location) exists.'.format(output_path))
with open(input_path, 'r') as input_file, open(output_path, 'w') as output_file:
seen_lines = set()
def add_line(line):
seen_lines.add(line)
return line
output_file.writelines((add_line(line) for line in input_file
if line not in seen_lines))
edit it within the same file
lines_seen = set() # holds lines already seen
with open("file.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
Readable and Concise
with open('sample.txt') as fl:
content = fl.read().split('\n')
content = set([line for line in content if line != ''])
content = '\n'.join(content)
with open('sample.txt', 'w') as fl:
fl.writelines(content)
Here is my solution
if __name__ == '__main__':
f = open('temp.txt','w+')
flag = False
with open('file.txt') as fp:
for line in fp:
for temp in f:
if temp == line:
flag = True
print('Found Match')
break
if flag == False:
f.write(line)
elif flag == True:
flag = False
f.seek(0)
f.close()
cat <filename> | grep '^[a-zA-Z]+$' | sort -u > outfile.txt
To filter and remove duplicate values from the file.
Here is my solution
d = input("your file:") #write your file name here
file1 = open(d, mode="r")
file2 = open('file2.txt', mode='w')
file2 = open('file2.txt', mode='a')
file1row = file1.readline()
while file1row != "" :
file2 = open('file2.txt', mode='a')
file2read = open('file2.txt', mode='r')
file2r = file2read.read().strip()
if file1row not in file2r:
file2.write(file1row)
file1row = file1.readline()
file2read.close()
file2.close

Categories