Here, I want to write the word_count in each loop line by line to the file. However, they are written all back to back.
import os
import string
def remove_punctuation(value):
result = ""
for c in value:
# If char is not punctuation, add it to the result.
if c not in string.punctuation and c != '،' and c != '؟' and c ! = '؛' and c != '«' and c != '»':
result += c
return result
def all_words(file_path):
with open(file_path, 'r', encoding = "utf-8") as f:
p = f.read()
p = remove_punctuation(p)
words = p.split()
word_count = len(words)
return str(word_count)
myfile = open('D:/t.txt', 'w')
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
f = all_words(file_path)
myfile.write(f)
break
myfile.close()
I have also tried to add newline, but instead, it writes nothing.
myfile.write(f'\n')
Change this line:
return str(word_count)
to
return str(word_count) + '\n'
If you're using python 3.6+, you could also try:
return f'{word_count}\n'
You can write a newline character at the end of each iteration:
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
f = all_words(file_path)
myfile.write(f)
break
myfile.write('\n')
When you us file.write() try using this instead:
myfile.write(f+"\n")
This will add a new line after every iteration
For your code to work, however, you need to iterate in a for loop, like this:
for string in f:
file.write(string+"\n")
I hope this helps
Related
I am trying to write a python script that iterates over all the txt files in a directory and deletes those that have fewer words than a defined limit. The current script does not delete the files. Below is what I tried:
import os
wordLimit = 1000
def articleWordCount(filename):
number = 0
with open(filename, 'w+') as f:
for line in f:
words = line.split()
number += len(words)
return number
def articlesRemoval():
directory = 'data'
removedArticles =0
for filename in os.listdir(directory):
if filename.endswith(".txt"):
if articleWordCount(filename) < wordLimit:
removedArticles += 1
os.remove(filename)
else:
print(os.path.join(directory, filename))
print(removedArticles)
articlesRemoval()
You should open the file in reading mode with the option "r+", you are opening it in write mode and the function articleWordCount always returns 0.
os.listdir() doesn't return paths, only filenames, so the files that you are trying to remove do not exist... I am assuming that data is in the directory where you are starting the script and that it does find the files you want. Change os.remove(filename) to os.remove(os.path.join(directory, filename)):
import os
wordLimit = 1000
def articleWordCount(filename):
number = 0
with open(filename, 'w+') as f:
for line in f:
words = line.split()
number += len(words)
return number
def articlesRemoval():
directory = 'data'
removedArticles = 0
for filename in os.listdir(directory):
if filename.endswith(".txt"):
if articleWordCount(filename) < wordLimit:
removedArticles += 1
os.remove(os.path.join(directory, filename))
else:
print(os.path.join(directory, filename))
print(removedArticles)
articlesRemoval()
I have a long script that is full of lines like this
[UBInt8.parse, b"\x01", 0x01, None],
[UBInt8.build, 0x01, b"\x01", None],
I need to turn them through regular expressions into
assert UBInt8.parse(b"\x01") == 0x01
assert UBInt8.build(0x01) == b"\x01"
Lists are always of length 4. 1st is method, 2nd is its argument, 3rd is return value, 4th is always None. I already used regex to solve a similar problem (someone produced the parser) but I need help writing the formatting string:
See Removing six.b from multiple files . This is the code I used before, the formatting expression needs to be rewritten and I dont speak regex. :(
import re
import os
indir = 'files'
for root, dirs, files in os.walk(indir):
for f in files:
fname = os.path.join(root, f)
with open(fname) as f:
txt = f.read()
txt = re.sub(r'six\.(b\("[^"]*"\))', r'\1', txt)
with open(fname, 'w') as f:
f.write(txt)
print(fname)
Here is the manual parsing that I came up with. No regex.
#!/usr/bin/python3
import re, os, sys
def processfile(fname):
print(fname+'... ')
with open(fname, 'rt') as f:
txt = f.readlines()
with open(fname+"-trans", 'wt') as f:
for line in txt:
items = list(map(str.strip, line.strip().strip(",[]").split(",")))
if len(items) == 4:
if items[1] == "None":
items[1] = ""
if items[3] == "None":
o = "assert {0}({1}) == {2}".format(*items)
else:
if items[1] == "":
o = "assert raises({0}) == {3}".format(*items)
else:
o = "assert raises({0}, {1}) == {3}".format(*items)
f.write(" "+o+"\n")
else:
f.write(line)
processfile(sys.argv[1])
I have a function that finds the mean string length. Now I'm trying to write a function that will traverse and touch every txt file in the directory and return the file with the highest mean. What I've got right now doesn't seem to traverse properly. Pleeease help. Thanks.
from __future__ import print_function
import os
def mean_length(file path):
length = 0.0
line_num = 0
with open(filepath) as f:
for line in f:
if line.strip():
length += len(line.strip())
line_num += 1
return length/line_num
def highest_mean():
max_mean = 0
max_name = ""
filepath = open("Desktop/Textfiles/moby.txt")
for root, dirs, files in os.walk("Desktop/Textfiles"):
for filename in files:
if filename.endswith('.txt'):
filepath = os.path.join(root, filename)
if mean_length(filepath) > max_mean:
max_name = filename
max_mean = mean_length(filepath)
return max_name
I think you need to go through all files to get the one with the highest mean, maybe with another two variables:
def mean_length(filepath):
length = 0.0
line_num = 0
with open(filepath) as f:
for line in f:
if line.strip():
length += len(line.strip())
line_num += 1
return length/line_num
def highest_mean():
max_mean = 0
max_name = ""
for root, dirs, files in os.walk("Desktop/Textfiles"):
for filename in files:
if filename.endswith('.txt'):
filepath = os.path.join(root, filename)
m_length = mean_length(filepath)
if m_length > max_mean:
max_name = filename
max_mean = m_length
return max_name
This is a simple code do the same work of len() built in function.
var =input("enter your text to calculate here : ")
def length(var):
count =0
for i in var:
count +=1
print(count)
lent(var)
print(len(var))
I have a csv file partList.csv with strings that I want to use to search through a larger group of txt files. For some reason when I use the direct string 'L 99' I get a result. When I load the string L 99 from the csv I get no result.
partList.csv only contains cells in the first column with part numbers, one of which is L-99. txt_files_sample\5.txt is a text document that at some point contains the string L 99
My code:
def GetPartList():
partList = []
f = open('partList.csv', 'rb')
try:
reader = csv.reader(f)
for row in reader:
part = row[0].replace('-',' ').strip()
partList.append(part)
finally:
f.close()
return partList
def FindFileNames(partList):
i = 0
files = []
for root, dirs, filenames in os.walk('txt_files_sample'):
for f in filenames:
document = open(os.path.join(root, f), 'rb')
for line in document:
if partList[i] in line:
#if 'L 99' in line:
files.append(f)
break
i = i + 1
return files
print FindFileNames(GetPartList())
The code, as it stands above produces:
>>> []
If I uncomment if 'L 99' in line: and comment out if partList[i] in line: I get the result:
>>> ['5.txt']
So using Martijn's input, I discovered the issue was how I looped over partList. Rewritting FindFileNames() worked:
def FindFileList(partList):
i = 0
files = []
for root, dirs, filenames in os.walk('txt_files'):
for f in filenames:
a = 0
document = open(os.path.join(root, f), 'rb')
for line in document:
if a is 1:
break
for partNo in partList:
if partNo in line:
files.append(f)
a = 1
document.close()
return files
With the updated code I got a result that was an accurate list of filenames.
I'm looking to run my script on all text files in a directory, but I'm having a lot of trouble.
Here is the code I have so far:
data = {}
date = ID = values = None
infile = "z140327b.txt"
outfile = "oz140327b.txt"
sample = 1
with open(infile) as datafile, open(outfile, 'w') as f2:
for line in datafile:
if line.lstrip().startswith('!'):
date = line[1:].strip()
elif line.lstrip().startswith('?'):
sample = 2
elif line.lstrip().startswith('#'):
ID = line[1:].strip()
data[ID] = {}
data[ID]['date'] = date
tedtime = ID[0:2] + ":" + ID[2:]
str_1 = str(data[ID])
f2.write(tedtime + ' ' + date + ',' + str(sample))
elif line.strip():
if not ID:
continue
try:
words = line.split()
value = float(words[-1]) # last word
unit = words[-2].lstrip('(').rstrip(')')
item = {'value': value, 'unit': unit}
key = ' '.join(words[:-2])
data[ID][key] = item
except (ValueError) as err:
print("Could not parse this line:")
print(line)
continue
else: # if 'empty' line
ca_str = str(data[ID]['Contact Angle']['value'])
f2.write(',' + ca_str + '\n')
ID = None
ca_str2 = str(data[ID]['Contact Angle']['value'])
f2.write(',' + ca_str2 + '\n')
At the minute, I'm manually adding the filename (infile) and the output filename (outfile). I would like the output file name to be the same as the input file, preceded by an 'o', as shown in the example code.
You can use glob to get all the files in the directory:
from glob import glob
files=glob('*.txt')
for filename in files:
with open(filename,'r') as f, open('o'+filename,'w') as f1:
....
#read from f
#write to f1
Simply iterate over each filename, do what you want to it, and write it to a new file.
Make sure your script is run from the directory you are in, or you need to pass the path to glob.
import glob
import os.path
def text_files(target_dir):
"""Return (infile, outfile) tuple for all *.txt files in target_dir."""
text_files = os.path.join(target_dir, '*.txt')
for fname in glob.glob(text_files):
outfile = 'o' + os.path.basename(fname)
outfile = os.path.join(target_dir, outfile)
yield fname, outfile
# Search for text files in /tmp
for inf, outf in text_files("/tmp"):
print inf, outf