I have a function that finds the mean string length. Now I'm trying to write a function that will traverse and touch every txt file in the directory and return the file with the highest mean. What I've got right now doesn't seem to traverse properly. Pleeease help. Thanks.
from __future__ import print_function
import os
def mean_length(file path):
length = 0.0
line_num = 0
with open(filepath) as f:
for line in f:
if line.strip():
length += len(line.strip())
line_num += 1
return length/line_num
def highest_mean():
max_mean = 0
max_name = ""
filepath = open("Desktop/Textfiles/moby.txt")
for root, dirs, files in os.walk("Desktop/Textfiles"):
for filename in files:
if filename.endswith('.txt'):
filepath = os.path.join(root, filename)
if mean_length(filepath) > max_mean:
max_name = filename
max_mean = mean_length(filepath)
return max_name
I think you need to go through all files to get the one with the highest mean, maybe with another two variables:
def mean_length(filepath):
length = 0.0
line_num = 0
with open(filepath) as f:
for line in f:
if line.strip():
length += len(line.strip())
line_num += 1
return length/line_num
def highest_mean():
max_mean = 0
max_name = ""
for root, dirs, files in os.walk("Desktop/Textfiles"):
for filename in files:
if filename.endswith('.txt'):
filepath = os.path.join(root, filename)
m_length = mean_length(filepath)
if m_length > max_mean:
max_name = filename
max_mean = m_length
return max_name
This is a simple code do the same work of len() built in function.
var =input("enter your text to calculate here : ")
def length(var):
count =0
for i in var:
count +=1
print(count)
lent(var)
print(len(var))
Related
I have a series of files and I have to separate and display part of the text
my code is :
path = 'C:\\Bot\\*.log'
files = glob.glob(path)
nlines = 0
for name in files:
try:
with open(name) as f:
for line in f :
nlines += 1
if (line.find("Total") >= 0):
print(line)
I need a text that is saved in the file after the line number is obtained.
With the above code, I have access to the line number but I do not have access to some subsequent lines
How to access the next line value??
path = 'C:\\Bot\\*.log'
files = glob.glob(path)
nlines = 0
for name in files:
try:
with open(name) as f:
for line in f:
nlines += 1
if (line.find("Total") >= 0):
print(next(f))
I think it is a better solution to this problem
Use next() to read:
path = 'C:\\Bot\\*.log'
files = glob.glob(path)
nlines = 0
for name in files:
try:
with open(name) as f:
for line in f:
nlines += 1
if (line.find("Total") >= 0):
for i in range(6):
print(next(f))
I am trying to write a python script that iterates over all the txt files in a directory and deletes those that have fewer words than a defined limit. The current script does not delete the files. Below is what I tried:
import os
wordLimit = 1000
def articleWordCount(filename):
number = 0
with open(filename, 'w+') as f:
for line in f:
words = line.split()
number += len(words)
return number
def articlesRemoval():
directory = 'data'
removedArticles =0
for filename in os.listdir(directory):
if filename.endswith(".txt"):
if articleWordCount(filename) < wordLimit:
removedArticles += 1
os.remove(filename)
else:
print(os.path.join(directory, filename))
print(removedArticles)
articlesRemoval()
You should open the file in reading mode with the option "r+", you are opening it in write mode and the function articleWordCount always returns 0.
os.listdir() doesn't return paths, only filenames, so the files that you are trying to remove do not exist... I am assuming that data is in the directory where you are starting the script and that it does find the files you want. Change os.remove(filename) to os.remove(os.path.join(directory, filename)):
import os
wordLimit = 1000
def articleWordCount(filename):
number = 0
with open(filename, 'w+') as f:
for line in f:
words = line.split()
number += len(words)
return number
def articlesRemoval():
directory = 'data'
removedArticles = 0
for filename in os.listdir(directory):
if filename.endswith(".txt"):
if articleWordCount(filename) < wordLimit:
removedArticles += 1
os.remove(os.path.join(directory, filename))
else:
print(os.path.join(directory, filename))
print(removedArticles)
articlesRemoval()
I'm writing a simple script which loops over some text file and uses a function which should replace some string looking in a .csv file (every row has the word to replace and the word which I want there)
Here is my simple code:
import os
import re
import csv
def substitute_tips(table, tree_content):
count = 0
for l in table:
print("element of the table", l[1])
reg_tree = re.search(l[1],tree_content)
if reg_tree is not None:
#print("match in the tree: ",reg_tree.group())
tree_content = tree_content.replace(reg_tree.group(), l[0])
count = count + 1
else:
print("Not found: ",l[1])
tree_content = tree_content
print("Substitutions done: ",count)
return(tree_content)
path=os.getcwd()
table_name = "162_table.csv"
table = open(table_name)
csv_table = csv.reader(table, delimiter='\t')
for root, dirs, files in os.walk(path, topdown=True):
for name in files:
if name.endswith(".tree"):
print(Fore.GREEN + "Working on treefile", name)
my_tree = open(name, "r")
my_tree_content = my_tree.read()
output_tree = substitute_tips(csv_table, my_tree_content)
output_file = open(name.rstrip("tree") + "SPECIES_NAME.tre", "w")
output_file.write(output_tree)
output_file.close()
else:
print(Fore.YELLOW + name ,Fore.RED + "doesn't end in .tree")
It's probably very easy, but I'm a newbie.
Thanks!
The files list returned by os.walk contains only the file names rather than the full path names. You should join root with the file names instead to be able to open them:
Change:
my_tree = open(name, "r")
...
output_file = open(name.rstrip("tree") + "SPECIES_NAME.tre", "w")
to:
my_tree = open(os.path.join(root, name), "r")
...
output_file = open(os.path.join(root, name.rstrip("tree") + "SPECIES_NAME.tre"), "w")
Here, I want to write the word_count in each loop line by line to the file. However, they are written all back to back.
import os
import string
def remove_punctuation(value):
result = ""
for c in value:
# If char is not punctuation, add it to the result.
if c not in string.punctuation and c != '،' and c != '؟' and c ! = '؛' and c != '«' and c != '»':
result += c
return result
def all_words(file_path):
with open(file_path, 'r', encoding = "utf-8") as f:
p = f.read()
p = remove_punctuation(p)
words = p.split()
word_count = len(words)
return str(word_count)
myfile = open('D:/t.txt', 'w')
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
f = all_words(file_path)
myfile.write(f)
break
myfile.close()
I have also tried to add newline, but instead, it writes nothing.
myfile.write(f'\n')
Change this line:
return str(word_count)
to
return str(word_count) + '\n'
If you're using python 3.6+, you could also try:
return f'{word_count}\n'
You can write a newline character at the end of each iteration:
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
f = all_words(file_path)
myfile.write(f)
break
myfile.write('\n')
When you us file.write() try using this instead:
myfile.write(f+"\n")
This will add a new line after every iteration
For your code to work, however, you need to iterate in a for loop, like this:
for string in f:
file.write(string+"\n")
I hope this helps
I'm looking to run my script on all text files in a directory, but I'm having a lot of trouble.
Here is the code I have so far:
data = {}
date = ID = values = None
infile = "z140327b.txt"
outfile = "oz140327b.txt"
sample = 1
with open(infile) as datafile, open(outfile, 'w') as f2:
for line in datafile:
if line.lstrip().startswith('!'):
date = line[1:].strip()
elif line.lstrip().startswith('?'):
sample = 2
elif line.lstrip().startswith('#'):
ID = line[1:].strip()
data[ID] = {}
data[ID]['date'] = date
tedtime = ID[0:2] + ":" + ID[2:]
str_1 = str(data[ID])
f2.write(tedtime + ' ' + date + ',' + str(sample))
elif line.strip():
if not ID:
continue
try:
words = line.split()
value = float(words[-1]) # last word
unit = words[-2].lstrip('(').rstrip(')')
item = {'value': value, 'unit': unit}
key = ' '.join(words[:-2])
data[ID][key] = item
except (ValueError) as err:
print("Could not parse this line:")
print(line)
continue
else: # if 'empty' line
ca_str = str(data[ID]['Contact Angle']['value'])
f2.write(',' + ca_str + '\n')
ID = None
ca_str2 = str(data[ID]['Contact Angle']['value'])
f2.write(',' + ca_str2 + '\n')
At the minute, I'm manually adding the filename (infile) and the output filename (outfile). I would like the output file name to be the same as the input file, preceded by an 'o', as shown in the example code.
You can use glob to get all the files in the directory:
from glob import glob
files=glob('*.txt')
for filename in files:
with open(filename,'r') as f, open('o'+filename,'w') as f1:
....
#read from f
#write to f1
Simply iterate over each filename, do what you want to it, and write it to a new file.
Make sure your script is run from the directory you are in, or you need to pass the path to glob.
import glob
import os.path
def text_files(target_dir):
"""Return (infile, outfile) tuple for all *.txt files in target_dir."""
text_files = os.path.join(target_dir, '*.txt')
for fname in glob.glob(text_files):
outfile = 'o' + os.path.basename(fname)
outfile = os.path.join(target_dir, outfile)
yield fname, outfile
# Search for text files in /tmp
for inf, outf in text_files("/tmp"):
print inf, outf