How do I get the line number of replaced key value?
currently functions are different for it, how do i combine it to have line number at the time of replacing the string.
filedata= is a path of file. In which i need to replace strings.
old_new_dict = {'hi':'bye','old':'new'}
def replace_oc(file):
lines = file.readlines()
line_number = None
for i, line in enumerate(lines):
line_number = i + 1
break
return line_number
def replacee(path, pattern):
for key, value in old_new_dict.items():
if key in filedata:
print("there")
filedata = filedata.replace(key, value)
else:
print("not there")
You could break down the filedata into lines to check for the words to replace before doing the actual replacements. For example:
filedata = """The quick brown fox
jumped over
the lazy dogs
and the cow ran away
from the fox"""
old_new_dict = {"dogs":"cats", "crazy":"sleeping","fox":"cow"}
for key,value in old_new_dict.items():
lines = [i for i,line in enumerate(filedata.split("\n"),1) if key in line]
if lines:
filedata = filedata.replace(key,value)
print(key,"found at lines",*lines)
else:
print(key,"is not there")
output:
# dogs found at lines 3
# crazy is not there
# fox found at lines 1 5
print(filedata)
The quick brown cow
jumped over
the lazy cats
and the cow ran away
from the cow
Related
I have list.txt which is ~4000 words like this:
cool
fast
smart
welcome
coder
word
nine
stacked
jokes
funny
haha
lol
qwerty
computer
keyboard
I would like to take all input in this list and putting it in another text document ( output.txt ) in batches of 10 with a blank line in between, like this;
cool fast smart welcome coder word nine stacked jokes funny
haha lol qwerty computer keyboard
blablabla...
Anyone that could help me out here?
I was thinking about using a list, but i have no idea how to make it into batches of 10 + add the blank line in between each one!
This will read in the words from test.txt and output them to result.txt.
There will be NO_WORDS words on each line, separated by a space and each line will be separated by a blank line.
NO_WORDS = 10
with open("test.txt") as file:
data_in = file.readlines()
data_out = [
[item.replace("\n", " ") for item in data_in[idx : idx + NO_WORDS]] + ["\n\n"]
for idx in range(0, len(data_in), NO_WORDS)
]
print(data_out)
with open("result.txt", "w") as file:
[file.write("".join(items)) for items in data_out]
string_batches = [string[start:start+10] for start in range(0, len(string), 10)]
This returns an array with equal strings size
Another solution:
from typing import Iterator
NO_WORDS = 10
def group(n, gen):
gen = iter(gen)
while True:
grp = None
try:
for i in range(n):
if grp is None:
grp = []
grp.append(next(gen))
yield grp
except StopIteration:
if grp is not None:
yield grp
break
def non_empty_lines(lines):
for line in lines:
line = line.strip()
if line != "":
yield line.strip()
def main():
with open('list.txt', 'r') as in_file, open('result.txt', 'w') as out_file:
for grp in group(NO_WORDS, non_empty_lines(in_file)):
print(" ".join(grp), file=out_file)
if __name__ == '__main__':
main()
I'm trying to create a simple program that opens a file, splits it into single word lines (for ease of use) and creates a dictionary with the words, the key being the word and the value being the number of times the word is repeated. This is what I have so far:
infile = open('paragraph.txt', 'r')
word_dictionary = {}
string_split = infile.read().split()
for word in string_split:
if word not in word_dictionary:
word_dictionary[word] = 1
else:
word_dictionary[word] =+1
infile.close()
word_dictionary
The line word_dictionary prints nothing, meaning that the lines are not being put into a dictionary. Any help?
The paragraph.txt file contains this:
This is a sample text file to be used for a program. It should have nothing important in here or be used for anything else because it is useless. Use at your own will, or don't because there's no point in using it.
I want the dictionary to do something like this, but I don't care too much about the formatting.
Two things. First of all the shorter version of
num = num + 1
is
num += 1
not
num =+ 1
code
infile = open('paragraph.txt', 'r')
word_dictionary = {}
string_split = infile.read().split()
for word in string_split:
if word not in word_dictionary:
word_dictionary[word] = 1
else:
word_dictionary[word] +=1
infile.close()
print(word_dictionary)
Secondly you need to print word_dictionary
I want to read from text file and print the first three words having the same initial three letters. I can get the first 3 initials but I cannot check if they are same or not.
Here is my code:
def main():
f = open("words.txt", "r+")
# The loop that prints the initial letters
for word in f.read().split():
# the part that takes the 3 initials letters of the word
initials = [j[:3] for j in word.split()]
print(initials)
words.txt
when, where, loop, stack, wheel, wheeler
output
You can use a mapping from the first 3 letters to the list of words. collections.defaultdict could save you a few keystrokes here:
from collections import defaultdict
def get_words():
d = defaultdict(list)
with open('words.txt') as f:
for line in f:
for word in line.split(', '):
prefix = word[:3]
d[prefix].append(word)
if len(d[prefix]) == 3:
return d[prefix]
return []
print(get_words()) # ['when', 'where', 'wheel']
This code snippet groups the words by there first 3 letters:
def main():
# a dict where the first 3 letters are the keys and the
# values are lists of words
my_dict = {}
with open("words.txt", "r") as f:
for line in f:
for word in line.strip().split():
s = word[:3]
if s not in my_dict:
# add 3 letters as the key
my_dict[s] = []
my_dict[s].append(word)
if len(my_dict[s]) == 3:
print(my_dict[s])
return
# this will only print if there are no 3 words with the same start letters
print(my_dict)
This stops the processing (I used a return statement) if you get to 3 words with the same 3 letters.
You can use dictionary here with first 3 characters as key. Example
d={}
f = open("words.txt", "r+")
key_with_three_element=''
for word in f.read().split():
if word[:3] in d:
d[word[:3]].append(word)
else:
d[word[:3]]=[word]
if(len(d[word[:3]])==3):
key_with_three_element=word[:3]
break
print(d[key_with_three_element])
Ouput:
['when', 'where', 'wheel']
def main():
f = open("words.txt", "r+")
for word in f.read().split():
record[word[:3]] = record.get(word[:3], [])+[word]
if len(record[word[:3]]) == 3:
print (record[word[:3]])
break
I have a txt file. I have written code that finds the unique words and the number of times each word appears in that file. I now need to figure out how to print the lines that those words apear in as well. How can I go about doing this?
Here is a sample output:
Analyze what file: itsy_bitsy_spider.txt
Concordance for file itsy_bitsy_spider.txt
itsy : Total Count: 2
Line:1: The ITSY Bitsy spider crawled up the water spout
Line:4: and the ITSY Bitsy spider went up the spout again
#this function will get just the unique words without the stop words.
def openFiles(openFile):
for i in openFile:
i = i.strip()
linelist.append(i)
b = i.lower()
thislist = b.split()
for a in thislist:
if a in stopwords:
continue
else:
wordlist.append(a)
#print wordlist
#this dictionary is used to count the number of times each stop
countdict = {}
def countWords(this_list):
for word in this_list:
depunct = word.strip(punctuation)
if depunct in countdict:
countdict[depunct] += 1
else:
countdict[depunct] = 1
from collections import defaultdict
target = 'itsy'
word_summary = defaultdict(list)
with open('itsy.txt', 'r') as f:
lines = f.readlines()
for idx, line in enumerate(lines):
words = [w.strip().lower() for w in line.split()]
for word in words:
word_summary[word].append(idx)
unique_words = len(word_summary.keys())
target_occurence = len(word_summary[target])
line_nums = set(word_summary[target])
print "There are %s unique words." % unique_words
print "There are %s occurences of '%s'" % (target_occurence, target)
print "'%s' is found on lines %s" % (target, ', '.join([str(i+1) for i in line_nums]))
If you parsed the input text file line by line, you could maintain another dictionary that is a word -> List<Line> mapping. ie for each word in a line, you add an entry. Might look something like the following. Bearing in mind I'm not very familiar with python, so there may be syntactic shortcuts I've missed.
eg
countdict = {}
linedict = {}
for line in text_file:
for word in line:
depunct = word.strip(punctuation)
if depunct in countdict:
countdict[depunct] += 1
else:
countdict[depunct] = 1
# add entry for word in the line dict if not there already
if depunct not in linedict:
linedict[depunct] = []
# now add the word -> line entry
linedict[depunct].append(line)
One modification you will probably need to make is to prevent duplicates being added to the linedict if a word appears twice in the line.
The above code assumes that you only want to read the text file once.
openFile = open("test.txt", "r")
words = {}
for line in openFile.readlines():
for word in line.strip().lower().split():
wordDict = words.setdefault(word, { 'count': 0, 'line': set() })
wordDict['count'] += 1
wordDict['line'].add(line)
openFile.close()
print words
i wonder how to know , a position inside the .txt when I read.
this is my txt
cat dog monkey bird
this my printing
Word: cat Position: line 1 , word 1 (1,1)
any idea?
foo.txt:
asd
asd
asd
ad
I put returns between .......
asd
sad
asd
code:
>>> def position(file,word):
... for i,line in enumerate(file): #for every line; i=linenumber and line=text
... s=line.find(word) #find word
... if s!=-1: #if word found
... return i,s # return line number and position on line
...
>>> position(open("foo.txt"),"put")
(4, 2) # (line,position)
This would work for this given file:
blah bloo cake
donky cat sparrow
nago cheese
The code:
lcount = 1
with open("file", "r") as f:
for line in f:
if word in line:
testline = line.split()
ind = testline.index("sparrow")
print "Word sparrow found at line %d, word %d" % (lcount, ind+1)
break
else:
lcount += 1
Would print:
Word sparrow found at line 2, word 3
You should be able to modify this quite easily to make a function or different output I hope.
Although I'm still really not sure if this is what you're after...
Minor edit:
As a function:
def findword(objf, word):
lcount = 1
found = False
with open(objf, "r") as f:
for line in f:
if word in line: # If word is in line
testline = line.split()
ind = testline.index(word) # This is the index, starting from 0
found = True
break
else:
lcount += 1
if found:
print "Word %s found at line %d, word %d" % (word, lcount, ind+1)
else:
print "Not found"
Use:
>>> findword('file', "sparrow")
Word sparrow found at line 2, word 3
>>> findword('file', "donkey")
Not found
>>>
Shrug Not the best method I'll give it that, but then again it works.
Basic idea
Open the file
Iterate over the lines
For every line read, increment some counter, e.g. line_no += 1;
Split the line by whitespace (you will get a list)
Check if the list contains the word (use in), then use list.index(word) to get the index, store that index in some variable word_no = list.index(word)
print line_no and word_no if the word was found
There are a lot better solutions out there (and more pythonic ones) but this gives you an idea.