Prints output twice when reading a file - python

I have written the following code to write a file, convert it into integer values, save that file and then read and convert it back to the original string. However, it prints the output twice.
My code is
def write():
sentence=input('What is your Statement?:')
name=input('Name your file:')
sentence=sentence.lower()
words=sentence.split(' ')
file_register=set()
F_output=[]
position=[]
for i in words:
if i not in file_register:
F_output.append(i)
file_register.add(i)
for x in words:
for y in range(len(F_output)):
if x==F_output[y]:
position.append(y)
name+='.txt'
with open(str(name),'w') as f:
f.write(str(len(position)) + '\n')
for c in range(len(position)):
f.write(str(position[c]) + '\n')
f.write(str(len(F_output)) + '\n')
for d in range(len(F_output)):
f.write(str(F_output[d] + '\n'))
f.close
global name
global position
def read1():
savefile=[]
output=('')
with open(name,'r') as file_open:
num=int(file_open.readline())
while num!=0:
a1=file_open.readline()
a1_split=a1.split('\n')
position.append(int(a1_split[0]))
num-=1
file_integer=int(file_open.readline())
while file_integer!=0:
word_s=file_open.readline()
word_split=word_s.split()
savefile.append(word_split)
file_integer-=1
for n in range(len(position)):
a=position[n]
output+=str(savefile[a])+('')
global output
write()
read1()
print('Your file is: '+output)
I have tried searching, but I cannot find an answer for it. I am fairly new to Python and any help is appreciated.

In write(), you declare position as global. In read1(), you don't declare it as global, but since you never create a local position variable it's the global one that gets used. So you end up populating it twice: once in write() then once again in read1().
To make a long story short: dont use globals. You don't need them.
Also, you'd find your code way easier to read, understand and debug by 1/ using better naming, 2/ writing simple, short functions that do one single thing, works on their inputs (arguments) and return values (the same arguments should produce the same output values), and 3/ properly using python's for loop.
Here's an example of what your code could looks like following those rules:
def parse(sentence):
sentence=sentence.lower()
words=sentence.split(' ')
register=set()
output = []
positions = []
for word in words:
if word not in register:
output.append(word)
register.add(word)
for word in words:
for index in range(len(output)):
if word == output[index]:
positions.append(index)
return output, positions
def write(path, output, positions):
with open(path, 'w') as f:
f.write(str(len(positions)) + '\n')
for index in positions:
f.write(str(index) + '\n')
f.write(str(len(output)) + '\n')
for word in output:
f.write(word + '\n')
def read(path):
words = []
positions = []
with open(path, 'r') as f:
poscount = int(f.readline().strip())
while poscount:
line = f.readline().strip()
pos = int(line)
positions.append(pos)
poscount -= 1
wordcount = int(f.readline().strip())
while wordcount:
line = f.readline()
word = line.strip()
words.append(word)
wordcount -= 1
output = []
for index in positions:
word = words[index]
output.append(word)
return output, positions
def main():
sentence = input('What is your Statement?:')
path = input('Name your file:')
if not path.endswith(".txt"):
path +=".txt"
source_out, source_positions = parse(sentence)
write(path, source_out, source_positions)
read_out, read_positions = read(path)
print("your file is: {}".format(read_out))
if __name__ == "__main__":
main()
This code is still overly complicated IMHO but at least it's mostly readable, doesn't use any global, and should be easier to test.

Related

Any simple python code suggestions to add a constant to every individual number in a .txt

so -----2-----3----5----2----3----- would become -----4-----5----7----4----5-----
if the constant was 2 and etc. for every individual line in the text file.
This would involve splitting recognising numbers in between strings and adding a constant to them e.g ---15--- becomes ---17--- not ---35---.
(basically getting a guitar tab and adding a constant to every fret number)
Thanks. Realised this started out vague and confusing so sorry about that.
lets say the file is:
-2--3--5---7--1/n-6---3--5-1---5
and im adding 2, it should become:
-4--5--7---9--3/n-8---5--7-3---7
Change the filename to something relevant and this code will work. Anything below new_string needs to be change for what you need, eg writing to a file.
def addXToAllNum(int: delta, str: line):
values = [x for x in s.split('-') if x.isdigit()]
values = [str(int(x) + delta) for x in values]
return '--'.join(values)
new_string = '' # change this section to save to new file
for line in open('tabfile.txt', 'r'):
new_string += addXToAllNum(delta, line) + '\n'
## general principle
s = '-4--5--7---9--3 -8---5--7-3---7'
addXToAllNum(2, s) #6--7--9--11--10--7--9--5--9
This takes all numbers and increments by the shift regardless of the type of separating characters.
import re
shift = 2
numStr = "---1----9---15---"
print("Input: " + numStr)
resStr = ""
m = re.search("[0-9]+", numStr)
while (m):
resStr += numStr[:m.start(0)]
resStr += str(int(m.group(0)) + shift)
numStr = numStr[m.end(0):]
m = re.search("[0-9]+", numStr)
resStr += numStr
print("Result:" + resStr)
Hi You Can use that to betwine every line in text file add -
rt = ''
f = open('a.txt','r')
app = f.readlines()
for i in app:
rt+=str(i)+'-'
print " ".join(rt.split())
import re
c = 2 # in this example, the increment constant value is 2
with open ('<your file path here>', 'r+') as file:
new_content = re.sub (r'\d+', lambda m : str (int (m.group (0)) + c), file.read ())
file.seek (0)
file.write (new_content)

Function won't work when using a list created from a file

I am trying to create a list of words from a file is being read as then delete all words that contain duplicate letters. I was able to do it successfully with a list of words that I entered however when I try to use the function on the list created from a file the function still includes words with duplicates.
This works:
words = ['word','worrd','worrrrd','wordd']
alpha = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
x = 0
while x in range(0, len(alpha)):
i = 0
while i in range(0, len(words)):
if words[i].count(alpha[x]) > 1:
del(words[i])
i = i - 1
else:
i = i + 1
x = x + 1
print(words)
This is how I'm trying to do it when reading the file:
words = []
length = 5
file = open('dictionary.txt')
for word in file:
if len(word) == length+1:
words.append(word.splitlines())
alpha = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
x = 0
while x in range(0, len(alpha)):
i = 0
while i in range(0, len(words)):
if words[i].count(alpha[x]) > 1:
del(words[i])
i = i - 1
else:
i = i + 1
x = x + 1
print(words)
Try something like this. First, the string module is not quite deprecated, but it's unpopular. Lucky for you, it defines some useful constants to save you a bunch of typing. So you don't have to type all those quotes and commas.
Next, use the with open('filespec') as ... context for reading files: it's what it was put there for!
Finally, be aware of how iteration works for text files: for line in file: reads lines, including any trailing newlines. Strip those off. If you don't have one-word-per-line, you'll have to split the lines after you read them.
# Read words (possibly >1 per line) from dictionary.txt into lexicon[].
# Convert the words to lower case.
import string
Lexicon = []
with open('dictionary.txt') as file:
for line in file:
words = line.strip().lower().split()
Lexicon.extend(words)
for ch in string.ascii_lowercase:
for i in range(len(Lexicon)):
word = Lexicon[i]
if word.count(ch) > 1:
del Lexicon[i]
i -= 1
print('\n'.join(Lexicon))
How about this:
#This more comprehensive sample allows me to reproduce the file-reading
# problem in the script itself (before I changed the code "tee" would
# print, for instance)
words = ['green','word','glass','worrd','door','tee','wordd']
outlist = []
for word in words:
chars = [c for c in word]
# a `set` only contains unique characters, so if it is shorter than the
# `word` itself, we found a word with duplicate characters, so we keep
# looping
if len(set(chars)) < len(chars):
continue
else:
outlist.append(word)
print(outlist)
Result:
['word']
import string
words = ['word','worrd','worrrrd','wordd','5word']
new_words = [x for x in words if len(x) == len(set(x)) if all(i in string.ascii_letters for i in x)]
print(new_words)

How do I count the occurences of characters of a partition in python?

I have a large file containing sequences; I want to analyze only the last set of characters, which happen to be of variable length. In each line I would like to take the first character and last character of each set in a text file and count the total instances of those characters.
Here is an example of the data in the file:
-1iqd_BA_0_CDRH3.pdb kabat H3 PDPDAFDV
-1iqw_HL_0_CDRH3.pdb kabat H3 NRDYSNNWYFDV
I want to take the first character after the "H3" and the last character (both in bold in example).
The output for these two lines should be:
first Counter({'N': 1, 'P': 1})
last Counter({'V': 2})
This is what I have done so far:
f = open("C:/CDRH3.txt", "r")
from collections import Counter
grab = 1
for line in f:
line=line.rstrip()
left,sep,right=line.partition(" H3 ")
if sep:
AminoAcidsFirst = right[:grab]
AminoAcidsLast = right[-grab:]
print ("first ",Counter(line[:] for line in AminoAcidsFirst))
print ("last ",Counter(line[:] for line in AminoAcidsLast))
f.close()
This prints the counts of only the last line of data which looks like:
first Counter({'N': 1})
last Counter({'V': 1})
How do I count all these characters in all lines in the file?
Notes:
Printing (AminoAcidsFirst) or (AminoAcidsLast) gives the desired list of all the lines in vertical but I can't count it or output it to a file. Writing to a new file will only write the characters of the last line of the original file.
Thanks!
No need for Counter: simply grab the last token after spliting and count the first and last characters:
first_counter = {}
last_counter = {}
for line in f:
line=line.split()[-1] # grab the last token
first_counter[line[0]] = first_counter.get(line[0], 0) + 1
last_counter[line[-1]] = last_counter.get(line[-1], 0) + 1
print("first ", first_counter)
print("last ", last_counter)
OUTPUT
first {'P': 1, 'N': 1}
last {'V': 2}
create 2 empty lists and append in each loop like so:
f = open("C:/CDRH3.txt", "r")
from collections import Counter
grab = 1
AminoAcidsFirst = []
AminoAcidsLast = []
for line in f:
line=line.rstrip()
left,sep,right=line.partition(" H3 ")
if sep:
AminoAcidsFirst.append(right[:grab])
AminoAcidsLast.append(right[-grab:])
print ("first ",Counter(line[:] for line in AminoAcidsFirst))
print ("last ",Counter(line[:] for line in AminoAcidsLast))
f.close()
Here:
Creation of empty list:
AminoAcidsFirst = []
AminoAcidsLast = []
Appending in each loop:
AminoAcidsFirst.append(right[:grab])
AminoAcidsLast.append(right[-grab:])
Two important things I would like to point out
never reveal path of file on your computer, this is especially applicable if you are from scientific community
your code can be more pythonic using with...as approach
And now the program
from collections import Counter
filePath = "C:/CDRH3.txt"
AminoAcidsFirst, AminoAcidsLast = [], [] # important! these should be lists
with open(filePath, 'rt') as f: # rt not r. Explicit is better than implicit
for line in f:
line = line.rstrip()
left, sep, right = line.partition(" H3 ")
if sep:
AminoAcidsFirst.append( right[0] ) # really no need of extra grab=1 variable
AminoAcidsLast.append( right[-1] ) # better than right[-grab:]
print ("first ",Counter(AminoAcidsFirst))
print ("last ",Counter(AminoAcidsLast))
Don't do line.strip()[-1] because sep verification is important
OUTPUT
first {'P': 1, 'N': 1}
last {'V': 2}
Note: Data files can get really large, and you might run into memory issues or computer hanging. So, might I suggest lazy read? Folloing is more robust program
from collections import Counter
filePath = "C:/CDRH3.txt"
AminoAcidsFirst, AminoAcidsLast = [], [] # important! these should be lists
def chunk_read(fileObj, linesCount = 100):
lines = fileObj.readlines(linesCount)
yield lines
with open(filePath, 'rt') as f: # rt not r. Explicit is better than implicit
for aChunk in chunk_read(f):
for line in aChunk:
line = line.rstrip()
left, sep, right = line.partition(" H3 ")
if sep:
AminoAcidsFirst.append( right[0] ) # really no need of extra grab=1 variable
AminoAcidsLast.append( right[-1] ) # better than right[-grab:]
print ("first ",Counter(AminoAcidsFirst))
print ("last ",Counter(AminoAcidsLast))
If you put statements at the bottom of or after your for loop to print AminoAcidsFirst and AminoAcidsLast, you will see that on each iteration you are just assigning a new value. Your intent should be to collect, contain or accumulate these values, before feeding them to collections.Counter.
s = ['-1iqd_BA_0_CDRH3.pdb kabat H3 PDPDAFDV', '-1iqw_HL_0_CDRH3.pdb kabat H3 NRDYSNNWYFDV']
An immediate fix for your code would be to accumulate the characters:
grab = 1
AminoAcidsFirst = ''
AminoAcidsLast = ''
for line in s:
line=line.rstrip()
left,sep,right=line.partition(" H3 ")
if sep:
AminoAcidsFirst += right[:grab]
AminoAcidsLast += right[-grab:]
print ("first ",collections.Counter(AminoAcidsFirst))
print ("last ",collections.Counter(AminoAcidsLast))
Another approach would be to produce the characters on demand. Define a generator function that will yield the things you want to count
def f(iterable):
for thing in iterable:
left, sep, right = thing.partition(' H3 ')
if sep:
yield right[0]
yield right[-1]
Then feed that to collections.Counter
z = collections.Counter(f(s))
Or using a file as the data source:
with open('myfile.txt') as f1:
# lines is a generator expression
# that produces stripped lines
lines = (line.strip() for line in f1)
z = collections.Counter(f(lines))

getting error unhashable type list in my python code

I'm getting this error when I run my python code, but I'm kind of learning my way around python and I'm having trouble decipher what's wrong with the code. I'm getting "unhashable type: list" error. Error is showing on line 54, and 35. I wonder if I'm missing some import. I've checked the code, but I don't see the error
#!/usr/bin/python
import string
def rotate(str, n):
inverted = ''
for i in str:
#calculating starting point in ascii
if i.isupper():
start = ord('A')
else:
start = ord('a')
d = ord(i) - start
j = chr((d + n) % 26 + start)
#calculating starting point in ascii(d + n) + start
inverted += j
return inverted
'''
making a dictionary out of a file containing all words
'''
def make_dictionary():
filename = "/home/jorge/words.txt"
fin = open(filename, 'r')
dic = dict()
for line in fin:
line = line.split()
dic[line] = line
return dic
'''
function that rotates a word and find other words
'''
def find_word(word):
rotated_words = dict() #dictionary for storing rotated words
for i in range(1, 14):
rotated = rotate(word, i)
if rotated in dic:
print word, rotated, i
if __name__ == "__main__":
words = make_dictionary()
for w in words:
find_word(w)
I wonder if I'm missing some imports?
For example:
line = line.split()
dic[line] = line
line is a list after the split and, as the error message tells you, lists aren't hashable; dictionary keys must be hashable. The minimal fix is to use an (immutable, hashable) tuple instead:
dic[tuple(line)] = line
Note that dictionary values can be lists, the restriction applies only to keys.
This makes line a list:
line = line.split()
dict keys need to be hashable, and lists are not hashable:
dic[line] = line
In your code it's not clear you need a dict. A set of words would suffice:
def make_set():
filename = "/home/jorge/words.txt"
result = set()
with open(filename, 'r') as fin:
for line in fin:
for word in line.split():
result.add(word)
return result
Using a set will remove duplicate words.

Python - Unable to split lines from a txt file into words

My goal is to open a file and split it into unique words and display that list (along with a number count). I think I have to split the file into lines and then split those lines into words and add it all into a list.
The problem is that if my program will run in an infinite loop and not display any results, or it will only read a single line and then stop. The file being read is The Gettysburg Address.
def uniquify( splitz, uniqueWords, lineNum ):
for word in splitz:
word = word.lower()
if word not in uniqueWords:
uniqueWords.append( word )
def conjunctionFunction():
uniqueWords = []
with open(r'C:\Users\Alex\Desktop\Address.txt') as f :
getty = [line.rstrip('\n') for line in f]
lineNum = 0
lines = getty[lineNum]
getty.append("\n")
while lineNum < 20 :
splitz = lines.split()
lineNum += 1
uniquify( splitz, uniqueWords, lineNum )
print( uniqueWords )
conjunctionFunction()
Using your current code, the line:
lines = getty[lineNum]
should be moved within the while loop.
You figured out what's wrong with your code, but nonetheless, I would do this slightly differently. Since you need to keep track of the number of unique words and their counts, you should use a dictionary for this task:
wordHash = {}
with open('C:\Users\Alex\Desktop\Address.txt', 'r') as f :
for line in f:
line = line.rstrip().lower()
for word in line:
if word not in wordHash:
wordHash[word] = 1
else:
wordHash[word] += 1
print wordHash
def splitData(filename):
return [words for words in open(filename).reads().split()]
Easiest way to split a file into words :)
Assume inp is retrived from a file
inp = """Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense."""
data = inp.splitlines()
print data
_d = {}
for line in data:
word_lst = line.split()
for word in word_lst:
if word in _d:
_d[word] += 1
else:
_d[word] = 1
print _d.keys()
Output
['Beautiful', 'Flat', 'Simple', 'is', 'dense.', 'Explicit', 'better', 'nested.', 'Complex', 'ugly.', 'Sparse', 'implicit.', 'complex.', 'than', 'complicated.']
I recommend:
#!/usr/local/cpython-3.3/bin/python
import pprint
import collections
def genwords(file_):
for line in file_:
for word in line.split():
yield word
def main():
with open('gettysburg.txt', 'r') as file_:
result = collections.Counter(genwords(file_))
pprint.pprint(result)
main()
...but you could use re.findall to deal with punctuation better, instead of string.split.

Categories