I am getting multiple lists as an output to a function. I want to combine all the lists and form only one list. Please help
def words(*args):
word =[args]
tokens = nltk.wordpunct_tokenize(''.join(word))
for word in tokens:
final = wn.synsets(word)
synonyms = set()
for synset in final:
for synwords in synset.lemma_names:
synonyms.add(synwords)
final = list(synonyms)
dic = dict(zip(word,final))
dic[word] = final
return final
Use this wherever you are using words function (which is a list of lists according to your question):
wordlist = [word for sublist in words(args) for word in sublist]
Once your code is corrected I find no problem with function words() returning anything other than a flat list. If there is some input that reproduces the problem please update your question with it.
The correction is to pass args directly into join(), not to wrap it in a list.
def words(*args):
tokens = nltk.wordpunct_tokenize(''.join(args))
for word in tokens:
final = wn.synsets(word)
synonyms = set()
for synset in final:
for synwords in synset.lemma_names:
synonyms.add(synwords)
final = list(synonyms)
dic = dict(zip(word,final))
dic[word] = final
return final
>>> words('good day', ', this is a', ' test!', 'the end.')
['beneficial', 'right', 'secure', 'just', 'unspoilt', 'respectable', 'good', 'goodness', 'dear', 'salutary', 'ripe', 'expert', 'skillful', 'in_force', 'proficient', 'unspoiled', 'dependable', 'soundly', 'honorable', 'full', 'undecomposed', 'safe', 'adept', 'upright', 'trade_good', 'sound', 'in_effect', 'practiced', 'effective', 'commodity', 'estimable', 'well', 'honest', 'near', 'skilful', 'thoroughly', 'serious']
Related
Write a function common_start(word_list) that takes in parameter a list of words.
This function must return a new list containing all words that start with the same letter as at least one other word in the list.
input: ['file', 'edit', 'view', 'insert', 'format']
output: ['file', 'format']
A one line solution, if you want
def common_start(input_list):
return [word for word in input_list if sum([w.startswith(word[0]) for w in input_list]) > 1]
This should work for you.
In this case I'm ignoring the duplicated words.
def common_start(word_list):
output = set()
for word in word_list:
for first_letter in word_list:
if word == first_letter:
next
elif word[0] == first_letter[0]:
output.add(word)
return list(output)
Input: ['tara', 'file', 'edit', 'view', 'insert', 'format','test']
Output: ['tara', 'file', 'format', 'test']
I have a list of specific words
['to', 'with', 'in', 'for']
I want to make a function, which takes a sentence and if there is a word form my list, it should select the next two words after it and put them joined to the list(i need it for a part of my sentence generator). For example:
sentence = 'In the morning I went to the store and then to the restaurant'
I want to get
['tothe', 'tostore', 'tothe', 'torestaurant']
I wrote this code:
preps = ['to', 'with', 'in', 'for']
def nextofnext_words_ofnegs(sentence):
list_of_words = sentence.split()
next_word = []
for i in list_of_words:
for j in preps:
if i == j:
next_word.append(j + list_of_words[list_of_words.index(i) + 1])
next_word.append(j + list_of_words[list_of_words.index(i) + 2])
return next_word
However i get this:
['tothe', 'tostore', 'tothe', 'tostore']
Instead of this:
['tothe', 'tostore', 'tothe', 'torestaurant']
This should work to give you what you want. Note that you can use the "in" operator in Python to check if the word exists in your string list, there is no need to loop the list here in this case. Also as mentioned above, using of .index is insufficient here, you can use enumerate to get the index as well as the item in the list.
preps = ['to', 'with', 'in', 'for']
def nextofnext_words_ofnegs(sentence):
list_of_words = sentence.split()
next_word = []
for idx, word in enumerate(list_of_words):
if word in preps:
next_word.append(word + list_of_words[idx + 1])
next_word.append(word + list_of_words[idx + 2])
return next_word
function anagrams(s1, s2) is a Boolean valued function, which returns true just in case the string s1 contains the same letters as string s2 but in a different order. The function should be case insensitive --- in other words it should return the same value if any letters in either s1 or s2 are changed from upper to lower case or from lower to upper case. You may assume that the input strings contain only letters.
The function find_all_anagrams(string) takes a string as input and returns a list of all words in the file english_words.txt that are anagrams of the input string.
the function should return a list [word1, ..., wordN] such that each word in this list is a word in the dictionary file such that the value function anagrams(string, word) are True
def anagrams( string1, string2 ):
str_1 = string1.lower()
str_2 = string2.lower()
if str_1 == str_2:
return False
else:
list_1 = list( str_1 )
list_1.sort()
list_2 = list( str_2 )
list_2.sort()
return list_1 == list_2
def find_all_anagrams( string ):
with open("english_words.txt") as f:
word_list = []
for line in f.readlines():
word_list.append(line.strip())
list1 = [i.split() for i in word_list]
for j in list1:
if anagrams( string, j ) == True:
return list1
else:
return []
ERROR kept saying this: AttributeError: 'list' object has no attribute 'lower'
for example,word_list contains:
['pyruvates', 'python', 'pythoness', 'pythonesses', 'pythonic', 'pythons', 'pyuria', 'pyurias', 'pyx', 'pyxes']
Expected output below
Part of the txt file shown on the right:
Update:
I think I just solved it,here are my codes:
def find_all_anagrams( string ):
list1 = []
with open("english_words.txt") as f:
word_list = []
for line in f.readlines():
word_list.append(line.strip())
for i in word_list:
if anagrams( string, i ):
list1.append(i)
return list1
You are using split() function at this part:
list1 = [i.split() for i in word_list]
Let's see what the documentation tells us about that function:
str.split(sep=None, maxsplit=-1)
Return a list of the words in the
string, using sep as the delimiter string. If maxsplit is given, at
most maxsplit splits are done (thus, the list will have at most
maxsplit+1 elements). If maxsplit is not specified or -1, then there
is no limit on the number of splits (all possible splits are made).
It returns a list, and you added that list to your own list. I can see that word_list is meant to hold lines of words. Let's assume that word_list looks like this:
word_list = ["hello darkness my", "old friend I've", "come to see you", "again"]
What happens after list1 = [i.split() for i in word_list]?
list1 = [i.split() for i in word_list]
print(list1)
Output:
[['hello', 'darkness', 'my'], ['old', 'friend', "I've"], ['come', 'to', 'see', 'you'], ['again']]
As you can see, elements are individual lists. At this part of your code:
for j in list1:
if anagrams( string, j ) == True:
return list1
else:
return []
j is a list, therefore here:
def anagrams( string1, string2 ):
str_1 = string1.lower()
str_2 = string2.lower()
str_2 = string2.lower() is trying to call lower method on a list, which isn't a valid method for a list object, and that's why Python is complaining.
List Comprehension might look "cool" but often using simple loops benefits your code's readability, and in some cases might even avoid mistakes like this one. Here is my alternative:
list1 = []
for i in word_list:
for word in i.split():
list1.append(word)
see the output:
print(list1)
['hello', 'darkness', 'my', 'old', 'friend', "I've", 'come', 'to', 'see', 'you', 'again']
Single words as you intended.
As indicated by the error message, list does not have an attribute .lower
I guess what you meant do to is access a string within the list with a .lower attribute.
For example:
mylist[index].lower()
where index corresponds to the string position within the list.
This question already has answers here:
Adding more than one value to dictionary when looping through string
(7 answers)
Closed 6 years ago.
I need to build a function that takes as input a string and returns a dictionary.
The keys are numbers and the values are lists that contain the unique words that have a number of letters equal to the keys.
For example, if the input function is as follows:
n_letter_dictionary("The way you see people is the way you treat them and the Way you treat them is what they become")
The function should return:
{2: ['is'], 3: ['and', 'see', 'the', 'way', 'you'], 4: ['them', 'they', 'what'], 5: ['treat'], 6: ['become', 'people']}
The code that I have written is as follows:
def n_letter_dictionary(my_string):
my_string=my_string.lower().split()
sample_dictionary={}
for word in my_string:
words=len(word)
sample_dictionary[words]=word
print(sample_dictionary)
return sample_dictionary
The function is returning a dictionary as follows:
{2: 'is', 3: 'you', 4: 'they', 5: 'treat', 6: 'become'}
The dictionary does not contain all the words with the same number of letters but is returning only the last one in the string.
Since you only want to store unique values in your lists, it actually makes more sense to use a set. Your code is almost right, you just need to make sure that you create a set if words isn't already a key in your dictionary, but that you add to the set if words is already a key in your dictionary. The following displays this:
def n_letter_dictionary(my_string):
my_string=my_string.lower().split()
sample_dictionary={}
for word in my_string:
words=len(word)
if words in sample_dictionary:
sample_dictionary[words].add(word)
else:
sample_dictionary[words] = {word}
print(sample_dictionary)
return sample_dictionary
n_letter_dictionary("The way you see people is the way you treat them and the Way you treat them is what they become")
Output
{2: set(['is']), 3: set(['and', 'the', 'see', 'you', 'way']),
4: set(['them', 'what', 'they']), 5: set(['treat']), 6: set(['become', 'people'])}
The problem with your code is that you just put the latest word into the dictionary. Instead, you have to add that word to some collection of words that have the same length. In your example, that is a list, but a set seems to be more appropriate, assuming order is not important.
def n_letter_dictionary(my_string):
my_string=my_string.lower().split()
sample_dictionary={}
for word in my_string:
if len(word) not in sample_dictionary:
sample_dictionary[len(word)] = set()
sample_dictionary[len(word)].add(word)
return sample_dictionary
You can make this a bit shorter by using a collections.defaultdict(set):
my_string=my_string.lower().split()
sample_dictionary=collections.defaultdict(set)
for word in my_string:
sample_dictionary[len(word)].add(word)
return dict(sample_dictionary)
Or use itertools.groupby, but for this you have to sort by length, first:
words_sorted = sorted(my_string.lower().split(), key=len)
return {k: set(g) for k, g in itertools.groupby(words_sorted, key=len)}
Example (same result for each of the three implementations):
>>> n_letter_dictionary("The way you see people is the way you treat them and the Way you treat them is what they become")
{2: {'is'}, 3: {'way', 'the', 'you', 'see', 'and'}, 4: {'what', 'them', 'they'}, 5: {'treat'}, 6: {'become', 'people'}}
With sample_dictionary[words]=word you overwrite the current contents which you have put there so far. You need a list, and to that you can append.
Instead of that you need:
if words in sample_dictionary.keys():
sample_dictionary[words].append(word)
else:
sample_dictionary[words]=[word]
So if there is a value to this key, I append to it, and else create a new list.
You can use a defaultdict found in the collections library. You can use it to create a default type for the value portion of your dictionary, in this case a list, and just append to it based on the length of your word.
from collections import defaultdict
def n_letter_dictionary(my_string):
my_dict = defaultdict(list)
for word in my_string.split():
my_dict[len(word)].append(word)
return my_dict
You could still do this without defaultdict's, but would just be a little longer in length.
def n_letter_dictionary(my_string):
my_dict = {}
for word in my_string.split():
word_length = len(word)
if word_length in my_dict:
my_dict[word_length].append(word)
else:
my_dict[word_length] = [word]
return my_dict
To ensure no duplicated in the values list, without using set(). Be warned though, if your value lists are large, and your input data is fairly unique, you'll experience a performance setback as checking if the value already exists in the list will only early exit once it is encountered.
from collections import defaultdict
def n_letter_dictionary(my_string):
my_dict = defaultdict(list)
for word in my_string.split():
if word not in my_dict[len(word)]:
my_dict[len(word)].append(word)
return my_dict
# without defaultdicts
def n_letter_dictionary(my_string):
my_dict = {} # Init an empty dict
for word in my_string.split(): # Split the string and iterate over it
word_length = len(word) # Get the length, also the key
if word_length in my_dict: # Check if the length is in the dict
if word not in my_dict[word_length]: # If the length exists as a key, but the word doesn't exist in the value list
my_dict[word_length].append(word) # Add the word
else:
my_dict[word_length] = [word] # The length/key doesn't exist, so you can safely add it without checking for its existence
So if you have a high frequency of duplicates and a short list of words to scan through, this approach would be acceptable. If you had for example a list of randomly generated words with just permutations of alphabetic characters, causing the value list to bloat, scanning through them will become expensive.
The shortest solution I came up with uses a defaultdict:
from collections import defaultdict
sentence = ("The way you see people is the way you treat them"
" and the Way you treat them is what they become")
Now the algorithm:
wordsOfLength = defaultdict(list)
for word in sentence.split():
wordsOfLength[len(word)].append(word)
Now wordsOfLength will hold the desired dictionary.
itertools groupby is the perfect tools for this.
from itertools import groupby
def n_letter_dictionary(string):
result = {}
for key, group in groupby(sorted(string.split(), key = lambda x: len(x)), lambda x: len(x)):
result[key] = list(group)
return result
print n_letter_dictionary("The way you see people is the way you treat them and the Way you treat them is what they become")
# {2: ['is', 'is'], 3: ['The', 'way', 'you', 'see', 'the', 'way', 'you', 'and', 'the', 'Way', 'you'], 4: ['them', 'them', 'what', 'they'], 5: ['treat', 'treat'], 6: ['people', 'become']}
my_string="a aa bb ccc a bb".lower().split()
sample_dictionary={}
for word in my_string:
words=len(word)
if words not in sample_dictionary:
sample_dictionary[words] = []
sample_dictionary[words].append(word)
print(sample_dictionary)
I'm looking for a way to remove duplicate entries from a Python list but with a twist; The final list has to be case sensitive with a preference of uppercase words.
For example, between cup and Cup I only need to keep Cup and not cup. Unlike other common solutions which suggest using lower() first, I'd prefer to maintain the string's case here and in particular I'd prefer keeping the one with the uppercase letter over the one which is lowercase..
Again, I am trying to turn this list:
[Hello, hello, world, world, poland, Poland]
into this:
[Hello, world, Poland]
How should I do that?
Thanks in advance.
This does not preserve the order of words, but it does produce a list of "unique" words with a preference for capitalized ones.
In [34]: words = ['Hello', 'hello', 'world', 'world', 'poland', 'Poland', ]
In [35]: wordset = set(words)
In [36]: [item for item in wordset if item.istitle() or item.title() not in wordset]
Out[36]: ['world', 'Poland', 'Hello']
If you wish to preserve the order as they appear in words, then you could use a collections.OrderedDict:
In [43]: wordset = collections.OrderedDict()
In [44]: wordset = collections.OrderedDict.fromkeys(words)
In [46]: [item for item in wordset if item.istitle() or item.title() not in wordset]
Out[46]: ['Hello', 'world', 'Poland']
Using set to track seen words:
def uniq(words):
seen = set()
for word in words:
l = word.lower() # Use `word.casefold()` if possible. (3.3+)
if l in seen:
continue
seen.add(l)
yield word
Usage:
>>> list(uniq(['Hello', 'hello', 'world', 'world', 'Poland', 'poland']))
['Hello', 'world', 'Poland']
UPDATE
Previous version does not take care of preference of uppercase over lowercase. In the updated version I used the min as #TheSoundDefense did.
import collections
def uniq(words):
seen = collections.OrderedDict() # Use {} if the order is not important.
for word in words:
l = word.lower() # Use `word.casefold()` if possible (3.3+)
seen[l] = min(word, seen.get(l, word))
return seen.values()
Since an uppercase letter is "smaller" than a lowercase letter in a comparison, I think you can do this:
orig_list = ["Hello", "hello", "world", "world", "Poland", "poland"]
unique_list = []
for word in orig_list:
for i in range(len(unique_list)):
if unique_list[i].lower() == word.lower():
unique_list[i] = min(word, unique_list[i])
break
else:
unique_list.append(word)
The min will have a preference for words with uppercase letters earlier on.
Some better answers here, but hopefully something simple, different and useful. This code satisfies the conditions of your test, sequential pairs of matching words, but would fail on anything more complicated; such as non-sequential pairs, non-pairs or non-strings. Anything more complicated and I'd take a different approach.
p1 = ['Hello', 'hello', 'world', 'world', 'Poland', 'poland']
p2 = ['hello', 'Hello', 'world', 'world', 'Poland', 'Poland']
def pref_upper(p):
q = []
a = 0
b = 1
for x in range(len(p) /2):
if p[a][0].isupper() and p[b][0].isupper():
q.append(p[a])
if p[a][0].isupper() and p[b][0].islower():
q.append(p[a])
if p[a][0].islower() and p[b][0].isupper():
q.append(p[b])
if p[a][0].islower() and p[b][0].islower():
q.append(p[b])
a +=2
b +=2
return q
print pref_upper(p1)
print pref_upper(p2)