Errors in Python AI - python

I am currently programming an Artificial Intelligence in Python, with some basic code from ELIZA. I will improve on the code once I get it working. My problem is that when I run the program and enter a query to the computer, there is no response. My code is below.
import string
# OSWALD v1.0
switch = [
["I need \(.*\)",
[ "Why do you need %1?",
"Would it REALLY help you to get %1?",
"Are you sure you need %1?"]]
#There is more code with responses.
]
gPats = {
"am" : "are",
"was" : "were",
"i" : "you",
"i'd" : "you would",
"i've" : "you have",
"i'll" : "you will",
"my" : "your",
"are" : "am",
"you've": "I have",
"you'll": "I will",
"your" : "my",
"yours" : "mine",
"you" : "me",
"me" : "you",
}
s = input
gKeys = map(lambda x:regex.compile(x[0]),gPats)
gValues = map(lambda x:x[1],gPats)
print ("Hello, mortal. My name is Oswald. What would you like to talk about?")
while s == input:
try: s = input(">")
def translate(str,dict):
words = string.split(string.lower(str))
keys = dict.keys();
for i in range(0,len(words)):
if words[i] in keys:
words[i] = dict[words[i]]
return print(switch)
def respond(str,keys,values):
for i in range(0,len(keys)):
if input == input:
respnum = whrandom.randint(0,len(values[word])-1)
resp = values[i][respnum]
pos = string.find(resp,'%')
print(string.find(resp,'%'))
while pos > -1:
num = string.atoi(resp[pos+1:pos+2])
resp = resp[:pos] + \
translate(keys[i].group(num),gReflections) + \
resp[pos+2:]
pos = string.find(resp,'%')
if resp[-2:] == '?.': resp = resp[:-2] + '.'
if resp[-2:] == '??': resp = resp[:-2] + '?'
print(string.find(resp,'%'))

Related

name 'stop_words' is not defined

What Am I doing wrong? I already defined it but it keeps on saying its not defined.
# Pre-process the comments
def preprocess_text(text):
# Lowercase the text
text = text.lower()
# Remove punctuations
text = re.sub(r'\[^\\w\\s\]', '', text)
# Tokenize the text
words = word_tokenize(text)
stop_words = set(stopwords.words("english"))
stop_words.update(["a", "an", "and", "are", "as", "at", "be", "by",
"for", "from", "has", "he", "in", "is", "it", "its", "of", "on",
"that", "the", "to", "was", "were", "will", "with"])
words = [word for word in words if word not in stop_words]
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word) for word in words]
return words
comments_processed = [preprocess_text(comment) for comment in comments]
# Perform sentiment analysis on the comments
sentiments = []
for comment in comments:
sentiment = TextBlob(comment).sentiment.polarity
sentiments.append(sentiment)
# Identify the top 3 best and worst things about the product
positive_features = {}
negative_features = {}
for i in range(len(comments)):
comment = comments[i]
sentiment = sentiments[i]
words = preprocess_text(comment)
for word in words:
if sentiment > 0:
if word in positive_features:
positive_features[word] += 1
else:
positive_features[word] = 1
elif sentiment < 0:
if word in negative_features:
negative_features[word] += 1
else:
negative_features[word] = 1
top_positive_features = sorted(positive_features, key=positive_features.get, reverse=True)[:3]
top_negative_features = sorted(negative_features, key=negative_features.get, reverse=True)[:3]
# Visualize the results using word clouds
positive_cloud = WordCloud(width=800, height=800, background_color='white', stopwords=stop_words, min_font_size=10).generate_from_frequencies(positive_features)
negative_cloud = WordCloud(width=800, height=800, background_color='white', stopwords=stop_words, min_font_size=10).generate_from_frequencies(negative_features)
What is wrong here?
NameError
Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_1612\1814734049.py in <module>
63
64 # Visualize the results using word clouds
---> 65 positive_cloud = WordCloud(width=800, height=800,
background_color='white', stopwords=stop_words,
min_font_size=10).generate_from_frequencies(positive_features)
66
67 negative_cloud = WordCloud(width=800, height=800,
background_color='white', stopwords=stop_words,
min_font_size=10).generate_from_frequencies(negative_features)
NameError: name 'stop_words' is not defined
Your stop_words is defined only in the function preprocess_text(), so it's scope is limited to only that function.

How to convert text file into json file?

I am new to python and I want to convert a text file into json file.
Here's how it looks like:
#Q Three of these animals hibernate. Which one does not?
^ Sloth
A Mouse
B Sloth
C Frog
D Snake
#Q What is the literal translation of the Greek word Embioptera, which denotes an order of insects, also known as webspinners?
^ Lively wings
A Small wings
B None of these
C Yarn knitter
D Lively wings
#Q There is a separate species of scorpions which have two tails, with a venomous sting on each tail.
^ False
A True
B False
Contd
.
.
.
.
^ means Answer.
I want it in json format as shown below.
Example:
{
"questionBank": [
{
"question": "Grand Central Terminal, Park Avenue, New York is the worlds",
"a": "largest railway station",
"b": "Longest railway station",
"c": "highest railway station",
"d": "busiest railway station",
"answer": "largest railway station"
},
{
"question": "Eritrea, which became the 182nd member of the UN in 1993, is in the continent of",
"a": "Asia",
"b": "Africa",
"c": "Europe",
"d": "Oceania",
"answer": "Africa"
}, Contd.....
]
}
I came across a few similar posts and here's what I have tried:
dataset = "file.txt"
data = []
with open(dataset) as ds:
for line in ds:
line = line.strip().split(",")
print(line)
To which the output is:
['']
['#Q What part of their body do the insects from order Archaeognatha use to spring up into the air?']
['^ Tail']
['A Antennae']
['B Front legs']
['C Hind legs']
['D Tail']
['']
['#Q What is the literal translation of the Greek word Embioptera', ' which denotes an order of insects', ' also known as webspinners?']
['^ Lively wings']
['A Small wings']
['B None of these']
['C Yarn knitter']
['D Lively wings']
['']
Contd....
The sentences containing commas are separated by python lists. I tried to use .join but didn't get the results I was expecting.
Please let me know how to approach this.
dataset = "text.txt"
question_bank = []
with open(dataset) as ds:
for i, line in enumerate(ds):
line = line.strip("\n")
if len(line) == 0:
question_bank.append(question)
question = {}
elif line.startswith("#Q"):
question = {"question": line}
elif line.startswith("^"):
question['answer'] = line.split(" ")[1]
else:
key, val = line.split(" ", 1)
question[key] = val
question_bank.append(question)
print({"questionBank":question_bank})
#for storing json file to local directory
final_output = {"questionBank":question_bank}
with open("output.json", "w") as outfile:
outfile.write(json.dumps(final_output, indent=4))
Rather than handling the lines one at a time, I went with using a regex pattern approach.
This also more reliable as it will error out if the input data is in a bad format - rather than silently ignoring a grouping which is missing a field.
PATTERN = r"""[#]Q (?P<question>.+)\n\^ (?P<answer>.+)\nA (?P<option_a>.+)\nB (?P<option_b>.+)\n(?:C (?P<option_c>.+)\n)?(?:D (?P<option_d>.+))?"""
def parse_qa_group(qa_group):
"""
Extact question, answer and 2 to 4 options from input string and return as a dict.
"""
# "group" here is a set of question, answer and options.
matches = PATTERN.search(qa_group)
# "group" here is a regex group.
question = matches.group('question')
answer = matches.group('answer')
try:
c = matches.group('option_c')
except IndexError:
c = None
try:
d = matches.group('option_d')
except IndexError:
d = None
results = {
"question": question,
"answer": answer,
"a": matches.group('option_a'),
"b": matches.group('option_b')
}
if c:
results['c'] = c
if d:
results['d'] = d
return results
# Split into groups using the blank line.
qa_groups = question_answer_str.split('\n\n')
# Process each group, building up a list of all results.
all_results = [parse_qa_group(qa_group) for qa_group in qa_groups]
print(json.dumps(all_results, indent=4))
Further details in my gist. Read more on regex Grouping
I left out reading the text and writing a JSON file.

How to add dictionaries inside of a list

I want to connect separated messages of a chat, so I created a list for all the dictionaries
messages = ["Hello", "How you doing","fine","how can I help you", "how to do this?", "like this", "thanks","man","no problem"]
Person1= [True,True,False,False,True,False,True,True,False]
data =[]
chat_messages = messages
people = Person1
k = 0
for i in range(len(messages)):
if people[i] == people[i+1]:
chat_messages[i+1] = chat_messages[i] +" " +chat_messages[i+1]
chatData = {'text': chat_messages[i+1], 'person1': people[i]}
data[k] = chatData
else:
k +=1
chatData = {'text': chat_messages[i+1], 'person1': people[i+1]}
print(chatData )
data[k] = chatData
print(data)
I'm getting errors in here
File "main.py", line 20, in <module>
data[k] = chatData
IndexError: list assignment index out of range
How can I fix it please?
I want the output of data to look like this:
data = [{'text': 'Hello How you doing', 'person1': True} , {'text': 'fine how can I help you', 'person1': False}, {'text': 'how to do this?', 'person1': True}]
You cant add elements to a list in python this way. you have to use python method append().
data.append(chatData)
This method will add elements at the end of the list.
You can learn more python list methods using this link
https://www.geeksforgeeks.org/list-methods-python/
The problem is that when you add the index i + 1, it gives an error when you get to the nr 9, because your list index stops at 8. Here is my solution:
messages = ["Hello", "How you doing","fine","how can I help you", "how to do this?", "like this", "thanks","man","no problem"]
Person1= [True,True,False,False,True,False,True,True,False]
data =[]
chat_messages = messages
people = Person1
k = 0
data = []
for i, msg in enumerate(messages):
try:
if people[i] == people[i+1]:
chat_messages[i+1] = chat_messages[i] +" " +chat_messages[i+1]
data.append({'text': chat_messages[i+1], 'person1': people[i]})
except:
pass
print(data)
messages = ["Hello", "How you doing","fine","how can I help you", "how to do this?", "like this", "thanks","man","no problem"]
Person1= [True,True,False,False,True,False,True,True,False]
data =[]
chat_messages = messages
people = Person1
k = 0
for i in range(len(messages)):
if len(messages)-1 is i:
None
else:
if people[i] == people[i+1]:
chat_messages[i+1] = chat_messages[i] +" " +chat_messages[i+1]
chatData = {'text': chat_messages[i+1], 'person1': people[i]}
data.append(chatData)
else:
chatData = {'text': chat_messages[i+1], 'person1': people[i+1]}
print(chatData )
data.append(chatData)
print(data)

string manipulation in python using list

I have some tweets which contain some shorthand text like ur,bcz etc. I am using dictionary to map the correct words. I know we cannot mutate strings in python. So after replacing the with correct word, i am storing a copy in a new list. Its working. I am facing issue if any tweet have more than one shorthand text.
My code is replacing one word at a time. How can i replace words multiple times in a single string.
Here is my code
# some sample tweets
tweet = ['stats is gr8', 'india is grt bcz it is colourfull', 'i like you','your movie is grt', 'i hate ur book of hatred' ]
short_text={
"bcz" : "because",
"ur" : "your",
"grt" : "great",
"gr8" : "great",
"u" : "you"
}
import re
def find_word(text,search):
result = re.findall('\\b'+search+'\\b',text,flags=re.IGNORECASE)
if len(result) > 0:
return True
else:
return False
corrected_tweets=list()
for i in tweet:
tweettoken=i.split()
for short_word in short_text:
print("current iteration")
for tok in tweettoken:
if(find_word(tok,short_word)):
print(tok)
print(i)
newi = i.replace(tok,short_text[short_word])
corrected_tweets.append(newi)
print(newi)
my output is
['stats is great',
'india is grt because it is colourfull',
'india is great bcz it is colourfull',
'your movie is great',
'i hate your book of hatred']
What I need is tweet 2 and 3 should be appended once with all correction. I am new to python. Any help will be great.
use a regex function on word boundary, fetching the replacement in the dictionary (with default to original word, so returns the same word if not found)
tweet = ['stats is gr8', 'india is grt bcz it is colourfull', 'i like you','your movie is grt', 'i hate ur book of hatred' ]
short_text={
"bcz" : "because",
"ur" : "your",
"grt" : "great",
"gr8" : "great",
"u" : "you"
}
import re
changed = [re.sub(r"\b(\w+)\b",lambda m:short_text.get(m.group(1),m.group(1)),x) for x in tweet]
result:
['stats is great', 'india is great because it is colourfull', 'i like you', 'your movie is great', 'i hate your book of hatred']
this approach is very fast because it has O(1) lookup for each word (doesn't depend on the length of the dictionary)
Advantage of re+word boundary vs str.split is that it works when words are separated with punctuation as well.
you can use a list comp for this:
[' '.join(short_text.get(s, s) for s in new_str.split()) for new_str in tweet]
result:
In [1]: tweet = ['stats is gr8', 'india is grt bcz it is colourfull', 'i like you','your movie is grt', 'i hate ur book of hatred' ]
...:
In [2]: short_text={
...: "bcz" : "because",
...: "ur" : "your",
...: "grt" : "great",
...: "gr8" : "great",
...: "u" : "you"
...: }
In [4]: [' '.join(short_text.get(s, s) for s in new_str.split()) for new_str in tweet]
Out[4]:
['stats is great',
'india is great because it is colourfull',
'i like you',
'your movie is great',
'i hate your book of hatred']
You can try this approach:
tweet = ['stats is gr8', 'india is grt bcz it is colourfull', 'i like you','your movie is grt', 'i hate ur book of hatred' ]
short_text={
"bcz" : "because",
"ur" : "your",
"grt" : "great",
"gr8" : "great",
"u" : "you"
}
for j,i in enumerate(tweet):
data=i.split()
for index_np,value in enumerate(data):
if value in short_text:
data[index_np]=short_text[value]
tweet[j]=" ".join(data)
print(tweet)
output:
['stats is great', 'india is great because it is colourfull', 'i like you', 'your movie is great', 'i hate your book of hatred']

Counting lines from a string

I need to create a program which removes punctuation, some specific words, duplicates and return the words left and their respective lines. I also need to keep track of the duplicates. For instance,
Python IDLE
Indexer: type in lines, finish with a . at start of line only
It is a briskly blowing wind that blows
from the north, the North of my youth.
The wind is cold too, colder than the
winds of yesteryear.
.
The index is:
brisk 1
blow 1
wind 1, 3, 4
north 2
youth 2
cold 3
yesteryear 4
The Problem: I need to keep track of the line number of the words left and also their duplicates. I'm not being able to do that.
from string import *
stopWords = [ "a", "i", "it", "am", "at", "on", "in", "to", "too", "very", \
"of", "from", "here", "even", "the", "but", "and", "is", "my", \
"them", "then", "this", "that", "than", "though", "so", "are" ]
endings = [ "es" , "ed" , "er", "ly"]
punctuation = [ ".", "," , ":" , ";" , "!" , "?" , "&" , "'" ]
unindexed_sentence = raw_input("type in lines, finish with a . at start of line only").lower()
#removing duplicates.
def unique_string(l):
ulist = []
ulist2 = []
[ulist.append(x) for x in l if x not in ulist]
[ulist2.append(x)]
global ulist2
return ulist
unindexed_sentence =' '.join(unique_string(unindexed_sentence.split()))
unindexed_sentence1 = split(unindexed_sentence,"\n")
list_unindexed = []
# splitting
i = 0
while i<len(unindexed_sentence1):
list_unindexed += [split(unindexed_sentence1[i])]
i+=1
countline = 0
i = 0
while i < len(list_unindexed):
j = 0
while j < len(list_unindexed[i]):
if list_unindexed[i][j][0] in punctuation:
list_unindexed[i][j] = list_unindexed[i][j][:0]
if list_unindexed[i][j][-1] in punctuation:
list_unindexed[i][j] = list_unindexed[i][j][:-1]
if list_unindexed[i][j][-1] == "s":
list_unindexed[i][j] = list_unindexed[i][j][:-1]
if list_unindexed[i][j][-2:] in endings:
list_unindexed[i][j] = list_unindexed[i][j][:-2]
if list_unindexed[i][j][-3:] == "ing":
list_unindexed[i][j] = list_unindexed[i][j][:-3]
if list_unindexed[i][j] in stopWords:
del list_unindexed[i][j]
else:
j += 1
i += 1
countline += 1
def new_line(n):
split(n,"\n")
count = 1
if n[-1] == "\n":
count += 1
return count
string1 = str(list_unindexed)
string2 = str(string1)
string2 ='\n'.join(unique_string(string2.split()))
print string2
Is it your homework?
Here some tips:
Don't do: from string import *. You don't need it.
Use data.splitlines() to get list of lines
Use enumerate() to get a index, e.g.: for i, line in enumerate(data.splitlines())
Use a dictionary for keeping track of all words. Each value could be a list or a set of line numbers
Don't remove duplicates initially. You can do this using dictionaries or sets.

Categories