Related
import os
import random
file = open('getty.txt')
filetext = file.read()
def getline(words,length):
ans=[]
total=0
while (length>total) and 0 != len(words):
word=words.pop(0)
total += len(word)+1 #add 1 for the space
ans.append(word)
#now we are one word too long
if total > length:
words.insert(0,ans.pop())
return ans
def printPara(words,length):
line = []
spaces = []
while len(words) != 0:
line.append(getline(words, length))
for z in range(0,len(line)):
for i in range(0,len(line[z])):
spaces = [[1] * len(line[i]) for i in range (len(line))]
for p in range (0,len(spaces)):
spaces[p][len(spaces[p])-1] = 0
if len(words) + len(spaces) != 0:
addSpace(line,spaces,length)
printLine(line,spaces)
else:
printLine(line,spaces)
def addSpace(line,spaces,length):
totalInt = 0
for i in range (0, len(line)):
totalInt = (len(spaces[i])-2) + len(line[i])
while length < totalInt:
num = random.randint(0, len(spaces) - 2)
spaces[num] += 1
return spaces
def printLine(line, spaces):
for i in range (len(line)):
print(str(line[i]) + (' ' * len(spaces[i])))
def main():
length = 75
textparagraph = filetext.split("\n\n")
para = [0] * len(textparagraph)
for i in range (0, len(textparagraph)):
para[i] = textparagraph[i]
words = [[0] * len(textparagraph) for i in range(len(para))]
for b in range (0,len(para)):
words[b] = para[b].split()
for z in range (0, len(para)):
printPara(words[z],length)
main()
My code outputs only lists of the separate lines and will not concatenate the two lists of words and spaces. How would I get it to output correctly?
Some exampes of output.
['Four', 'score', 'and', 'seven', 'years', 'ago', 'our', 'fathers', 'brought', 'forth', 'on', 'this']
['continent,', 'a', 'new', 'nation,', 'conceived', 'in', 'Liberty,', 'and', 'dedicated', 'to', 'the']
['proposition', 'that', 'all', 'men', 'are', 'created', 'equal.']
['Now', 'we', 'are', 'engaged', 'in', 'a', 'great', 'civil', 'war,', 'testing', 'whether', 'that', 'nation,', 'or']
['any', 'nation', 'so', 'conceived', 'and', 'so', 'dedicated,', 'can', 'long', 'endure.', 'We', 'are', 'met', 'on', 'a']
Expected output "Four score and seven years ago..."
You can use
" ".join(["hello", "world"])
and you'll get
"hello world"
This is a word processing code for chabot, in it it removes some articles and prepositions to make it easier for the bot to read
import json
from random import choice
class ChatterMessage:
def __init__(self, raw):
self.raw = str(raw).lower()
self.processed_str = self.reduce()
self.responses = self.get_responses()
self.data = self.process_response()
self.response = choice(self.data['response'])
def remove_unwanted_chars(self, string):
list_of_chars = ['?', ".", ",", "!", "#", "[", "]", "{", "}", "#", "$", "%", "*", "&", "(", ")", "-", "_", "+", "="]
new_str = ""
for char in string:
if char not in list_of_chars:
new_str += str(char)
return new_str
def get_responses(self, response_file="info.json"):
with open(response_file, 'r') as file:
return json.loads(file.read())
def reduce(self):
stopwords = ['de', 'a', 'o', 'que', 'e', 'é', 'do', 'da', 'em', 'um', 'para', 'com', 'não', 'uma', 'os', 'no', 'se', 'na', 'por', 'mais', 'as', 'dos', 'como', 'mas', 'ao', 'ele', 'das', 'à', 'seu', 'sua', 'ou', 'quando', 'muito', 'nos', 'já', 'eu', 'também', 'só', 'pelo', 'pela', 'até', 'isso', 'ela', 'entre', 'depois', 'sem', 'mesmo', 'aos', 'seus', 'quem', 'nas', 'me', 'esse', 'eles', 'você', 'essa', 'num', 'nem', 'suas', 'meu', 'às', 'minha', 'numa', 'pelos', 'elas', 'qual', 'nós', 'lhe', 'deles', 'essas', 'esses', 'pelas', 'este', 'dele', 'tu', 'te', 'vocês', 'vos', 'lhes', 'meus', 'minhas', 'teu', 'tua', 'teus', 'tuas', 'nosso', 'nossa', 'nossos', 'nossas', 'dela', 'delas', 'esta', 'estes', 'estas', 'aquele', 'aquela', 'aqueles', 'aquelas', 'isto', 'aquilo', 'estou', 'está', 'estamos', 'estão', 'estive', 'esteve', 'estivemos', 'estiveram', 'estava', 'estávamos', 'estavam', 'estivera', 'estivéramos', 'esteja', 'estejamos', 'estejam', 'estivesse', 'estivéssemos', 'estivessem', 'estiver', 'estivermos', 'estiverem', 'hei', 'há', 'havemos', 'hão', 'houve', 'houvemos', 'houveram', 'houvera', 'houvéramos', 'haja', 'hajamos', 'hajam', 'houvesse', 'houvéssemos', 'houvessem', 'houver', 'houvermos', 'houverem', 'houverei', 'houverá', 'houveremos', 'houverão', 'houveria', 'houveríamos', 'houveriam', 'sou', 'somos', 'são', 'era', 'éramos', 'eram', 'fui', 'foi', 'fomos', 'foram', 'fora', 'fôramos', 'seja', 'sejamos', 'sejam', 'fosse', 'fôssemos', 'fossem', 'for', 'formos', 'forem', 'serei', 'será', 'seremos', 'serão', 'seria', 'seríamos', 'seriam', 'tenho', 'tem', 'temos', 'tém', 'tinha', 'tínhamos', 'tinham', 'tive', 'teve', 'tivemos', 'tiveram', 'tivera', 'tivéramos', 'tenha', 'tenhamos', 'tenham', 'tivesse', 'tivéssemos', 'tivessem', 'tiver', 'tivermos', 'tiverem', 'terei', 'terá', 'teremos', 'terão', 'teria', 'teríamos', 'teriam']
custom_filter = []
keywords_list = []
strlist = self.raw.split(" ")
for x in strlist:
if x not in stopwords and x not in custom_filter:
keywords_list.append(self.remove_unwanted_chars(x))
return keywords_list
def process_response(self):
percentage = lambda x, y: (100 * y) / x
total = sum(len(x['keywords']) for x in self.responses)
most_acc = 0
response_data = None
acc = 0
for value in self.responses:
c = 0
for x in value['keywords']:
if str(x).lower() in self.processed_str:
c += 1
if c > most_acc:
most_acc = c
acc = percentage(total, most_acc)
print(acc)
response_data = value
if acc < 6:
return {"response": "Sorry, I do not understand. Be more clear please"}
for x in self.processed_str:
if x not in response_data['keywords']:
response_data['keywords'].append(x)
return response_data
if __name__ == '__main__':
while True:
k = input("Você: ")
res = ChatterMessage(k)
.response
print("Bot:", res)
How to remove accents from keyword strings to "make it easier" for chatbot to read? I found this explanation: How to remove string accents using Python 3? But I don't know how it would be applied to this code as the bot always stops responding
You could use the Python package unidecode that replaces special characters with ASCII equivalents.
from unidecode import unidecode
text = "Björn, Łukasz and Σωκράτης."
print(unidecode(text))
# ==> Bjorn, Lukasz and Sokrates.
You could apply this to both the input and keywords.
# In the function definition of reduce(), place this line of code after
# stopwords = ['de', 'a', 'o', .....])
stopwords = [unidecode(s) for s in stopwords]
# In "__main__": replace k = input("Você: ") with the following line of code.
k = unidecode(input("Você: "))
If it makes sense, you could also force the strings to be all lowercase. This will make your string comparisons even more robust.
k = unidecode(input("Você: ").lower())
Because you requested the entire code:
import json
from random import choice
from unidecode import unidecode
class ChatterMessage:
def __init__(self, raw):
self.raw = str(raw).lower()
self.processed_str = self.reduce()
self.responses = self.get_responses()
self.data = self.process_response()
self.response = choice(self.data['response'])
def remove_unwanted_chars(self, string):
list_of_chars = ['?', ".", ",", "!", "#", "[", "]", "{", "}", "#", "$", "%", "*", "&", "(", ")", "-", "_", "+", "="]
new_str = ""
for char in string:
if char not in list_of_chars:
new_str += str(char)
return new_str
def get_responses(self, response_file="info.json"):
with open(response_file, 'r') as file:
return json.loads(file.read())
def reduce(self):
stopwords = ['de', 'a', 'o', 'que', 'e', 'é', 'do', 'da', 'em', 'um', 'para', 'com', 'não', 'uma', 'os', 'no', 'se', 'na', 'por', 'mais', 'as', 'dos', 'como', 'mas', 'ao', 'ele', 'das', 'à', 'seu', 'sua', 'ou', 'quando', 'muito', 'nos', 'já', 'eu', 'também', 'só', 'pelo', 'pela', 'até', 'isso', 'ela', 'entre', 'depois', 'sem', 'mesmo', 'aos', 'seus', 'quem', 'nas', 'me', 'esse', 'eles', 'você', 'essa', 'num', 'nem', 'suas', 'meu', 'às', 'minha', 'numa', 'pelos', 'elas', 'qual', 'nós', 'lhe', 'deles', 'essas', 'esses', 'pelas', 'este', 'dele', 'tu', 'te', 'vocês', 'vos', 'lhes', 'meus', 'minhas', 'teu', 'tua', 'teus', 'tuas', 'nosso', 'nossa', 'nossos', 'nossas', 'dela', 'delas', 'esta', 'estes', 'estas', 'aquele', 'aquela', 'aqueles', 'aquelas', 'isto', 'aquilo', 'estou', 'está', 'estamos', 'estão', 'estive', 'esteve', 'estivemos', 'estiveram', 'estava', 'estávamos', 'estavam', 'estivera', 'estivéramos', 'esteja', 'estejamos', 'estejam', 'estivesse', 'estivéssemos', 'estivessem', 'estiver', 'estivermos', 'estiverem', 'hei', 'há', 'havemos', 'hão', 'houve', 'houvemos', 'houveram', 'houvera', 'houvéramos', 'haja', 'hajamos', 'hajam', 'houvesse', 'houvéssemos', 'houvessem', 'houver', 'houvermos', 'houverem', 'houverei', 'houverá', 'houveremos', 'houverão', 'houveria', 'houveríamos', 'houveriam', 'sou', 'somos', 'são', 'era', 'éramos', 'eram', 'fui', 'foi', 'fomos', 'foram', 'fora', 'fôramos', 'seja', 'sejamos', 'sejam', 'fosse', 'fôssemos', 'fossem', 'for', 'formos', 'forem', 'serei', 'será', 'seremos', 'serão', 'seria', 'seríamos', 'seriam', 'tenho', 'tem', 'temos', 'tém', 'tinha', 'tínhamos', 'tinham', 'tive', 'teve', 'tivemos', 'tiveram', 'tivera', 'tivéramos', 'tenha', 'tenhamos', 'tenham', 'tivesse', 'tivéssemos', 'tivessem', 'tiver', 'tivermos', 'tiverem', 'terei', 'terá', 'teremos', 'terão', 'teria', 'teríamos', 'teriam']
stopwords = [unidecode(s) for s in stopwords]
custom_filter = []
keywords_list = []
strlist = self.raw.split(" ")
for x in strlist:
if x not in stopwords and x not in custom_filter:
keywords_list.append(self.remove_unwanted_chars(x))
return keywords_list
def process_response(self):
percentage = lambda x, y: (100 * y) / x
total = sum(len(x['keywords']) for x in self.responses)
most_acc = 0
response_data = None
acc = 0
for value in self.responses:
c = 0
for x in value['keywords']:
if str(x).lower() in self.processed_str:
c += 1
if c > most_acc:
most_acc = c
acc = percentage(total, most_acc)
print(acc)
response_data = value
if acc < 6:
return {"response": "Sorry, I do not understand. Be more clear please"}
for x in self.processed_str:
if x not in response_data['keywords']:
response_data['keywords'].append(x)
return response_data
if __name__ == '__main__':
while True:
k = unidecode(input("Você: "))
res = ChatterMessage(k).response
print("Bot:", res)
This question already has answers here:
How to check if type of a variable is string? [duplicate]
(22 answers)
Closed 2 years ago.
import random
import sys
def v1_debug(v1, subject):
if v1 != str and subject != str:
sys.exit()
else:
if subject == 'He' or 'She' or 'It':
for i in v1:
if i == [len(v1)+1]:
if i == 's' or 'z' or 'x' or 'o':
v1 = v1 + 'es'
elif i == 'y':
v1 = v1 - 'y' + 'ies'
elif v1[len(v1)] == 's' and v1[len(v1)+1] == 'h':
v1 = v1 + 'es'
elif v1[len(v1)] == 'c' and v1[len(v1)+1] == 'h':
v1 = v1 + 'es'
if subject == 'I' or 'You' or 'We' or 'They':
for i in v1:
if i == v1[len(v1)+1]:
v1 = v1 + 'ing'
return ''
def default_positive_form():
try:
sbj = ['He',
'She',
'It',
'I',
'You',
'We',
'They']
v1 = ['be',
'beat',
'become',
'begin',
'bend',
'bet',
'bid',
'bite',
'blow',
'break',
'bring',
'build',
'burn',
'buy',
'catch',
'choose',
'come',
'cost',
'cut',
'dig',
'dive',
'do',
'draw',
'dream',
'drive',
'drink',
'eat',
'fall',
'feel',
'fight',
'find',
'fly',
'forget',
'forgive',
'freeze',
'get',
'give',
'go',
'grow',
'hang',
'have',
'hear',
'hide',
'hit',
'hold',
'hurt',
'keep',
'know',
'lay',
'lead',
'leave',
'lend',
'let',
'lie',
'lose',
'make',
'mean',
'meet',
'pay',
'put',
'read',
'ride',
'ring',
'rise',
'run',
'say',
'see',
'sell',
'send',
'show',
'shut',
'sing',
'sit',
'sleep',
'speak',
'spend',
'stand',
'swim',
'take',
'teach',
'tear',
'tell',
'think',
'throw',
'understand',
'wake',
'wear',
'win',
'write']
sbj = random.choice(sbj)
v1 = random.choice(v1)
verb_debug = v1_debug(v1, sbj)
sen = ''
if sbj == 'I':
sen = sbj + 'am' + verb_debug
elif sbj == 'He' or 'She' or 'It':
sen = sbj + 'is' + verb_debug
elif sbj == 'You' or 'We' or 'They':
sen = sbj + 'are' + verb_debug
print(f'{sen}')
except NameError:
print('this is bullshit')
return
default_positive_form()
this is python 3.8
sen will only consist of an empty string if none of the conditions of your if/elif/elif blocks are met. Change the print line to
print(f"sen is: {sen}")
But that's not the real problem. obj != str does not check if obj is a string, it checks to see if the object is pointing to the type constant str (Thanks Charles Duffy for the comment). Instead, use the builtin function isinstance() like so:
if not isinstance(v1, str) and not isinstance(subject, str):
print("Variables are the wrong type!")
sys.exit()
I have created a function to search for the contexts of a given word(w) in a text, with left and right as parameters for flexibility in the number of words to record.
import re
def get_context (text, w, left, right):
text.insert (0, "*START*")
text.append ("*END*")
all_contexts = []
for i in range(len(text)):
if re.match(w,text[i], 0):
if i < left:
context_left = text[:i]
else:
context_left = text[i-left:i]
if len(text) < (i+right):
context_right = text[i:]
else:
context_right = text[i:(i+right+1)]
context = context_left + context_right
all_contexts.append(context)
return all_contexts
So for example if a have a text in the form of a list like this:
text = ['Python', 'is', 'dynamically', 'typed', 'language', 'Python',
'functions', 'really', 'care', 'about', 'what', 'you', 'pass', 'to',
'them', 'but', 'you', 'got', 'it', 'the', 'wrong', 'way', 'if', 'you',
'want', 'to', 'pass', 'one', 'thousand', 'arguments', 'to', 'your',
'function', 'then', 'you', 'can', 'explicitly', 'define', 'every',
'parameter', 'in', 'your', 'function', 'definition', 'and', 'your',
'function', 'will', 'be', 'automagically', 'able', 'to', 'handle',
'all', 'the', 'arguments', 'you', 'pass', 'to', 'them', 'for', 'you']
The function works fine for example:
get_context(text, "function",2,2)
[['language', 'python', 'functions', 'really', 'care'], ['to', 'your', 'function', 'then', 'you'], ['in', 'your', 'function', 'definition', 'and'], ['and', 'your', 'function', 'will', 'be']]
Now I am trying to build a dictionary with the contexts of every word in the text doing the following:
d = {}
for w in set(text):
d[w] = get_context(text,w,2,2)
But I am getting this error.
Traceback (most recent call last):
File "<pyshell#32>", line 2, in <module>
d[w] = get_context(text,w,2,2)
File "<pyshell#20>", line 9, in get_context
if re.match(w,text[i], 0):
File "/usr/lib/python3.4/re.py", line 160, in match
return _compile(pattern, flags).match(string)
File "/usr/lib/python3.4/re.py", line 294, in _compile
p = sre_compile.compile(pattern, flags)
File "/usr/lib/python3.4/sre_compile.py", line 568, in compile
p = sre_parse.parse(p, flags)
File "/usr/lib/python3.4/sre_parse.py", line 760, in parse
p = _parse_sub(source, pattern, 0)
File "/usr/lib/python3.4/sre_parse.py", line 370, in _parse_sub
itemsappend(_parse(source, state))
File "/usr/lib/python3.4/sre_parse.py", line 579, in _parse
raise error("nothing to repeat")
sre_constants.error: nothing to repeat
I don't understand this error. Can anyone help me with this?
The problem is that "*START*" and "*END*" are being interpreted as regex. Also, note that inserting "*START*" and "*END*" in text in the begging of the function will cause problem. You should do it just once.
Here is a complete version of the working code:
import re
def get_context(text, w, left, right):
all_contexts = []
for i in range(len(text)):
if re.match(w,text[i], 0):
if i < left:
context_left = text[:i]
else:
context_left = text[i-left:i]
if len(text) < (i+right):
context_right = text[i:]
else:
context_right = text[i:(i+right+1)]
context = context_left + context_right
all_contexts.append(context)
return all_contexts
text = ['Python', 'is', 'dynamically', 'typed', 'language',
'Python', 'functions', 'really', 'care', 'about', 'what',
'you', 'pass', 'to', 'them', 'but', 'you', 'got', 'it', 'the',
'wrong', 'way', 'if', 'you', 'want', 'to', 'pass', 'one',
'thousand', 'arguments', 'to', 'your', 'function', 'then',
'you', 'can', 'explicitly', 'define', 'every', 'parameter',
'in', 'your', 'function', 'definition', 'and', 'your',
'function', 'will', 'be', 'automagically', 'able', 'to', 'handle',
'all', 'the', 'arguments', 'you', 'pass', 'to', 'them', 'for', 'you']
text.insert(0, "START")
text.append("END")
d = {}
for w in set(text):
d[w] = get_context(text,w,2,2)
Maybe you can replace re.match(w,text[i], 0) with w == text[i].
The whole thing can be re-written very succinctly follows,
text = 'Python is dynamically typed language Python functions really care about what you pass to them but you got it the wrong way if you want to pass one thousand arguments to your function then you can explicitly define every parameter in your function definition and your function will be automagically able to handle all the arguments you pass to them for you'
Keeping it a str, assuming context = 'function',
pat = re.compile(r'(\w+\s\w+\s)functions?(?=(\s\w+\s\w+))')
pat.findall(text)
[('language Python ', ' really care'),
('to your ', ' then you'),
('in your ', ' definition and'),
('and your ', ' will be')]
Now, minor customization will be needed in the regex to allow for, words like say, functional or functioning not only function or functions. But the important idea is to do away with indexing and go more functional.
Please comment if this doesn't work out for you, when you apply it in bulk.
At least one of the elements in text contains characters that are special in a regular expression. If you're just trying to find whether the word is in the string, just use str.startswith, i.e.
if text[i].startswith(w): # instead of re.match(w,text[i], 0):
But I don't understand why you are checking for that anyways, and not for equality.
I'm developing a Hebrew python library for my toddler, who doesn't speak English yet. So far I've managed to make it work (function names and variables work fine). The problem is with 'if', 'while', 'for', etc. statements. if this were C++, for ex., I'd use
#define if אם
are there are any alternatives for #define in Python?
****EDIT*****
For now a quick and dirty solution works for me; instead of running the program I run this code:
def RunReady(Path):
source = open(Path, 'rb')
program = source.read().decode()
output = open('curr.py', 'wb')
program = program.replace('כל_עוד', 'while')
program = program.replace('עבור', 'for')
program = program.replace('אם', 'if')
program = program.replace(' ב ', ' in ')
program = program.replace('הגדר', 'def')
program = program.replace('אחרת', 'else')
program = program.replace('או', 'or')
program = program.replace('וגם', 'and')
output.write(program.encode('utf-8'))
output.close()
source.close()
import curr
current_file = 'Sapir_1.py'
RunReady(current_file)
Python 3 has 33 keywords of which only a few are used by beginners:
['False', 'None', 'True', 'and', 'as', 'assert', 'break', 'case', 'class', 'continue', 'def', 'default', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'match', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield']
Given that Python doesn't support renaming keywords it's probably easier to teach a few of these keywords along with teaching programming.
How about if you add the #define stuff then run the c preprocessor (but not the compiler) which will give you a python source.