Create sentences with markov chain in python - python

I have a Python code that uses markov chains to generate sentences, but for the code works I have to define 2 starting words, but I want that the first word was randomly chosen.
this is the code:
import random
def getLines(filename):
return [line[0:-1] for line in open(filename).readlines()]
def getWords(lines):
words = []
for line in lines:
words.extend(line.split())
return words
def createProbabilityHash(words):
numWords = len(words)
wordCount = {}
for word in words:
if wordCount.has_key(word):
wordCount[word] += 1
else:
wordCount[word] = 1
for word in wordCount.keys():
wordCount[word] /= 1.0 * numWords
return wordCount
def getRandomWord(wordCount):
randomValue = random.random()
cumulative = 0.0
choosenWord = ""
print wordCount
for word in wordCount:
probability = wordCount[word]
if probability > cumulative:
cumulative = probability
choosenWord = word
return choosenWord
words = getWords(getLines("frases.txt"))
wordMap = {}
previous = (words[0], words[1])
for word in words[2:]:
if wordMap.has_key(previous):
wordMap[previous].append(word)
else:
wordMap[previous] = [word]
previous = (previous[1], word)
for word in wordMap.keys():
probabilityHash = createProbabilityHash(wordMap[word])
wordMap[word] = probabilityHash
palavras = ['hello', 'apple', 'something', 'yeah', 'nope', 'lalala']
previous = (".", "A") #Starting words
numWords = 10 # The number of words to print
print previous[0], previous[1],
for i in range(numWords):
word = getRandomWord(wordMap[previous])
print word,
if word.endswith(","):
print "\n"
if word.endswith("."):
break
previous = (previous[1], word)

This will choose at random from the words in your dictionary:
import random
previous[0] = random.choice(wordMap.keys())

Related

I've made an anagram function in python, I cannot figure out how to get the output to look like this: "Key": word1, word2, word3 etc

with open('words.txt', 'r') as read:
line = read.readlines()
key_list = []
def make_anagram_dict(line):
word_list = {}
for word in line:
word = word.lower()
key = ''.join(sorted(word))
if key in word_list and len(word) > 5:
word_list[key].append(word)
key_list.append(key)
else:
word_list[key] = [word]
return word_list
if __name__ == '__main__':
word_list = make_anagram_dict(line)
for words in word_list.values():
if len(words) > 1:
print('Words: {}'.format(', '.join(words)))
I.e I need it to look like this:
Key:
aeehrtw
Words:
weather
, whereat
, wreathe
I also have a problem where words in the .txt file are duplicated but one word starts with a capital, i.e Zipper and zipper. How can I have it so that it only uses one of the words?
To get the exact same output, you can try :
if __name__ == '__main__':
word_list = make_anagram_dict(line)
for key, words in word_list.items():
if len(words) > 1:
print('Key:')
print(key)
print()
print('Words:')
print('\n, '.join(words))

Replace a sequence of characters by another one

I have a sequence of characters '-------' and i want to replace each '-' in it by each letter in 'jaillir' in the correct range.
How do i do that ?
Here is my code
import random
with open ("lexique.txt", "r", encoding= "utf8") as a:
words = []
letters = []
tirets= []
for line in a:
ligne = line[:-1]
words.append(ligne)
choix = random.choice(words)
tiret = ('-'* len(choix))
print(tiret)
print(choix)
accompli = False
while not accompli:
lettre = input("Entrez une lettre du mot ")
for t in range(len(tiret)):
if lettre in choix:
tiret.replace(tiret[t], lettre[t])
print(tiret)
I think you need to fix your file reading code, even though it is not the question, as below:
with open('lexique.txt', r) as f:
text = f.read() # get file contents
Next to replace the ---- by a word, I am assuming that the dashes in your text will only ever be the same length as the word, so:
word = 'word' # any string e.g. word
dashes = '-' * len(word)
So now you can use python's string.replace method like so:
text = text.replace(dashes, word) # every time it finds the sequence of dashes it will be replaced by your word
With a for loop (gradual replacement):
word = 'word' # any word
length = len(word)
temp = ''
for i, letter in enumerate(text):
if letter == '-':
if i + len(tempword) < len(text):
characters = [True if l == '-' else False for l in text[i:i + len(tempword)]]
if not(False in characters):
new += tempword[0]
if len(tempword) > 1:
tempword = tempword[1:]
else:
tempword = word
else:
new += letter
else:
new += letter
print(new)

finding keywords frequency in a c code using python excluding comments

I want to create a program, that can count the frequency of keywords used in a C code, excluding the commented ones or inside printf command.
def counting(f, word):
counter = 0
for w in f.split():
if word==w:
counter += 1
return counter
key=open('c_keywords.txt')
keyw=key.read().split()
file=open('a1.cpp').read()
for key in keyw:
x = counting(file,key)
if x != 0:
print (key, ":", x)
Here is an example of how to do it with a textfile, you can edit the text.txt and use your C code file instead
with open('text.txt', 'r') as doc:
print('opened txt')
for words in doc:
wordlist = words.split()
for numbers in range(len(wordlist)):
for inner_numbers in range(len(wordlist)):
if inner_numbers != numbers:
if wordlist[numbers] == wordlist[inner_numbers]:
print('word: %s == %s' %(wordlist[numbers], wordlist[inner_numbers]))
Use:
f = open('keywords_c.txt')
count = 0
words = []
for x in f:
w = x.split()
for a in w:
words.append(a)
print(words)
cpp = open('Simple_c.cpp')
program = []
for y in cpp:
if y.startswith('printf'):
continue
elif y.startswith('//'):
continue
else:
w = y.split()
for b in w:
if any(b in s for s in words):
count +=1
print(count)

How do you check if a place in a list is ascii

Ive been looking for a way to do this and ive only found ways to do it with strings i was wondering if it can be done with lists to.
#----------------------------
#flesch reading score program
#----------------------------
#----getting file to read----
myfile = open('project2test.txt')
txt = myfile.read()
words = txt.split()
txt = list(txt)
txt.append(' ')
#----global variables----
numWords, numberOfWords, numSentences, numSyllables = 0,0,0,0
punctuation = '.?;:!'
extendedPunctuation = '.?;:!,/<>]}\|)(*&^%$##'
abc = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
syllables = 'aeiouAEIOU'
numbers = '1234567890'
#----counting functions----
def sentenceEnd(i,numSentences):
if txt[i] in punctuation:
numSentences += 1
return numSentences
def wordEnd(i,numWords):
if txt[i] in abc:
if txt[i+1] == ' ' or txt[i+1] == '\n':
numWords += 1
if txt[i+1] in extendedPunctuation:
numWords += 1
if txt[i+1] in numbers:
numWords += 1
return numWords
def syllableEnd(i,numSyllables):
if txt[i] in syllables:
if txt[i+1] not in syllables:
numSyllables += 1
return numSyllables
#----main loop----
for i in range(len(txt)):
numWords = wordEnd(i, numWords)
numSentences = sentenceEnd(i,numSentences)
numSyllables = syllableEnd(i,numSyllables)
for word in words:
numberOfWords += 1
flesh = 206.835 - 1.015 * (numberOfWords/numSentences) - 84.6 * ((numSyllables)/numberOfWords)
print("The flesch redability of your document is: ", flesh)
print(numSyllables, ' ', numSentences, ' ' , numWords, ' ', numberOfWords)
right now i have to define everything.
i bassically want to know if in stead of sat puntuation =, abs = ... list[i].ascii == true
punctuations = [' ',',']
original_string = "Hello,World! How Clean is Python!"
words = []
words.append(original_string)
for x in punctuations:
temp = []
for word in words:
temp += word.split(x)
words = temp
This should work.

Translating sentences into pig latin

SO i have this assignment to translate multiple words into pig latin. assume that the user will always input lowercase and only letters and spaces.
#----------------global variables
sentence = input("What do you want to translate into piglattin? ")
sentence = list(sentence)
sentence.insert(0, ' ')
length = len(sentence)
sentence.append(' ')
pigLattin = sentence
false = 0
true = 1
consonant = []
a = 0
b = 0
c = 0
d = 0
e = 0
f = 0
j = 0
x = 0
y = 0
#----------------main functions
def testSpace(sentence, i):
if sentence[i] == ' ':
a = true
else:
a = false
return a
def testVowel(sentence, i):
if sentence[i] == 'a' or sentence[i] == 'e' or sentence[i] == 'i' or sentence[i] == 'o' or sentence[i] == 'u' or sentence[i] == 'y':
b = true
else:
b = false
return b
def testStartWord(sentence, i):
x = 0
if sentence[i].isalpha() and sentence[i-1] == ' ':
c = true
x = 1
if x == 1 and sentence[i] != 'a' and sentence[i] != 'e' and sentence[i] != 'i' and sentence[i] != 'o' and sentence[i] != 'u' and sentence[i] != 'y':
c = true
else:
c = false
return c
def testWordEnd(sentence, i):
if sentence[i].isalpha() and sentence[i+1] == ' ':
d = true
else:
d = false
return d
#----------------main loop
for i in range(1,length):
x = 0
space = testSpace(sentence, i)
vowel = testVowel(sentence, i)
word = testStartWord(sentence, i)
end = testWordEnd(sentence, i)
if vowel == false and space == false and word == true:
e = i
consonant.append(sentence[i])
pigLattin.pop(e)
f = f + 1
if end == true:
consonant.append('a')
consonant.append('y')
consLength = len(consonant)
for x in range(consLength):
y = i + j - f
pigLattin.insert(y,consonant[x])
j = j + 1
del consonant[:]
pigLength = len(pigLattin)
for b in range (pigLength):
print(pigLattin[b], end='')
this is what i have so far. it gets kinda messy when trying to remove items. im sort of stuck here and its not working.
OK i got it working now this is an updated version
sentence = input("Please enter a sentence: ")
vowels = ("a", "e", "i", "o", "u", "A", "E", "I", "O", "U")
words = sentence.split()
count = 0
def find_vowel(word):
for i in range(len(word)):
if word[i] in vowels:
return i
return -1
for word in words:
vowel = find_vowel(word)
if(vowel == -1):
print(word, ' ', end='')
elif(vowel == 0):
print(word + "ay", ' ', end='')
else:
print(word[vowel:] + word[:vowel] + "ay", ' ', end='')
Instead of using testSpace eliminate the spaces by using sentence = sentence.split(). This will split all your words into strings in a list. Then iterate through the words in your list.
Instead of using testStartWord, use an if statement:
for word in sentence:
if word[0] in ["a","e","i","o","u"]:
word[:(len(word)-1)] = word[0]
#More Code...
At the end, where you print the output, use print sentence.join()
Here's an alternate version. I use a regular expression to find words in the input string, pass them to a callback function, and substitute them back into the original string. This allows me to preserve numbers, spacing and punctuation:
import re
import sys
# Python 2/3 compatibility shim
inp = input if sys.hexversion >= 0x3000000 else raw_input
VOWELS = set('aeiouyAEIOUY')
YS = set('yY')
def pig_word(word):
"""
Given a word, convert it to Pig Latin
"""
if hasattr(word, 'group'):
# pull the text out of a regex match object
word = word.group()
# find the first vowel and what it is
vowel, where = None, None
for i,ch in enumerate(word):
if ch in VOWELS:
vowel, where = ch, i
break
if vowel is None:
# No vowels found
return word
elif where == 0 and vowel not in YS:
# Starts with a vowel - end in 'way'
# (leading y is treated as a consonant)
return word + 'way'
else:
# Starts with consonants - move to end and follow with 'ay'
# check capitalization
uppercase = word.isupper() and len(word) > 1
titlecase = word[:1].isupper() and not uppercase
# rearrange word
word = word[where:] + word[:where] + 'ay'
# repair capitalization
if uppercase:
word = word.upper()
elif titlecase:
# don't use str.title() because it screws up words with apostrophes
word = word[:1].upper() + word[1:].lower()
return word
def pig_latin(s, reg=re.compile('[a-z\']+', re.IGNORECASE)):
"""
Translate a sentence into Pig Latin
"""
# find each word in the sentence, pass it to pig_word, and insert the result back into the string
return reg.sub(pig_word, s)
def main():
while True:
s = inp('Enter a sentence to translate (or Enter to quit): ')
if s.strip():
print(pig_latin(s))
print('')
else:
break
if __name__=="__main__":
main()
then
Enter a sentence to translate (or Enter to quit):
>>> Hey, this is really COOL! Let's try it 3 or 4 times...
Eyhay, isthay isway eallyray OOLCAY! Et'slay ytray itway 3 orway 4 imestay...

Categories