how to call function inside for loop with PYTHON
must call this funtion
def EE():
print("dd")
inside this
def upload_file(request):
if request.method == 'POST':
form = UploadFileForm(request.POST, request.FILES)
files = request.FILES.getlist('file_field')
fs = FileSystemStorage()
for f in files:
filename = fs.save(f.name, f)
ee=EE()
print(ee)
number_of_files=len(files)
uploaded_file_url = fs.url(filename)
return render(request, 'core/simple_upload.html', {
# 'uploaded_file_url': uploaded_file_url
})
The way you have written is correct. Since your function doesn't returns any value, I doubt whether you will receive the desired output.
Assuming the function to be called and the other are in the same scope.
def sample_function():
return "This is a sample function."
def main_function():
# function call
x = sample_function()
print(x)
# add your logic here.
Hope this will help.
def sentence_finder(text, word):
sentences = sent_tokenize(text)
return [sent for sent in sentences if word in word_tokenize(sent)]
def EE(filename,no_of_files):
for i in range(no_of_files):
try:
print('\n')
print(i+1)
pdfFileObj = open(filename, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
num_pages = pdfReader.numPages
count = 0
text = ""
# The while loop will read each page
while count < num_pages:
pageObj = pdfReader.getPage(count)
count += 1
text += pageObj.extractText()
# This if statement exists to check if the above library returned #words. It's done because PyPDF2 cannot read scanned files.
if text != "":
text = text
# If the above returns as False, we run the OCR library textract to #convert scanned/image based PDF files into text
else:
text = textract.process(filename, method='tesseract', language='eng')
# select relevnt section
# education qulification
textt = re.search(r'EDUCATION\n.*?SKILLS', text, re.DOTALL).group()
edu_qulification = textt[textt.find('\n') + 1:textt.rfind('\n')]
srt1=edu_qulification.lower()
# print(edu_qulification)
str12 = srt1.replace("\n", ". ")
str2 = str12.replace("m.s.", "master")
# print(str2)
syn = synonyms = wordnet.synsets('degree')
syn_set1 = list(chain.from_iterable([word.lemma_names() for word in synonyms]))
syn = synonyms = wordnet.synsets('BACHELOR')
syn_set2 = list(chain.from_iterable([word.lemma_names() for word in synonyms]))
syn = synonyms = wordnet.synsets('Master')
syn_set3 = list(chain.from_iterable([word.lemma_names() for word in synonyms]))
listone = ['bsc','be', 'btech']
listtwo =['m.s.']
mergedlist = listone + syn_set1 + syn_set2 + syn_set3 + listtwo
# print(mergedlist)
for i in mergedlist:
sent_part=sentence_finder(str2,i)
# print(sent_part)
if not sent_part:
pass
else:
Digree = sentence_finder(str2, i)
synn = synonyms = wordnet.synsets('university')
syn_seta = list(chain.from_iterable([word.lemma_names() for word in synonyms]))
synn = synonyms = wordnet.synsets('institute')
syn_setb= list(chain.from_iterable([word.lemma_names() for word in synonyms]))
synn = synonyms = wordnet.synsets('college')
syn_setc = list(chain.from_iterable([word.lemma_names() for word in synonyms]))
listthree=['center']
mergedlistt = listthree + syn_seta + syn_setb + syn_setc
# print(mergedlistt)
for j in mergedlistt:
sent_partt = sentence_finder(str2, j)
# print(sent_partt)
if not sent_partt:
pass
else:
University = sentence_finder(str2, j)
# Digree = sentence_finder(str2, 'BACHELOR')
# University = sentence_finder(str2, 'UNIVERSITY')
print(Digree)
print(University)
print(".................................................................")
# print(University)
except:
print("No Education Qualification mentioned")
Related
from app import getPhonemes
import pandas as pd
import sys
triphones = []
def phonemize(sentence):
tokens = sentence.split(' ')
phonemes = getPhonemes(tokens)
return '$'.join(phonemes)
def generateTriphones(phonemes):
triphones = []
for i in range(len(phonemes)):
for j in range(len(phonemes)):
for k in range(len(phonemes)):
triphones.append(phonemes[i] + ' ' + phonemes[j] + ' ' + phonemes[k])
return triphones
def scoreSentence(sentence,phonemes):
flag = 0
global triphones
score = 0
tokens = sentence.split('$')
uniqueTokens = set(tokens)
triphoneticTokens = [token for token in uniqueTokens if token.count(' ') > 1]
for token in triphoneticTokens:
for triphone in triphones:
if token.find(triphone) != -1:
score += 1
triphones.remove(triphone)
if triphones == []:
flag = -1
return score, flag
def Process(fil):
global triphones
file = open('itudict/vocab.phoneme', 'r',encoding='utf-8')
data = []
for line in file:
data.append(line.strip())
file.close()
phonemes = data[4:]
triphones = generateTriphones(phonemes)
data = pd.read_csv(fil+'.csv')
data = data.drop(['score','covered_vocab'],axis=1)
i = 1
while len(data) > 0:
print('Processing File: '+str(i))
sentencee = data[:10000]
data = data[10000:]
sentences = sentencee['sentence'].tolist()
phonemes = []
scores = []
for j in range(len(sentences)):
if j%1000 == 0:
print('Processing Sentence: '+str(j))
print(len(triphones))
phones = phonemize(sentences[j])
score, flag = scoreSentence(phones,phonemes)
if flag == -1:
data = []
phonemes.append(phones)
scores.append(score)
data['Phonemes'] = phonemes
data['score'] = scores
data.to_csv(fil+'phonemized'+str(i)+'.csv', index=False)
i += 1
if __name__ == '__main__':
Process(sys.argv[1])
I am trying to generate the phonemes for 800000 sentences. The model which am using is G2P which phonemizes the sentence. after phonemization i am calculating the scores. the phoneme array which i am using for calculating scores is of size 2620000.
The length of sentences are 800000 and the code is taking days, can somebody parallelize this code or suggest some solution
I want to parallelize this code to execute faster.
import re
sifrelenmisdizi = []
kelimeler = []
bulunankelimeler = []
input = input("Lütfen Şifrelenmiş Veriyi giriniz : ")
def sifrecoz(message): #im cracking password here
encrypted = ""
for i in range(25):
for char in message:
value = ord(char) + 1
valuex = value % 123
if (valuex <= 0):
valuex = 97
encrypted += chr(valuex)
elif (valuex == 33):
encrypted += chr(32)
else:
encrypted += chr(valuex)
message = encrypted
sifrelenmisdizi.append(encrypted)
encrypted = ""
def kelime_getir(dosya_adi): # here im taking words on "kelimeler.txt"
with open(dosya_adi, 'r', encoding='utf-8') as input_file:
dosya_icerigi = input_file.read()
kelime_listesi = dosya_icerigi.split()
index = 0
while index <= 1164053:
kelimeler.append(kelime_listesi[index]) #here im taking that issue
index += 1
return kelimeler
sifrecoz(input)
kelime_getir("kelimeler.txt")
for i in range(len(kelimeler)):
for j in range(len(sifrelenmisdizi)):
x = re.split("\s", sifrelenmisdizi[j])
for k in range(len(x)):
if (kelimeler[i] == x[k]):
bulunankelimeler.append(kelimeler[i])
print("Kırılmış şifreniz : ",bulunankelimeler)
# selam daktilo dalga = ugnco eblujmp ebmhb
Here I am coding a password cracking program with Caesar decryption of encrypted data and compare with "kelimeler" list.
I'm trying to add words to "kelimeler" list but I'm taking out of range error.
This is my word list:
[URL=https://dosya.co/31174l7qq8zh/kelimeler.txt.html]kelimeler.txt - 16.9 MB[/URL]
It appears that the function kelime_getir is expected to return a list of all the words in the file (which has one word per line).
Therefore:
def kelime_getir(dosya_adi):
with open(dosya_adi, encoding='utf-8') as txt:
return list(map(str.strip, txt))
...is all you need
Below is my code i am reading .txt file then making a list and then saving it to excel but in excel i am getting ('ip subnet/mask', ) but i want only (ip subnet/mask) in out put
Below are my code blocks
1.I read routing Table output from Txt file and create a list
2.then from 10.0.0.0/8 address space i remove routing table sybnets
3.I save the available IP,s to Available.txt file
4.Create List from Available.txt file
5.then i create excel file and then i save the list out put to excel in specific 10.x.x.x/16 sheet
import os
import re
import xlsxwriter
from netaddr import *
from openpyxl import load_workbook
def ip_adresses():
lst = []
for line in fstring:
for word in line.split():
result = pattern.search(word)
if result:
lst.append(word)
return lst
def write_excel(aaa, bbb, num):
bbb = sorted(bbb)
work11 = load_workbook(r'C:\Users\irfan\PycharmProjects\pythonProject\irfan4.xlsx')
sheet11 = work11[aaa]
count = sheet11.max_row
max1 = sheet11.max_row
for row1, entry in enumerate(bbb, start=1):
sheet11.cell(row=row1 + max1, column=1, value=entry)
work11.save("irfan4.xlsx")
os.chdir(r'C:\Users\irfan\PycharmProjects\pythonProject')
file = open('RR-ROUTING TABLE.txt')
fstring = file.readlines()
# declaring the regex pattern for IP addresses
pattern = re.compile(r'(10\.\d{1,3}\.\d{1,3}\.\d{1,3}[/])')
# initializing the list object
unique = []
# extracting the IP addresses
IPs = ip_adresses()
unique = list(dict.fromkeys(IPs))
ipv4_addr_space = IPSet(['10.0.0.0/8'])
ip_list = IPSet(list(unique))
print(ip_list)
available = ipv4_addr_space ^ ip_list
print()
f = open("Available.txt", "a")
f.write(str(available))
f.close
print(available)
workbook = xlsxwriter.Workbook('irfan4.xlsx')
worksheet = workbook.add_worksheet()
for row_num, data in enumerate(available):
worksheet.write(row_num, 0, data)
num = 0
while num <= 255:
worksheet = workbook.add_worksheet("10." + str(num) + ".0.0")
num += 1
workbook.close()
# CREATE AUDIT BOOK
##################################################
os.chdir(r'C:\Users\irfan\PycharmProjects\pythonProject')
file_2 = open('Available.txt')
fstring_2 = file_2.readlines()
def ip_adresses1():
lst = []
for line in fstring_2:
for word in line.split():
result = pattern.search(word)
if result:
lst.append(word)
return lst
List_A=ip_adresses1()
print(List_A[1])
get_list = []
num = 0
while num <= 255:
pattern_sheet = re.compile(r'(10\.' + str(num) + '\.\d{1,3}\.\d{1,3}[/])')
for get_ips in fstring_2:
result_ip = pattern_sheet.search(get_ips)
if result_ip:
get_list.append(get_ips)
sheet_name = ("10." + str(num) + ".0.0")
write_excel(sheet_name, get_list, num)
get_list = []
num += 1
enter code here
I have used re.sub function to remove characters from string:
def ip_adresses1():
lst = []
for line in fstring_2:
for word in line.split():
word = re.sub("IPSet", " ", word)
word = re.sub(",", " ", word)
word = re.sub("'", " ", word)
word = re.sub("\(", " ", word)
word = re.sub("\)", " ", word)
word = re.sub("\]", " ", word)
word = re.sub("\[", " ", word)
result = pattern.search(word)
if result:
lst.append(word)
return lst
Hello fellow developers,
I am trying to build a chatbot using markov chains and I am stuck at a problem. I the code below, I have made a random sentence generator that learns from movie scripts. The problem is, how do I get this sentence generator to not be random and to respond to the user's input? How should I go about doing this? Is it something to do with input/output training like this:
In: how are you today
Out: I'm good thanks how are you
Here is my code. Most of the functions are used to put the data in a csv file so don't mind those.
from collections import defaultdict
import random, itertools, nltk, pandas, csv, string, re, os, time
class Chatbot:
def __init__(self, name, txt_transcript_filedir, character=None):
self.name = name
self.txt_transcript_filedir = txt_transcript_filedir
self.character = character
print("Hello my name is " + name + ".")
def parse_transcript(self):
parsed_lines = []
self.csv_transcript_filedir = self.txt_transcript_filedir.replace('.txt', '.csv')
with open(self.txt_transcript_filedir, encoding='utf-8') as txt_file:
lines = txt_file.readlines()
for line in lines:
line = line.replace(', ', ' ')
line = re.sub(r'\[.*?\]', '', line)
if ': ' in line:
line = line.replace(': ', ',')
parsed_lines.append(line)
with open(self.csv_transcript_filedir, 'w', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['person', 'text'])
for line in parsed_lines:
csv_file.write(line)
def tokenize_transcript(self):
csv_file = pandas.read_csv(self.csv_transcript_filedir)
textss = []
final_sents = []
if self.character == None:
texts = csv_file['text']
for text in texts:
sent = nltk.sent_tokenize(text)
textss.append(sent)
else:
char_sets = csv_file[csv_file['person'] == self.character]
texts = char_sets['text']
for text in texts:
sent = nltk.sent_tokenize(text)
textss.append(sent)
for text in textss:
for sent in text:
if sent[0] == ' ':
sent = sent[1:]
final_sent = [w for w in sent if w not in string.punctuation]
final_sent = ''.join(final_sent)
final_sents.append(final_sent)
self.training_data = [sent for sent in final_sents]
def learn(self):
self.parse_transcript()
self.tokenize_transcript()
self.make_word_dict(self.training_data)
def make_word_dict(self, text):
word_dict = defaultdict(list)
for sent in text:
words = nltk.word_tokenize(sent)
for i in range(len(words) - 1):
if i+2 >= (len(words)):
word_dict[(words[i], words[i+1])].append('<end>')
else:
word_dict[(words[i], words[i+1])].append(words[i+2])
self.vocabulary = word_dict
def generate_text(self, num):
for i in range(0, num):
start_key = random.choice(list(self.vocabulary.keys()))
text = []
text.append(start_key[0])
text.append(start_key[1])
for i in itertools.count():
key = (text[i], text[i+1])
if key[1] == '<end>':
break
else:
text.append(random.choice(self.vocabulary[text[i], text[i+1]]))
text = ' '.join(text)
if text.endswith('<end>'):
text = text[:-6]
text = text + '.'
return text
def say(self, text):
os.system('say -v Oliver ' + text)
def main():
num = 100
bot = Chatbot("J.A.R.V.I.S", "avengers_age_of_ultron.txt", "JARVIS")
bot.learn()
for i in range(num):
text = bot.generate_text(1)
print(text)
if __name__ == '__main__':
main()
So I've been working with the CC-CEDICT, a free downloadable Chinese-English dictionary. I've been using python to make some small changes and reformat the dictionary. When I ran code that just reorganized the dictionary as a csv file, I had no issues and the characters were written into the file as expected. Here is the code for that:
filename = 'cedict_ts.u8.txt'
newname = 'cedict_ts.u8.csv'
f = open(filename,'r')
allLines = f.readlines()
f.close()
newf = open(newname, 'w')
endofhash = False
for i in range(0, len(allLines)):
curLine = allLines[i]
if curLine[0] == '#':
newf.write(curLine)
else:
if(not endofhash):
newarr = ['Traditional','Simplified','Pinyin','Definition(s)\r\n']
newline = ','.join(newarr)
newf.write(newline)
endofhash = True
firstws = curLine.find(' ')
lsbrack = curLine.find('[')
rsbrack = curLine.find(']')
fslash = curLine.find('/')
lslash = curLine.rfind('/')
trad = curLine[0:firstws]
simp = curLine[firstws+1:lsbrack-1]
piny = curLine[lsbrack+1:rsbrack]
defin = curLine[fslash+1:lslash]
defin = defin.replace('/','; ')
defin = defin + '\r\n'
newarr = [trad, simp, piny, defin]
newline = ','.join(newarr)
newf.write(newline)
newf.close()
However, when I run a program that also changes the pinyin system and adds it to the dictionary, the content of the text file is gobbly-gook. But, as a test I had the program print out each line before it was written to the text file, and it prints to the terminal as expected. Here is the code that does that:
from pinyinConverter import *
filename = 'cedict_ts.u8.txt'
newname = 'cedict_ts_wpym.u8.csv'
f = open(filename,'r')
allLines = f.readlines()
f.close()
apy = readPinyinTextfile('pinyinchars.txt')
newf = open(newname, 'w')
endofhash = False
for i in range(0, len(allLines)):
curLine = allLines[i]
if curLine[0] == '#':
newf.write(curLine)
else:
if(not endofhash):
newarr = ['Traditional','Simplified','Pinyin','PinyinWithMarks','Definition(s)\r\n']
newline = ','.join(newarr)
newf.write(newline)
endofhash = True
firstws = curLine.find(' ')
lsbrack = curLine.find('[')
rsbrack = curLine.find(']')
fslash = curLine.find('/')
lslash = curLine.rfind('/')
trad = curLine[0:firstws]
simp = curLine[firstws+1:lsbrack-1]
piny = curLine[lsbrack+1:rsbrack]
split_piny = piny.split(' ')
for i in range(0, len(split_piny)):
curPin = split_piny[i]
newPin = convertPinyinSystem(curPin, apy)
split_piny[i] = newPin
pnwm = ' '.join(split_piny)
defin = curLine[fslash+1:lslash]
defin = defin.replace('/','; ')
defin = defin + '\r\n'
newarr = [trad, simp, piny, pnwm, defin]
newline = ','.join(newarr)
newf.write(newline)
newf.close()
And here is the pinyinConverter file code:
def convertPinyinSystem(inputString, allPinyin):
chars = ['a','e', 'i', 'o','u','u:']
tone = grabTone(inputString)
toneIdx = (tone - 1) * 2
hasIdx = -1
for i in range(0, len(chars)):
if(chars[i] in inputString):
hasIdx = i
newString = inputString
newString = newString.replace(str(tone),'')
if(not ('iu' in inputString)):
newChar = allPinyin[hasIdx][toneIdx:toneIdx+2]
else:
newChar = allPinyin[4][toneIdx:toneIdx+2]
newString = newString.replace(chars[hasIdx],newChar)
if(tone == 5):
newString = inputString
newString = newString.replace(str(tone),'')
return newString
elif(tone == -1):
return inputString
else:
return newString
def readPinyinTextfile(pinyintextfile):
f = open(pinyintextfile, 'r')
allLines = f.readlines()
f.close()
for i in range(0, len(allLines)):
curLine = allLines[i]
curLine = curLine[0:len(curLine)-1]
allLines[i] = curLine
return allLines
def grabTone(inputText):
isToneIdx = False
idx = 0
while(not isToneIdx):
isToneIdx = is_int(inputText[idx])
if(isToneIdx):
break
else:
idx += 1
if(idx == len(inputText)):
return -1
return int(inputText[idx])
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
And the content of the pinyin chars.txt file is this:
āáăà
ēéĕè
īíĭì
ōóŏò
ūúŭù
ǖǘǚǜ
I'm on a 2009 MacBook Pro, running OSX version 10.8.5, python is version 2.7.6 and the coding of the dictionary is UTF-8. Also I know some of the code for doing the pinyin conversion is not optimized, but for this it doesn't really matter.
If your pinyin file is encoded as utf-8, you might want to try using the codecs package, which is part of the standard library, like this:
import codecs
...
def readPinyinTextfile(pinyintextfile):
f = codecs.open(pinyintextfile, 'r', 'utf-8')
If it looks okay in the terminal, then it's likely that you need to specifically change the writing function to use the codecs package:
apy = readPinyinTextfile('pinyinchars.txt')
newf = codecs.open(newname, 'w', 'utf-8')