Passing string over urls django - python
I'm having trouble in understanding this error in my code, first let me try and explain what is happening and what I'm I trying to do.
My code is designed to load up 45 separate text files into an array, including the weight of each word/phrase and the word phrase itself. This has to occur at the beginning, before any description is received.
Second, once the description is received, it is parsed by my software into words/phrases, which are compared to the words/phrases in the array.
Third, my software then provides the top three classes, in rank order (first/second/third) by number, along with the score for each class.
I've made a django application that will serve this code, so I have a form which will provide two parameters classes and description, like this:
class TrademarkClassifierForm(forms.Form):
"""
TODO: This forms will cover the questions the
initial classifier program does
:returns: TODO
"""
classes = forms.CharField(max_length=10,
label="Test all trademark classes? Type 'yes' to do so or else enter the class to be tested ")
description = forms.CharField(widget=forms.Textarea)
def __init__(self, *args, **kwargs):
super(TrademarkClassifierForm, self).__init__(*args, **kwargs)
self.helper = FormHelper()
self.helper.add_input(Submit('submit', 'Submit'))
Then I want to pass this two parameters in the view over the url like this:
class TrademarkClassifierResultView(FormView):
"""
TODO: Post should redirect to it's on page with GET,
specify set values in some query parameters,
something like ?classes=yes&name=NameOfTrademarkClass
This should be visible on results page.
:param: classes
:param: description
:returns: TODO - params
"""
template_name = 'trademark.html'
form_class = TrademarkClassifierForm
def get(self, request, *args, **kwargs):
classes = str(self.request.GET.get('classes'))
description = str(self.request.GET.get('description'))
form = TrademarkClassifierForm(initial={'classes': classes, 'description': description})
context_data = self.get_context_data(classes, description, form=form)
return self.render_to_response(context_data)
def form_valid(self, form):
classes = form.cleaned_data['classes']
description = form.cleaned_data['description']
return redirect(self.get_success_url(classes, description))
def form_invalid(self, form):
messages.add_message(self.request, messages.ERROR,
"Invalid data. Please check fields.")
return self.render_to_response(
self.get_context_data(form=form)
)
def get_success_url(self, classes=None, description=None):
return reverse("classifier:trademark") + "?classes=" + str(classes) + "&description" + str(description)
def get_context_data(self, classes, description, **kwargs):
context = super(TrademarkClassifierResultView, self).get_context_data(**kwargs)
context['classes'] = classes
context['description'] = description
context['trademark'] = ClassifyMarkBased.control_program(classes, description)
return context
Now my problem is this error:
Environment:
Request Method: GET
Request URL: http://127.0.0.1:8000/trademark/
Django Version: 1.11.2
Python Version: 2.7.12
Installed Applications:
['django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.sites',
'classifier',
'crispy_forms',
'allauth',
'allauth.account',
'allauth.socialaccount',
'widget_tweaks',
'debug_toolbar']
Installed Middleware:
['django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'debug_toolbar.middleware.DebugToolbarMiddleware']
Traceback:
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/core/handlers/exception.py" in inner
41. response = get_response(request)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/core/handlers/base.py" in _get_response
187. response = self.process_exception_by_middleware(e, request)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/core/handlers/base.py" in _get_response
185. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/views/generic/base.py" in view
68. return self.dispatch(request, *args, **kwargs)
File "/home/petar/.virtualenvs/trademark/local/lib/python2.7/site-packages/django/views/generic/base.py" in dispatch
88. return handler(request, *args, **kwargs)
File "/home/petar/Documents/Synergy/Trademark/TM_base/classifier/views.py" in get
60. context_data = self.get_context_data(classes, description, form=form)
File "/home/petar/Documents/Synergy/Trademark/TM_base/classifier/views.py" in get_context_data
82. context['trademark'] = ClassifyMarkBased.control_program(classes, description)
File "/home/petar/Documents/Synergy/Trademark/TM_base/classifier/services/classify_mark_based.py" in control_program
89. N = len(word_count_array_for_all_classes[i])
Exception Type: IndexError at /trademark/
Exception Value: list index out of range
This is my url:
url(r'^trademark/', TrademarkClassifierResultView.as_view(), name="trademark"),
and this is the part of the code that should calculate the trademark over this two parameters:
import os
import numpy as np
import re
import requests
class TrademarkService(object):
# coding: utf-8
# In[5]:
# compare input string to a class
# for words not found,look in a dictionary - add to text files for trademark words
# In[6]:
# open each trademark class file and read the words/frequency back into an array
#staticmethod
def open_file_read_words(file_name):
unique_words_and_count_not_format = []
tm_word_count_array = []
my_list = []
all_possible_entries = 1
with open(file_name) as f:
lines = [line.strip() for line in open(file_name)]
all_possible_entries = len(lines)
tm_word_count_array = [[0 for x in range(2)] for y in range(all_possible_entries)]
i = 0
while i < all_possible_entries:
tm_word_count_array[i] = lines[i].split(',', 1)
i += 1
i = 0
while i < all_possible_entries:
tm_word_count_array[i][0] = int(tm_word_count_array[i][0])
i += 1
return tm_word_count_array
# In[7]:
# this section normalizes word frequency by the number of words x 1000
#staticmethod
def normalize_array(tm_word_count_array):
list_of_freqs = []
max_entries = len(tm_word_count_array)
list_of_freqs = [0 for y in range(max_entries)]
i = 0
while i < max_entries:
list_of_freqs[i] = tm_word_count_array[i][0]
i += 1
max_value = max(list_of_freqs)
i = 0
while i < max_entries:
tm_word_count_array[i][0] = ((float(tm_word_count_array[i][0])) / max_entries) * 1000
i += 1
return tm_word_count_array
# In[8]:
# include the list of not useful words here
#staticmethod
def find_not_useful_words(word):
not_useful_words = (
"about", "are", "upon", "-", " ", "up", "other", "or", "not", "namely", "more", "made", "in", "for", "except",
"but", "being", "all", "against", "was", "were", "will", "that", "its", "on", "it", "at", "was", "our", "your",
"ours", "yours", "their", "them", "other", "out", "having", "have", "has", "in", "be", "than", "use", "uses",
"using", "", "by", "and", "an", "a", "use", "used", "using", "for", "to", "of", "-)", "-]", "with", "as", "in",
"the", "from")
for test_word in not_useful_words:
if word == test_word:
return False
return True
# In[9]:
# clean up the phrases by removing problematic characters
#staticmethod
def clean_up_phrases(data):
important_words = ''
word = data
for c in word:
if 0 <= ord(c) <= 127:
# this is an ascii character.
not_a_variable = 0
else:
if ord(c) == 201:
word = word.replace(c, "e")
elif ord(c) == 241:
word = word.replace(c, "n")
elif ord(c) == 225:
word = word.replace(c, "a")
elif ord(c) == 251:
word = word.replace(c, "u")
elif ord(c) == 8206:
word = word.replace(c, "")
else:
word = word.replace(c, "")
# continue_yes=raw_input("do you want to continue?")
word = word.lower()
word = str(filter(lambda ch: ch not in "?.!/;:,'()[]", word))
# calls the function above to remove words that were found to interfere with classification
if data.find_not_useful_words(word):
if len(word) > 1:
important_words += word
return important_words
# In[10]:
# find the important words in the string
#staticmethod
def find_important_words(data):
all_entries = len(data)
important_words = []
for word in data.split():
for c in word:
if 0 <= ord(c) <= 127:
# this is an ascii character.
not_a_variable = 0
else:
if ord(c) == 201:
word = word.replace(c, "e")
elif ord(c) == 241:
word = word.replace(c, "n")
elif ord(c) == 225:
word = word.replace(c, "a")
elif ord(c) == 251:
word = word.replace(c, "u")
elif ord(c) == 8206:
word = word.replace(c, "")
else:
word = word.replace(c, "")
word = word.lower()
word = str(filter(lambda ch: ch not in " ?.!/;:,'()[]", word))
if word.endswith("-"):
word = word[:-1]
if word.startswith("-"):
word = word[:1]
if data.find_not_useful_words(word):
if len(word) > 1:
important_words.append(word)
return important_words
# In[11]:
#staticmethod
def analyze_each_line_test_data(test_sentence, N, normalized_tm_word_count_array):
# remove problematic characters and words, plus find important words/phrases
test_important_phrases = test_sentence.clean_up_phrases(test_sentence)
i = 0
total_found = 0
total_TM_class_count = 0
total_TM_words_matched = []
# score the trademark phrases in the string
while i < N:
count_phrases = 0
if len(normalized_tm_word_count_array[i][1].split()) > 1:
if test_important_phrases.find(normalized_tm_word_count_array[i][1]) > -1:
total_TM_words_matched.append(normalized_tm_word_count_array[i][1])
total_TM_class_count += (normalized_tm_word_count_array[i][0])
total_found += 1
i += 1
# decompose the string and remove extraneous words, then score the words in the string
test_important_words = test_sentence.find_important_words(test_sentence)
i = 0
while i < N:
count_words = 0
if test_important_words.count(normalized_tm_word_count_array[i][1]) > 0:
total_TM_words_matched.append(normalized_tm_word_count_array[i][1])
count_words = test_important_words.count(normalized_tm_word_count_array[i][1])
total_TM_class_count += (normalized_tm_word_count_array[i][0] * count_words)
total_found += 1
i += 1
i = 0
normalized_tm_word_count_values = [0 for y in range(N)]
normalized_tm_word_count_words = ['a' for y in range(N)]
while i < N:
normalized_tm_word_count_values[i] = normalized_tm_word_count_array[i][0]
normalized_tm_word_count_words[i] = normalized_tm_word_count_array[i][1]
i += 1
total_words_to_match = len(test_important_words) + len(test_important_phrases)
not_found_words = list(set(test_important_words) - set(normalized_tm_word_count_words))
return total_found, total_TM_words_matched, not_found_words, total_TM_class_count
# In[12]:
#staticmethod
def open_class_file_read_words_to_array(file_name, file_name_class=None):
tm_word_count_array = []
tm_word_count_array = file_name.open_file_read_words(file_name_class)
return tm_word_count_array
# In[13]:
# create a file for the trademark results
#staticmethod
def create_results_file(file_name, results_array, description):
unique_words_and_count_not_format = []
unique_words_and_count_to_write = []
open_file_name = open(file_name, 'a')
open_file_name.write("New trademark comparison")
open_file_name.write("\n")
open_file_name.write(description)
open_file_name.write("\n")
unique_words_and_count_to_write = np.array(results_array, dtype=object)
np.savetxt(open_file_name, unique_words_and_count_to_write, fmt='%s', delimiter=',')
open_file_name.write("\n")
open_file_name.write("\n")
open_file_name.write("\n")
open_file_name.close()
# In[14]:
# this section controls the program
#staticmethod
def control_the_program(classes, description):
description = []
word_count_array_for_all_classes = []
correct_class_set = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
'18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33',
'34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45']
# depending on the answer, only one class worth of trademark words will be loaded up or else all will be loaded up
# test_all_classes = raw_input(
# "Test all trademark classes? Type 'yes' to do so or else enter the class to be tested ")
test_all_classes = classes
# test description of goods/services
# test_data_array = raw_input("Provide the description of goods or services ")
test_data_array = description
# file_name_data = raw_input("Provide the identifier for the results file ")
# this file has the output of the classification engine, including the top 3 results
# file_name_results = 'user_test_comparison_results_' + file_name_data + '.txt'
# call to a program to open each file of trademark words in turn and read the words back into an array
if test_all_classes == 'yes':
i = 1
number_classes_to_check = 45
word_count_array_for_all_classes = [[] for z in range(46)]
temp_array = []
while i <= 45:
# opens each file with the trademark words
file_name_class = 'counted_phrases_class' + str(i) + '.txt'
temp_array = classes.open_class_file_read_words_to_array(file_name_class)
# normalization is used because some classes have many words and some have few words
# the words/phrases are weighted according to frequency
word_count_array_for_all_classes[i] = classes.normalize_array(temp_array)
i += 1
else:
# print "you didn't enter yes"
pass
# length_test_data_array = len(test_data_array)
# open(file_name_results, 'a').close()
# start_writing_results = open(file_name_results, 'a')
# start_writing_results.write("The start of the test")
# start_writing_results.write("\n")
# start_writing_results.write("Total number of potential items to match ")
# start_writing_results.write(str(length_test_data_array))
# start_writing_results.write("\n")
# start_writing_results.close()
top_result = [0 for y in range(2)]
second_result = [0 for y in range(2)]
third_result = [0 for y in range(2)]
top_array_words_not_found = []
second_array_words_not_found = []
third_array_words_not_found = []
counter_for_9vs42 = 0
counter_for_data_errors = 0
top_result = [0 for y in range(2)]
second_result = [0 for y in range(2)]
third_result = [0 for y in range(2)]
top_array_words_not_found = []
second_array_words_not_found = []
third_array_words_not_found = []
actual_class_results = [0 for y in range(2)]
overall_array_results = [[0 for x in range(3)] for y in range(4)]
actual_class_words_not_found = []
i = 1
while i <= 45:
total_found = 0
total_TM_words_matched = 0
not_found_words = ['']
score = 0
N = len(word_count_array_for_all_classes[i])
total_found, total_TM_words_matched, not_found_words, score = classes.analyze_each_line_test_data(test_data_array, N,
word_count_array_for_all_classes[i])
if int(score) > 0:
if int(score) > top_result[0]:
third_result[0] = second_result[0]
third_result[1] = second_result[1]
third_array_words_not_found = second_array_words_not_found
second_result[0] = top_result[0]
second_result[1] = top_result[1]
second_array_words_not_found = top_array_words_not_found
top_result[0] = int(score)
top_result[1] = i
top_array_words_not_found = ['']
top_array_words_not_found = not_found_words
elif int(score) > second_result[0]:
third_result[0] = second_result[0]
third_result[1] = second_result[1]
third_array_words_not_found = second_array_words_not_found
second_result[0] = int(score)
second_result[1] = i
second_array_words_not_found = ['']
second_array_words_not_found = not_found_words
elif int(score) > third_result[0]:
third_result[0] = int(score)
third_result[1] = i
third_array_words_not_found = ['']
third_array_words_not_found = not_found_words
i += 1
overall_array_results[0][0] = top_result[0]
overall_array_results[0][1] = top_result[1]
overall_array_results[0][2] = top_array_words_not_found
overall_array_results[1][0] = second_result[0]
overall_array_results[1][1] = second_result[1]
overall_array_results[1][2] = second_array_words_not_found
overall_array_results[2][0] = third_result[0]
overall_array_results[2][1] = third_result[1]
overall_array_results[2][2] = third_array_words_not_found
# all results - including the first, second, third choices of the engine and the original description - are written to the file
# create_results_file(file_name_results, overall_array_results, test_data_array)
# start_writing_results = open(file_name_results, 'a')
# start_writing_results.write("The end of the test")
# start_writing_results.write("\n")
#
# start_writing_results.write("\n")
# start_writing_results.write("\n")
# start_writing_results.close()
# print "finished the process"
From the code that I've provided you can see that this parameters where provided over python raw_input and after calculation code was creating a file in which you can read about the result.
I've rewritten this so I can serve it over the django application, so parameters classes and description should overwrite the raw_input and the result will be displayed in the template, like this:
{{ trademark.overall_array_results.top_result }}<br>
{{ trademark.overall_array_results.second_result }}<br>
{{ trademark.overall_array_results.third_result }}
I'm not sure if I'm doing the write thing here, so I need help to understand this better, can someone help me to over come error.
If classes is not "yes", then word_count_array_for_all_classes remains an empty list.
Related
How to retrieve section IDs using Google docs API Python
For instance, we have a document such as this - Table Of Content Introduction <text: A> 1.1 Background <text: B> 1.2 Problem statement <text: C> Approach <text: D> 2.1.1 Outline of the algorithm <text: E> I need to pattern match a "string" in all of the texts in the document. For example my search string could be "REQ-". Which could match "REQ-1", "REQ-2" to "REQ-10". Suppose if "REQ-1" was located in text:C, and "REQ-2" in text:E, then the output I am looking for is ("REQ-1", "1.2"), ("REQ-2", "2.1.1") etc Essentially, it matches the search string, identify all matches, and for each match, returns a 2-tuple of the matched string and the "section id" in the document containing the matched string. def get_creds(): credentials = service_account.Credentials.from_service_account_file( "cred_new.json", scopes=SCOPES ) return credentials def search_paragraph_element(element, search_str): text_run = element.get('textRun') if not text_run: return False res = text_run.get('content').find(search_str) if res != -1: return True return False def search_structural_elements(elements, search_str): text = '' hd_1 = 0 hd_2 = 0 hd_3 = 0 for value in elements: if 'paragraph' in value: if value['paragraph']['paragraphStyle']['namedStyleType'] == 'HEADING_1': hd_1 = hd_1 + 1 hd_2 = 0 hd_3 = 0 elif value['paragraph']['paragraphStyle']['namedStyleType'] == 'HEADING_2': hd_2 = hd_2 + 1 hd_3 = 0 elif value['paragraph']['paragraphStyle']['namedStyleType'] == 'HEADING_3': hd_3 = hd_3 + 1 elements = value.get('paragraph').get('elements') for elem in elements: res = search_paragraph_element(elem, search_str) if res is True: return str(hd_1) + '.' + str(hd_2) + '.' + str(hd_3) return text def main(): """Uses the Docs API to print out the text of a document.""" credentials = get_creds() service = build("docs", "v1", credentials=credentials).documents() properties = service.get(documentId=REQ_DOCUMENT_ID).execute() doc_content = properties.get('body').get('content') print(search_structural_elements(doc_content, "MySearchString")) if __name__ == '__main__': main() ``
Appending string from list to arrays after using NLTK postag
I have these arrays ADJ = [] #adjective ADP = [] #adposition ADV = [] #adverb CONJ = [] #conjunction DET = [] #determiner NOUN = [] #noun NUM = [] #number PRT = [] #participle PRON = [] #pronoun VERB = [] #verb PUNCT = [] #punctuation X = [] #other Now I want to insert the words respectively to the arrays above. Meaning if Hi == NOUN then NOUN.append('Hi') [["Hi", "NOUN"], ["my", "PRON"], ["name", "NOUN"], ["is", "VERB"], ["Bob", "NOUN"], ["and", "CONJ"], ["I", "PRON"], ["live", "VERB"], ["in", "ADP"], ["Germany", "NOUN"]] The code below is what I tried but it did not work for me if(detect(paraInput) == 'en'): tokenizedPI = nltk.word_tokenize(text) try: for(i in tokenizedPI): word = nltk.word_tokenize(i) tagged = nltk.pos_tag(word, tagset="universal") if(tagged[0][1] == "ADJ"): ADJ.append(tagged[0][0]) elif(tagged[0][1] == "ADP"): ADP.append(tagged[0][0]) elif(tagged[0][1] == "ADV"): ADV.append(tagged[0][0]) elif(tagged[0][1] == "CONJ"): CONJ.append(tagged[0][0]) elif(tagged[0][1] == "DET"): DET.append(tagged[0][0]) elif(tagged[0][1] == "NOUN"): NOUN.append(tagged[0][0]) elif(tagged[0][1] == "NUM"): NUM.append(tagged[0][0]) elif(tagged[0][1] == "PRT"): PRT.append(tagged[0][0]) elif(tagged[0][1] == "PRON"): PRON.append(tagged[0][0]) elif(tagged[0][1] == "VERB"): VERB.append(tagged[0][0]) elif(tagged[0][1] == "PUNCT"): PUNT.append(tagged[0][0]) else: X.append(tagged[0][0]) tagged = nltk.pos_tag(word) entity = nltk.ne_chunk(tagged) except Exception as e: print(str(s)) else: print("The String inputted is not in English, please enter a string in English") What should be corrected in order to fix it as the error I am getting is for(i in tokenWord): SyntaxError: invalid syntax
This code should help. import nltk text = "A Sample Text for token test" tokenizedPI = nltk.word_tokenize(text) tagged = nltk.pos_tag(tokenizedPI, tagset="universal") print(tagged) This will return a list as expected. [('A', 'DET'), ('Sample', 'NOUN'), ('Text', 'NOUN'), ('for', 'ADP'), ('token', 'ADJ'), ('test', 'NOUN')]
How to transpose a single item in a nested list
I am trying to input values into my table but the table is not coming out the way I would like it to. The headers ("OrderDate", "Rep", etc.) of my given csv file should be under the "Columns:" cell in the following image: Table of statistical values I have tried to create multiple functions that could transpose the headers but when trying to print the table, it would give the error: TypeError: unsupported format string passed to list.__format__. One code I tired to input just before the "labels" line was: headers2 = [x.split() for x in headers] P.S. I have removed the csv file code and manually put in a list assigned to "A". My Code: A = [['OrderDate', 'Region', 'Rep', 'Item', 'Units', 'Unit Price'], ['4-Jul-2014', 'East', 'Richard', 'Pen Set', '62', '4.99'], ['12-Jul-2014', 'East', 'Nick', 'Binder', '29', '1.99'], ['21-Jul-2014', 'Central', 'Morgan', 'Pen Set', '55', '12.49'], ['29-Jul-2014', 'East', 'Susan', 'Binder', '81', '19.99'], ['7-Aug-2014', 'Central', 'Matthew', 'Pen Set', '42', '23.95'], ['15-Aug-2014', 'East', 'Richard', 'Pencil', '35', '4.99'], ['24-Aug-2014', 'West', 'James', 'Desk', '3', '275'], ['1-Sep-2014', 'Central', 'Smith', 'Desk', '2', '125']] minVal = [] maxVal = [] hist = [] average = [] stanDev = [] mode = [] headers = A[0] #this sets the variable "headers" as the first row rows = A[1:] #sets the variable 'rows' to be a nested list without headers def rows2cols(A): if len(A) == 0: #this covers the base case of having an empty csv file return [] res = [[] for x in headers] #creates a list of empty lists for line in A: for col in range(len(line)): res[col].append(line[col]) return res def convertstringtofloats(A): res = [] for x in A: res.append(float(x)) return res def isnumericlist(A): for x in A: try: numeric = float(x) except: return False return True def getMin(A): B = convertstringtofloats(cols[col]) #Let's Python know what B is for the next line. If this isn't here, there is an error. res = B[0] for x in A: if x < res: res = x return res def getMax(A): B = convertstringtofloats(cols[col]) #Let's Python know what B is for the next line. If this isn't here, there is an error. res = B[0] for x in A: if x > res: res = x return res def getAvg(A): return sum(A)/len(A) def most_common(A): counts = {} for x in A: counts[(x)] = counts.get((x), 0) + 1 max = -1 maxKey = "" for key,value in counts.items(): if max < value: max = value maxKey = key return maxKey def getSD(A): sumsq = 0 for n in A: sumsq += (getAvg(A))**2 return sumsq cols = rows2cols(rows) #transposes 'rows' and assigns to variable 'cols' def stats(A): B = convertstringtofloats(A) minVal.append(getMin(B)) maxVal.append(getMax(B)) average.append(getAvg(B)) stanDev.append(getSD(B)) for col in range(len(headers)): if isnumericlist(cols[col]): stats(cols[col]) #calls the function to calculate stats of the transposed matrix else: minVal.append("n/a") maxVal.append("n/a") average.append("n/a") stanDev.append("n/a") mode.append(most_common(cols[col])) #headers2 = [x.split() for x in headers] labels = ["Columns:", "Min", "Max", "Avg", "Std. Dev.", "Most Common Word"] #labels for the table table_values = [labels, headers, minVal, maxVal, average, stanDev, mode] #combines all the calculated stats into a single list print(table_values) def print_table(table): longest_cols = [ (max([len(str(row[i])) for row in table]) + 0) for i in range(len(table[0])) ] row_format = "|".join([" {:>" + str(longest_col) + "} " for longest_col in longest_cols]) first = True for row in table: print(row_format.format(*row)) if first: print((sum(longest_cols) + (len(table[0]) - 0) * 3) * "-") first = False print_table(table_values) # this prints the 'labels' at the top, but the statistical values are not in the right place
How to add a member function to an existing Python object?
Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option(). An example is: import copy import numpy as np from math import * from pybrain.structure import * from pybrain.supervised.trainers import BackpropTrainer from pybrain.datasets.supervised import SupervisedDataSet import pickle import neural_network_related class A(object): """the neural network for simulation""" ''' todo: - find boundary - get_angles_from_coefficients ''' def __init__(self, index, # the index of the current network list_of_coor_data_files, # accept multiple files of training data energy_expression_file, # input, output files preprocessing_settings = None, connection_between_layers = None, connection_with_bias_layers = None, PCs = None, # principal components ): self._index = index self._list_of_coor_data_files = list_of_coor_data_files self._energy_expression_file = energy_expression_file self._data_set = [] for item in list_of_coor_data_files: self._data_set += self.get_many_cossin_from_coordiantes_in_file(item) self._preprocessing_settings = preprocessing_settings self._connection_between_layers = connection_between_layers self._connection_with_bias_layers = connection_with_bias_layers self._node_num = [8, 15, 2, 15, 8] self._PCs = PCs def save_into_file(self, filename = None): if filename is None: filename = "network_%s.pkl" % str(self._index) # by default naming with its index with open(filename, 'wb') as my_file: pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL) return def get_cossin_from_a_coordinate(self, a_coordinate): num_of_coordinates = len(a_coordinate) / 3 a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3) diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:] # bond vectors diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:] normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2); normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors)) normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:]; diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors cos_of_angles = range(len(normal_vectors_normalized_1)) sin_of_angles_vec = range(len(normal_vectors_normalized_1)) sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization for index in range(len(normal_vectors_normalized_1)): cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index]) sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index]) sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index])); return cos_of_angles + sin_of_angles def get_many_cossin_from_coordinates(self, coordinates): return map(self.get_cossin_from_a_coordinate, coordinates) def get_many_cossin_from_coordiantes_in_file (self, filename): coordinates = np.loadtxt(filename) return self.get_many_cossin_from_coordinates(coordinates) def mapminmax(self, my_list): # for preprocessing in network my_min = min(my_list) my_max = max(my_list) mul_factor = 2.0 / (my_max - my_min) offset = (my_min + my_max) / 2.0 result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list)) return (result_list, (mul_factor, offset)) # also return the parameters for processing def get_mapminmax_preprocess_result_and_coeff(self,data=None): if data is None: data = self._data_set data = np.array(data) data = np.transpose(data) result = []; params = [] for item in data: temp_result, preprocess_params = self.mapminmax(item) result.append(temp_result) params.append(preprocess_params) return (np.transpose(np.array(result)), params) def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None): # try begin if preprocessing_settings is None: preprocessing_settings = self._preprocessing_settings temp_setttings = np.transpose(np.array(preprocessing_settings)) result = [] for item in input_data: item = np.multiply(item - temp_setttings[1], temp_setttings[0]) result.append(item) return result # try end def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None): if connection_between_layers is None: connection_between_layers = self._connection_between_layers if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers node_num = self._node_num expression = "" # first part: network for i in range(2): expression = '\n' + expression mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) bias_coef = connection_with_bias_layers[i].params for j in range(np.size(mul_coef, 0)): temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j) for k in range(np.size(mul_coef, 1)): temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k) temp_expression += ' %f);\n' % (bias_coef[j]) expression = temp_expression + expression # order of expressions matter in OpenMM # second part: definition of inputs index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19]; for i in range(len(index_of_backbone_atoms) - 3): index_of_coss = i index_of_sins = i + 4 expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \ (index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0]) expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \ (index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0]) expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i) expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i) expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \ (i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3]) return expression def write_expression_into_file(self, out_file = None): if out_file is None: out_file = self._energy_expression_file expression = self.get_expression_of_network() with open(out_file, 'w') as f_out: f_out.write(expression) return def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None): if input_data is None: input_data = self._data_set if connection_between_layers is None: connection_between_layers = self._connection_between_layers if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers node_num = self._node_num temp_mid_result = range(4) mid_result = [] # first need to do preprocessing for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings): for i in range(4): mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num bias_coef = connection_with_bias_layers[i].params previous_result = item if i == 0 else temp_mid_result[i - 1] temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef if i != 3: # the last output layer is a linear layer, while others are tanh layers temp_mid_result[i] = map(tanh, temp_mid_result[i]) mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy return mid_result def get_PC_and_save_it_to_network(self): '''get PCs and save the result into _PCs ''' mid_result = self.get_mid_result() self._PCs = [item[1] for item in mid_result] return def train(self): ####################### set up autoencoder begin ####################### node_num = self._node_num in_layer = LinearLayer(node_num[0], "IL") hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")] bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")] out_layer = LinearLayer(node_num[4], "OL") layer_list = [in_layer] + hidden_layers + [out_layer] molecule_net = FeedForwardNetwork() molecule_net.addInputModule(in_layer) for item in (hidden_layers + bias_layers): molecule_net.addModule(item) molecule_net.addOutputModule(out_layer) connection_between_layers = range(4); connection_with_bias_layers = range(4) for i in range(4): connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1]) connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1]) molecule_net.addConnection(connection_between_layers[i]) # connect two neighbor layers molecule_net.addConnection(connection_with_bias_layers[i]) molecule_net.sortModules() # this is some internal initialization process to make this module usable ####################### set up autoencoder end ####################### trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1) data_set = SupervisedDataSet(node_num[0], node_num[4]) sincos = self._data_set (sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos) for item in sincos_after_process: # is it needed? data_set.addSample(item, item) trainer.trainUntilConvergence(data_set, maxEpochs=50) self._connection_between_layers = connection_between_layers self._connection_with_bias_layers = connection_with_bias_layers print("Done!\n") return def create_sge_files_for_simulation(self,potential_centers = None): if potential_centers is None: potential_centers = self.get_boundary_points() neural_network_related.create_sge_files(potential_centers) return def get_boundary_points(self, list_of_points = None, num_of_bins = 5): if list_of_points is None: list_of_points = self._PCs x = [item[0] for item in list_of_points] y = [item[1] for item in list_of_points] temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins]) hist_matrix = temp[0] # add a set of zeros around this region hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0) hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0) hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1) hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1) hist_matrix = (hist_matrix != 0).astype(int) sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points for i in range(np.shape(hist_matrix)[0]): for j in range(np.shape(hist_matrix)[1]): if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j] if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1] if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j] if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1] bin_width_0 = temp[1][1]-temp[1][0] bin_width_1 = temp[2][1]-temp[2][0] min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0 # multiply by 0.5 since we want the center of the grid min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1 potential_centers = [] for i in range(np.shape(hist_matrix)[0]): for j in range(np.shape(hist_matrix)[1]): if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0: # no points in this block but there are points in neighboring blocks temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)] potential_centers.append(temp_potential_center) return potential_centers # this function is added after those old objects of A were created def plotting_in_PC_space_with_coloring_option(self, list_of_coordinate_files_for_plotting=None, # accept multiple files color_option='pure'): ''' by default, we are using training data, and we also allow external data input ''' if list_of_coordinate_files_for_plotting is None: PCs_to_plot = self._PCs else: temp_sincos = [] for item in list_of_coordinate_files_for_plotting: temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item) temp_mid_result = self.get_mid_result(input_data = temp_sincos) PCs_to_plot = [item[1] for item in temp_mid_result] (x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot]) # coloring if color_option == 'pure': coloring = 'red' elif color_option == 'step': coloring = range(len(x)) fig, ax = plt.subplots() ax.scatter(x,y, c=coloring) ax.set_xlabel("PC1") ax.set_ylabel("PC2") plt.show() return But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)? Thanks!
Something like this: class A: def q(self): print 1 a = A() def f(self): print 2 setattr(A, 'f', f) a.f() This is called a monkey patch.
Convert LexToken to list Python
I have a lexer for html tokens which returns and prints lextoken objects in a given html string I have a parser which takes tokens as a list and grammar as input and returns true if the set of tokens form a valid string in grammar I want to combine these programs to form a complete lexer - parser program But the problem is in the second program the tokens are in form of list and output of first program is lextoken Lexer import ply.lex as lex tokens = ( 'LANGLE', # < 'LANGLESLASH', # </ 'RANGLE', # > 'SLASHRANGLE', # /> 'EQUAL', # = 'STRING', # "144" 'WORD', # 'Welcome' in "Welcome to my webpage." 'NUMBER' # 12, 5.6, -1., 3.14159, -8.1, 867.5309 ) t_ignore = ' \t\v\r' # shortcut for whitespace states = ( ('htmlcomment', 'exclusive'), # <!-- ) def t_htmlcomment(t): r'<!--' t.lexer.begin('htmlcomment') def t_htmlcomment_end(t): r'-->' t.lexer.lineno += t.value.count('\n') t.lexer.begin('INITIAL') pass def t_htmlcomment_error(t): t.lexer.skip(1) def t_LANGLESLASH(t): r'</' return t def t_LANGLE(t): r'<' return t def t_SLASHRANGLE(t): r'/>' return t def t_RANGLE(t): r'>' return t def t_EQUAL(t): r'=' return t def t_STRING(t): r'"[^"]*"' t.value = t.value[1:-1] # drop "surrounding quotes" return t def t_WORD(t): r'[^ <>]+' return t webpage = "hello <!-- comment --> 123456 <b> Bushra </b> all" htmllexer = lex.lex() htmllexer.input(webpage) while True: tok = htmllexer.token() if not tok: break print tok This is my parser work_count = 0 # track one notion of "time taken" def addtoset(theset,index,elt): if not (elt in theset[index]): theset[index] = [elt] + theset[index] return True return False def parse(tokens,grammar): global work_count work_count = 0 tokens = tokens + [ "end_of_input_marker" ] chart = {} start_rule = grammar[0] for i in range(len(tokens)+1): chart[i] = [ ] start_state = (start_rule[0], [], start_rule[1], 0) chart[0] = [ start_state ] for i in range(len(tokens)): while True: changes = False for state in chart[i]: # State === x -> a b . c d , j x = state[0] ab = state[1] cd = state[2] j = state[3] next_states = [ (rule[0],[],rule[1],i) for rule in grammar if cd <> [] and cd[0] == rule[0] ] work_count = work_count + len(grammar) for next_state in next_states: changes = addtoset(chart,i,next_state) or changes if cd <> [] and tokens[i] == cd[0]: next_state = (x, ab + [cd[0]], cd[1:], j) changes = addtoset(chart,i+1,next_state) or changes next_states = [ (jstate[0], jstate[1] + [x], (jstate[2])[1:], jstate[3] ) for jstate in chart[j] if cd == [] and jstate[2] <> [] and (jstate[2])[0] == x ] work_count = work_count + len(chart[j]) for next_state in next_states: changes = addtoset(chart,i,next_state) or changes # We're done if nothing changed! if not changes: break accepting_state = (start_rule[0], start_rule[1], [], 0) return accepting_state in chart[len(tokens)-1] grammar = [ ("html", ["element", "html"]), ("html", [ ]), ("element", ["word"]), ("element", ["tag-open","word","tag-close"]), ("tag-open",["<","word",">"]), ("tag-close",["<","/","word",">"]) ] tokens = [ "<", "b", ">" , "Hello", "<", "/" , "b" , ">"] result=parse(tokens, grammar) print result
You can do this by using the attribute value of LexToken: webpage = "hello <!-- comment --> 123456 <b> Bushra </b> all" htmllexer = lex.lex() htmllexer.input(webpage) tokens = [] while True: tok = htmllexer.token() if not tok: break tokens.append(tok.value) print tokens #['hello', '123456', '<', 'b', '>', 'Bushra', '</', 'b', '>', 'all'] All available attributes may be obtained by using the dir() function: print dir(tok)