how to get the info which contains 'sss', not "= ", - python

this is my code:
class Marker_latlng(db.Model):
geo_pt = db.GeoPtProperty()
class Marker_info(db.Model):
info = db.StringProperty()
marker_latlng =db.ReferenceProperty(Marker_latlng)
q = Marker_info.all()
q.filter("info =", "sss")
but how to get the info which contains 'sss', not "=",
has a method like "contains "?
q = Marker_info.all()
q.filter("info contains", "sss")

Instead of using a StringProperty, you could use a StringListProperty. Before saving the info string, split it into a list of strings, containing each word.
Then, when you use q.filter("info =", "sss") it will match any item which contains a word which is each to "sss".
For something more general, you could look into app engine full text search.

Related

unable to retrieve any candidates with kb.get_candidates

I created a csv file like this:
"CAMERA", "Camera", "kamera", "cam", "Kamera"
"PICTURE", "Picture", "bild", "photograph"
and used it somewhat like this:
nlp = de_core_news_sm.load()
text = "Cam is not good"
doc = nlp(text)
name_dict, desc_dict = load_entities()
kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=96)
for qid, desc in desc_dict.items():
desc_doc = nlp(desc)
desc_enc = desc_doc.vector
kb.add_entity(entity=qid, entity_vector=desc_enc, freq=342) # 342 is an arbitrary value here
for qid, name in name_dict.items():
kb.add_alias(alias=name, entities=[qid], probabilities=[1]) # 100% prior probability P(entity|alias)
Printing values like this:
print(f"Entities in the KB: {kb.get_entity_strings()}")
print(f"Aliases in the KB: {kb.get_alias_strings()}")
gives me:
Entities in the KB: ['PICTURE', 'CAMERA']
Aliases in the KB: [' "Camera"', ' "Picture"']
However, if I try to check for candidates, I only get an empty list:
candidates = kb.get_candidates("Camera")
print(candidates)
for c in candidates:
print(" ", c.entity_, c.prior_prob, c.entity_vector)
Aliases in the KB: [' "Camera"', ' "Picture"']
It looks to me as if your parsing script added the literal string "Camera", with spaces and quotes and all, to the KB, instead of just the raw string Camera?

Python Pattern Regex

I have a input text as follows:
SAVE_TIMECARD = "insert into sh_user_timecard (instance_id, user_id, in_time, in_time_activity_log_aid, in_time_activity_log_instance_id, " +"out_time, out_time_activity_log_aid, out_time_activity_log_instance_id, parent_aid, parent_instance_id)" + " values (:instanceId, :userId, :inTime, :inTimeActivityAid, :inTimeActivityInstanceId, :outTime, :outTimeActivityAid, " +":outTimeActivityInstanceId, :parentAid, :parentInstanceId)";
The output I need is:
SAVE_TIMECARD =:instanceId, :userId, :inTime, :inTimeActivityAid, :inTimeActivityInstanceId, :outTime, :outTimeActivityAid, " +":outTimeActivityInstanceId, :parentAid, :parentInstanceId
I've tried achieving this using:
result = re.findall(r'[A-z]+(:?=)',inputfile)
I need to extract the upper case words that is SAVE_TIMECARD and allthe words that starts with colon.
I found the solution
import re
regex = re.compile("^[^=]{0,}|:(\w{1,})")
testString = "private static final String SAVE_TIMECARD = "insert into sh_user_timecard (instance_id, user_id, in_time, in_time_activity_log_aid, in_time_activity_log_instance_id, " +"out_time, out_time_activity_log_aid, out_time_activity_log_instance_id, parent_aid, parent_instance_id)" + " values (:instanceId, :userId, :inTime, :inTimeActivityAid, :inTimeActivityInstanceId, :outTime, :outTimeActivityAid, " +":outTimeActivityInstanceId, :parentAid, :parentInstanceId)";"
matchArray = regex.findall(testString)
the matchArray variable contains the list of matches
:\w+
Will identify the 'words starting with a colon'. You'll need to loop through the original text to find all instances.

Parsing with placeholders

I am trying to scrape all the different variations of this webpage.For instance the code that should scrape this webpage http://www.virginiaequestrian.com/main.cfm?action=greenpages&sub=view&ID=11849.
should be the same as the code i use to scrape this webpage
http://www.virginiaequestrian.com/main.cfm?action=greenpages&sub=view&ID=11849
def extract_contact(url):
r=requests.get(url)
soup=BeautifulSoup(r.content,'lxml')
tbl=soup.findAll('table')[2]
list=[]
Contact=tbl.findAll('p')[0]
for br in Contact.findAll('br'):
next = br.nextSibling
if not (next and isinstance(next,NavigableString)):
continue
next2 = next.nextSibling
if next2 and isinstance(next2,Tag) and next2.name == 'br':
text = re.sub(r'[\n\r\t\xa0]','',next).replace('Phone:','').strip()
list.append(text)
print list
#Street=list.pop(0)
#CityStateZip=list.pop(0)
#Phone=list.pop(0)
#City,StateZip= CityStateZip.split(',')
#State,Zip= StateZip.split(' ')
#ContactName = Contact.findAll('b')[1]
#ContactEmail = Contact.findAll('a')[1]
#Body=tbl.findAll('p')[1]
#Website = Contact.findAll('a')[2]
#Email = ContactEmail.text.strip()
#ContactName = ContactName.text.strip()
#Website = Website.text.strip()
#Body = Body.text
#Body = re.sub(r'[\n\r\t\xa0]','',Body).strip()
#list.extend([Street,City,State,Zip,ContactName,Phone,Email,Website,Body])
return list
The way i believe i will need to write the code in order it to work, is to set it up so that print list returns the same number of values, ordered identically.Currently, the above script returns these values
[u'2133 Craigs Store Road', u'Afton,VA 22920', u'434-882-3150']
[u'Alexandria,VA 22305']
Accounting for missing values,in order to be able to parse this page consistently,
I need the print list command to return something similar to this
[u'2133 Craigs Store Road', u'Afton,VA 22920', u'434-882-3150']
['',u'Alexandria,VA 22305','']
this way i will be able to manipulate values by position(as they will be in consistent order). The problem is that i don't know how to accomplish this as I am still very new to parsing. If anybody has any insight as to how to solve the problem i would be highly appreciative.
def extract_contact(url):
r=requests.get(url)
soup=BeautifulSoup(r.content,'lxml')
tbl=soup.findAll('table')[2]
list=[]
Contact=tbl.findAll('p')[0]
for br in Contact.findAll('br'):
next = br.nextSibling
if not (next and isinstance(next,NavigableString)):
continue
next2 = next.nextSibling
if next2 and isinstance(next2,Tag) and next2.name == 'br':
text = re.sub(r'[\n\r\t\xa0]','',next).replace('Phone:','').strip()
list.append(text)
Street=[s for s in list if ',' not in s and '-' not in s]
CityStateZip=[s for s in list if ',' in s]
Phone = [s for s in list if '-' in s]
if not Street:
Street=''
else:
Street=Street[0]
if not CityStateZip:
CityStateZip=''
else:
City,StateZip= CityStateZip[0].split(',')
State,Zip= StateZip.split(' ')
if not Phone:
Phone=''
else:
Phone=Phone[0]
list=[]
I figured out an alternative solution using substrings and if statements. Since there are only 3 values max in the list, all with defining characteristics i realized that i could delegate by looking for special characters rather than the position of the record.

Three way language dictionary

se_eng_fr_dict = {'School': ['Skola', 'Ecole'], 'Ball': ['Boll', 'Ballon']}
choose_language = raw_input("Type 'English', for English. Skriv 'svenska' fo:r svenska. Pour francais, ecrit 'francais'. ")
if choose_language == 'English':
word = raw_input("Type in a word:")
swe_word = se_eng_fr_dict[word][0]
fra_word = se_eng_fr_dict[word][1]
print word, ":", swe_word, "pa. svenska," , fra_word, "en francais."
elif choose_language == 'Svenska':
word = raw_input("Vilket ord:")
for key, value in se_eng_fr_dict.iteritems():
if value == word:
print key
I want to create a dictionary (to be stored locally as a txt file) and the user can choose between entering a word in English, Swedish or French to get the translation of the word in the two other languages. The user should also be able to add data to the dictionary.
The code works when I look up the Swedish and French word with the English word. But how can I get the Key, and Value2 if I only have value1?
Is there a way or should I try to approach this problem in a different way?
A good option would be to store None for the value if it hasn't been set. While it would increase the amount of memory required, you could go a step further and add the language itself.
Example:
se_eng_fr_dict = {'pencil': {'se': None, 'fr': 'crayon'}}
def translate(word, lang):
# If dict.get() finds no value with `word` it will return
# None by default. We override it with an empty dictionary `{}`
# so we can always call `.get` on the result.
translated = se_eng_fr_dict.get(word, {}).get(lang)
if translated is None:
print("No {lang} translation found for {word}.format(**locals()))
else:
print("{} is {} in {}".format(word, translated, lang))
translate('pencil', 'fr')
translate('pencil', 'se')
i hope there could be a better solution, but here is mine:
class Word:
def __init__(self, en, fr, se):
self.en = en
self.fr = fr
self.se = se
def __str__(self):
return '<%s,%s,%s>' % (self.en, self.fr, self.se)
then you dump all these Words into a mapping data structure. you can use dictionary, but here if you have a huge data set, it's better for you to use BST, have a look at https://pypi.python.org/pypi/bintrees/2.0.1
lets say you have all these Words loaded in a list named words, then:
en_words = {w.en: w for w in words}
fr_words = {w.fr: w for w in words}
se_words = {w.se: w for w in words}
again, BST is more recommended here.
Maybe a set of nested lists would be better for this:
>>> my_list = [
[
"School", "Skola", "Ecole"
],
[
"Ball", "Boll", "Ballon"
]
]
Then you can access the set of translations by doing:
>>> position = [index for index, item in enumerate(my_list) for subitem in item if value == subitem][0]
This returns the index of the list, which you can grab:
>>> sub_list = my_list[position]
And the sublist will have all the translations in order.
For example:
>>> position = [index for index, item in enumerate(my_list) for subitem in item if "Ball" == subitem][0]
>>> print position
1
>>> my_list[position]
['Ball', 'Boll', 'Ballon']
In order to speedup word lookups and achieve a good flexibility, I'd choose a dictionary of subdictionaries: each subdictionary translates the words of a language into all the available languages and the top-level dictionary maps each language into the corresponding subdictionary.
For example, if multidict is the top-level dictionary, then multidict['english']['ball'] returns the (sub)dictionary:
{'english':'ball', 'francais':'ballon', 'svenska':'ball'}
Below is a class Multidictionary implementing such an idea.
For convenience it assumes that all the translations are stored into a text file in CSV format, which is read at initialization time, e.g.:
english,svenska,francais,italiano
school,skola,ecole,scuola
ball,boll,ballon,palla
Any number of languages can be easily added to the CSV file.
class Multidictionary(object):
def __init__(self, fname=None):
'''Init a multidicionary from a CSV file.
The file describes a word per line, separating all the available
translations with a comma.
First file line must list the corresponding languages.
For example:
english,svenska,francais,italiano
school,skola,ecole,scuola
ball,boll,ballon,palla
'''
self.fname = fname
self.multidictionary = {}
if fname is not None:
import csv
with open(fname) as csvfile:
reader = csv.DictReader(csvfile)
for translations in reader:
for lang, word in translations.iteritems():
self.multidictionary.setdefault(lang, {})[word] = translations
def get_available_languages(self):
'''Return the list of available languages.'''
return sorted(self.multidictionary)
def translate(self, word, language):
'''Return a dictionary containing the translations of a word (in a
specified language) into all the available languages.
'''
if language in self.get_available_languages():
translations = self.multidictionary[language].get(word)
else:
print 'Invalid language %r selected' % language
translations = None
return translations
def get_translations(self, word, language):
'''Generate the string containing the translations of a word in a
language into all the other available languages.
'''
translations = self.translate(word, language)
if translations:
other_langs = (lang for lang in translations if lang != language)
lang_trans = ('%s in %s' % (translations[lang], lang) for lang in other_langs)
s = '%s: %s' % (word, ', '.join(lang_trans))
else:
print '%s word %r not found' % (language, word)
s = None
return s
if __name__ == '__main__':
multidict = Multidictionary('multidictionary.csv')
print 'Available languages:', ', '.join(multidict.get_available_languages())
language = raw_input('Choose the input language: ')
word = raw_input('Type a word: ')
translations = multidict.get_translations(word, language)
if translations:
print translations

Python - Split string with " "

I know this is a basic question, but I'm having difficult with parsing some text.
So how the system will work, let's take the following example:
> set title "Hello world"
I should therefore get:
["set title", "Hello world"]
The problem is therefore, I need to split the string so when I enter, for example:
> plot("data.txt");
Should give me:
["plot", "data.txt"]
I have tried the following:
While True:
command = raw_input(">");
parse = command.split("' '");
if(parse[0] == "set title"):
title = parse[1];
But this does not work and will not even recognise that I am entering "set title"
Any ideas?
You don't need split. You need re:
import re
def parse(command):
regex = r'(.*) "(.*)"'
items = list(re.match(regex, command).groups())
return items
if __name__ == '__main__':
command = 'set title "Hello world"'
print parse(command)
returns
['set title', 'Hello world']
split("' '")
Will split on the literal sequence of three characters single quote, space, single quote, which don't appear in your command strings.
I think you will need to approach this more like:
command, content = command.split(" ", 1)
if command == "plot":
plot(command[1:-1])
elif command == "set":
item, content = content.split(" ", 1)
if item == "title":
title = content[1:-1]
...
Note the use of a second argument to tell split how many times to do so; 'set title "foo"'.split(" ", 1) == ['set', 'title "foo"']. Precisely how you implement will depend on the range of things you want to be able to parse.
To split the string by blanks you need to use
parse = command.split(' ')
For the input "set title" you will get a parse array looking like this
['set', 'title']
where parse[0] == 'set' and parse[1] == 'title'
If you want to test whether your string starts with "set title", either check the command string itself or check the first two indexes of parse.

Categories