I have the following Python class.
from body_parser import Extractor
import re
class FEOProcessor(object):
CHECKS = [
('Standard JavaScript Inlining Optimization', ('EMBED_JAVASCRIPT',), 'check_js_inlining'),
('HTML5 Advanced Cache', ('JAVASCRIPT_HTML5_CACHE', 'CSS_HTML5_CACHE'), 'check_html5_advanced_cache'),
('Cookieless Resource Domain', ('RENAME_JAVASCRIPT', 'RENAME_CSS'), 'check_cookieless_resource_domain'),
('Minificatiopn of JS', ('MINIFY_JAVASCRIPT',), 'check_js_minifaction'),
('File Versioning', ('RENAME_JAVASCRIPT', 'RENAME_IMAGE', 'RENAME_CSS'), 'check_file_versioning'),
('Small Image Embedding', ('EMBED_IMAGE',), 'check_small_image_embedding'),
('Responsive Image Loading', ('RESPONSIVE_IMAGES',), 'check_responsive_image_loading'),
('Asynchronous JS and CSS Loading', ('ASYNC_JAVASCRIPT',), 'check_async_js_and_css_loading'),
('JS Pre-Execution', ('PRE_EXECUTE_JAVASCRIPT',), 'check_js_pre_execution'),
('EDGESTART', ('EDGESTART',), 'check_edgestart'),
('Invoke Click OnTouch', ('BlzFastClick',), 'check_click'),
('Cellular Connection Keep-Alive', ('blzEnableMobileHeartbeat',), 'check_cell'),
]
def __init__(self):
self.parser = Extractor()
self.result = dict((k, False) for k,_,_ in self.CHECKS)
for _, keys, name in CHECKS:
locals()[name] = lambda self, result, _keys=keys: all(result.get(k, 0)>0 for k in _keys)
def process_feo_debug_output(self, analysis_id, url):
feed = self.parser.start_parser(analysis_id, url, True)
result = self.get_feo_tags(feed)
for name, _, func in self.CHECKS:
self.result[name] = (False, True)[getattr(self,func)(result)]
return self.result
def get_feo_tags(self, feed):
result = {}
tag_list = re.findall(r'(?:TextTransApplied):\s*((?:(?:[A-Z]+(?:_[A-Z\d]+)+)?\(\d+\)\s*(?:,\s*|;))*)', str(feed))
for tag in tag_list:
for element in tag.split(","):
index = element.index('(')
if element[:index].strip():
result[element[:index].strip()] = (element.split("(")[1].rstrip(");"))
return result
def check_edgestart(self, result):
return 1 if 'EDGESTART' in result.keys() else 0
def check_click(self, result):
return 1 if 'BlzFastClick' in result.keys() else 0
def check_cell(self, result):
return 1 if 'blzEnableMobileHeartbeat' in result.keys() else 0
which returns me a list of True and False values based on the checks. I want an additional check based on the feed which is input to the get_feo_tags method.
The another check that need to be incorporated is
for img in feed.find_all('img', attrs={'data-blzsrc': True, 'src': lambda x: 'data' not in x}):
#append to the result dict {On Demand Image Loading: True}
How to do this in the current settings.
If soup is the same as feed, you can try this:
#!/usr/bin/env python
#-*- coding:utf-8 -*-
from body_parser import Extractor
import re
class FEOProcessor(object):
CHECKS = [
('Standard JavaScript Inlining Optimization', ('EMBED_JAVASCRIPT',), 'check_js_inlining'),
('HTML5 Advanced Cache', ('JAVASCRIPT_HTML5_CACHE', 'CSS_HTML5_CACHE'), 'check_html5_advanced_cache'),
('Cookieless Resource Domain', ('RENAME_JAVASCRIPT', 'RENAME_CSS'), 'check_cookieless_resource_domain'),
('Minificatiopn of JS', ('MINIFY_JAVASCRIPT',), 'check_js_minifaction'),
('File Versioning', ('RENAME_JAVASCRIPT', 'RENAME_IMAGE', 'RENAME_CSS'), 'check_file_versioning'),
('Small Image Embedding', ('EMBED_IMAGE',), 'check_small_image_embedding'),
('Responsive Image Loading', ('RESPONSIVE_IMAGES',), 'check_responsive_image_loading'),
('Asynchronous JS and CSS Loading', ('ASYNC_JAVASCRIPT',), 'check_async_js_and_css_loading'),
('JS Pre-Execution', ('PRE_EXECUTE_JAVASCRIPT',), 'check_js_pre_execution'),
('EDGESTART', ('EDGESTART',), 'check_edgestart'),
('Invoke Click OnTouch', ('BlzFastClick',), 'check_click'),
('Cellular Connection Keep-Alive', ('blzEnableMobileHeartbeat',), 'check_cell'),
('On Demand Image Loading', ('img',), 'check_img'),
]
def __init__(self):
self.parser = Extractor()
self.result = dict((k, False) for k,_,_ in self.CHECKS)
for _, keys, name in CHECKS:
locals()[name] = lambda self, result, _keys=keys: all(result.get(k, 0)>0 for k in _keys)
def process_feo_debug_output(self, analysis_id, url):
feed = self.parser.start_parser(analysis_id, url, True)
# this works only if feed is the same as soup as you said
result = self.get_feo_tags(feed)
for name, _, func in self.CHECKS:
if name == 'On Demand Image Loading':
self.result[name] = (False, True)[getattr(self,func)(feed)]
else:
self.result[name] = (False, True)[getattr(self,func)(result)]
return self.result
def get_feo_tags(self, feed):
result = {}
tag_list = re.findall(r'(?:TextTransApplied):\s*((?:(?:[A-Z]+(?:_[A-Z\d]+)+)?\(\d+\)\s*(?:,\s*|;))*)', str(feed))
for tag in tag_list:
for element in tag.split(","):
index = element.index('(')
if element[:index].strip():
result[element[:index].strip()] = (element.split("(")[1].rstrip(");"))
return result
def check_edgestart(self, result):
return 1 if 'EDGESTART' in result.keys() else 0
def check_click(self, result):
return 1 if 'BlzFastClick' in result.keys() else 0
def check_cell(self, result):
return 1 if 'blzEnableMobileHeartbeat' in result.keys() else 0
def check_img(self, feed):
return 1 if feed.find_all('img', attrs={'data-blzsrc': True, 'src': lambda x: 'data' not in x}) else 0
Related
I download the source html code from https://down.ali213.net/pcgame/all/falcom-0-0-0-new-pic-1 as the file htmlstr1.html
Then I use the following python code to handle the htmlstr1.html, in the code, I am trying to delete all the {a} tags and {span} tags with the bs4 method extract. However I notice there are still a few {a} tags and {span} tags in the output file htmlstr_extracted1.html
I dont't know where goes wrong, anybody can help?
# 抽取html的特征
from bs4 import BeautifulSoup
from bs4 import NavigableString, Tag, Comment
import re
import hashlib
def read_file(_path):
with open(_path, "r", encoding="utf-8") as f:
_html_str = f.read()
return _html_str
def write_file(_path, _str):
with open(_path, "w", encoding="utf-8") as f:
f.write(_str)
def md5_str(_str):
md = hashlib.md5(_str.encode())
return md.hexdigest()
class HtmlFeatureExtractor:
def __init__(self, _html_str):
self.del_col = []
_html_str = self.replace_spaces_with_space(_html_str)
_html_str = self.remove_all_br(_html_str)
self.soup = BeautifulSoup(_html_str, 'lxml')
def inline_block_check(self, parent_obj):
if isinstance(parent_obj, Tag):
if parent_obj.name in ['p', 'a', 'span', 'h3', 'b', 'h1', 'strong', 'font', 'h1', 'h4', 'li'] and len(
parent_obj.parent.contents) == 1:
self.collect_del_element(parent_obj)
self.inline_block_check(parent_obj.parent)
def collect_del_element(self, element):
if element not in self.del_col:
self.del_col.append(element)
def remove_all_br(self, _str):
_str = _str.replace("\r\n", "")
_str = _str.replace("\r", "")
_str = _str.replace("\n", "")
return _str
def replace_spaces_with_space(self, _str):
pattern = re.compile(r'\s+', re.I | re.S)
_str = pattern.sub(' ', _str)
return _str
def not_others_child(self, element):
for i in self.del_col:
if element is i:
continue
if isinstance(i, Tag) or isinstance(i, NavigableString):
try:
for ii in i.descendants:
if element is ii:
return False
except Exception as err:
continue
return True
def extract(self):
for x in self.soup.descendants:
if isinstance(x, NavigableString):
self.collect_del_element(x)
if isinstance(x.parent, Tag):
if len(x.parent.contents) == 1 and x.parent.name not in ['[document]', 'html', 'body']: # 对inline部分再搞一层,并且子集唯一
self.collect_del_element(x.parent) # 第一层,直接删
self.inline_block_check(x.parent.parent) # 再往后,递归
else:
if x.name in ['script', 'meta', 'link', 'style', 'img', 'a', 'input', 'iframe', 'form', 'p', 'li', 'span']: # 还要包含a标签, !!!写一个去重判断
self.collect_del_element(x) # 后删
if x.name in ['img', 'a', 'iframe']:
self.inline_block_check(x.parent)
self.del_col = [i for i in self.del_col if self.not_others_child(i)]
for y in self.del_col:
if isinstance(y, Tag) or isinstance(y, NavigableString):
y.extract()
# z = y.extract()
# print("-" * 30)
# print(z)
_feature_str = str(self.soup)
_feature_str = self.replace_spaces_with_space(_feature_str)
_feature_str = self.remove_all_br(_feature_str)
write_file("./htmlstr_extracted%d.html" % gg, _feature_str) # todo 调试用
return md5_str(_feature_str)
if __name__ == '__main__':
g_num = [1]
for gg in g_num:
html_str = read_file("./htmlstr%d.html" % gg)
feature_extractor = HtmlFeatureExtractor(html_str)
feature_str = feature_extractor.extract()
print("成功: %s" % feature_str)
my purpose is to extract the feature of a html source, I wish to get the same hash value if the webpage use the same template. the following two pages use the same template, so they should have the same hash value calculate by the above code.
https://down.ali213.net/pcgame/all/falcom-0-0-0-new-pic-1
https://down.ali213.net/pcgame/all/rockstar-0-0-0-new-pic-1
For some reason, in my fruit scraper, i cannot access anything from listify function.
I'am getting an error, for exmaple: NameError: name 'family' is not defined.
And i cant figure out what is wrong with my code - is my function is bad, or i'am doing something wrong with class ?
import requests
import json
import random
import pickle
class FruitScraper():
def __init__(self):
self.name = []
self.id = []
self.family = []
self.genus = []
self.order = []
self.carbohydrates = []
self.protein = []
self.fat = []
self.calories = []
self.sugar = []
def scrape_all_fruits(self):
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(self, stats):
alist = json.dumps(self.scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
self.name.append(i['name'])
self.id.append(i['id'])
self.family.append(i['family'])
self.genus.append(i['genus'])
self.order.append(i['order'])
self.carbohydrates.append(i['nutritions']['carbohydrates'])
self.protein.append(i['nutritions']['protein'])
self.fat.append(i['nutritions']['fat'])
self.calories.append(i['nutritions']['calories'])
self.sugar.append(i['nutritions']['sugar'])
except:
pass
return stats
def get_summary(self):
for i in self.listify(zip(self.fat, self.protein, self.calories, self.sugar, self.carbohydrates, self.name)):
nutr_stats = f'\nNutrients maximum statistics:\nFat: {max(self.fat)}\nProtein: {max(self.protein)}\nCarbohydrates: {max(self.carbohydrates)}\nCalories: {max(self.calories)}\nSugar: {max(self.sugar)}' \
f'\nNutrients minimum statistics:\nFat: {min(self.fat)}\nProtein: {min(self.protein)}\nCarbohydrates: {min(self.carbohydrates)}\nCalories: {min(self.calories)}\nSugar: {min(self.sugar)}' \
f'\nTotal fruits scraped: {len(self.name)}'
return nutr_stats
Scraped_info = FruitScraper().scrape_all_fruits()
Listified_info = FruitScraper().listify(family)
Fruits_statistics = FruitScraper().get_summary()
It's my first time doing OOP.
Please consider changing this
Scraped_info = FruitScraper().scrape_all_fruits()
Listified_info = FruitScraper().listify(family)
Fruits_statistics = FruitScraper().get_summary()
to
myScraper = FruitScraper()
Scraped_info = myScraper.scrape_all_fruits()
myScraper.listify()
Fruits_statistics = myScraper.get_summary()
Otherwise you create three different objects of this class and discard them with all their attributes after running the individual method once.
This might also be critical to define family in this line of the code:
Listified_info = myScraper.listify(family)
But I can't see how you intended to use the parameter stats in your method listify(). It is just received and returned. I suggest that you change:
def listify(self, stats):
to
def listify(self):
and remove
return stats
If you want to get those lists inside the object of this class returned by listify(), you may do the following (but this is not OOP way of doing things):
import requests
import json
import copy
class FruitScraper():
def __init__(self):
self.name = []
self.id = []
self.family = []
self.genus = []
self.order = []
self.carbohydrates = []
self.protein = []
self.fat = []
self.calories = []
self.sugar = []
def collect_all_lists(self):
self.allLists = dict('name': self.name, 'id': self.id, 'family': self.family, 'genus': self.genus, 'order': self.order, 'carbohydrates': self.carbohydrates, 'protein': self.protein, 'fat': self.fat, 'calories': self.calories, 'sugar': self.sugar)
def scrape_all_fruits(self):
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(self):
alist = json.dumps(self.scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
self.name.append(i['name'])
self.id.append(i['id'])
self.family.append(i['family'])
self.genus.append(i['genus'])
self.order.append(i['order'])
self.carbohydrates.append(i['nutritions']['carbohydrates'])
self.protein.append(i['nutritions']['protein'])
self.fat.append(i['nutritions']['fat'])
self.calories.append(i['nutritions']['calories'])
self.sugar.append(i['nutritions']['sugar'])
except:
pass
self.collect_all_lists()
return copy.deepcopy(self.allLists)
def get_summary(self):
for i in self.listify(zip(self.fat, self.protein, self.calories, self.sugar, self.carbohydrates, self.name)):
nutr_stats = f'\nNutrients maximum statistics:\nFat: {max(self.fat)}\nProtein: {max(self.protein)}\nCarbohydrates: {max(self.carbohydrates)}\nCalories: {max(self.calories)}\nSugar: {max(self.sugar)}' \
f'\nNutrients minimum statistics:\nFat: {min(self.fat)}\nProtein: {min(self.protein)}\nCarbohydrates: {min(self.carbohydrates)}\nCalories: {min(self.calories)}\nSugar: {min(self.sugar)}' \
f'\nTotal fruits scraped: {len(self.name)}'
return nutr_stats
myScraper = FruitScraper()
Scraped_info = myScraper.scrape_all_fruits()
Listified_info = myScraper.listify()
Fruits_statistics = myScraper.get_summary()
I have the following Graphene implementation:
import graphene
import json
import psycopg2
import re
connection = psycopg2.connect(user='postgres', password='Steppen1!', host='127.0.0.1', port='5432', database='TCDigital')
cursor = connection.cursor()
paths = {}
class PathError(Exception):
def __init__(self, referencing, referenced):
self.message = "entity {} has no relation with entity {}".format(referencing, referenced)
def __str__(self):
return self.message
def get_columns(entity):
columns = {}
cursor.execute("SELECT ordinal_position, column_name FROM information_schema.columns WHERE table_name = '{}'".format(entity))
resultset = cursor.fetchall()
i = 1
for entry in resultset:
columns[entry[1]] = i
i = i + 1
return columns
def get_previous_annotate(name, entity, related_column, id):
columns = get_columns(entity)
related_position = columns[related_column]-1
entity_content = paths[name][entity]
entity_content_filtered = [entry for entry in entity_content if entry['entry'][related_position] == id]
annotate_to_return = sum(list(map(lambda entry: entry['annotate'], entity_content_filtered)))
return annotate_to_return
def calculate_annotate_operation(entity, entry, entity_columns, operation, operands):
operand1 = entity_columns[operands[0]]
operand2 = entity_columns[operands[1]]
if operation == '_sum':
return entry[operand1] + entry[operand2]
elif operation == '_mult':
return entry[operand1] * entry[operand2]
elif operation == '_div':
return entry[operand1] / entry[operand2]
elif operation == '_rest':
return entry[operand1] - entry[operand2]
else:
return None
def get_annotated_value(name, entity, entry, annotate, entity_columns):
if annotate[0] != '_':
column = entity_columns[annotate]
column_value = entity[column['ordinal_position']]
return column_value
elif annotate == '_count':
return 1
else:
operation = annotate.split('(')
if operation[0] in ['_sum', '_mult', '_div', '_rest']:
operands_base = operation[1].split(')')[0]
operands = operands_base.split(',')
return calculate_annotate_operation(operation[0], operands)
else:
raise "Operación no permitida: {}".format(annotate)
def get_annotate(name, entity, entry, entity_columns, previous_entity, related_column, annotate):
annotated_value = None
previous_entity_columns = get_columns(previous_entity)
if previous_entity:
annotated_value = get_previous_annotate(name, previous_entity, related_column, entry[entity_columns['id']-1])
else:
annotated_value = get_annotated_value(name, entity, entry, annotate, entity_columns)
#print({'name': name, 'entity': entity, 'entry': entry, 'annotated_value': annotated_value})
return annotated_value
def populate_entity(name, entity, entity_columns, previous_entity, previous_entity_relationship_column, annotate):
cursor.execute('SELECT * FROM {}'.format(entity))
resultset = cursor.fetchall()
paths[name][entity] = []
for entry in resultset:
if previous_entity:
entry_annotate = get_annotate(name, entity, entry, entity_columns, previous_entity, previous_entity_relationship_column, annotate)
else:
entry_annotate = get_annotate(name, entity, entry, entity_columns, previous_entity, None, annotate)
paths[name][entity].append({'entry': entry, 'entity_columns': entity_columns, 'annotate': entry_annotate, 'previos_entity': previous_entity, 'previous_entity_relationship_column': previous_entity_relationship_column})
def create_path(name, entities, annotate):
paths[name] = {}
previous_entity = None
for entity in reversed(entities):
previous_entity_relationship_column = None
if previous_entity:
previous_entity_relationships = get_foreign_relationships(previous_entity)
previous_entity_relationship = [relationship for relationship in previous_entity_relationships if relationship[5] == entity][0]
previous_entity_relationship_column = previous_entity_relationship[3]
entity_columns = get_columns(entity)
populate_entity(name, entity, entity_columns, previous_entity, previous_entity_relationship_column, annotate)
previous_entity = entity
def get_foreign_relationships(entity):
cursor.execute('''
SELECT
tc.table_schema, tc.constraint_name, tc.table_name, kcu.column_name, ccu.table_schema AS foreign_table_schema, ccu.table_name AS foreign_table_name, ccu.column_name AS foreign_column_name
FROM information_schema.table_constraints AS tc
JOIN information_schema.key_column_usage AS kcu ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
JOIN information_schema.constraint_column_usage AS ccu ON ccu.constraint_name = tc.constraint_name
AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name='{}';'''.format(entity))
result = cursor.fetchall()
result_array = []
for record in result:
new_entity = Entity(name=record[5])
result_array.append(new_entity)
return result
def is_relationship(referencing, referenced):
foreign_relationships = get_foreign_relationships(referencing)
if referenced in list(map(lambda relationship: relationship[5], foreign_relationships)):
return True
else:
return False
def traverse(entities, direction):
for i in range(len(entities)):
if i > 0 and i < len(entities)-1:
if not is_relationship(entities[i], entities[i-1]):
raise PathError(entities[i], entities[i-1])
return True
def validate_path(path):
entities = path.split('/')
traverse(entities, 'forward')
return entities
def get_path_step(name, step, key):
content = paths[name][step]
if key is None:
filtered_content = [{'entry': entry['entry'], 'annotate': entry['annotate']} for entry in content]
else:
if content['previous_entity_relationship_column'] is not None:
previous_entity_relationship_column = content['previous_entity_relationship_column']
relationship_column_index = content['entity_columns'][previous_entity_relationship_column]
filtered_content = [{'entry': entry['entry'], 'annotate': entry['annotate']} for entry in content if entry[relationship_column_index] == key]
return filtered_content
class Entity(graphene.ObjectType):
name = graphene.String()
annotate = graphene.Float()
content = graphene.Field(graphene.List(lambda: Entity))
class Query(graphene.ObjectType):
entity_relationships = graphene.List(Entity, entity=graphene.String())
postgresql_version = graphene.String
path = graphene.String(name=graphene.String(), path=graphene.String(), annotate=graphene.String(), current=graphene.String(), key=graphene.Int())
path_step = graphene.String(name=graphene.String(), step=graphene.String(), key=graphene.Int())
#staticmethod
def resolve_path_step(parent, info, name, step, key):
path_step = get_path_step(name, step, key)
print(name)
print(step)
print(key)
print(path_step)
return path_step
#staticmethod
def resolve_path(parent, info, name, path, annotate, current, key):
entities = validate_path(path)
create_path(name, entities, annotate)
content_to_return = get_path_step(name, entities[0], None)
return content_to_return
#staticmethod
def resolve_entity_relationships(parent, info, entity):
result_array = get_foreign_relationships(entity)
return result_array
#staticmethod
def resolve_postgresql_version(parent, info):
cursor.execute("SELECT version();")
record = cursor.fetchone()
return record
def execute_query(query_to_execute):
queries = {
'postgresqlVersion': '''
{
postgresqlVersion
}
''',
'entityRelationships': '''
{
entityRelationships (entity: "inventory_productitem") {
name
}
}
''',
'path': '''
{
path(name: "Ventas", path: "general_state/general_city/inventory_store/operations_sale", annotate: "_count", current: "inventory_product", key: 0)
}
''',
'path_step': '''
{
path_step(name: "Ventas", step: "inventory_store", key: 27)
}
'''
}
schema = graphene.Schema(query=Query)
result = schema.execute(queries[query_to_execute])
dict_result = dict(result.data.items())
print(json.dumps(dict_result, indent=2))
result2 = schema.execute(queries['path_step'])
dict_result2 = dict(result2.data.items())
print(json.dumps(dict_result2, indent=2))
execute_query('path')
Te first call to schema.execute() works with no problem, but the second one doesn't even enter the resolver, and the only error message I get is:
Traceback (most recent call last):
File "query.py", line 249, in <module>
execute_query('path')
File "query.py", line 246, in execute_query
dict_result2 = dict(result2.data.items())
AttributeError: 'NoneType' object has no attribute 'items'
I don't know what I am missing.
I have found that the problem was that I am making a pythonic pascal-cased call to Graphene query: path_step(name: "Ventas", step: "inventory_store", key: 27), but Graphene requieres queries to be called on a camel-cased fashion, even when the name of the resolvers and query variables are pascal-cased in the code.
So the call to the query must by camel-cased like this: pathStep(name: "Ventas", step: "inventory_store", key: 27)
i'm failing at dynamically changing the dropdown options for one field, after setting another field in a dexterity form.Schema.
the vocabularies are based in a sql database.
specifically i want to update the vocabulary options for township after selecting the county.
right now i am just pulling the full list of townships regardless of the country selected. as all attempts to dynamically change things led to errors.
any help on this would be appreciated. thanks!
from site.py:
from selectionProvider import SelectionProvider
vocabProvider = SelectionProvider()
#grok.provider(IContextSourceBinder)
def countySource(context):
return vocabProvider.getVocabulary('county')
#grok.provider(IContextSourceBinder)
def townshipSource(context):
return vocabProvider.getVocabulary('township')
class IESurrenderSite(form.Schema):
county = schema.Choice(
title=_(u"County"),
source=countySource,
required=True,
)
township = schema.Choice(
title=_(u"Township"),
source=townshipSource,
required=True,
)
...
from selectiorProvider.py:
class SelectionProvider(object):
DB = maap_db.maap_db()
vocabularies = {}
Counties = {}
Townships = {}
def getCountiesDropdownRecordSet(self, object):
"""input:object is a string
"""
print 'in getCountiesDropdownRecordSet'
self.DB.open()
rs = self.DB.RunQuery("Select *, CountyID as [value], name as [title] From County Order By name;")
self.DB.close()
SelectionProvider.CountiesByName = {}
for rec in rs:
SelectionProvider.CountiesByName[rec['CountyID']] = rec['title']
#
return rs
def getTownshipDropdownRecordSet(self, object):
"""input:object is a string
"""
print 'in getTownshipDropdownRecordSet'
self.DB.open()
rs = self.DB.RunQuery("Select *, TownshipID as [value], name as [title] From Township Order By name;")
self.DB.close()
SelectionProvider.TownshipsByName = {}
for rec in rs:
SelectionProvider.TownshipsByName[rec['TownshipID']] = rec['title']
#
return rs
# #
def getDropdownRecordSet(self, object):
"""input:object is a string
"""
print 'in getDropdownRecordSet'
self.DB.open()
rs = self.DB.RunQuery("Select * From DropdownSelections Where object = '%s' Order By seqNo;" % (object))
self.DB.close()
return rs
def buildVocabulary(self, rs, valueField='value', titleField='title'):
"""DO NOT USE directly outside this class, see getVocabulary() or rebuildVocabulary() instead
"""
data = []
for rec in rs:
data.append(SimpleTerm(value=rec[valueField], title=_(rec[titleField])))
#
return SimpleVocabulary(data)
#
def rebuildVocabulary(self, object):
"""Force a fetch from the database and rebuild the vocabulary.
input object: a string, matches the DropdownSelections field
"""
print 'initializing %s' % (object)
if object=="county":
print 'going to CountiesDropdowns'
vocab = self.buildVocabulary(self.getCountiesDropdownRecordSet(object), "CountyID","title")
SelectionProvider.vocabularies[object] = vocab
return vocab
if object=="township":
print 'going to TownshipDropdowns'
vocab = self.buildVocabulary(self.getTownshipDropdownRecordSet(object), "TownshipID","title")
SelectionProvider.vocabularies[object] = vocab
#print _SITE_NAME, '%s selection list initialized.' % (object)
return vocab
else:
vocab = self.buildVocabulary(self.getDropdownRecordSet(object))
SelectionProvider.vocabularies[object] = vocab
return vocab
def getVocabulary(self, object):
"""Retrieve cached vocabulary
input object: a string, matches the DropdownSelections field
"""
recreate = False
if not SelectionProvider.vocabularies.has_key(object):
recreate = True
#
vocab = SelectionProvider.vocabularies.get(object)
if vocab == None or len(vocab) == 0:
recreate = True
#
if recreate:
vocab = self.rebuildVocabulary(object)
#
return vocab
You can do this with plone.formwidget.masterselect
Like this (untested, but gives you an idear of how it works):
from zope import schema
from plone.supermodel import model
from plone.formwidget.masterselect import _
from plone.formwidget.masterselect import MasterSelectBoolField
from plone.formwidget.masterselect import MasterSelectField
def getTownshipDynVocab(master):
CountryID = master_value
# search for your township entries by CountryID
# and return it as a DisplayList
return townshipDynamicVocab
class IESurrenderSite(model.Schema):
county = MasterSelectField(
title=_(u"County"),
source=countySource,
slave_fields=(
# Controls the vocab of township
{'name': 'township',
'action': 'vocabulary',
'vocab_method': getTownshipDynVocab,
},
),
required=True,
)
township = schema.Set(
title=_(u"Township"),
value_type=schema.Choice(),
required=False,
)
I'd like to generate a stub SOAP web service class using the Python soaplib module, based on an existing WSDL. The idea is to generate a mock for a third party web service.
Does any such code generator exist, or must we write our own?
Martin
Okay, I had a go at hacking my wsdl2interface (http://pypi.python.org/pypi/wsdl2interface) script to output soaplib code. I think I have something that works, though it's not pretty or especially well tested.
I'll paste it here for the record. I could be persuaded to release it if someone needs it, though it's not exactly my best code. Note that it uses Suds' WSDL parser to generate soaplib code, which is a bit strange in itself.
Run like this:
$ wsdl2soaplib <url or filename of WSDL> > wsdl.py
The code (you'll need suds in your path, ideally in a virtualenv):
from StringIO import StringIO
import os.path
import sys
import textwrap
import keyword
import re
import suds.client
VALID_IDENTIFIER_RE = re.compile(r"[_A-Za-z][_A-Za-z1-9]*")
VALID_IDENTIFIER_FIRST_LETTER_RE = re.compile(r"[_A-Za-z]")
VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE = re.compile(r"[_A-Za-z1-9]")
HEADER = '''\
"""SOAP web services generated from:
%(wsdl)s.
"""
from soaplib.serializers.primitive import (
String, Integer, Float, Double, DateTime, Bolean, Null, Array, Map, Any
)
from soaplib.serializers.clazz import ClassSerializer
from soaplib.service import SoapServiceBase
from soaplib.service import soapmethod
'''
INTERFACE = '''\
class %(name)s(%(bases)s):
"""%(docstring)s"""
'''
SERVICE_INTERFACE_DOCSTRING = '''\
SOAP service ``%(serviceName)s`` with target namespace %(tns)s.
'''
TYPE_INTERFACE_DOCSTRING = '''\
SOAP %(type)s ``{%(namespace)s}%(name)s``
'''
TYPE_MAP = '''\
WSDL_TYPES = {
%(items)s
}
'''
SOAPMETHOD = ''' #soapmethod(%(args)s, _returns=%(response)s)'''
METHOD = ''' def %(name)s(self, %(args)s):'''
METHOD_DOCSTRING = '''\
"""Parameters:
%(args)s
Returns: %(response)s
"""
'''
STANDARD_TYPE_NAMESPACES = [
'http://schemas.xmlsoap.org/soap/encoding/',
'http://schemas.xmlsoap.org/wsdl/',
'http://www.w3.org/2001/XMLSchema'
]
SCHEMA_TYPE_MAPPING = {
None: '%(typeName)s',
'None': 'None',
'boolean': 'Boolean',
'string': 'String',
'long': 'Integer',
'int': 'Integer',
'short': 'Integer',
'byte': 'Integer',
'unsignedLong': 'Integer',
'unsignedInt': 'Integer',
'unsignedShort': 'Integer',
'unsignedByte': 'Integer',
'positiveInteger': 'Integer',
'nonPositiveInteger': 'Integer',
'negativeInteger': 'Integer',
'nonNegativeInteger': 'Integer',
'float': 'Float',
'double': 'Float',
'decimal': 'Decimal',
'dateTime': 'DateTime',
'date': 'DateTime',
'anyURI': 'String',
'token': 'String',
'normalizedString': 'String',
'base64Binary': 'String',
'hexBinary': 'String',
}
def formatDocstring(text, indent=4, colwidth=78):
width = colwidth - indent
joiner = '\n' + ' ' * indent
return joiner.join(textwrap.wrap(text, width) + [''])
def typeName(type, sd):
resolved = type.resolve()
return resolved.name or ''
def schemaTypeName(type, sd, deps=None):
resolved = type.resolve()
name = resolved.name or ''
schemaType = SCHEMA_TYPE_MAPPING.get(name)
if schemaType is None: # not a standard type
# user default
schemaType = SCHEMA_TYPE_MAPPING[None]
# possibly save dependency link
if deps is not None:
deps.append(unicode(name))
required = type.required()
schemaType = schemaType % dict(typeName=name, required=required)
if type.unbounded():
schemaType = "Array(%s)" % schemaType
return schemaType
def normalizeIdentifier(identifier):
if not VALID_IDENTIFIER_RE.match(identifier):
newIdentifierLetters = []
firstLetter = True
for letter in identifier:
if firstLetter:
if VALID_IDENTIFIER_FIRST_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
firstLetter = False
else:
if VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
identifier = ''.join(newIdentifierLetters)
if keyword.iskeyword(identifier):
identifier = identifier + '_'
return identifier
def generate(client, url=None, standardTypeNamespaces=STANDARD_TYPE_NAMESPACES, removeInputOutputMesssages=True):
"""Given a WSDL URL, return a file that could become your interfaces.py
"""
printed = [] # sequence of type name -> string
for sd in client.sd:
serviceOut = StringIO()
print >>serviceOut, HEADER % dict(
wsdl=url,
)
printed.append(('', serviceOut.getvalue(),))
# Types
typeMap = {}
typeSeq = []
typeDeps = {}
typeAttributes = {}
typesPrinted = []
for type_ in sd.types:
typeOut = StringIO()
resolved = type_[0].resolve()
namespaceURL = resolved.namespace()[1]
if namespaceURL not in standardTypeNamespaces:
if resolved.enum():
typeDescription = "enumeration"
else:
typeDescription = "complex type"
# Look for basess
interfaceBases = []
if resolved.extension():
def find(t):
for c in t.rawchildren:
if c.extension():
find(c)
if c.ref is not None:
interfaceBases.append(c.ref[0])
find(resolved)
if not interfaceBases:
interfaceBases = ['ClassSerializer']
rawTypeName = typeName(type_[0], sd)
typeInterfaceName = normalizeIdentifier(rawTypeName)
typeMap[rawTypeName] = typeInterfaceName
typeSeq.append((rawTypeName, typeInterfaceName,))
typeAttributes[rawTypeName] = {}
print >>typeOut, INTERFACE % dict(
name=normalizeIdentifier(typeInterfaceName),
bases=', '.join(interfaceBases),
docstring=formatDocstring(TYPE_INTERFACE_DOCSTRING % dict(
type=typeDescription,
name=rawTypeName,
namespace=namespaceURL,
)
)
)
print >>typeOut, " class types:"
if resolved.enum():
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
print >>typeOut, " %s = String # XXX: Enumeration value" % name
else:
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
attrTypeName = typeName(attr[0], sd)
typeAttributes[rawTypeName][name] = attrTypeName
schemaType = schemaTypeName(attr[0], sd, deps=typeDeps.setdefault(unicode(rawTypeName), []))
print >>typeOut, " %s = %s" % (normalizeIdentifier(name), schemaType,)
print >>typeOut
typesPrinted.append((rawTypeName, typeOut.getvalue(),))
serviceInterfaceOut = StringIO()
# Main service interface
print >>serviceInterfaceOut, INTERFACE % dict(
name=normalizeIdentifier(sd.service.name),
bases=u"SoapServiceBase",
docstring=formatDocstring(SERVICE_INTERFACE_DOCSTRING % dict(
serviceName=sd.service.name,
tns=sd.wsdl.tns[1],
)
)
)
methods = {} # name -> (response type, list of parameters,)
for p in sd.ports:
for m in p[1]:
methodName = m[0]
methodArgs = m[1]
if methodName not in methods:
methodDef = p[0].method(methodName)
# XXX: This is discards the namespace part
if methodDef.soap.output.body.wrapped:
inputMessage = methodDef.soap.input.body.parts[0].element[0]
outputMessage = methodDef.soap.output.body.parts[0].element[0]
if outputMessage in typeAttributes:
if len(typeAttributes[outputMessage]) > 0:
response = typeAttributes[outputMessage].values()[0]
else:
response = "None"
else:
response = outputMessage
# Remove types used as input/output messages
if removeInputOutputMesssages:
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == inputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if inputMessage in typeMap:
del typeMap[inputMessage]
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == outputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if outputMessage in typeMap:
del typeMap[outputMessage]
else:
response = methodDef.soap.output.body.parts[0].element[0]
methods[methodName] = (response, methodArgs,)
for methodName in sorted(methods):
methodArgNames = [m[0] for m in methods[methodName][1]]
methodReturnType = methods[methodName][0]
methodArgDetails = []
methodArgSpecs = []
for m in methods[methodName][1]:
argDetail = m[1]
# for docstring
methodModifierParts = []
if not argDetail.required():
methodModifierParts.append('optional')
if argDetail.nillable:
methodModifierParts.append('may be None')
methodModifiers = ""
if methodModifierParts:
methodModifiers = ' (%s)' % ', '.join(methodModifierParts)
argTypeName = typeName(argDetail, sd)
methodSpec = "``%s`` -- %s%s" % (
argDetail.name,
argTypeName,
methodModifiers
)
methodArgDetails.append(methodSpec)
# for #soapmethod decorator
schemaType = schemaTypeName(argDetail, sd)
methodArgSpecs.append(schemaType)
# TODO: Probably not aware of array return types
if methodReturnType not in typeMap and methodReturnType in SCHEMA_TYPE_MAPPING:
methodReturnType = SCHEMA_TYPE_MAPPING[methodReturnType]
print >>serviceInterfaceOut, SOAPMETHOD % dict(
args=', '.join(methodArgSpecs),
response=methodReturnType,
)
print >>serviceInterfaceOut, METHOD % dict(
name=normalizeIdentifier(methodName),
args=', '.join(methodArgNames),
)
print >>serviceInterfaceOut, METHOD_DOCSTRING % dict(
args='\n '.join(methodArgDetails),
response=methodReturnType,
)
print >>serviceInterfaceOut
# Sort list of complex types based on internal dependencies
def sortDeps(printed):
printed = list(reversed(printed))
queue = [item for item in printed if len(typeDeps.get(unicode(item[0]), [])) == 0]
satisfied = set(queue)
remaining = [item for item in printed if item not in queue]
sortedPrinted = []
while queue:
item = queue.pop()
itemTypeName = unicode(item[0])
sortedPrinted.append(item)
satisfied.add(itemTypeName)
for item in remaining:
remainingItemTypeName = unicode(item[0])
depsList = typeDeps.get(remainingItemTypeName, [])
remainingDeps = []
for dep in depsList:
if dep not in satisfied:
remainingDeps.append(dep)
typeDeps[remainingItemTypeName] = remainingDeps
if len(remainingDeps) == 0:
queue.append(item)
remaining.remove(item)
return sortedPrinted
typesPrinted = sortDeps(typesPrinted)
# Print everything
printed.extend(typesPrinted)
printed.append((sd.service.name, serviceInterfaceOut.getvalue(),))
typeMapOut = StringIO()
print >>typeMapOut, TYPE_MAP % dict(
items=',\n'.join([" '%s': %s" % k for k in typeSeq if k[0] in typeMap])
)
print >>typeMapOut
printed.append(('', typeMapOut.getvalue(),))
return '\n'.join([v[1] for v in printed])
def main():
if len(sys.argv) < 2:
print "Usage: %s <url>" % sys.argv[0]
print "The output will be printed to the console"
return
if not '://' in sys.argv[1]:
sys.argv[1] = 'file://' + os.path.abspath(sys.argv[1])
client = suds.client.Client(sys.argv[1])
print generate(client, sys.argv[1])
if __name__ == '__main__':
main()
I have just created a github repository where I'm improving on optilude's script to make it work with soaplib2.0 and more. The link is https://github.com/fvieira/wsdl2soaplib.