construct mongoDB query dynamically in Pymongo - python

I have SCIM search request body like this,
{
"schemas": ["urn:ietf:params:scim:api:messages:2.0:SearchRequest"],
"attributes": ["displayName", "userName"],
"excludedAttributes": ["emails"],
"filter":"displayName sw \"smith\"",
"startIndex": 1,
"count": 10,
"sortBy": "userName",
"sortOrder": "ascending"
}
all the above attributes are optional, except for "schemas" attribute.
because, all the attributes are optional i have construct query accordingly.
below is the code for this handling, as you can see there are conditions which make code look untidy.
data = request.get_json()
a = {}
attributes = data.get('attributes',[])
sortby = data.get('sortBy',None)
sortorder = data.get('sortOrder',None)
if not attributes:
pass
else:
for i in attributes:
if i not in a:
a[i]=1
excludedAttributes = data.get('excludedAttributes',[])
if not excludedAttributes:
pass
else:
for i in excludedAttributes:
if i not in a:
a[i]=0
if not a and not sortby:
result = mongo.db.test.find({}, )
if a and not sortby:
result = mongo.db.test.find({}, a)
if not a and sortby and not sortorder:
result = mongo.db.test.find({}, ).sort([(sortby,flask_pymongo.ASCENDING)])
if a and sortby and not sortorder:
result = mongo.db.test.find({}, a).sort([(sortby, flask_pymongo.ASCENDING)])
if not a and sortby and sortorder=='ascending':
result = mongo.db.test.find({}, ).sort([(sortby, flask_pymongo.ASCENDING)])
if a and sortby and not sortorder=='ascending':
result = mongo.db.test.find({}, a).sort([(sortby, flask_pymongo.ASCENDING)])
if not a and sortby and sortorder=='descending':
result = mongo.db.test.find({}, ).sort([(sortby, flask_pymongo.DESCENDING)])
if a and sortby and not sortorder=='descending':
result = mongo.db.test.find({}, a).sort([(sortby, flask_pymongo.DESCENDING)])
for i in result:
full_data.append(i)
resp = jsonify(json.loads(dumps(full_data)))
return resp
if i include even pagination, there will be even more conditions piling up.
How do i construct these queries effectively.

data = request.get_json()
a = {}
attributes = data.get('attributes',[])
sortby = data.get('sortBy',None)
sortorder = data.get('sortOrder',None)
if not attributes:
pass
else:
for i in attributes:
if i not in a:
a[i]=1
excludedAttributes = data.get('excludedAttributes',[])
if not excludedAttributes:
pass
else:
for i in excludedAttributes:
if i not in a:
a[i]=0
result = mongo.db.test.find({}, )
if a:
result = mongo.db.test.find({}, a)
if sortby:
if sortorder == "descending":
sortorder = flask_pymongo.DESCENDING
else:
sortorder = flask_pymongo.ASCENDING
result = result.sort([(sortby, sortorder)])
for i in result:
full_data.append(i)
resp = jsonify(json.loads(dumps(full_data)))
return resp

Related

Grapehene doesn't execute query

I have the following Graphene implementation:
import graphene
import json
import psycopg2
import re
connection = psycopg2.connect(user='postgres', password='Steppen1!', host='127.0.0.1', port='5432', database='TCDigital')
cursor = connection.cursor()
paths = {}
class PathError(Exception):
def __init__(self, referencing, referenced):
self.message = "entity {} has no relation with entity {}".format(referencing, referenced)
def __str__(self):
return self.message
def get_columns(entity):
columns = {}
cursor.execute("SELECT ordinal_position, column_name FROM information_schema.columns WHERE table_name = '{}'".format(entity))
resultset = cursor.fetchall()
i = 1
for entry in resultset:
columns[entry[1]] = i
i = i + 1
return columns
def get_previous_annotate(name, entity, related_column, id):
columns = get_columns(entity)
related_position = columns[related_column]-1
entity_content = paths[name][entity]
entity_content_filtered = [entry for entry in entity_content if entry['entry'][related_position] == id]
annotate_to_return = sum(list(map(lambda entry: entry['annotate'], entity_content_filtered)))
return annotate_to_return
def calculate_annotate_operation(entity, entry, entity_columns, operation, operands):
operand1 = entity_columns[operands[0]]
operand2 = entity_columns[operands[1]]
if operation == '_sum':
return entry[operand1] + entry[operand2]
elif operation == '_mult':
return entry[operand1] * entry[operand2]
elif operation == '_div':
return entry[operand1] / entry[operand2]
elif operation == '_rest':
return entry[operand1] - entry[operand2]
else:
return None
def get_annotated_value(name, entity, entry, annotate, entity_columns):
if annotate[0] != '_':
column = entity_columns[annotate]
column_value = entity[column['ordinal_position']]
return column_value
elif annotate == '_count':
return 1
else:
operation = annotate.split('(')
if operation[0] in ['_sum', '_mult', '_div', '_rest']:
operands_base = operation[1].split(')')[0]
operands = operands_base.split(',')
return calculate_annotate_operation(operation[0], operands)
else:
raise "Operación no permitida: {}".format(annotate)
def get_annotate(name, entity, entry, entity_columns, previous_entity, related_column, annotate):
annotated_value = None
previous_entity_columns = get_columns(previous_entity)
if previous_entity:
annotated_value = get_previous_annotate(name, previous_entity, related_column, entry[entity_columns['id']-1])
else:
annotated_value = get_annotated_value(name, entity, entry, annotate, entity_columns)
#print({'name': name, 'entity': entity, 'entry': entry, 'annotated_value': annotated_value})
return annotated_value
def populate_entity(name, entity, entity_columns, previous_entity, previous_entity_relationship_column, annotate):
cursor.execute('SELECT * FROM {}'.format(entity))
resultset = cursor.fetchall()
paths[name][entity] = []
for entry in resultset:
if previous_entity:
entry_annotate = get_annotate(name, entity, entry, entity_columns, previous_entity, previous_entity_relationship_column, annotate)
else:
entry_annotate = get_annotate(name, entity, entry, entity_columns, previous_entity, None, annotate)
paths[name][entity].append({'entry': entry, 'entity_columns': entity_columns, 'annotate': entry_annotate, 'previos_entity': previous_entity, 'previous_entity_relationship_column': previous_entity_relationship_column})
def create_path(name, entities, annotate):
paths[name] = {}
previous_entity = None
for entity in reversed(entities):
previous_entity_relationship_column = None
if previous_entity:
previous_entity_relationships = get_foreign_relationships(previous_entity)
previous_entity_relationship = [relationship for relationship in previous_entity_relationships if relationship[5] == entity][0]
previous_entity_relationship_column = previous_entity_relationship[3]
entity_columns = get_columns(entity)
populate_entity(name, entity, entity_columns, previous_entity, previous_entity_relationship_column, annotate)
previous_entity = entity
def get_foreign_relationships(entity):
cursor.execute('''
SELECT
tc.table_schema, tc.constraint_name, tc.table_name, kcu.column_name, ccu.table_schema AS foreign_table_schema, ccu.table_name AS foreign_table_name, ccu.column_name AS foreign_column_name
FROM information_schema.table_constraints AS tc
JOIN information_schema.key_column_usage AS kcu ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
JOIN information_schema.constraint_column_usage AS ccu ON ccu.constraint_name = tc.constraint_name
AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name='{}';'''.format(entity))
result = cursor.fetchall()
result_array = []
for record in result:
new_entity = Entity(name=record[5])
result_array.append(new_entity)
return result
def is_relationship(referencing, referenced):
foreign_relationships = get_foreign_relationships(referencing)
if referenced in list(map(lambda relationship: relationship[5], foreign_relationships)):
return True
else:
return False
def traverse(entities, direction):
for i in range(len(entities)):
if i > 0 and i < len(entities)-1:
if not is_relationship(entities[i], entities[i-1]):
raise PathError(entities[i], entities[i-1])
return True
def validate_path(path):
entities = path.split('/')
traverse(entities, 'forward')
return entities
def get_path_step(name, step, key):
content = paths[name][step]
if key is None:
filtered_content = [{'entry': entry['entry'], 'annotate': entry['annotate']} for entry in content]
else:
if content['previous_entity_relationship_column'] is not None:
previous_entity_relationship_column = content['previous_entity_relationship_column']
relationship_column_index = content['entity_columns'][previous_entity_relationship_column]
filtered_content = [{'entry': entry['entry'], 'annotate': entry['annotate']} for entry in content if entry[relationship_column_index] == key]
return filtered_content
class Entity(graphene.ObjectType):
name = graphene.String()
annotate = graphene.Float()
content = graphene.Field(graphene.List(lambda: Entity))
class Query(graphene.ObjectType):
entity_relationships = graphene.List(Entity, entity=graphene.String())
postgresql_version = graphene.String
path = graphene.String(name=graphene.String(), path=graphene.String(), annotate=graphene.String(), current=graphene.String(), key=graphene.Int())
path_step = graphene.String(name=graphene.String(), step=graphene.String(), key=graphene.Int())
#staticmethod
def resolve_path_step(parent, info, name, step, key):
path_step = get_path_step(name, step, key)
print(name)
print(step)
print(key)
print(path_step)
return path_step
#staticmethod
def resolve_path(parent, info, name, path, annotate, current, key):
entities = validate_path(path)
create_path(name, entities, annotate)
content_to_return = get_path_step(name, entities[0], None)
return content_to_return
#staticmethod
def resolve_entity_relationships(parent, info, entity):
result_array = get_foreign_relationships(entity)
return result_array
#staticmethod
def resolve_postgresql_version(parent, info):
cursor.execute("SELECT version();")
record = cursor.fetchone()
return record
def execute_query(query_to_execute):
queries = {
'postgresqlVersion': '''
{
postgresqlVersion
}
''',
'entityRelationships': '''
{
entityRelationships (entity: "inventory_productitem") {
name
}
}
''',
'path': '''
{
path(name: "Ventas", path: "general_state/general_city/inventory_store/operations_sale", annotate: "_count", current: "inventory_product", key: 0)
}
''',
'path_step': '''
{
path_step(name: "Ventas", step: "inventory_store", key: 27)
}
'''
}
schema = graphene.Schema(query=Query)
result = schema.execute(queries[query_to_execute])
dict_result = dict(result.data.items())
print(json.dumps(dict_result, indent=2))
result2 = schema.execute(queries['path_step'])
dict_result2 = dict(result2.data.items())
print(json.dumps(dict_result2, indent=2))
execute_query('path')
Te first call to schema.execute() works with no problem, but the second one doesn't even enter the resolver, and the only error message I get is:
Traceback (most recent call last):
File "query.py", line 249, in <module>
execute_query('path')
File "query.py", line 246, in execute_query
dict_result2 = dict(result2.data.items())
AttributeError: 'NoneType' object has no attribute 'items'
I don't know what I am missing.
I have found that the problem was that I am making a pythonic pascal-cased call to Graphene query: path_step(name: "Ventas", step: "inventory_store", key: 27), but Graphene requieres queries to be called on a camel-cased fashion, even when the name of the resolvers and query variables are pascal-cased in the code.
So the call to the query must by camel-cased like this: pathStep(name: "Ventas", step: "inventory_store", key: 27)

Parsing logs to json Python

Folks,
I am trying to parse log file into json format.
I have a lot of logs, there is one of them
How can I parse this?
03:02:03.113 [info] ext_ref = BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}
Into this type of json, or any other format (but I guess json is best one)
{
"time":"03:02:03.113",
"class_req":"info",
"ext_ref":"BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ",
"reqid":"1253166",
"type":"INREQ",
"channel":"BANK24AOS",
"sid":"msid_1591844511335516_KRRNBSLH2FS",
"duration":"703.991",
"req_uri":"marketcredit/order/state",
"login":"77012221122",
"req_type":"cl_req",
"req_headers":{
"accept-encoding":"gzip",
"connection":"close",
"host":"test-mobileapp-api.bank.kz",
"user-agent":"okhttp/4.4.1",
"x-forwarded-for":"212.154.169.134",
"x-real-ip":"212.154.169.134"
},
"req_body":{
"$sid":"msid_1591844511335516_KRRNBSLH2FS",
"$sid":"msid_1591844511335516_KRRNBSLH2FS",
"app":"bank",
"app_version":"2.3.2",
"channel":"aos",
"colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv",
"colvir_commercial_id":"-1",
"colvir_id":"000120.335980",
"openway_commercial_id":"6247520",
"openway_id":"6196360",
"$lang":"ru",
"ekb_id":"923243",
"inn":"990830221722",
"login":"77012221122",
"bank24_id":"262"
},
"resp_body":{
"task_id":"",
"status":"success",
"data":{
"state":"init",
"applications":[
{
"status":"init",
"id":"123db561-34a3-4a8d-9fa7-03ed6377b44f",
"name":"Sulpak",
"amount":101000,
"items":[
{
"name":"Switch CISCO x24",
"price":100000,
"count":1,
"amount":100000
}
]
}
],
"segment":{
"range":{
"min":6,
"max":36,
"step":1
},
"payment_day":{
"max":28,
"min":1
}
}
}
}
}
I am trying to split first whole text, but there I met another problem is to match keys to values depending on '=' sign. Also there might be some keys with empty values. For ex.:
type = INREQ channel = sid = duration = 1.333 (to get to know that there is an empty value, you need to pay attention on number of spaces. Usually there is 1 space between prev.value and next key). So this example should look like this:
{
"type":"INREQ",
"channel":"",
"sid":"",
"duration":"1.333"
}
Thanks ahead!
Here, one thing pass for duplicate key about "$sid":"msid_1591844511335516_KRRNBSLH2FS"
import re
text = """03:02:03.113 [info] ext_ref = reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}"""
index1 = text.index('[')
index2 = text.index(']')
new_text = 'time = '+ text[:index1-1] + ' class_req = ' + text[index1+1:index2] + text[index2+2:]
lst = re.findall(r'\S+? = |\S+? = \{.*?\} |\S+? = \{.*?\}$|\S+? = \S+? ', new_text)
res = {}
for item in lst:
key, equal, value = item.partition('=')
key, value = key.strip(), value.strip()
if value.startswith('{'):
try:
value = json.loads(value)
except:
print(value)
res[key] = value
you can try regulation in python.
here is what i write, it works for your problem.
for convenience i deleted string before "ext_ref...",you can directly truncate the raw string.
import re
import json
string = 'ext_ref = BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}'
position = re.search("req_headers",string) # position of req_headers
resp_body_pos = re.search("resp_body",string)
resp_body = string[resp_body_pos.span()[0]:]
res1 = {}
res1.setdefault(resp_body.split("=")[0],resp_body.split("=")[1])
print(res1)
before = string[:position.span()[0]]
after = string[position.span()[0]:resp_body_pos.span()[0]] # handle req_body seperately
res2 = re.findall("(\S+) = (\S+)",before)
print(res2)
res3 = re.findall("(\S+) = ({.*?})",after)
print(res3)
#res1 type: dict{'resp_body':'...'} content in resp_body
#res2 type: list[(),()..] content before req_head
#res3 type: list[(),()..] the rest content
and now you can do what you want to do with the data(.e.g. transform it into json respectively)
Hope this is helpful

insert a variable into an url

I got a CSV file with numbers and I want to insert these numbers into a specific location in an url : jus after " "value": "
Here is my code :
with open('update_cases_id.csv') as p:
for lines in p:
uuid = lines.rstrip()
url_POST = "www.example.com/"
values = {}
values['return_type'] = 'retrieval'
values['format'] = 'TSV'
values['size'] = '70'
values['filters'] = '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value": .format(uuid)}}]}'
data = urllib.urlencode(values)
url_final = url_POST + '?' + data
req2 = urllib2.Request(url_final)
req2.add_header('cookie', cookie)
handle = urllib2.urlopen(req2)
( edited :
example input : 123456-123456-987654
example output : it s data text )
You can do this with string formatting, this should work for you:
# ...snip
values['filters'] = '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":%s}]}' % uuid
# snip...
The %s will be replaced by the uuid by the % replacement operator:
>>> values = {}
>>> uuid = 1234
>>> values['filters'] = '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":%s}]}' % uuid
>>> values
{'filters': '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":1234}]}'}
Try to use Template.
from string import Template
params = Template('{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value": ${your_value}}}]}')
params = params.safe_substitute(your_value=123)
# params is '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":123}]}'

M2m relation breaks when passing filter parameters

I have a m2m relation between properties and images in my model like imageproperty = models.ManyToManyField(Property, blank = True). Im having an issue trying to filter properties with their associated images as whenever i pass a parameter in my query i get something like this and the images are not showing quiet good
. This is my code so far
def filter_properties(request, prop, p):
order = "creation_date"
if p["sort"]: order = p["sort"]
if p["asc_desc"] == "desc": order = '-' + order
results = Property.objects.filter(status = True)
for prop in results:
prop.images = prop.image_set.all()[:1] #Should i need to return in results so it brings values when filtering?
if p["name"] : results = results.filter(name__icontains=p["name"])
if p["price_from"] : results = results.filter(price__gte=int(p["price_from"]))
if p["price_to"] : results = results.filter(price__lte=int(p["price_to"]))
if p["category"]:
lst = p["category"]
or_query = Q(categories = lst[0])
for c in lst[1:]:
or_query = or_query | Q(categories = c)
results = results.filter(or_query).distinct()
return results
def search_properties_view(request):
try:
page = int(request.GET.get("page", '1'))
except ValueError:
page = 1
p = request.POST
prop = defaultdict(dict)
parameters = dict.fromkeys(
('name', 'price_from', 'price_to', 'currency_type', 'activity_type', 'sort', 'asc_desc'),
'',
)
parameters["category"] = []
for k, v in p.items():
if k == "category":
parameters[k] = [int(x) for x in p.getlist(k)]
elif k in parameters:
parameters[k] = v
elif k.startswith("name") or k.startswith("curency_type") or k.startswith("activity_type"):
k, pk = k.split('-')
prop[pk][k] = v
elif k.startswith("category"):
pk = k.split('-')[1]
prop[pk]["category"] = p.getlist(k)
if page != 1 and "parameters" in request.session:
parameters = request.session["parameters"]
else:
request.session["parameters"] = parameters
results = filter_properties(request, prop, parameters)
paginator = Paginator(results, 20)
try:
results = paginator.page(page)
except (InvalidPage, EmptyPage):
request = paginator.page(paginator.num_pages)
return render(request, 'propiedades/propiedades.html', {
'propiedades': request.POST,
'media_url': settings.MEDIA_URL,
'results': results,
'params': parameters,
'categories': PropertyCategory.objects.all()
})

Reference to value of the function

At beginning i wanna say i'm newbie in use Python and everything I learned it came from tutorials.
My problem concerning reference to the value. I'm writing some script which is scrapping some information from web sites. I defined some function:
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
So now inside the script I wanna check value of the expr1 returned by my fynction. I don't know how to do that. Can u guys help me ? Is my function written correct ?
EDIT:
I can't add answer so I edit my current post
This is my all script. Some comments are in my native language but i add some in english
#! /usr/bin/env python
#encoding:UTF-8-
# ----------------------------- importujemy potrzebne biblioteki i skrypty -----------------------
# ------------------------------------------------------------------------------------------------
import urllib
from lxml import etree, html
import sys
import re
import MySQLdb as mdb
from TOR_connections import *
from XPathSelection import *
import os
# ------------------------------ Definiuje xPathSelectors ------------------------------------------
# --------------------------------------------------------------------------------------------------
# -------Doctors -----
check_reg = etree.XPath("string(//html/body/div/table[1]/tr[3]/td[2]/text())") #warunek Lekarz
check_practice = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())") #warunek praktyka
no_ks = etree.XPath("string(//html/body/div/table[1]/tr[1]/td[2]/text())")
Registred_by = etree.XPath("string(//html/body/div/table[1]/tr[4]/td[2]/text())")
Name_doctor = etree.XPath("string(//html/body/div/table[2]/tr[2]/td[2]/text())")
Registration_no = etree.XPath("string(//html/body/div/table[2]/tr[3]/td[2]/text())")
PWZL = etree.XPath("string(//html/body/div/table[2]/tr[4]/td[2]/text())")
NIP = etree.XPath("string(//html/body/div/table[2]/tr[5]/td[2]/text())")
Spec = etree.XPath("string(//html/body/div/table[2]/tr[18]/td[2]/text())")
Start_date = etree.XPath("string(//html/body/div/table[2]/tr[20]/td[2]/text())")
#-----Reg_practice-----
Name_of_practise = etree.XPath("string(//html/body/div/table[2]/tr[1]/td[2]/text())")
TERYT = etree.XPath("string(//html/body/div/table[2]/tr[7]/td[2]/*/text())")
Street = etree.XPath("string(//html/body/div/table[2]/tr[8]/td[2]/text())")
House_no = etree.XPath("string(//html/body/div/table[2]/tr[9]/td[2]/*/text())")
Flat_no = etree.XPath("string(//html/body/div/table[2]/tr[10]/td[2]/*/text())")
Post_code = etree.XPath("string(//html/body/div/table[2]/tr[11]/td[2]/*/text())")
City = etree.XPath("string(//html/body/div/table[2]/tr[12]/td[2]/*/text())")
Practice_no = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())")
Kind_of_practice = etree.XPath("string(//html/body/div/table[3]/tr[5]/td[2]/text())")
#------Serv_practice -----
TERYT2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[2]/td[2]/*/text())")
Street2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[3]/td[2]/text())")
House_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[4]/td[2]/*/text())")
Flat_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[5]/td[2]/i/text())")
Post_code2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[6]/td[2]/*/text())")
City2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[7]/td[2]/*/text())")
Phone_no = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[8]/td[2]/text())")
# --------------------------- deklaracje zmiennych globalnych ----------------------------------
# ----------------------------------------------------------------------------------------------
decrease = 9
No = 1
Link = "http://rpwdl.csioz.gov.pl/rpz/druk/wyswietlKsiegaServletPub?idKsiega="
# --------------------------- funkcje zdefiniowane ----------------------------------
# ----------------------------------------------------------------------------------------------
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
# --------------------------- ustanawiamy polaczenie z baza danych -----------------------------
# ----------------------------------------------------------------------------------------------
con = mdb.connect('localhost', 'root', '******', 'SANBROKER', charset='utf8');
# ---------------------------- początek programu -----------------------------------------------
# ----------------------------------------------------------------------------------------------
with con:
cur = con.cursor()
cur.execute("SELECT Old_num FROM SANBROKER.Number_of_records;")
Old_num = cur.fetchone()
count = Old_num[0]
counter = input("Input number of rows: ")
# ----------------------- pierwsze połączenie z TORem ------------------------------------
# ----------------------------------------------------------------------------------------
#connectTor()
#conn = httplib.HTTPConnection("my-ip.heroku.com")
#conn.request("GET", "/")
#response = conn.getresponse()
#print(response.read())
while count <= counter: # co dziesiata liczba
# --------------- pierwsze wpisanie do bazy danych do Archive --------------------
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Archive_num=%s",(count))
# ---------------------------------------------------------------------------------
if decrease == 0:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
#------- ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease = 10-1
count +=1
else:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
# ------ ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease -=1
count +=1
Well, I'm assuming check_reg is a function that returns a boolean (either True or False).
If that's the case, to check the return:
if expr1:
print "True."
else:
print "False"
There's more than one way to do it, but basically, if expr1: is all you need to do the checking.
To capture the return value of a function, assign the function to a name with an equal sign, like this:
return_value = somefunction(some_value)
print('The return value is ',return_value)
Keep in mind that when the first return statement is encountered, the function will exit. So if you have more than one return statement after each other, only the first will execute.
If you want to return multiple things, add them to a list and then return the list.
Here is an improved version of your function:
def match_pattern(count):
sock = urllib.urlopen(Link+str(count))
htmlsource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
# root = etree.HTML(htmlSource) - duplicate line
# result = etree.tostring(root, pretty_print=True, method="html")
function_names = [check_reg, check_practice, no_ks, Registered_by, \
Name_doctor, Registration_no, PWZL, NIP, Spec, Start_date, \
Name_of_practise, TERYT, Street, House_no2, Flat_no, \
Post_code2, City2, Phone_no]
results = []
for function in function_names:
results.append(function(root))
return results
r = match_pattern(1)
print r[0] # this will be the result of check_reg(root)
The code you have posted is quite ambigous. Can you please fix the ident to let us know what belongs to the function and which part is the script.
A function can returns only one value. You cannot do :
return something
return something_else
return ...
The function will ends when first value will be returned.
What you can do is returning a list, tuple or dict containing all your values.
For instance :
return (something,something_else,...)
or
return [something,something_else,...]
In your case, it seems better to create a class that would have all values you want as attributes, and turn this function into a method that would set the attributes values.
class Example(object):
def __init__ ( self , link , count ):
sock = urllib.urlopen(link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
self.expr1 = check_reg(root)
self.expr2 = check_practice(root)
self.D_expr1 = no_ks(root)
...
self.D_expr8 = Start_date(root)
#-----Reg_practice-----
self.R_expr1 = Name_of_practise(root)
...
self.R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
self.S_expr1 = TERYT2(root)
...
self.S_expr7 = Phone_no(root)
Then you will be able to use this class like :
exampleInstance = Example ( "link you want to use" , 4 ) # the second argument is your 'count' value
# Now you can use attributes of your class to get the values you want
print exampleInstance . expr1
print exampleInstance . S_expr7

Categories