Related
I have a below string multiple lines. For each line, I want to split string and add this to a JSON output file. I had done this using string.gettext().split and a regular expression. However I am not sure this is the best way to do it.
Input file :
Server:prod01
Available memory: 20480 Disk:200 CPU:4
Used memory:12438 Disk:120 CPU:3
Unused memory:8042 Disk:80 CPU:1
Server:prod02
Available memory: 40960 Disk:500 CPU:8
Used memory:20888 Disk:320 CPU:3
Unused memory:20072 Disk:180 CPU:5
Expected output JSON:
{"prod01_available_memory":20480}
{"prod01_used_memory":12438}
{"prod01_unused_memory":8042}
{"prod01_available_disk":200}
{"prod01_used_disk":120}
{"prod01_unused_disk":80}
{"prod01_available_cpu":4}
{"prod01_used_cpu":3}
{"prod01_unused_cpu":1}
{"prod02_available_memory":40960}
{"prod02_used_memory":20888}
{"prod02_unused_memory":20072"}
{"prod02_available_disk":500"}
{"prod02_used_disk":380}
{"prod02_unused_disk":120}
{"prod02_available_cpu":8}
{"prod02_used_cpu":3}
{"prod02_unused_cpu":5}
Thanks,
Rinku
Below is my code -
def tsplit(string, *delimiters):
pattern = '|'.join(map(re.escape, delimiters))
return re.split(pattern, string)
prelist = pre.get_text().splitlines()
server_name = re.split('server|:',prelist[0])[2].strip()
if server_name == 'prod01':
#print prelist[1]
prod01_memory_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[2])
prod01_Disk_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[4])
prod01_CPU_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[6])
#print prelist[2]
prod01_memory_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[2])
prod01_Disk_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[4])
prod01_CPU_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[6])
#print prelist[4]
prod01_memory_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[2])
prod01_Disk_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[4])
prod01_CPU_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[6])
elif server_name == 'prod02':
#print prelist[1]
prod02memory_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[2])
prod02Disk_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[4])
prod02CPU_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[6])
#print prelist[2]
prod02memory_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[2])
prod02Disk_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[4])
prod02CPU_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[6])
#print prelist[4]
prod02memory_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[2])
prod02Disk_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[4])
prod02CPU_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[6])
else
#assign all varaiables 0
.....
proc_item["logtime"] = str(t1)
proc_item["prod01_memory_actv"] = prod01_memory_actv
proc_item["prod01_Disk_actv"] = prod01_Disk_actv
proc_item["prod01_CPU_actv"] = prod01_CPU_actv
......
#for all otehr variables...
proc_data.append(proc_item)
with open("./proc_"+ str(date.today()) + ".txt", 'a+') as f:
json.dump(proc_data, f)
f.write("\n")
I have some basic knowledge on python.
- Just using string array indices
hostmtrcs = "Server:prod01 Available memory:20480 Disk:200 CPU:4 Used memory:12438 Disk:120 CPU:3 Unused memory:8042 " \
"Disk:80 CPU:1 Server:prod02 Available memory: 40960 Disk:500 CPU:8 Used memory:20888 Disk:320 CPU:3 Unused " \
"memory:20072 Disk:180 CPU:5 "
datasplt = hostmtrcs.split(":")
hstname = ''
attrkey = ''
attrvalue = ''
for word in range(0, datasplt.__len__()):
if not datasplt[word].__contains__("Server"):
elmnt = datasplt[word].split(" ")
if datasplt[word].__contains__('prod'):
hstname = elmnt[0].lower()
if elmnt.__len__() == 3:
attrkey = elmnt[1].lower() + "_" + elmnt[2].lower() # attrkey
else:
attrkey = elmnt[1]
# retreive the value from the next element in the 1st attry datasplit
if word != datasplt.__len__() - 1:
nxtelmnt = datasplt[word + 1].split(" ")
attrvalue = nxtelmnt[0] # sattrvalue frm next element
finalfrmt = '{' + '"' +hstname + "_" + attrkey + '"' + ":" + attrvalue + '}'
print(finalfrmt)
I think you can do it with dict then just dump over json.(in your case i dont think its valid json but its needs so as per your request i have dump dict over json) i havn't validates keys, i am assuming you get dictionary data correct.
d = { 'Server':'prod01',
'Available memory': 20480,
'Disk':200,
'CPU':4}
import json
s = json.dumps({str(d['Server']+"_"+key).replace(' ','_'):value for key,value in d.items()})
print(json.loads(s))
>>> {'prod01_Server': 'prod01', 'prod01_Available memory': 20480, 'prod01_Disk': 200, 'prod01_CPU': 4}
You should split the input text, section by section, according to what you're looking for.
data = '''Server:prod01
Available memory: 20480 Disk:200 CPU:4
Used memory:12438 Disk:120 CPU:3
Unused memory:8042 Disk:80 CPU:1
Server:prod02
Available memory: 40960 Disk:500 CPU:8
Used memory:20888 Disk:320 CPU:3
Unused memory:20072 Disk:180 CPU:5'''
import re
import json
print(json.dumps({'_'.join((s, l.split(' ', 1)[0], k)).lower(): int(v) for s, d in [i.split('\n', 1) for i in data.split('Server:') if i] for l in d.split('\n') for k, v in re.findall(r'(\w+):\s*(\d+)', l)}))
This outputs:
{"prod01_available_memory": 20480, "prod01_available_disk": 200, "prod01_available_cpu": 4, "prod01_used_memory": 12438, "prod01_used_disk": 120, "prod01_used_cpu": 3, "prod01_unused_memory": 8042, "prod01_unused_disk": 80, "prod01_unused_cpu": 1, "prod02_available_memory": 40960, "prod02_available_disk": 500, "prod02_available_cpu": 8, "prod02_used_memory": 20888, "prod02_used_disk": 320, "prod02_used_cpu": 3, "prod02_unused_memory": 20072, "prod02_unused_disk": 180, "prod02_unused_cpu": 5}
I got a CSV file with numbers and I want to insert these numbers into a specific location in an url : jus after " "value": "
Here is my code :
with open('update_cases_id.csv') as p:
for lines in p:
uuid = lines.rstrip()
url_POST = "www.example.com/"
values = {}
values['return_type'] = 'retrieval'
values['format'] = 'TSV'
values['size'] = '70'
values['filters'] = '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value": .format(uuid)}}]}'
data = urllib.urlencode(values)
url_final = url_POST + '?' + data
req2 = urllib2.Request(url_final)
req2.add_header('cookie', cookie)
handle = urllib2.urlopen(req2)
( edited :
example input : 123456-123456-987654
example output : it s data text )
You can do this with string formatting, this should work for you:
# ...snip
values['filters'] = '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":%s}]}' % uuid
# snip...
The %s will be replaced by the uuid by the % replacement operator:
>>> values = {}
>>> uuid = 1234
>>> values['filters'] = '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":%s}]}' % uuid
>>> values
{'filters': '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":1234}]}'}
Try to use Template.
from string import Template
params = Template('{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value": ${your_value}}}]}')
params = params.safe_substitute(your_value=123)
# params is '{"op":"and","content":[{"op":"in","content":{"field":"cases.case_id","value":123}]}'
I have files with incorrect JSON that I want to start fixing by getting it into properly grouped chunks.
The brace grouping {{ {} {} } } {{}} {{{}}} should already be correct
How can I grab all the top-level braces, correctly grouped, as separate strings?
If you don't want to install any extra modules simple function will do:
def top_level(s):
depth = 0
start = -1
for i, c in enumerate(s):
if c == '{':
if depth == 0:
start = i
depth += 1
elif c == '}' and depth:
depth -= 1
if depth == 0:
yield s[start:i+1]
print(list(top_level('{{ {} {} } } {{}} {{{}}}')))
Output:
['{{ {} {} } }', '{{}}', '{{{}}}']
It will skip invalid braces but could be easily modified to report an error when they are spotted.
Using the regex module:
In [1]: import regex
In [2]: braces = regex.compile(r"\{(?:[^{}]++|(?R))*\}")
In [3]: braces.findall("{{ {} {} } } {{}} {{{}}}")
Out[3]: ['{{ {} {} } }', '{{}}', '{{{}}}']
pyparsing can be really helpful here. It will handle pathological cases where you have braces inside strings, etc. It might be a little tricky to do all of this work yourself, but fortunately, somebody (the author of the library) has already done the hard stuff for us.... I'll reproduce the code here to prevent link-rot:
# jsonParser.py
#
# Implementation of a simple JSON parser, returning a hierarchical
# ParseResults object support both list- and dict-style data access.
#
# Copyright 2006, by Paul McGuire
#
# Updated 8 Jan 2007 - fixed dict grouping bug, and made elements and
# members optional in array and object collections
#
json_bnf = """
object
{ members }
{}
members
string : value
members , string : value
array
[ elements ]
[]
elements
value
elements , value
value
string
number
object
array
true
false
null
"""
from pyparsing import *
TRUE = Keyword("true").setParseAction( replaceWith(True) )
FALSE = Keyword("false").setParseAction( replaceWith(False) )
NULL = Keyword("null").setParseAction( replaceWith(None) )
jsonString = dblQuotedString.setParseAction( removeQuotes )
jsonNumber = Combine( Optional('-') + ( '0' | Word('123456789',nums) ) +
Optional( '.' + Word(nums) ) +
Optional( Word('eE',exact=1) + Word(nums+'+-',nums) ) )
jsonObject = Forward()
jsonValue = Forward()
jsonElements = delimitedList( jsonValue )
jsonArray = Group(Suppress('[') + Optional(jsonElements) + Suppress(']') )
jsonValue << ( jsonString | jsonNumber | Group(jsonObject) | jsonArray | TRUE | FALSE | NULL )
memberDef = Group( jsonString + Suppress(':') + jsonValue )
jsonMembers = delimitedList( memberDef )
jsonObject << Dict( Suppress('{') + Optional(jsonMembers) + Suppress('}') )
jsonComment = cppStyleComment
jsonObject.ignore( jsonComment )
def convertNumbers(s,l,toks):
n = toks[0]
try:
return int(n)
except ValueError, ve:
return float(n)
jsonNumber.setParseAction( convertNumbers )
Phew! That's a lot ... Now how do we use it? The general strategy here will be to scan the string for matches and then slice those matches out of the original string. Each scan result is a tuple of the form (lex-tokens, start_index, stop_index). For our use, we don't care about the lex-tokens, just the start and stop. We could do: string[result[1], result[2]] and it would work. We can also do string[slice(*result[1:])] -- Take your pick.
results = jsonObject.scanString(testdata)
for result in results:
print '*' * 80
print testdata[slice(*result[1:])]
At beginning i wanna say i'm newbie in use Python and everything I learned it came from tutorials.
My problem concerning reference to the value. I'm writing some script which is scrapping some information from web sites. I defined some function:
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
So now inside the script I wanna check value of the expr1 returned by my fynction. I don't know how to do that. Can u guys help me ? Is my function written correct ?
EDIT:
I can't add answer so I edit my current post
This is my all script. Some comments are in my native language but i add some in english
#! /usr/bin/env python
#encoding:UTF-8-
# ----------------------------- importujemy potrzebne biblioteki i skrypty -----------------------
# ------------------------------------------------------------------------------------------------
import urllib
from lxml import etree, html
import sys
import re
import MySQLdb as mdb
from TOR_connections import *
from XPathSelection import *
import os
# ------------------------------ Definiuje xPathSelectors ------------------------------------------
# --------------------------------------------------------------------------------------------------
# -------Doctors -----
check_reg = etree.XPath("string(//html/body/div/table[1]/tr[3]/td[2]/text())") #warunek Lekarz
check_practice = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())") #warunek praktyka
no_ks = etree.XPath("string(//html/body/div/table[1]/tr[1]/td[2]/text())")
Registred_by = etree.XPath("string(//html/body/div/table[1]/tr[4]/td[2]/text())")
Name_doctor = etree.XPath("string(//html/body/div/table[2]/tr[2]/td[2]/text())")
Registration_no = etree.XPath("string(//html/body/div/table[2]/tr[3]/td[2]/text())")
PWZL = etree.XPath("string(//html/body/div/table[2]/tr[4]/td[2]/text())")
NIP = etree.XPath("string(//html/body/div/table[2]/tr[5]/td[2]/text())")
Spec = etree.XPath("string(//html/body/div/table[2]/tr[18]/td[2]/text())")
Start_date = etree.XPath("string(//html/body/div/table[2]/tr[20]/td[2]/text())")
#-----Reg_practice-----
Name_of_practise = etree.XPath("string(//html/body/div/table[2]/tr[1]/td[2]/text())")
TERYT = etree.XPath("string(//html/body/div/table[2]/tr[7]/td[2]/*/text())")
Street = etree.XPath("string(//html/body/div/table[2]/tr[8]/td[2]/text())")
House_no = etree.XPath("string(//html/body/div/table[2]/tr[9]/td[2]/*/text())")
Flat_no = etree.XPath("string(//html/body/div/table[2]/tr[10]/td[2]/*/text())")
Post_code = etree.XPath("string(//html/body/div/table[2]/tr[11]/td[2]/*/text())")
City = etree.XPath("string(//html/body/div/table[2]/tr[12]/td[2]/*/text())")
Practice_no = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())")
Kind_of_practice = etree.XPath("string(//html/body/div/table[3]/tr[5]/td[2]/text())")
#------Serv_practice -----
TERYT2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[2]/td[2]/*/text())")
Street2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[3]/td[2]/text())")
House_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[4]/td[2]/*/text())")
Flat_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[5]/td[2]/i/text())")
Post_code2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[6]/td[2]/*/text())")
City2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[7]/td[2]/*/text())")
Phone_no = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[8]/td[2]/text())")
# --------------------------- deklaracje zmiennych globalnych ----------------------------------
# ----------------------------------------------------------------------------------------------
decrease = 9
No = 1
Link = "http://rpwdl.csioz.gov.pl/rpz/druk/wyswietlKsiegaServletPub?idKsiega="
# --------------------------- funkcje zdefiniowane ----------------------------------
# ----------------------------------------------------------------------------------------------
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
# --------------------------- ustanawiamy polaczenie z baza danych -----------------------------
# ----------------------------------------------------------------------------------------------
con = mdb.connect('localhost', 'root', '******', 'SANBROKER', charset='utf8');
# ---------------------------- początek programu -----------------------------------------------
# ----------------------------------------------------------------------------------------------
with con:
cur = con.cursor()
cur.execute("SELECT Old_num FROM SANBROKER.Number_of_records;")
Old_num = cur.fetchone()
count = Old_num[0]
counter = input("Input number of rows: ")
# ----------------------- pierwsze połączenie z TORem ------------------------------------
# ----------------------------------------------------------------------------------------
#connectTor()
#conn = httplib.HTTPConnection("my-ip.heroku.com")
#conn.request("GET", "/")
#response = conn.getresponse()
#print(response.read())
while count <= counter: # co dziesiata liczba
# --------------- pierwsze wpisanie do bazy danych do Archive --------------------
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Archive_num=%s",(count))
# ---------------------------------------------------------------------------------
if decrease == 0:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
#------- ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease = 10-1
count +=1
else:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
# ------ ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease -=1
count +=1
Well, I'm assuming check_reg is a function that returns a boolean (either True or False).
If that's the case, to check the return:
if expr1:
print "True."
else:
print "False"
There's more than one way to do it, but basically, if expr1: is all you need to do the checking.
To capture the return value of a function, assign the function to a name with an equal sign, like this:
return_value = somefunction(some_value)
print('The return value is ',return_value)
Keep in mind that when the first return statement is encountered, the function will exit. So if you have more than one return statement after each other, only the first will execute.
If you want to return multiple things, add them to a list and then return the list.
Here is an improved version of your function:
def match_pattern(count):
sock = urllib.urlopen(Link+str(count))
htmlsource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
# root = etree.HTML(htmlSource) - duplicate line
# result = etree.tostring(root, pretty_print=True, method="html")
function_names = [check_reg, check_practice, no_ks, Registered_by, \
Name_doctor, Registration_no, PWZL, NIP, Spec, Start_date, \
Name_of_practise, TERYT, Street, House_no2, Flat_no, \
Post_code2, City2, Phone_no]
results = []
for function in function_names:
results.append(function(root))
return results
r = match_pattern(1)
print r[0] # this will be the result of check_reg(root)
The code you have posted is quite ambigous. Can you please fix the ident to let us know what belongs to the function and which part is the script.
A function can returns only one value. You cannot do :
return something
return something_else
return ...
The function will ends when first value will be returned.
What you can do is returning a list, tuple or dict containing all your values.
For instance :
return (something,something_else,...)
or
return [something,something_else,...]
In your case, it seems better to create a class that would have all values you want as attributes, and turn this function into a method that would set the attributes values.
class Example(object):
def __init__ ( self , link , count ):
sock = urllib.urlopen(link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
self.expr1 = check_reg(root)
self.expr2 = check_practice(root)
self.D_expr1 = no_ks(root)
...
self.D_expr8 = Start_date(root)
#-----Reg_practice-----
self.R_expr1 = Name_of_practise(root)
...
self.R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
self.S_expr1 = TERYT2(root)
...
self.S_expr7 = Phone_no(root)
Then you will be able to use this class like :
exampleInstance = Example ( "link you want to use" , 4 ) # the second argument is your 'count' value
# Now you can use attributes of your class to get the values you want
print exampleInstance . expr1
print exampleInstance . S_expr7
I'd like to generate a stub SOAP web service class using the Python soaplib module, based on an existing WSDL. The idea is to generate a mock for a third party web service.
Does any such code generator exist, or must we write our own?
Martin
Okay, I had a go at hacking my wsdl2interface (http://pypi.python.org/pypi/wsdl2interface) script to output soaplib code. I think I have something that works, though it's not pretty or especially well tested.
I'll paste it here for the record. I could be persuaded to release it if someone needs it, though it's not exactly my best code. Note that it uses Suds' WSDL parser to generate soaplib code, which is a bit strange in itself.
Run like this:
$ wsdl2soaplib <url or filename of WSDL> > wsdl.py
The code (you'll need suds in your path, ideally in a virtualenv):
from StringIO import StringIO
import os.path
import sys
import textwrap
import keyword
import re
import suds.client
VALID_IDENTIFIER_RE = re.compile(r"[_A-Za-z][_A-Za-z1-9]*")
VALID_IDENTIFIER_FIRST_LETTER_RE = re.compile(r"[_A-Za-z]")
VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE = re.compile(r"[_A-Za-z1-9]")
HEADER = '''\
"""SOAP web services generated from:
%(wsdl)s.
"""
from soaplib.serializers.primitive import (
String, Integer, Float, Double, DateTime, Bolean, Null, Array, Map, Any
)
from soaplib.serializers.clazz import ClassSerializer
from soaplib.service import SoapServiceBase
from soaplib.service import soapmethod
'''
INTERFACE = '''\
class %(name)s(%(bases)s):
"""%(docstring)s"""
'''
SERVICE_INTERFACE_DOCSTRING = '''\
SOAP service ``%(serviceName)s`` with target namespace %(tns)s.
'''
TYPE_INTERFACE_DOCSTRING = '''\
SOAP %(type)s ``{%(namespace)s}%(name)s``
'''
TYPE_MAP = '''\
WSDL_TYPES = {
%(items)s
}
'''
SOAPMETHOD = ''' #soapmethod(%(args)s, _returns=%(response)s)'''
METHOD = ''' def %(name)s(self, %(args)s):'''
METHOD_DOCSTRING = '''\
"""Parameters:
%(args)s
Returns: %(response)s
"""
'''
STANDARD_TYPE_NAMESPACES = [
'http://schemas.xmlsoap.org/soap/encoding/',
'http://schemas.xmlsoap.org/wsdl/',
'http://www.w3.org/2001/XMLSchema'
]
SCHEMA_TYPE_MAPPING = {
None: '%(typeName)s',
'None': 'None',
'boolean': 'Boolean',
'string': 'String',
'long': 'Integer',
'int': 'Integer',
'short': 'Integer',
'byte': 'Integer',
'unsignedLong': 'Integer',
'unsignedInt': 'Integer',
'unsignedShort': 'Integer',
'unsignedByte': 'Integer',
'positiveInteger': 'Integer',
'nonPositiveInteger': 'Integer',
'negativeInteger': 'Integer',
'nonNegativeInteger': 'Integer',
'float': 'Float',
'double': 'Float',
'decimal': 'Decimal',
'dateTime': 'DateTime',
'date': 'DateTime',
'anyURI': 'String',
'token': 'String',
'normalizedString': 'String',
'base64Binary': 'String',
'hexBinary': 'String',
}
def formatDocstring(text, indent=4, colwidth=78):
width = colwidth - indent
joiner = '\n' + ' ' * indent
return joiner.join(textwrap.wrap(text, width) + [''])
def typeName(type, sd):
resolved = type.resolve()
return resolved.name or ''
def schemaTypeName(type, sd, deps=None):
resolved = type.resolve()
name = resolved.name or ''
schemaType = SCHEMA_TYPE_MAPPING.get(name)
if schemaType is None: # not a standard type
# user default
schemaType = SCHEMA_TYPE_MAPPING[None]
# possibly save dependency link
if deps is not None:
deps.append(unicode(name))
required = type.required()
schemaType = schemaType % dict(typeName=name, required=required)
if type.unbounded():
schemaType = "Array(%s)" % schemaType
return schemaType
def normalizeIdentifier(identifier):
if not VALID_IDENTIFIER_RE.match(identifier):
newIdentifierLetters = []
firstLetter = True
for letter in identifier:
if firstLetter:
if VALID_IDENTIFIER_FIRST_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
firstLetter = False
else:
if VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
identifier = ''.join(newIdentifierLetters)
if keyword.iskeyword(identifier):
identifier = identifier + '_'
return identifier
def generate(client, url=None, standardTypeNamespaces=STANDARD_TYPE_NAMESPACES, removeInputOutputMesssages=True):
"""Given a WSDL URL, return a file that could become your interfaces.py
"""
printed = [] # sequence of type name -> string
for sd in client.sd:
serviceOut = StringIO()
print >>serviceOut, HEADER % dict(
wsdl=url,
)
printed.append(('', serviceOut.getvalue(),))
# Types
typeMap = {}
typeSeq = []
typeDeps = {}
typeAttributes = {}
typesPrinted = []
for type_ in sd.types:
typeOut = StringIO()
resolved = type_[0].resolve()
namespaceURL = resolved.namespace()[1]
if namespaceURL not in standardTypeNamespaces:
if resolved.enum():
typeDescription = "enumeration"
else:
typeDescription = "complex type"
# Look for basess
interfaceBases = []
if resolved.extension():
def find(t):
for c in t.rawchildren:
if c.extension():
find(c)
if c.ref is not None:
interfaceBases.append(c.ref[0])
find(resolved)
if not interfaceBases:
interfaceBases = ['ClassSerializer']
rawTypeName = typeName(type_[0], sd)
typeInterfaceName = normalizeIdentifier(rawTypeName)
typeMap[rawTypeName] = typeInterfaceName
typeSeq.append((rawTypeName, typeInterfaceName,))
typeAttributes[rawTypeName] = {}
print >>typeOut, INTERFACE % dict(
name=normalizeIdentifier(typeInterfaceName),
bases=', '.join(interfaceBases),
docstring=formatDocstring(TYPE_INTERFACE_DOCSTRING % dict(
type=typeDescription,
name=rawTypeName,
namespace=namespaceURL,
)
)
)
print >>typeOut, " class types:"
if resolved.enum():
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
print >>typeOut, " %s = String # XXX: Enumeration value" % name
else:
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
attrTypeName = typeName(attr[0], sd)
typeAttributes[rawTypeName][name] = attrTypeName
schemaType = schemaTypeName(attr[0], sd, deps=typeDeps.setdefault(unicode(rawTypeName), []))
print >>typeOut, " %s = %s" % (normalizeIdentifier(name), schemaType,)
print >>typeOut
typesPrinted.append((rawTypeName, typeOut.getvalue(),))
serviceInterfaceOut = StringIO()
# Main service interface
print >>serviceInterfaceOut, INTERFACE % dict(
name=normalizeIdentifier(sd.service.name),
bases=u"SoapServiceBase",
docstring=formatDocstring(SERVICE_INTERFACE_DOCSTRING % dict(
serviceName=sd.service.name,
tns=sd.wsdl.tns[1],
)
)
)
methods = {} # name -> (response type, list of parameters,)
for p in sd.ports:
for m in p[1]:
methodName = m[0]
methodArgs = m[1]
if methodName not in methods:
methodDef = p[0].method(methodName)
# XXX: This is discards the namespace part
if methodDef.soap.output.body.wrapped:
inputMessage = methodDef.soap.input.body.parts[0].element[0]
outputMessage = methodDef.soap.output.body.parts[0].element[0]
if outputMessage in typeAttributes:
if len(typeAttributes[outputMessage]) > 0:
response = typeAttributes[outputMessage].values()[0]
else:
response = "None"
else:
response = outputMessage
# Remove types used as input/output messages
if removeInputOutputMesssages:
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == inputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if inputMessage in typeMap:
del typeMap[inputMessage]
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == outputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if outputMessage in typeMap:
del typeMap[outputMessage]
else:
response = methodDef.soap.output.body.parts[0].element[0]
methods[methodName] = (response, methodArgs,)
for methodName in sorted(methods):
methodArgNames = [m[0] for m in methods[methodName][1]]
methodReturnType = methods[methodName][0]
methodArgDetails = []
methodArgSpecs = []
for m in methods[methodName][1]:
argDetail = m[1]
# for docstring
methodModifierParts = []
if not argDetail.required():
methodModifierParts.append('optional')
if argDetail.nillable:
methodModifierParts.append('may be None')
methodModifiers = ""
if methodModifierParts:
methodModifiers = ' (%s)' % ', '.join(methodModifierParts)
argTypeName = typeName(argDetail, sd)
methodSpec = "``%s`` -- %s%s" % (
argDetail.name,
argTypeName,
methodModifiers
)
methodArgDetails.append(methodSpec)
# for #soapmethod decorator
schemaType = schemaTypeName(argDetail, sd)
methodArgSpecs.append(schemaType)
# TODO: Probably not aware of array return types
if methodReturnType not in typeMap and methodReturnType in SCHEMA_TYPE_MAPPING:
methodReturnType = SCHEMA_TYPE_MAPPING[methodReturnType]
print >>serviceInterfaceOut, SOAPMETHOD % dict(
args=', '.join(methodArgSpecs),
response=methodReturnType,
)
print >>serviceInterfaceOut, METHOD % dict(
name=normalizeIdentifier(methodName),
args=', '.join(methodArgNames),
)
print >>serviceInterfaceOut, METHOD_DOCSTRING % dict(
args='\n '.join(methodArgDetails),
response=methodReturnType,
)
print >>serviceInterfaceOut
# Sort list of complex types based on internal dependencies
def sortDeps(printed):
printed = list(reversed(printed))
queue = [item for item in printed if len(typeDeps.get(unicode(item[0]), [])) == 0]
satisfied = set(queue)
remaining = [item for item in printed if item not in queue]
sortedPrinted = []
while queue:
item = queue.pop()
itemTypeName = unicode(item[0])
sortedPrinted.append(item)
satisfied.add(itemTypeName)
for item in remaining:
remainingItemTypeName = unicode(item[0])
depsList = typeDeps.get(remainingItemTypeName, [])
remainingDeps = []
for dep in depsList:
if dep not in satisfied:
remainingDeps.append(dep)
typeDeps[remainingItemTypeName] = remainingDeps
if len(remainingDeps) == 0:
queue.append(item)
remaining.remove(item)
return sortedPrinted
typesPrinted = sortDeps(typesPrinted)
# Print everything
printed.extend(typesPrinted)
printed.append((sd.service.name, serviceInterfaceOut.getvalue(),))
typeMapOut = StringIO()
print >>typeMapOut, TYPE_MAP % dict(
items=',\n'.join([" '%s': %s" % k for k in typeSeq if k[0] in typeMap])
)
print >>typeMapOut
printed.append(('', typeMapOut.getvalue(),))
return '\n'.join([v[1] for v in printed])
def main():
if len(sys.argv) < 2:
print "Usage: %s <url>" % sys.argv[0]
print "The output will be printed to the console"
return
if not '://' in sys.argv[1]:
sys.argv[1] = 'file://' + os.path.abspath(sys.argv[1])
client = suds.client.Client(sys.argv[1])
print generate(client, sys.argv[1])
if __name__ == '__main__':
main()
I have just created a github repository where I'm improving on optilude's script to make it work with soaplib2.0 and more. The link is https://github.com/fvieira/wsdl2soaplib.