I have a below string multiple lines. For each line, I want to split string and add this to a JSON output file. I had done this using string.gettext().split and a regular expression. However I am not sure this is the best way to do it.
Input file :
Server:prod01
Available memory: 20480 Disk:200 CPU:4
Used memory:12438 Disk:120 CPU:3
Unused memory:8042 Disk:80 CPU:1
Server:prod02
Available memory: 40960 Disk:500 CPU:8
Used memory:20888 Disk:320 CPU:3
Unused memory:20072 Disk:180 CPU:5
Expected output JSON:
{"prod01_available_memory":20480}
{"prod01_used_memory":12438}
{"prod01_unused_memory":8042}
{"prod01_available_disk":200}
{"prod01_used_disk":120}
{"prod01_unused_disk":80}
{"prod01_available_cpu":4}
{"prod01_used_cpu":3}
{"prod01_unused_cpu":1}
{"prod02_available_memory":40960}
{"prod02_used_memory":20888}
{"prod02_unused_memory":20072"}
{"prod02_available_disk":500"}
{"prod02_used_disk":380}
{"prod02_unused_disk":120}
{"prod02_available_cpu":8}
{"prod02_used_cpu":3}
{"prod02_unused_cpu":5}
Thanks,
Rinku
Below is my code -
def tsplit(string, *delimiters):
pattern = '|'.join(map(re.escape, delimiters))
return re.split(pattern, string)
prelist = pre.get_text().splitlines()
server_name = re.split('server|:',prelist[0])[2].strip()
if server_name == 'prod01':
#print prelist[1]
prod01_memory_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[2])
prod01_Disk_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[4])
prod01_CPU_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[6])
#print prelist[2]
prod01_memory_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[2])
prod01_Disk_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[4])
prod01_CPU_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[6])
#print prelist[4]
prod01_memory_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[2])
prod01_Disk_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[4])
prod01_CPU_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[6])
elif server_name == 'prod02':
#print prelist[1]
prod02memory_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[2])
prod02Disk_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[4])
prod02CPU_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[6])
#print prelist[2]
prod02memory_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[2])
prod02Disk_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[4])
prod02CPU_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[6])
#print prelist[4]
prod02memory_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[2])
prod02Disk_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[4])
prod02CPU_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[6])
else
#assign all varaiables 0
.....
proc_item["logtime"] = str(t1)
proc_item["prod01_memory_actv"] = prod01_memory_actv
proc_item["prod01_Disk_actv"] = prod01_Disk_actv
proc_item["prod01_CPU_actv"] = prod01_CPU_actv
......
#for all otehr variables...
proc_data.append(proc_item)
with open("./proc_"+ str(date.today()) + ".txt", 'a+') as f:
json.dump(proc_data, f)
f.write("\n")
I have some basic knowledge on python.
- Just using string array indices
hostmtrcs = "Server:prod01 Available memory:20480 Disk:200 CPU:4 Used memory:12438 Disk:120 CPU:3 Unused memory:8042 " \
"Disk:80 CPU:1 Server:prod02 Available memory: 40960 Disk:500 CPU:8 Used memory:20888 Disk:320 CPU:3 Unused " \
"memory:20072 Disk:180 CPU:5 "
datasplt = hostmtrcs.split(":")
hstname = ''
attrkey = ''
attrvalue = ''
for word in range(0, datasplt.__len__()):
if not datasplt[word].__contains__("Server"):
elmnt = datasplt[word].split(" ")
if datasplt[word].__contains__('prod'):
hstname = elmnt[0].lower()
if elmnt.__len__() == 3:
attrkey = elmnt[1].lower() + "_" + elmnt[2].lower() # attrkey
else:
attrkey = elmnt[1]
# retreive the value from the next element in the 1st attry datasplit
if word != datasplt.__len__() - 1:
nxtelmnt = datasplt[word + 1].split(" ")
attrvalue = nxtelmnt[0] # sattrvalue frm next element
finalfrmt = '{' + '"' +hstname + "_" + attrkey + '"' + ":" + attrvalue + '}'
print(finalfrmt)
I think you can do it with dict then just dump over json.(in your case i dont think its valid json but its needs so as per your request i have dump dict over json) i havn't validates keys, i am assuming you get dictionary data correct.
d = { 'Server':'prod01',
'Available memory': 20480,
'Disk':200,
'CPU':4}
import json
s = json.dumps({str(d['Server']+"_"+key).replace(' ','_'):value for key,value in d.items()})
print(json.loads(s))
>>> {'prod01_Server': 'prod01', 'prod01_Available memory': 20480, 'prod01_Disk': 200, 'prod01_CPU': 4}
You should split the input text, section by section, according to what you're looking for.
data = '''Server:prod01
Available memory: 20480 Disk:200 CPU:4
Used memory:12438 Disk:120 CPU:3
Unused memory:8042 Disk:80 CPU:1
Server:prod02
Available memory: 40960 Disk:500 CPU:8
Used memory:20888 Disk:320 CPU:3
Unused memory:20072 Disk:180 CPU:5'''
import re
import json
print(json.dumps({'_'.join((s, l.split(' ', 1)[0], k)).lower(): int(v) for s, d in [i.split('\n', 1) for i in data.split('Server:') if i] for l in d.split('\n') for k, v in re.findall(r'(\w+):\s*(\d+)', l)}))
This outputs:
{"prod01_available_memory": 20480, "prod01_available_disk": 200, "prod01_available_cpu": 4, "prod01_used_memory": 12438, "prod01_used_disk": 120, "prod01_used_cpu": 3, "prod01_unused_memory": 8042, "prod01_unused_disk": 80, "prod01_unused_cpu": 1, "prod02_available_memory": 40960, "prod02_available_disk": 500, "prod02_available_cpu": 8, "prod02_used_memory": 20888, "prod02_used_disk": 320, "prod02_used_cpu": 3, "prod02_unused_memory": 20072, "prod02_unused_disk": 180, "prod02_unused_cpu": 5}
Related
I'm trying to convert the result set of parsing to a dict with pyparsing. But dictionary contains only the last member of result set, not all of them. What am i missing?
Here's my code:
from pyparsing import *
test = """
pci24 u2480-L0
fcs1 g4045-L1
pci25 h6045-L0
en192 v7024-L3
pci26 h6045-L1
"""
grammar_pci = Combine("pci" + Word( nums )).setResultsName("name") + Word( alphanums + "-" ).setResultsName("loc")
grammar_non_pci = Suppress( Regex( r"(?!pci).*" ) )
grammar = OneOrMore( Group(grammar_pci).setResultsName("pci_slots") | grammar_non_pci, stopOn=StringEnd())
def main():
data = grammar.parse_string(test, parseAll=True)
print(data)
data_dict = data.as_dict()
print(data_dict)
if __name__ == "__main__":
main()
And the outputs of data and data_dict:
[['pci24', 'u2480-L0'], ['pci25', 'h6045-L0'], ['pci26', 'h6045-L1']]
{'pci_slots': {'name': 'pci26', 'loc': 'h6045-L1'}}
pyparsing version: 3.0.9
I hope, this will fix the issue (set listAllMatches=True in setResultName of 'pci_slots')
from pyparsing import *
test = """
pci24 u2480-L0
fcs1 g4045-L1
pci25 h6045-L0
en192 v7024-L3
pci26 h6045-L1
"""
grammar_pci = Combine("pci" + Word( nums )).setResultsName("name") + Word( alphanums + "-" ).setResultsName("loc")
grammar_non_pci = Suppress( Regex( r"(?!pci).*" ) )
grammar = OneOrMore( Group(grammar_pci).setResultsName("pci_slots", listAllMatches=True) | grammar_non_pci, stopOn=StringEnd())
data = grammar.parse_string(test, parseAll=True)
print(data)
data_dict = data.as_dict()
print(data_dict)
I am a newbie in programming. I am writing a Python script that extracts data from a pdf. I am having trouble with tuple. I am not able to provide its argument. I think it is my logic that is not correct including indentation, sequence, or something else.
I am hoping to get some explanation on why I am getting the error.
This is the sample of my PDF (i have to block some sensitive info)
This is what I am trying to achieve
I am getting this error:
Traceback (most recent call last):
File "/Users/jeff/PycharmProjects/extractFreightInvoice/main.py", line 79, in <module>
lines.append(Line('invDate, invNumber, poNumber, contactName, jobNumber, '
TypeError: <lambda>() missing 11 required positional arguments: 'invNumber', 'poNumber', 'contactName', 'jobNumber', 'jobName', 'invDescription', 'siteAddress', 'invItemsDesc', 'invItemsQty', 'invItemsUnitPrice', and 'invItemsAmount'
My code is as per below:
# This is a pdf extractor
import re
import pdfplumber
import pandas as pd
from collections import namedtuple
Line = namedtuple('Line', 'invDate, invNumber, poNumber, contactName, jobNumber, '
'jobName, invDescription, siteAddress, invItemsDesc, invItemsQty, invItemsUnitPrice, '
'invItemsAmount ')
invDate_re = re.compile(r'(Clever Core NZ Limited\s)(\d{1,2}/\d{1,2}/\d{4})(.+)')
invNumber_re = re.compile(r'(IN\d{6})')
poNumber_re = re.compile(r'\d{4}')
contactNameBen_re = re.compile(r'(Jordan\s.+)')
contactNameCraig_re = re.compile(r'(Lorna\s.+)')
jobNumber_re = re.compile(r'(J[\d]{6})')
jobName_re = re.compile(r'(Job Name)')
invDescription_re = re.compile(r'(Invoice Description)')
siteAddress_re = re.compile(r'(Site address.*)')
colHeading_re = re.compile(r'((Description)(.* Quantity.* Unit Price.*))')
invItems_re = re.compile(
r'(.+) (([0-9]*[.])?[0-9]+) (([0-9]*[.])?[0-9]+) (\d*\?\d+|\d{1,3}(,\d{3})*(\.\d+)?)')
# quoteLines_re = re.compile(r'(.+)(:\s*)(.+)')
# clevercorePriceLine_re = re.compile(r'(.* First .*\s?)(-\s?.*\$)(\s*)(.+)')
file = 'CombinedInvoicePdf.pdf'
lines = []
with pdfplumber.open(file) as myPdf:
for page in myPdf.pages:
text = page.extract_text()
lines = text.split('\n')
index = 0
for i in range(len(lines)):
line = lines[i]
invDateLine = invDate_re.search(line)
invNumberLine = invNumber_re.search(line)
poNumberLine = poNumber_re.search(line)
contactNameJordanLine = contactNameJordan_re.search(line)
contactNameLornaLine = contactNameLorna_re.search(line)
jobNumberLine = jobNumber_re.search(line)
jobNameLine = jobName_re.search(line)
invDescriptionLine = invDescription_re.search(line)
colHeadingLine = colHeading_re.search(line)
siteAddressLine = siteAddress_re.search(line)
invItemsLine = invItems_re.search(line)
if invDateLine:
invDate = invDateLine.group(2)
if invNumberLine:
invNumber = invNumberLine.group(1)
if poNumberLine and len(line) == 4:
poNumber = poNumberLine.group(0)
if contactNameBenLine:
contactName = 'Jordan Michael'
if contactNameCraigLine:
contactName = 'Lorna Tolentin'
if jobNumberLine:
jobNumber = lines[i]
if jobNameLine:
jobName = (lines[i + 1])
if invDescriptionLine:
invDescription = lines[i + 1]
if siteAddressLine:
if len(lines[i + 1]) > 0 and len(lines[i + 1]) == 0:
siteAddress = lines[i + 1]
elif len(lines[i + 1]) > 0 and len(lines[i + 1]) > 0:
siteAddress = lines[i + 1] + ' ' + lines[i + 2]
else:
siteAddress = 'check invoice'
if invItemsLine and invItemsLine[2] != '06':
invItemsDesc = invItemsLine.group(1)
invItemsQty = invItemsLine.group(2)
invItemsUnitPrice = invItemsLine.group(4)
invItemsAmount = invItemsLine.group(6)
lines.append(Line('invDate, invNumber, poNumber, contactName, jobNumber, '
'jobName, invDescription, siteAddress, invItemsDesc, invItemsQty, invItemsUnitPrice, '
'inItemsAmount'))
df = pd.DataFrame(lines)
print(df)
print(df.head())
df.to_csv('freightCharges.csv')
Line is a tuple subclass with parameters and fields
You need to fill them with separate parameters, not a single string
lines.append(Line('invDate', 'invNumber', 'poNumber', 'contactName', 'jobNumber', 'jobName', 'invDescription',
'siteAddress', 'invItemsDesc', 'invItemsQty', 'invItemsUnitPrice', 'inItemsAmount'))
I am extracting from the log file and print using the below code
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
print (g)
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'GET')]
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'POST')]
How to add to the output
output
1.1.1.1: PUT = 2
2.2.2.2: GET = 1,POST=1
You could use a dictionary to count:
# initialize the count dict
count_dict= dict()
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
for tup in g:
# get the counts for tuple tup if we don't have it yet
# use 0 (second argument to .get)
num= count_dict.get(tup, 0)
# increase the count and write it back
count_dict[tup]= num+1
# now iterate over the key (tuple) - value (counts)-pairs
# and print the result
for tup, count in count_dict.items():
print(tup, count)
Ok, I have to admit this doesn't give the exact output, you want, but from this you can do in a similar manner:
out_dict= dict()
for (comma_string, request_type), count in count_dict.items():
out_str= out_dict.get(comma_string, '')
sep='' if out_str == '' else ', '
out_str= f'{out_str}{sep}{request_type} = {count}'
out_dict[comma_string]= out_str
for tup, out_str in out_dict.items():
print(tup, out_str)
From your data that outputs:
1.1.1.1 PUT = 2
2.2.2.2 GET = 1, POST = 1
I would look towards Counter.
from collections import Counter
results = []
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
results.append(g[0])
ip_list = set(result[0] for result in results)
for ip in ip_list:
print(ip, Counter(result[1] for result in results if result[0] == ip ))
You can use collection.defaultdict
Ex:
from collections import defaultdict
result = defaultdict(list)
for line in data:
for ip, method in re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line):
result[ip].append(method)
for k, v in result.items():
temp = ""
for i in set(v):
temp += " {} = {}".format(i, v.count(i))
print("{}{}".format(k, temp))
from collections import Counter
x = [[('1.1.1.1', 'PUT')],[('2.2.2.2', 'GET')],[('1.1.1.1', 'PUT')],[('2.2.2.2', 'POST')]]
# step 1: convert x into a dict.
m = {}
for i in x:
a, b = i[0]
if a not in m.keys():
m[a] = [b]
else:
x = m[a]
x.append(b)
m[a] = x
print('new dict is {}'.format(m))
# step 2 count frequency
m_values = list(m.values())
yy = []
for i in m_values:
x = []
k = list(Counter(i).keys())
v = list(Counter(i).values())
for i in range(len(k)):
x.append(k[i] + '=' + str(v[i]))
yy.append(x)
# step 3, update the value of the dict
m_keys = list(m.keys())
n = len(m_keys)
for i in range(n):
m[m_keys[i]] = yy[i]
print("final dict is{}".format(m))
Output is
new dict is {'1.1.1.1': ['PUT', 'PUT'], '2.2.2.2': ['GET', 'POST']}
final dict is{'1.1.1.1': ['PUT=2'], '2.2.2.2': ['GET=1', 'POST=1']}
Without dependencies and using a dict for counting, in a very basic way. Given the data_set:
data_set = [[('1.1.1.1', 'PUT')],
[('2.2.2.2', 'GET')],
[('2.2.2.2', 'POST')],
[('1.1.1.1', 'PUT')]]
Initialize the variables (manually, just few verbs) then iterate over the data:
counter = {'PUT': 0, 'GET': 0, 'POST': 0, 'DELETE': 0}
res = {}
for data in data_set:
ip, verb = data[0]
if not ip in res:
res[ip] = counter
else:
res[ip][verb] += 1
print(res)
#=> {'1.1.1.1': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}, '2.2.2.2': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}}
It's required to format the output to better fits your needs.
I am building a bot game for my friends in LINE. I'm a beginning coder. I'm trying to call an object in json which includes a string + integer. I've looked around but nothing seems to fit what I need. What would be the best/simple solution?
My code is amateur, please go easy on me. :P
I'm trying to have Python extract through Json, "Name" + "Stat".
Right now it only extracts "Name" and randomly selects an item. Is there any way to select the item + the stat, display the item and calculate the stat? Thanks.
Python 3:
if text == 'FIGHT':
with open('items.json', 'r') as f:
data = json.load(f)
armor1 = [v for d in data['armor'] for k,v in d.items() if k == 'name']
weapon1 = [v for d in data['weapon'] for k,v in d.items() if k == 'name']
magic1 = [v for d in data['magic'] for k,v in d.items() if k == 'name']
armor2 = random.choice(armor1)
weapon2 = random.choice(weapon1)
magic2 = random.choice(magic1)
calc = add(int(armor2), int(weapon2), int(magic2))
line_bot_api.reply_message(
event.reply_token,
TextSendMessage('Armor = ' + (armor2)),
TextSendMessage('Weapon = ' + (weapon2)),
TextSendMessage('Magic = ' + (magic2)),
TextSendMessage('You have a score of ' + str(calc) + '.'),
TextSendMessage('Waiting for next opponent...')
)
Json:
"armor": [
{
"name":"Cardboard armor 10 DEF" ,
"stats":"10" },
{
"name":"Plastic armor 20 DEF" ,
"stats":"20" },
{
"name":"Rubber armor 30 DEF" ,
"stats":"30" },
{
"name":"Metal armor 40 DEF" ,
"stats":"40" },
{
"name":"Indestructable armor 50 DEF" ,
"stats":"50" }
],
After trying just about everything.. The solution was:
if text == 'FIGHT':
with open('items.json', 'r') as f:
data = json.load(f)
armor2 = random.choice(data['armor'])
weapon2 = random.choice(data['weapon'])
magic2 = random.choice(data['magic'])
calc = add(armor2['stats'], weapon2['stats'], magic2['stats'])
line_bot_api.reply_message(
event.reply_token, [
TextSendMessage('Armor = ' + (armor2['name'])),
TextSendMessage('Weapon = ' + (weapon2['name'])),
TextSendMessage('Magic = ' + (magic2['name'])),
TextSendMessage('Total = ' + str(calc) + '.')
]
)
Thanks to everyone and special thanks to my friend Sae who helped me. :)
I'd like to generate a stub SOAP web service class using the Python soaplib module, based on an existing WSDL. The idea is to generate a mock for a third party web service.
Does any such code generator exist, or must we write our own?
Martin
Okay, I had a go at hacking my wsdl2interface (http://pypi.python.org/pypi/wsdl2interface) script to output soaplib code. I think I have something that works, though it's not pretty or especially well tested.
I'll paste it here for the record. I could be persuaded to release it if someone needs it, though it's not exactly my best code. Note that it uses Suds' WSDL parser to generate soaplib code, which is a bit strange in itself.
Run like this:
$ wsdl2soaplib <url or filename of WSDL> > wsdl.py
The code (you'll need suds in your path, ideally in a virtualenv):
from StringIO import StringIO
import os.path
import sys
import textwrap
import keyword
import re
import suds.client
VALID_IDENTIFIER_RE = re.compile(r"[_A-Za-z][_A-Za-z1-9]*")
VALID_IDENTIFIER_FIRST_LETTER_RE = re.compile(r"[_A-Za-z]")
VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE = re.compile(r"[_A-Za-z1-9]")
HEADER = '''\
"""SOAP web services generated from:
%(wsdl)s.
"""
from soaplib.serializers.primitive import (
String, Integer, Float, Double, DateTime, Bolean, Null, Array, Map, Any
)
from soaplib.serializers.clazz import ClassSerializer
from soaplib.service import SoapServiceBase
from soaplib.service import soapmethod
'''
INTERFACE = '''\
class %(name)s(%(bases)s):
"""%(docstring)s"""
'''
SERVICE_INTERFACE_DOCSTRING = '''\
SOAP service ``%(serviceName)s`` with target namespace %(tns)s.
'''
TYPE_INTERFACE_DOCSTRING = '''\
SOAP %(type)s ``{%(namespace)s}%(name)s``
'''
TYPE_MAP = '''\
WSDL_TYPES = {
%(items)s
}
'''
SOAPMETHOD = ''' #soapmethod(%(args)s, _returns=%(response)s)'''
METHOD = ''' def %(name)s(self, %(args)s):'''
METHOD_DOCSTRING = '''\
"""Parameters:
%(args)s
Returns: %(response)s
"""
'''
STANDARD_TYPE_NAMESPACES = [
'http://schemas.xmlsoap.org/soap/encoding/',
'http://schemas.xmlsoap.org/wsdl/',
'http://www.w3.org/2001/XMLSchema'
]
SCHEMA_TYPE_MAPPING = {
None: '%(typeName)s',
'None': 'None',
'boolean': 'Boolean',
'string': 'String',
'long': 'Integer',
'int': 'Integer',
'short': 'Integer',
'byte': 'Integer',
'unsignedLong': 'Integer',
'unsignedInt': 'Integer',
'unsignedShort': 'Integer',
'unsignedByte': 'Integer',
'positiveInteger': 'Integer',
'nonPositiveInteger': 'Integer',
'negativeInteger': 'Integer',
'nonNegativeInteger': 'Integer',
'float': 'Float',
'double': 'Float',
'decimal': 'Decimal',
'dateTime': 'DateTime',
'date': 'DateTime',
'anyURI': 'String',
'token': 'String',
'normalizedString': 'String',
'base64Binary': 'String',
'hexBinary': 'String',
}
def formatDocstring(text, indent=4, colwidth=78):
width = colwidth - indent
joiner = '\n' + ' ' * indent
return joiner.join(textwrap.wrap(text, width) + [''])
def typeName(type, sd):
resolved = type.resolve()
return resolved.name or ''
def schemaTypeName(type, sd, deps=None):
resolved = type.resolve()
name = resolved.name or ''
schemaType = SCHEMA_TYPE_MAPPING.get(name)
if schemaType is None: # not a standard type
# user default
schemaType = SCHEMA_TYPE_MAPPING[None]
# possibly save dependency link
if deps is not None:
deps.append(unicode(name))
required = type.required()
schemaType = schemaType % dict(typeName=name, required=required)
if type.unbounded():
schemaType = "Array(%s)" % schemaType
return schemaType
def normalizeIdentifier(identifier):
if not VALID_IDENTIFIER_RE.match(identifier):
newIdentifierLetters = []
firstLetter = True
for letter in identifier:
if firstLetter:
if VALID_IDENTIFIER_FIRST_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
firstLetter = False
else:
if VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
identifier = ''.join(newIdentifierLetters)
if keyword.iskeyword(identifier):
identifier = identifier + '_'
return identifier
def generate(client, url=None, standardTypeNamespaces=STANDARD_TYPE_NAMESPACES, removeInputOutputMesssages=True):
"""Given a WSDL URL, return a file that could become your interfaces.py
"""
printed = [] # sequence of type name -> string
for sd in client.sd:
serviceOut = StringIO()
print >>serviceOut, HEADER % dict(
wsdl=url,
)
printed.append(('', serviceOut.getvalue(),))
# Types
typeMap = {}
typeSeq = []
typeDeps = {}
typeAttributes = {}
typesPrinted = []
for type_ in sd.types:
typeOut = StringIO()
resolved = type_[0].resolve()
namespaceURL = resolved.namespace()[1]
if namespaceURL not in standardTypeNamespaces:
if resolved.enum():
typeDescription = "enumeration"
else:
typeDescription = "complex type"
# Look for basess
interfaceBases = []
if resolved.extension():
def find(t):
for c in t.rawchildren:
if c.extension():
find(c)
if c.ref is not None:
interfaceBases.append(c.ref[0])
find(resolved)
if not interfaceBases:
interfaceBases = ['ClassSerializer']
rawTypeName = typeName(type_[0], sd)
typeInterfaceName = normalizeIdentifier(rawTypeName)
typeMap[rawTypeName] = typeInterfaceName
typeSeq.append((rawTypeName, typeInterfaceName,))
typeAttributes[rawTypeName] = {}
print >>typeOut, INTERFACE % dict(
name=normalizeIdentifier(typeInterfaceName),
bases=', '.join(interfaceBases),
docstring=formatDocstring(TYPE_INTERFACE_DOCSTRING % dict(
type=typeDescription,
name=rawTypeName,
namespace=namespaceURL,
)
)
)
print >>typeOut, " class types:"
if resolved.enum():
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
print >>typeOut, " %s = String # XXX: Enumeration value" % name
else:
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
attrTypeName = typeName(attr[0], sd)
typeAttributes[rawTypeName][name] = attrTypeName
schemaType = schemaTypeName(attr[0], sd, deps=typeDeps.setdefault(unicode(rawTypeName), []))
print >>typeOut, " %s = %s" % (normalizeIdentifier(name), schemaType,)
print >>typeOut
typesPrinted.append((rawTypeName, typeOut.getvalue(),))
serviceInterfaceOut = StringIO()
# Main service interface
print >>serviceInterfaceOut, INTERFACE % dict(
name=normalizeIdentifier(sd.service.name),
bases=u"SoapServiceBase",
docstring=formatDocstring(SERVICE_INTERFACE_DOCSTRING % dict(
serviceName=sd.service.name,
tns=sd.wsdl.tns[1],
)
)
)
methods = {} # name -> (response type, list of parameters,)
for p in sd.ports:
for m in p[1]:
methodName = m[0]
methodArgs = m[1]
if methodName not in methods:
methodDef = p[0].method(methodName)
# XXX: This is discards the namespace part
if methodDef.soap.output.body.wrapped:
inputMessage = methodDef.soap.input.body.parts[0].element[0]
outputMessage = methodDef.soap.output.body.parts[0].element[0]
if outputMessage in typeAttributes:
if len(typeAttributes[outputMessage]) > 0:
response = typeAttributes[outputMessage].values()[0]
else:
response = "None"
else:
response = outputMessage
# Remove types used as input/output messages
if removeInputOutputMesssages:
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == inputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if inputMessage in typeMap:
del typeMap[inputMessage]
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == outputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if outputMessage in typeMap:
del typeMap[outputMessage]
else:
response = methodDef.soap.output.body.parts[0].element[0]
methods[methodName] = (response, methodArgs,)
for methodName in sorted(methods):
methodArgNames = [m[0] for m in methods[methodName][1]]
methodReturnType = methods[methodName][0]
methodArgDetails = []
methodArgSpecs = []
for m in methods[methodName][1]:
argDetail = m[1]
# for docstring
methodModifierParts = []
if not argDetail.required():
methodModifierParts.append('optional')
if argDetail.nillable:
methodModifierParts.append('may be None')
methodModifiers = ""
if methodModifierParts:
methodModifiers = ' (%s)' % ', '.join(methodModifierParts)
argTypeName = typeName(argDetail, sd)
methodSpec = "``%s`` -- %s%s" % (
argDetail.name,
argTypeName,
methodModifiers
)
methodArgDetails.append(methodSpec)
# for #soapmethod decorator
schemaType = schemaTypeName(argDetail, sd)
methodArgSpecs.append(schemaType)
# TODO: Probably not aware of array return types
if methodReturnType not in typeMap and methodReturnType in SCHEMA_TYPE_MAPPING:
methodReturnType = SCHEMA_TYPE_MAPPING[methodReturnType]
print >>serviceInterfaceOut, SOAPMETHOD % dict(
args=', '.join(methodArgSpecs),
response=methodReturnType,
)
print >>serviceInterfaceOut, METHOD % dict(
name=normalizeIdentifier(methodName),
args=', '.join(methodArgNames),
)
print >>serviceInterfaceOut, METHOD_DOCSTRING % dict(
args='\n '.join(methodArgDetails),
response=methodReturnType,
)
print >>serviceInterfaceOut
# Sort list of complex types based on internal dependencies
def sortDeps(printed):
printed = list(reversed(printed))
queue = [item for item in printed if len(typeDeps.get(unicode(item[0]), [])) == 0]
satisfied = set(queue)
remaining = [item for item in printed if item not in queue]
sortedPrinted = []
while queue:
item = queue.pop()
itemTypeName = unicode(item[0])
sortedPrinted.append(item)
satisfied.add(itemTypeName)
for item in remaining:
remainingItemTypeName = unicode(item[0])
depsList = typeDeps.get(remainingItemTypeName, [])
remainingDeps = []
for dep in depsList:
if dep not in satisfied:
remainingDeps.append(dep)
typeDeps[remainingItemTypeName] = remainingDeps
if len(remainingDeps) == 0:
queue.append(item)
remaining.remove(item)
return sortedPrinted
typesPrinted = sortDeps(typesPrinted)
# Print everything
printed.extend(typesPrinted)
printed.append((sd.service.name, serviceInterfaceOut.getvalue(),))
typeMapOut = StringIO()
print >>typeMapOut, TYPE_MAP % dict(
items=',\n'.join([" '%s': %s" % k for k in typeSeq if k[0] in typeMap])
)
print >>typeMapOut
printed.append(('', typeMapOut.getvalue(),))
return '\n'.join([v[1] for v in printed])
def main():
if len(sys.argv) < 2:
print "Usage: %s <url>" % sys.argv[0]
print "The output will be printed to the console"
return
if not '://' in sys.argv[1]:
sys.argv[1] = 'file://' + os.path.abspath(sys.argv[1])
client = suds.client.Client(sys.argv[1])
print generate(client, sys.argv[1])
if __name__ == '__main__':
main()
I have just created a github repository where I'm improving on optilude's script to make it work with soaplib2.0 and more. The link is https://github.com/fvieira/wsdl2soaplib.