Generating Python soaplib stubs from WSDL

Generating Python soaplib stubs from WSDL - python

I'd like to generate a stub SOAP web service class using the Python soaplib module, based on an existing WSDL. The idea is to generate a mock for a third party web service.
Does any such code generator exist, or must we write our own?
Martin

Okay, I had a go at hacking my wsdl2interface (http://pypi.python.org/pypi/wsdl2interface) script to output soaplib code. I think I have something that works, though it's not pretty or especially well tested.
I'll paste it here for the record. I could be persuaded to release it if someone needs it, though it's not exactly my best code. Note that it uses Suds' WSDL parser to generate soaplib code, which is a bit strange in itself.
Run like this:
$ wsdl2soaplib <url or filename of WSDL> > wsdl.py
The code (you'll need suds in your path, ideally in a virtualenv):
from StringIO import StringIO
import os.path
import sys
import textwrap
import keyword
import re
import suds.client
VALID_IDENTIFIER_RE = re.compile(r"[_A-Za-z][_A-Za-z1-9]*")
VALID_IDENTIFIER_FIRST_LETTER_RE = re.compile(r"[_A-Za-z]")
VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE = re.compile(r"[_A-Za-z1-9]")
HEADER = '''\
"""SOAP web services generated from:
%(wsdl)s.
"""
from soaplib.serializers.primitive import (
String, Integer, Float, Double, DateTime, Bolean, Null, Array, Map, Any
)
from soaplib.serializers.clazz import ClassSerializer
from soaplib.service import SoapServiceBase
from soaplib.service import soapmethod
'''
INTERFACE = '''\
class %(name)s(%(bases)s):
"""%(docstring)s"""
'''
SERVICE_INTERFACE_DOCSTRING = '''\
SOAP service ``%(serviceName)s`` with target namespace %(tns)s.
'''
TYPE_INTERFACE_DOCSTRING = '''\
SOAP %(type)s ``{%(namespace)s}%(name)s``
'''
TYPE_MAP = '''\
WSDL_TYPES = {
%(items)s
}
'''
SOAPMETHOD = ''' #soapmethod(%(args)s, _returns=%(response)s)'''
METHOD = ''' def %(name)s(self, %(args)s):'''
METHOD_DOCSTRING = '''\
"""Parameters:
%(args)s
Returns: %(response)s
"""
'''
STANDARD_TYPE_NAMESPACES = [
'http://schemas.xmlsoap.org/soap/encoding/',
'http://schemas.xmlsoap.org/wsdl/',
'http://www.w3.org/2001/XMLSchema'
]
SCHEMA_TYPE_MAPPING = {
None: '%(typeName)s',
'None': 'None',
'boolean': 'Boolean',
'string': 'String',
'long': 'Integer',
'int': 'Integer',
'short': 'Integer',
'byte': 'Integer',
'unsignedLong': 'Integer',
'unsignedInt': 'Integer',
'unsignedShort': 'Integer',
'unsignedByte': 'Integer',
'positiveInteger': 'Integer',
'nonPositiveInteger': 'Integer',
'negativeInteger': 'Integer',
'nonNegativeInteger': 'Integer',
'float': 'Float',
'double': 'Float',
'decimal': 'Decimal',
'dateTime': 'DateTime',
'date': 'DateTime',
'anyURI': 'String',
'token': 'String',
'normalizedString': 'String',
'base64Binary': 'String',
'hexBinary': 'String',
}
def formatDocstring(text, indent=4, colwidth=78):
width = colwidth - indent
joiner = '\n' + ' ' * indent
return joiner.join(textwrap.wrap(text, width) + [''])
def typeName(type, sd):
resolved = type.resolve()
return resolved.name or ''
def schemaTypeName(type, sd, deps=None):
resolved = type.resolve()
name = resolved.name or ''
schemaType = SCHEMA_TYPE_MAPPING.get(name)
if schemaType is None: # not a standard type
# user default
schemaType = SCHEMA_TYPE_MAPPING[None]
# possibly save dependency link
if deps is not None:
deps.append(unicode(name))
required = type.required()
schemaType = schemaType % dict(typeName=name, required=required)
if type.unbounded():
schemaType = "Array(%s)" % schemaType
return schemaType
def normalizeIdentifier(identifier):
if not VALID_IDENTIFIER_RE.match(identifier):
newIdentifierLetters = []
firstLetter = True
for letter in identifier:
if firstLetter:
if VALID_IDENTIFIER_FIRST_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
firstLetter = False
else:
if VALID_IDENTIFIER_SUBSEQUENT_LETTER_RE.match(letter):
newIdentifierLetters.append(letter)
else:
newIdentifierLetters.append('_')
identifier = ''.join(newIdentifierLetters)
if keyword.iskeyword(identifier):
identifier = identifier + '_'
return identifier
def generate(client, url=None, standardTypeNamespaces=STANDARD_TYPE_NAMESPACES, removeInputOutputMesssages=True):
"""Given a WSDL URL, return a file that could become your interfaces.py
"""
printed = [] # sequence of type name -> string
for sd in client.sd:
serviceOut = StringIO()
print >>serviceOut, HEADER % dict(
wsdl=url,
)
printed.append(('', serviceOut.getvalue(),))
# Types
typeMap = {}
typeSeq = []
typeDeps = {}
typeAttributes = {}
typesPrinted = []
for type_ in sd.types:
typeOut = StringIO()
resolved = type_[0].resolve()
namespaceURL = resolved.namespace()[1]
if namespaceURL not in standardTypeNamespaces:
if resolved.enum():
typeDescription = "enumeration"
else:
typeDescription = "complex type"
# Look for basess
interfaceBases = []
if resolved.extension():
def find(t):
for c in t.rawchildren:
if c.extension():
find(c)
if c.ref is not None:
interfaceBases.append(c.ref[0])
find(resolved)
if not interfaceBases:
interfaceBases = ['ClassSerializer']
rawTypeName = typeName(type_[0], sd)
typeInterfaceName = normalizeIdentifier(rawTypeName)
typeMap[rawTypeName] = typeInterfaceName
typeSeq.append((rawTypeName, typeInterfaceName,))
typeAttributes[rawTypeName] = {}
print >>typeOut, INTERFACE % dict(
name=normalizeIdentifier(typeInterfaceName),
bases=', '.join(interfaceBases),
docstring=formatDocstring(TYPE_INTERFACE_DOCSTRING % dict(
type=typeDescription,
name=rawTypeName,
namespace=namespaceURL,
)
)
)
print >>typeOut, " class types:"
if resolved.enum():
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
print >>typeOut, " %s = String # XXX: Enumeration value" % name
else:
for attr in type_[0].children():
name = attr[0].name.replace(' ', '_')
attrTypeName = typeName(attr[0], sd)
typeAttributes[rawTypeName][name] = attrTypeName
schemaType = schemaTypeName(attr[0], sd, deps=typeDeps.setdefault(unicode(rawTypeName), []))
print >>typeOut, " %s = %s" % (normalizeIdentifier(name), schemaType,)
print >>typeOut
typesPrinted.append((rawTypeName, typeOut.getvalue(),))
serviceInterfaceOut = StringIO()
# Main service interface
print >>serviceInterfaceOut, INTERFACE % dict(
name=normalizeIdentifier(sd.service.name),
bases=u"SoapServiceBase",
docstring=formatDocstring(SERVICE_INTERFACE_DOCSTRING % dict(
serviceName=sd.service.name,
tns=sd.wsdl.tns[1],
)
)
)
methods = {} # name -> (response type, list of parameters,)
for p in sd.ports:
for m in p[1]:
methodName = m[0]
methodArgs = m[1]
if methodName not in methods:
methodDef = p[0].method(methodName)
# XXX: This is discards the namespace part
if methodDef.soap.output.body.wrapped:
inputMessage = methodDef.soap.input.body.parts[0].element[0]
outputMessage = methodDef.soap.output.body.parts[0].element[0]
if outputMessage in typeAttributes:
if len(typeAttributes[outputMessage]) > 0:
response = typeAttributes[outputMessage].values()[0]
else:
response = "None"
else:
response = outputMessage
# Remove types used as input/output messages
if removeInputOutputMesssages:
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == inputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if inputMessage in typeMap:
del typeMap[inputMessage]
remove = False
for idx, (t, x) in enumerate(typesPrinted):
if t == outputMessage:
remove = True
break
if remove:
del typesPrinted[idx]
if outputMessage in typeMap:
del typeMap[outputMessage]
else:
response = methodDef.soap.output.body.parts[0].element[0]
methods[methodName] = (response, methodArgs,)
for methodName in sorted(methods):
methodArgNames = [m[0] for m in methods[methodName][1]]
methodReturnType = methods[methodName][0]
methodArgDetails = []
methodArgSpecs = []
for m in methods[methodName][1]:
argDetail = m[1]
# for docstring
methodModifierParts = []
if not argDetail.required():
methodModifierParts.append('optional')
if argDetail.nillable:
methodModifierParts.append('may be None')
methodModifiers = ""
if methodModifierParts:
methodModifiers = ' (%s)' % ', '.join(methodModifierParts)
argTypeName = typeName(argDetail, sd)
methodSpec = "``%s`` -- %s%s" % (
argDetail.name,
argTypeName,
methodModifiers
)
methodArgDetails.append(methodSpec)
# for #soapmethod decorator
schemaType = schemaTypeName(argDetail, sd)
methodArgSpecs.append(schemaType)
# TODO: Probably not aware of array return types
if methodReturnType not in typeMap and methodReturnType in SCHEMA_TYPE_MAPPING:
methodReturnType = SCHEMA_TYPE_MAPPING[methodReturnType]
print >>serviceInterfaceOut, SOAPMETHOD % dict(
args=', '.join(methodArgSpecs),
response=methodReturnType,
)
print >>serviceInterfaceOut, METHOD % dict(
name=normalizeIdentifier(methodName),
args=', '.join(methodArgNames),
)
print >>serviceInterfaceOut, METHOD_DOCSTRING % dict(
args='\n '.join(methodArgDetails),
response=methodReturnType,
)
print >>serviceInterfaceOut
# Sort list of complex types based on internal dependencies
def sortDeps(printed):
printed = list(reversed(printed))
queue = [item for item in printed if len(typeDeps.get(unicode(item[0]), [])) == 0]
satisfied = set(queue)
remaining = [item for item in printed if item not in queue]
sortedPrinted = []
while queue:
item = queue.pop()
itemTypeName = unicode(item[0])
sortedPrinted.append(item)
satisfied.add(itemTypeName)
for item in remaining:
remainingItemTypeName = unicode(item[0])
depsList = typeDeps.get(remainingItemTypeName, [])
remainingDeps = []
for dep in depsList:
if dep not in satisfied:
remainingDeps.append(dep)
typeDeps[remainingItemTypeName] = remainingDeps
if len(remainingDeps) == 0:
queue.append(item)
remaining.remove(item)
return sortedPrinted
typesPrinted = sortDeps(typesPrinted)
# Print everything
printed.extend(typesPrinted)
printed.append((sd.service.name, serviceInterfaceOut.getvalue(),))
typeMapOut = StringIO()
print >>typeMapOut, TYPE_MAP % dict(
items=',\n'.join([" '%s': %s" % k for k in typeSeq if k[0] in typeMap])
)
print >>typeMapOut
printed.append(('', typeMapOut.getvalue(),))
return '\n'.join([v[1] for v in printed])
def main():
if len(sys.argv) < 2:
print "Usage: %s <url>" % sys.argv[0]
print "The output will be printed to the console"
return
if not '://' in sys.argv[1]:
sys.argv[1] = 'file://' + os.path.abspath(sys.argv[1])
client = suds.client.Client(sys.argv[1])
print generate(client, sys.argv[1])
if __name__ == '__main__':
main()

I have just created a github repository where I'm improving on optilude's script to make it work with soaplib2.0 and more. The link is https://github.com/fvieira/wsdl2soaplib.

Related

Parsing logs to json Python

Folks,
I am trying to parse log file into json format.
I have a lot of logs, there is one of them
How can I parse this?
03:02:03.113 [info] ext_ref = BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}
Into this type of json, or any other format (but I guess json is best one)
{
"time":"03:02:03.113",
"class_req":"info",
"ext_ref":"BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ",
"reqid":"1253166",
"type":"INREQ",
"channel":"BANK24AOS",
"sid":"msid_1591844511335516_KRRNBSLH2FS",
"duration":"703.991",
"req_uri":"marketcredit/order/state",
"login":"77012221122",
"req_type":"cl_req",
"req_headers":{
"accept-encoding":"gzip",
"connection":"close",
"host":"test-mobileapp-api.bank.kz",
"user-agent":"okhttp/4.4.1",
"x-forwarded-for":"212.154.169.134",
"x-real-ip":"212.154.169.134"
},
"req_body":{
"$sid":"msid_1591844511335516_KRRNBSLH2FS",
"$sid":"msid_1591844511335516_KRRNBSLH2FS",
"app":"bank",
"app_version":"2.3.2",
"channel":"aos",
"colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv",
"colvir_commercial_id":"-1",
"colvir_id":"000120.335980",
"openway_commercial_id":"6247520",
"openway_id":"6196360",
"$lang":"ru",
"ekb_id":"923243",
"inn":"990830221722",
"login":"77012221122",
"bank24_id":"262"
},
"resp_body":{
"task_id":"",
"status":"success",
"data":{
"state":"init",
"applications":[
{
"status":"init",
"id":"123db561-34a3-4a8d-9fa7-03ed6377b44f",
"name":"Sulpak",
"amount":101000,
"items":[
{
"name":"Switch CISCO x24",
"price":100000,
"count":1,
"amount":100000
}
]
}
],
"segment":{
"range":{
"min":6,
"max":36,
"step":1
},
"payment_day":{
"max":28,
"min":1
}
}
}
}
}
I am trying to split first whole text, but there I met another problem is to match keys to values depending on '=' sign. Also there might be some keys with empty values. For ex.:
type = INREQ channel = sid = duration = 1.333 (to get to know that there is an empty value, you need to pay attention on number of spaces. Usually there is 1 space between prev.value and next key). So this example should look like this:
{
"type":"INREQ",
"channel":"",
"sid":"",
"duration":"1.333"
}
Thanks ahead!

Here, one thing pass for duplicate key about "$sid":"msid_1591844511335516_KRRNBSLH2FS"
import re
text = """03:02:03.113 [info] ext_ref = reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}"""
index1 = text.index('[')
index2 = text.index(']')
new_text = 'time = '+ text[:index1-1] + ' class_req = ' + text[index1+1:index2] + text[index2+2:]
lst = re.findall(r'\S+? = |\S+? = \{.*?\} |\S+? = \{.*?\}$|\S+? = \S+? ', new_text)
res = {}
for item in lst:
key, equal, value = item.partition('=')
key, value = key.strip(), value.strip()
if value.startswith('{'):
try:
value = json.loads(value)
except:
print(value)
res[key] = value

you can try regulation in python.
here is what i write, it works for your problem.
for convenience i deleted string before "ext_ref...",you can directly truncate the raw string.
import re
import json
string = 'ext_ref = BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}'
position = re.search("req_headers",string) # position of req_headers
resp_body_pos = re.search("resp_body",string)
resp_body = string[resp_body_pos.span()[0]:]
res1 = {}
res1.setdefault(resp_body.split("=")[0],resp_body.split("=")[1])
print(res1)
before = string[:position.span()[0]]
after = string[position.span()[0]:resp_body_pos.span()[0]] # handle req_body seperately
res2 = re.findall("(\S+) = (\S+)",before)
print(res2)
res3 = re.findall("(\S+) = ({.*?})",after)
print(res3)
#res1 type: dict{'resp_body':'...'} content in resp_body
#res2 type: list[(),()..] content before req_head
#res3 type: list[(),()..] the rest content
and now you can do what you want to do with the data(.e.g. transform it into json respectively)
Hope this is helpful

Convert a string into multiple JSON objects

I have a below string multiple lines. For each line, I want to split string and add this to a JSON output file. I had done this using string.gettext().split and a regular expression. However I am not sure this is the best way to do it.
Input file :
Server:prod01
Available memory: 20480 Disk:200 CPU:4
Used memory:12438 Disk:120 CPU:3
Unused memory:8042 Disk:80 CPU:1
Server:prod02
Available memory: 40960 Disk:500 CPU:8
Used memory:20888 Disk:320 CPU:3
Unused memory:20072 Disk:180 CPU:5
Expected output JSON:
{"prod01_available_memory":20480}
{"prod01_used_memory":12438}
{"prod01_unused_memory":8042}
{"prod01_available_disk":200}
{"prod01_used_disk":120}
{"prod01_unused_disk":80}
{"prod01_available_cpu":4}
{"prod01_used_cpu":3}
{"prod01_unused_cpu":1}
{"prod02_available_memory":40960}
{"prod02_used_memory":20888}
{"prod02_unused_memory":20072"}
{"prod02_available_disk":500"}
{"prod02_used_disk":380}
{"prod02_unused_disk":120}
{"prod02_available_cpu":8}
{"prod02_used_cpu":3}
{"prod02_unused_cpu":5}
Thanks,
Rinku
Below is my code -
def tsplit(string, *delimiters):
pattern = '|'.join(map(re.escape, delimiters))
return re.split(pattern, string)
prelist = pre.get_text().splitlines()
server_name = re.split('server|:',prelist[0])[2].strip()
if server_name == 'prod01':
#print prelist[1]
prod01_memory_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[2])
prod01_Disk_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[4])
prod01_CPU_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[6])
#print prelist[2]
prod01_memory_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[2])
prod01_Disk_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[4])
prod01_CPU_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[6])
#print prelist[4]
prod01_memory_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[2])
prod01_Disk_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[4])
prod01_CPU_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[6])
elif server_name == 'prod02':
#print prelist[1]
prod02memory_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[2])
prod02Disk_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[4])
prod02CPU_actv = int(re.split('Activated memory|:|Disk|:|CPU|:',prelist[1])[6])
#print prelist[2]
prod02memory_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[2])
prod02Disk_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[4])
prod02CPU_cons = int(re.split('memory consumed|:|Disk|:|CPU|:',prelist[2])[6])
#print prelist[4]
prod02memory_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[2])
prod02Disk_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[4])
prod02CPU_unused = int(re.split('memory unused|:|Disk|:|CPU|:',prelist[4])[6])
else
#assign all varaiables 0
.....
proc_item["logtime"] = str(t1)
proc_item["prod01_memory_actv"] = prod01_memory_actv
proc_item["prod01_Disk_actv"] = prod01_Disk_actv
proc_item["prod01_CPU_actv"] = prod01_CPU_actv
......
#for all otehr variables...
proc_data.append(proc_item)
with open("./proc_"+ str(date.today()) + ".txt", 'a+') as f:
json.dump(proc_data, f)
f.write("\n")
I have some basic knowledge on python.

- Just using string array indices
hostmtrcs = "Server:prod01 Available memory:20480 Disk:200 CPU:4 Used memory:12438 Disk:120 CPU:3 Unused memory:8042 " \
"Disk:80 CPU:1 Server:prod02 Available memory: 40960 Disk:500 CPU:8 Used memory:20888 Disk:320 CPU:3 Unused " \
"memory:20072 Disk:180 CPU:5 "
datasplt = hostmtrcs.split(":")
hstname = ''
attrkey = ''
attrvalue = ''
for word in range(0, datasplt.__len__()):
if not datasplt[word].__contains__("Server"):
elmnt = datasplt[word].split(" ")
if datasplt[word].__contains__('prod'):
hstname = elmnt[0].lower()
if elmnt.__len__() == 3:
attrkey = elmnt[1].lower() + "_" + elmnt[2].lower() # attrkey
else:
attrkey = elmnt[1]
# retreive the value from the next element in the 1st attry datasplit
if word != datasplt.__len__() - 1:
nxtelmnt = datasplt[word + 1].split(" ")
attrvalue = nxtelmnt[0] # sattrvalue frm next element
finalfrmt = '{' + '"' +hstname + "_" + attrkey + '"' + ":" + attrvalue + '}'
print(finalfrmt)

I think you can do it with dict then just dump over json.(in your case i dont think its valid json but its needs so as per your request i have dump dict over json) i havn't validates keys, i am assuming you get dictionary data correct.
d = { 'Server':'prod01',
'Available memory': 20480,
'Disk':200,
'CPU':4}
import json
s = json.dumps({str(d['Server']+"_"+key).replace(' ','_'):value for key,value in d.items()})
print(json.loads(s))
>>> {'prod01_Server': 'prod01', 'prod01_Available memory': 20480, 'prod01_Disk': 200, 'prod01_CPU': 4}

You should split the input text, section by section, according to what you're looking for.
data = '''Server:prod01
Available memory: 20480 Disk:200 CPU:4
Used memory:12438 Disk:120 CPU:3
Unused memory:8042 Disk:80 CPU:1
Server:prod02
Available memory: 40960 Disk:500 CPU:8
Used memory:20888 Disk:320 CPU:3
Unused memory:20072 Disk:180 CPU:5'''
import re
import json
print(json.dumps({'_'.join((s, l.split(' ', 1)[0], k)).lower(): int(v) for s, d in [i.split('\n', 1) for i in data.split('Server:') if i] for l in d.split('\n') for k, v in re.findall(r'(\w+):\s*(\d+)', l)}))
This outputs:
{"prod01_available_memory": 20480, "prod01_available_disk": 200, "prod01_available_cpu": 4, "prod01_used_memory": 12438, "prod01_used_disk": 120, "prod01_used_cpu": 3, "prod01_unused_memory": 8042, "prod01_unused_disk": 80, "prod01_unused_cpu": 1, "prod02_available_memory": 40960, "prod02_available_disk": 500, "prod02_available_cpu": 8, "prod02_used_memory": 20888, "prod02_used_disk": 320, "prod02_used_cpu": 3, "prod02_unused_memory": 20072, "prod02_unused_disk": 180, "prod02_unused_cpu": 5}

Reference to value of the function

At beginning i wanna say i'm newbie in use Python and everything I learned it came from tutorials.
My problem concerning reference to the value. I'm writing some script which is scrapping some information from web sites. I defined some function:
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
So now inside the script I wanna check value of the expr1 returned by my fynction. I don't know how to do that. Can u guys help me ? Is my function written correct ?
EDIT:
I can't add answer so I edit my current post
This is my all script. Some comments are in my native language but i add some in english
#! /usr/bin/env python
#encoding:UTF-8-
# ----------------------------- importujemy potrzebne biblioteki i skrypty -----------------------
# ------------------------------------------------------------------------------------------------
import urllib
from lxml import etree, html
import sys
import re
import MySQLdb as mdb
from TOR_connections import *
from XPathSelection import *
import os
# ------------------------------ Definiuje xPathSelectors ------------------------------------------
# --------------------------------------------------------------------------------------------------
# -------Doctors -----
check_reg = etree.XPath("string(//html/body/div/table[1]/tr[3]/td[2]/text())") #warunek Lekarz
check_practice = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())") #warunek praktyka
no_ks = etree.XPath("string(//html/body/div/table[1]/tr[1]/td[2]/text())")
Registred_by = etree.XPath("string(//html/body/div/table[1]/tr[4]/td[2]/text())")
Name_doctor = etree.XPath("string(//html/body/div/table[2]/tr[2]/td[2]/text())")
Registration_no = etree.XPath("string(//html/body/div/table[2]/tr[3]/td[2]/text())")
PWZL = etree.XPath("string(//html/body/div/table[2]/tr[4]/td[2]/text())")
NIP = etree.XPath("string(//html/body/div/table[2]/tr[5]/td[2]/text())")
Spec = etree.XPath("string(//html/body/div/table[2]/tr[18]/td[2]/text())")
Start_date = etree.XPath("string(//html/body/div/table[2]/tr[20]/td[2]/text())")
#-----Reg_practice-----
Name_of_practise = etree.XPath("string(//html/body/div/table[2]/tr[1]/td[2]/text())")
TERYT = etree.XPath("string(//html/body/div/table[2]/tr[7]/td[2]/*/text())")
Street = etree.XPath("string(//html/body/div/table[2]/tr[8]/td[2]/text())")
House_no = etree.XPath("string(//html/body/div/table[2]/tr[9]/td[2]/*/text())")
Flat_no = etree.XPath("string(//html/body/div/table[2]/tr[10]/td[2]/*/text())")
Post_code = etree.XPath("string(//html/body/div/table[2]/tr[11]/td[2]/*/text())")
City = etree.XPath("string(//html/body/div/table[2]/tr[12]/td[2]/*/text())")
Practice_no = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())")
Kind_of_practice = etree.XPath("string(//html/body/div/table[3]/tr[5]/td[2]/text())")
#------Serv_practice -----
TERYT2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[2]/td[2]/*/text())")
Street2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[3]/td[2]/text())")
House_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[4]/td[2]/*/text())")
Flat_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[5]/td[2]/i/text())")
Post_code2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[6]/td[2]/*/text())")
City2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[7]/td[2]/*/text())")
Phone_no = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[8]/td[2]/text())")
# --------------------------- deklaracje zmiennych globalnych ----------------------------------
# ----------------------------------------------------------------------------------------------
decrease = 9
No = 1
Link = "http://rpwdl.csioz.gov.pl/rpz/druk/wyswietlKsiegaServletPub?idKsiega="
# --------------------------- funkcje zdefiniowane ----------------------------------
# ----------------------------------------------------------------------------------------------
def MatchPattern(count):
sock = urllib.urlopen(Link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
expr1 = check_reg(root)
expr2 = check_practice(root)
D_expr1 = no_ks(root)
D_expr2 = Registred_by(root)
D_expr3 = Name_doctor(root)
D_expr4 = Registration_no(root)
D_expr5 = PWZL(root)
D_expr6 = NIP(root)
D_expr7 = Spec(root)
D_expr8 = Start_date(root)
#-----Reg_practice-----
R_expr1 = Name_of_practise(root)
R_expr2 = TERYT(root)
R_expr3 = Street(root)
R_expr4 = House_no(root)
R_expr5 = Flat_no(root)
R_expr6 = Post_code(root)
R_expr7 = City(root)
R_expr8 = Practice_no(root)
R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
S_expr1 = TERYT2(root)
S_expr2 = Street2(root)
S_expr3 = House_no2(root)
S_expr4 = Flat_no2(root)
S_expr5 = Post_code2(root)
S_expr6 = City2(root)
S_expr7 = Phone_no(root)
return expr1
return expr2
return D_expr1
return D_expr2
return D_expr3
return D_expr4
return D_expr5
return D_expr6
return D_expr7
return D_expr8
#-----Reg_practice-----
return R_expr1
return R_expr2
return R_expr3
return R_expr4
return R_expr5
return R_expr6
return R_expr7
return R_expr8
return R_expr9
#------Serv_practice -----
return S_expr1
return S_expr2
return S_expr3
return S_expr4
return S_expr5
return S_expr6
return S_expr7
# --------------------------- ustanawiamy polaczenie z baza danych -----------------------------
# ----------------------------------------------------------------------------------------------
con = mdb.connect('localhost', 'root', '******', 'SANBROKER', charset='utf8');
# ---------------------------- początek programu -----------------------------------------------
# ----------------------------------------------------------------------------------------------
with con:
cur = con.cursor()
cur.execute("SELECT Old_num FROM SANBROKER.Number_of_records;")
Old_num = cur.fetchone()
count = Old_num[0]
counter = input("Input number of rows: ")
# ----------------------- pierwsze połączenie z TORem ------------------------------------
# ----------------------------------------------------------------------------------------
#connectTor()
#conn = httplib.HTTPConnection("my-ip.heroku.com")
#conn.request("GET", "/")
#response = conn.getresponse()
#print(response.read())
while count <= counter: # co dziesiata liczba
# --------------- pierwsze wpisanie do bazy danych do Archive --------------------
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Archive_num=%s",(count))
# ---------------------------------------------------------------------------------
if decrease == 0:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
#------- ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease = 10-1
count +=1
else:
MatchPattern(count)
# Now I wanna check some expresions (2 or 3)
# After that i wanna write all the values into my database
# ------ ostatnie czynności:
percentage = count / 100
print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
with con:
cur = con.cursor()
cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
decrease -=1
count +=1

Well, I'm assuming check_reg is a function that returns a boolean (either True or False).
If that's the case, to check the return:
if expr1:
print "True."
else:
print "False"
There's more than one way to do it, but basically, if expr1: is all you need to do the checking.

To capture the return value of a function, assign the function to a name with an equal sign, like this:
return_value = somefunction(some_value)
print('The return value is ',return_value)
Keep in mind that when the first return statement is encountered, the function will exit. So if you have more than one return statement after each other, only the first will execute.
If you want to return multiple things, add them to a list and then return the list.
Here is an improved version of your function:
def match_pattern(count):
sock = urllib.urlopen(Link+str(count))
htmlsource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
# root = etree.HTML(htmlSource) - duplicate line
# result = etree.tostring(root, pretty_print=True, method="html")
function_names = [check_reg, check_practice, no_ks, Registered_by, \
Name_doctor, Registration_no, PWZL, NIP, Spec, Start_date, \
Name_of_practise, TERYT, Street, House_no2, Flat_no, \
Post_code2, City2, Phone_no]
results = []
for function in function_names:
results.append(function(root))
return results
r = match_pattern(1)
print r[0] # this will be the result of check_reg(root)

The code you have posted is quite ambigous. Can you please fix the ident to let us know what belongs to the function and which part is the script.
A function can returns only one value. You cannot do :
return something
return something_else
return ...
The function will ends when first value will be returned.
What you can do is returning a list, tuple or dict containing all your values.
For instance :
return (something,something_else,...)
or
return [something,something_else,...]
In your case, it seems better to create a class that would have all values you want as attributes, and turn this function into a method that would set the attributes values.
class Example(object):
def __init__ ( self , link , count ):
sock = urllib.urlopen(link+str(count))
htmlSource = sock.read()
sock.close()
root = etree.HTML(htmlSource)
root = etree.HTML(htmlSource)
result = etree.tostring(root, pretty_print=True, method="html")
self.expr1 = check_reg(root)
self.expr2 = check_practice(root)
self.D_expr1 = no_ks(root)
...
self.D_expr8 = Start_date(root)
#-----Reg_practice-----
self.R_expr1 = Name_of_practise(root)
...
self.R_expr9 = Kind_of_practice(root)
#------Serv_practice -----
self.S_expr1 = TERYT2(root)
...
self.S_expr7 = Phone_no(root)
Then you will be able to use this class like :
exampleInstance = Example ( "link you want to use" , 4 ) # the second argument is your 'count' value
# Now you can use attributes of your class to get the values you want
print exampleInstance . expr1
print exampleInstance . S_expr7

How to store data like Freebase does?

I admit that this is basically a duplicate question of Use freebase data on local server? but I need more detailed answers than have already been given there
I've fallen absolutely in love with Freebase. What I want now is to essentially create a very simple Freebase clone for storing content that may not belong on Freebase itself but can be described using the Freebase schema. Essentially what I want is a simple and elegant way to store data like Freebase itself does and be able to easily use that data in a Python (CherryPy) web application.
Chapter 2 of the MQL reference guide states:
The database that underlies Metaweb is fundamentally different than the relational databases that you may be familiar with. Relational databases store data in the form of tables, but the Metaweb database stores data as a graph of nodes and relationships between those nodes.
Which I guess means that I should be using either a triplestore or a graph database such as Neo4j? Does anybody here have any experience with using one of those from a Python environment?
(What I've actually tried so far is to create a relational database schema which would be able to easily store Freebase topics, but I'm having issues with configuring the mappings in SQLAlchemy).
Things I'm looking into
http://gen5.info/q/2009/02/25/putting-freebase-in-a-star-schema/
http://librdf.org/
UPDATE [28/12/2011]:
I found an article on the Freebase blog that describes the proprietary tuple store / database Freebase themselves use (graphd): http://blog.freebase.com/2008/04/09/a-brief-tour-of-graphd/

This is what worked for me. It allows you to load all of a Freebase dump in a standard MySQL installation on less than 100GB of disk. The key is understanding the data layout in a dump and then transforming it (optimizing it for space and speed).
Freebase notions you should understand before you attempt to use this (all taken from the documentation):
Topic - anything of type '/common/topic', pay attention to the different types of ids you may encounter in Freebase - 'id', 'mid', 'guid', 'webid', etc.
Domain
Type - 'is a' relationship
Properties - 'has a' relationship
Schema
Namespace
Key - human readable in the '/en' namespace
Some other important Freebase specifics:
the query editor is your friend
understand the 'source', 'property', 'destination' and 'value' notions described here
everything has a mid, even things like '/', '/m', '/en', '/lang', '/m/0bnqs_5', etc.; Test using the query editor: [{'id':'/','mid':null}]
you don't know what any entity (i.e. row) in the data dump is, you have to get to its types to do that (for instance how do I know '/m/0cwtm' is a human);
every entity has at least one type (but usually many more)
every entity has at least one id/key (but usually many more)
the ontology (i.e. metadata) is embedded in the same dump and the same format as the data (not the case with other distributions like DBPedia, etc.)
the 'destination' column in the dump is the confusing one, it may contain a mid or a key (see how the transforms bellow deal with this)
the domains, types, properties are namespace levels at the same time (whoever came up with this is a genius IMHO);
understand what is a Topic and what is not a Topic (absolutely crucial), for example this entity '/m/03lmb2f' of type '/film/performance' is NOT a Topic (I choose to think of these as what Blank Nodes in RDF are although this may not be philosophically accurate), while '/m/04y78wb' of type '/film/director' (among others) is;
Transforms
(see the Python code at the bottom)
TRANSFORM 1 (from shell, split links from namespaces ignoring notable_for and non /lang/en text):
python parse.py freebase.tsv #end up with freebase_links.tsv and freebase_ns.tsv
TRANSFORM 2 (from Python console, split freebase_ns.tsv on freebase_ns_types.tsv, freebase_ns_props.tsv plus 15 others which we ignore for now)
import e
e.split_external_keys( 'freebase_ns.tsv' )
TRANSFORM 3 (from Python console, convert property and destination to mids)
import e
ns = e.get_namespaced_data( 'freebase_ns_types.tsv' )
e.replace_property_and_destination_with_mid( 'freebase_links.tsv', ns ) #produces freebase_links_pdmids.tsv
e.replace_property_with_mid( 'freebase_ns_props.tsv', ns ) #produces freebase_ns_props_pmids.tsv
TRANSFORM 4 (from MySQL console, load freebase_links_mids.tsv, freebase_ns_props_mids.tsv and freebase_ns_types.tsv in DB):
CREATE TABLE links(
source VARCHAR(20),
property VARCHAR(20),
destination VARCHAR(20),
value VARCHAR(1)
) ENGINE=MyISAM CHARACTER SET utf8;
CREATE TABLE ns(
source VARCHAR(20),
property VARCHAR(20),
destination VARCHAR(40),
value VARCHAR(255)
) ENGINE=MyISAM CHARACTER SET utf8;
CREATE TABLE types(
source VARCHAR(20),
property VARCHAR(40),
destination VARCHAR(40),
value VARCHAR(40)
) ENGINE=MyISAM CHARACTER SET utf8;
LOAD DATA LOCAL INFILE "/data/freebase_links_pdmids.tsv" INTO TABLE links FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE "/data/freebase_ns_props_pmids.tsv" INTO TABLE ns FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE "/data/freebase_ns_base_plus_types.tsv" INTO TABLE types FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
CREATE INDEX links_source ON links (source) USING BTREE;
CREATE INDEX ns_source ON ns (source) USING BTREE;
CREATE INDEX ns_value ON ns (value) USING BTREE;
CREATE INDEX types_source ON types (source) USING BTREE;
CREATE INDEX types_destination_value ON types (destination, value) USING BTREE;
Code
Save this as e.py:
import sys
#returns a dict to be used by mid(...), replace_property_and_destination_with_mid(...) bellow
def get_namespaced_data( file_name ):
f = open( file_name )
result = {}
for line in f:
elements = line[:-1].split('\t')
if len( elements ) < 4:
print 'Skip...'
continue
result[(elements[2], elements[3])] = elements[0]
return result
#runs out of memory
def load_links( file_name ):
f = open( file_name )
result = {}
for line in f:
if len( result ) % 1000000 == 0:
print len(result)
elements = line[:-1].split('\t')
src, prop, dest = elements[0], elements[1], elements[2]
if result.get( src, False ):
if result[ src ].get( prop, False ):
result[ src ][ prop ].append( dest )
else:
result[ src ][ prop ] = [dest]
else:
result[ src ] = dict([( prop, [dest] )])
return result
#same as load_links but for the namespaced data
def load_ns( file_name ):
f = open( file_name )
result = {}
for line in f:
if len( result ) % 1000000 == 0:
print len(result)
elements = line[:-1].split('\t')
src, prop, value = elements[0], elements[1], elements[3]
if result.get( src, False ):
if result[ src ].get( prop, False ):
result[ src ][ prop ].append( value )
else:
result[ src ][ prop ] = [value]
else:
result[ src ] = dict([( prop, [value] )])
return result
def links_in_set( file_name ):
f = open( file_name )
result = set()
for line in f:
elements = line[:-1].split('\t')
result.add( elements[0] )
return result
def mid( key, ns ):
if key == '':
return False
elif key == '/':
key = '/boot/root_namespace'
parts = key.split('/')
if len(parts) == 1: #cover the case of something which doesn't start with '/'
print key
return False
if parts[1] == 'm': #already a mid
return key
namespace = '/'.join(parts[:-1])
key = parts[-1]
return ns.get( (namespace, key), False )
def replace_property_and_destination_with_mid( file_name, ns ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_mids = open(fn+'_pdmids'+'.tsv', 'w')
def convert_to_mid_if_possible( value ):
m = mid( value, ns )
if m: return m
else: return None
counter = 0
for line in f:
elements = line[:-1].split('\t')
md = convert_to_mid_if_possible(elements[1])
dest = convert_to_mid_if_possible(elements[2])
if md and dest:
elements[1] = md
elements[2] = dest
f_out_mids.write( '\t'.join(elements)+'\n' )
else:
counter += 1
print 'Skipped: ' + str( counter )
def replace_property_with_mid( file_name, ns ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_mids = open(fn+'_pmids'+'.tsv', 'w')
def convert_to_mid_if_possible( value ):
m = mid( value, ns )
if m: return m
else: return None
for line in f:
elements = line[:-1].split('\t')
md = convert_to_mid_if_possible(elements[1])
if md:
elements[1]=md
f_out_mids.write( '\t'.join(elements)+'\n' )
else:
#print 'Skipping ' + elements[1]
pass
#cPickle
#ns=e.get_namespaced_data('freebase_2.tsv')
#import cPickle
#cPickle.dump( ns, open('ttt.dump','wb'), protocol=2 )
#ns=cPickle.load( open('ttt.dump','rb') )
#fn='/m/0'
#n=fn.split('/')[2]
#dir = n[:-1]
def is_mid( value ):
parts = value.split('/')
if len(parts) == 1: #it doesn't start with '/'
return False
if parts[1] == 'm':
return True
return False
def check_if_property_or_destination_are_mid( file_name ):
f = open( file_name )
for line in f:
elements = line[:-1].split('\t')
#if is_mid( elements[1] ) or is_mid( elements[2] ):
if is_mid( elements[1] ):
print line
#
def split_external_keys( file_name ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_extkeys = open(fn+'_extkeys' + '.tsv', 'w')
f_out_intkeys = open(fn+'_intkeys' + '.tsv', 'w')
f_out_props = open(fn+'_props' + '.tsv', 'w')
f_out_types = open(fn+'_types' + '.tsv', 'w')
f_out_m = open(fn+'_m' + '.tsv', 'w')
f_out_src = open(fn+'_src' + '.tsv', 'w')
f_out_usr = open(fn+'_usr' + '.tsv', 'w')
f_out_base = open(fn+'_base' + '.tsv', 'w')
f_out_blg = open(fn+'_blg' + '.tsv', 'w')
f_out_bus = open(fn+'_bus' + '.tsv', 'w')
f_out_soft = open(fn+'_soft' + '.tsv', 'w')
f_out_uri = open(fn+'_uri' + '.tsv', 'w')
f_out_quot = open(fn+'_quot' + '.tsv', 'w')
f_out_frb = open(fn+'_frb' + '.tsv', 'w')
f_out_tag = open(fn+'_tag' + '.tsv', 'w')
f_out_guid = open(fn+'_guid' + '.tsv', 'w')
f_out_dtwrld = open(fn+'_dtwrld' + '.tsv', 'w')
for line in f:
elements = line[:-1].split('\t')
parts_2 = elements[2].split('/')
if len(parts_2) == 1: #the blank destination elements - '', plus the root domain ones
if elements[1] == '/type/object/key':
f_out_types.write( line )
else:
f_out_props.write( line )
elif elements[2] == '/lang/en':
f_out_props.write( line )
elif (parts_2[1] == 'wikipedia' or parts_2[1] == 'authority') and len( parts_2 ) > 2:
f_out_extkeys.write( line )
elif parts_2[1] == 'm':
f_out_m.write( line )
elif parts_2[1] == 'en':
f_out_intkeys.write( line )
elif parts_2[1] == 'source' and len( parts_2 ) > 2:
f_out_src.write( line )
elif parts_2[1] == 'user':
f_out_usr.write( line )
elif parts_2[1] == 'base' and len( parts_2 ) > 2:
if elements[1] == '/type/object/key':
f_out_types.write( line )
else:
f_out_base.write( line )
elif parts_2[1] == 'biology' and len( parts_2 ) > 2:
f_out_blg.write( line )
elif parts_2[1] == 'business' and len( parts_2 ) > 2:
f_out_bus.write( line )
elif parts_2[1] == 'soft' and len( parts_2 ) > 2:
f_out_soft.write( line )
elif parts_2[1] == 'uri':
f_out_uri.write( line )
elif parts_2[1] == 'quotationsbook' and len( parts_2 ) > 2:
f_out_quot.write( line )
elif parts_2[1] == 'freebase' and len( parts_2 ) > 2:
f_out_frb.write( line )
elif parts_2[1] == 'tag' and len( parts_2 ) > 2:
f_out_tag.write( line )
elif parts_2[1] == 'guid' and len( parts_2 ) > 2:
f_out_guid.write( line )
elif parts_2[1] == 'dataworld' and len( parts_2 ) > 2:
f_out_dtwrld.write( line )
else:
f_out_types.write( line )
Save this as parse.py:
import sys
def parse_freebase_quadruple_tsv_file( file_name ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_links = open(fn+'_links'+'.tsv', 'w')
f_out_ns = open(fn+'_ns' +'.tsv', 'w')
for line in f:
elements = line[:-1].split('\t')
if len( elements ) < 4:
print 'Skip...'
continue
#print 'Processing ' + str( elements )
#cases described here http://wiki.freebase.com/wiki/Data_dumps
if elements[1].endswith('/notable_for'): #ignore notable_for, it has JSON in it
continue
elif elements[2] and not elements[3]: #case 1, linked
f_out_links.write( line )
elif not (elements[2].startswith('/lang/') and elements[2] != '/lang/en'): #ignore languages other than English
f_out_ns.write( line )
if len(sys.argv[1:]) == 0:
print 'Pass a list of .tsv filenames'
for file_name in sys.argv[1:]:
parse_freebase_quadruple_tsv_file( file_name )
Notes:
Depending on the machine the index creation may take anywhere from a few to 12+ hours (consider the amount of data you are dealing with though).
To be able to traverse the data in both directions you need an index on links.destination as well which I found to be expensive timewise and never finished.
Many other optimizations are possible here. For example the 'types' table is small enough to be loaded in memory in a Python dict (see e.get_namespaced_data( 'freebase_ns_types.tsv' ))
And the standard disclaimer here. It has been a few months since I did this. I believe it is mostly correct but I do apologize if my notes missed something. Unfortunately the project I needed it for fell through the cracks but hope this helps someone else. If something isn't clear drop a comment here.

My 2 cents...
I use a little bit of Java code to convert the Freebase data dump into RDF: https://github.com/castagna/freebase2rdf
I use Apache Jena's TDB store to load the RDF data and Fuseki to serve the data via SPARQL protocol over HTTP.
See also:
http://markmail.org/thread/mq6ylzdes6n7sc5o
http://markmail.org/thread/jegtn6vn7kb62zof

SPARQL is the query language to query RDF, it allows to write SQL-alike queries. Most RDF databases implement SPARQL interfaces. Moreover, Freebase allows you to export data in RDF so you could potentially use that data directly in an RDF database and query it with SPARQL.
I would have a look at this tutorial to get a better sense of SPARQL.
If you are going to handle a big dataset, like freebase, I would use 4store together with any of the Python clients. 4store exposes SPARQL via HTTP, you can make HTTP requests to assert, remove and query data. It also handles resultsets in JSON, and this is really handy with Python. I have used this infrastructure in several projects, not with CherryPy but with Django, but I guess that this difference doesn't really matter.

A good news for freebase dump users is that Freebase now offer RDF dump now: http://wiki.freebase.com/wiki/Data_dumps . It is in turtle format, so it is very convenient to use any graph database designed for RDF.
My suggestion is also 4store: http://4store.org/ . it is simple and easy to use.
You could use http request to do the SPARQL operation.
One tricky thing in my project is that the "." used in Freebase dump (to represent shorten URL) is not recognizable to 4store. So I add a bracket "<>" o all the columns contained "." and deal with the shorten URL myself.

Have a look at https://cayley.io. I believe it is written by the same author and uses same principles as graphd, the backend of Freebase, before Google killed it.
Regarding the data, you probably will want to run something like this to cleanup the Freebase DB dumps or use datahub.

And this is the extra code for my other answer. The meat is in edb.py. Run from Python console and follow the examples. Or use the web2py controller and run in your browser.
Save this as edb.py:
import MySQLdb
import sys
connection = MySQLdb.connect (host = "localhost",
user = "root",
passwd = "x",
db = "y")
cursor = connection.cursor()
query_counter = 0
print_queries = False
limit = 1000
def fetch_one( query ):
global query_counter, print_queries
query = query + ' LIMIT ' + str(limit)
if print_queries:
print query
cursor = connection.cursor()
cursor.execute( query )
query_counter += 1
result = cursor.fetchone()
if result:
return result[0]
else:
return None
def fetch_all( query ):
global query_counter, print_queries
query = query + ' LIMIT ' + str(limit)
if print_queries:
print query
cursor = connection.cursor()
cursor.execute( query )
query_counter += 1
return cursor.fetchall()
def _flatten( list_of_lists ):
import itertools
return list(itertools.chain(*list_of_lists))
#Example: e._search_by_name('steve martin')
def _search_by_name( name, operator = '=' ):
typed, ranked = {}, []
if name:
name = name.strip()
if not name:
return ( typed, ranked )
filler = '' if operator == '=' else '%'
ranks = {}
#to filter meaningful stuff for every mid returned order by the number of types they have
#search for value text if prop. is
#select * from ns where value = 'the king' and (property = '/m/01gr' or property = '/m/06b');
name_mid = _mid( '/type/object/name' )
alias_mid = _mid( '/common/topic/alias' )
query = "select ns.source from ns where ns.value %s '%s%s' and ns.property in ('%s', '%s')" % ( operator, name, filler, name_mid, alias_mid )
for i in fetch_all( query ):
typed[ i[0] ] = _types( i[0] )
import operator
ranked = [ ( len( typed[i] ), i ) for i in typed ]
ranked = [ e[1] for e in sorted( ranked, key=operator.itemgetter(0), reverse = True ) ]
return (typed, ranked)
#Example: e._children('') <---will get the top level domains
# e._children('/film') <---get all types from the domain
# e._children('/film/film') <---get all properties for the type
def _children( parent, expand = False, raw = False ):
query = "select t.source, t.value from types t where t.destination = '%s'" % (parent)
res = fetch_all( query )
if raw:
return [ row[0] for row in res ]
if expand: prefix = parent
else: prefix = ''
return [ prefix + '/' + row[1] for row in fetch_all(query) ]
#Example: e._parent('/film/film/songs')
def _parent( child ): # '/people/marriage/to' -> '/people/marriage'
#if not isinstance( child, str ): return None # what kind of safety mechanisms do we need here?
return '/'.join(child.split('/')[:-1])
#Example: e._domains()
def _domains():
return _children('')
#Example: e._top_level_types()
def _top_level_types():
return _children('/type')
#TODO get all primitive types
#Example: e._mid('/type/object')
# e._mid('/authority/imdb/name/nm0000188')
def _mid( key ):
if key == '':
return None
elif key == '/':
key = '/boot/root_namespace'
parts = key.split('/')
if parts[1] == 'm': #already a mid
return key
namespace = '/'.join(parts[:-1])
key = parts[-1]
return fetch_one( "select source from types t where t.destination = '%s' and t.value = '%s'" % (namespace, key) )
#Example: e._key('/type')
def _key( mid ):
if isinstance( mid, str):
res = _keys( mid )
if not res:
return None
rt = [ r for r in res if r.startswith( '/type' ) ]
if rt:
return rt[0]
else:
return res[0]
elif isinstance( mid, list ) or isinstance( mid, tuple ):
res = [ _key( e ) for e in mid ]
return [ r for r in res if r is not None ]
else:
return None
def _keys( mid ):
# check for '/type/object/key' as well?
query = "select t.destination, t.value from types t where t.source = '%s'" % mid
return [ row[0]+'/'+row[1] for row in fetch_all( query ) ]
#Example: e._types('/m/0p_47')
def _types( mid ):
tm = _mid( '/type/object/type' )
query = "select l.destination from links l where l.source = '%s' and l.property = '%s'" % (mid, tm)
return [ row[0] for row in fetch_all( query ) ]
#Example: e._props_n('/m/0p_47') <---Named immediate properties (like name, etc.)
def _props_n( mid ): #the same property can be set more than once per topic!
query = "select ns.property from ns where ns.source = '%s'" % (mid)
return list( set( [ row[0] for row in fetch_all( query ) ] ) )
#Example: e._props_l('/m/0p_47') <---All remote properties, some are named, some are anonymous
def _props_l( mid ): #the same property can be set more than once per topic!
tm = _mid( '/type/object/type' ) #exclude types, they have tons of instance links
res = fetch_all( "select l.property, l.destination from links l where l.source = '%s' and property <> '%s'" % (mid, tm) )
output = {}
for r in res:
dests = output.get( r[0], False )
if dests:
dests.append( r[1] )
else:
output[ r[0] ] = [ r[1] ]
return output
#Example: e._props_ln('/m/0p_47') <---All remote named properties
def _props_ln( mid ): #named properties
result = []
ps = _props_l( mid )
common_topic = _mid( '/common/topic' )
for p in ps:
ts = _types( ps[p][0] )
if common_topic in ts: #it's a common topic
result.append( p )
return result
#Example: e._props_la('/m/0p_47') <---All remote anonymous properties, these actually belong to the children!
#instead of has type /common/topic we used to check if it has name
def _props_la( mid, raw = True ): #anonymous properties (blank nodes in RDF?)
result = []
ps = _props_l( mid )
common_topic = _mid( '/common/topic' )
for p in ps:
ts = _types( ps[p][0] )
if common_topic not in ts: #it is not a common topic
t = _key( _types( ps[p][0] ) )
if t and '/type/type' not in t: #FIXME: hack not to go into types, could be done better
result.append( _children( t[0], expand=True, raw=raw ) ) #get the first, is this correct?
return _flatten( result ) #it is a list of lists
#FIXME: try to get '/film/actor/film' -> '/type/property/expected_type' -> '/film/performance' -> properties/children
#instead of trying is something has name
#Example: e._get_n('/m/0p_47', e._props_n('/m/0p_47')[0])['/lang/en'] <---These come with a namespace
def _get_n( mid, prop ): #the same property can be set more than once per topic!
p = _mid( prop )
query = "select ns.value from ns where ns.source = '%s' and ns.property = '%s'" % (mid, p)
return [ r[0] for r in fetch_all( query ) ]
#Example: e._get_l('/m/0p_47', e._props_l('/m/0p_47')[0]) <---returns a list of mids coresponding to that prop.
# e._name(e._get_l('/m/0p_47', '/film/writer/film'))
def _get_l( mid, prop ): #the same property can be set more than once per topic!
p = _mid( prop )
query = "select l.destination from links l where l.source = '%s' and l.property = '%s'" % (mid, p)
return [ row[0] for row in fetch_all( query ) ]
#Example: e._name(e._get_ln('/m/0p_47', e._props_ln('/m/0p_47')[0]))
def _get_ln( mid, p ): #just alias for _get_l, keeping for consistency
return _get_l( mid, p )
#Example: e._name(e._get_la('/m/0p_47', '/film/performance/film'))
def _get_la( mid, prop ):
result = []
ps = _props_l( mid )
for p in ps:
es = _get_l( mid, p ) #get the destinations
if not es: continue
ts = set( _types( es[0] ) )
if _mid(_parent(_key(_mid(prop)))) in ts: #should be able to do this more efficiently!!!
for e in es:
result.append( _get_l( e, prop ) )
return _flatten( result ) #return after the first result
#How do we determine properties with multiple values vs those with singular (i.e. place of birth)?
#is this in the ontology?
#Ans: yes, /type/property/unique
#Example: e._all_names_ln('/m/0p_47') <---gets all of object's remote named properties
def _all_names_ln( mid ):
result = {}
for p in _props_ln( mid ):
result[ _key(p) ] = _name( _get_ln( mid, p ) )
return result
#Example: e._all_names_la('/m/0p_47') <---gets all of object's remote anonymous properties
def _all_names_la( mid ): #TODO: prevent loops, run e.all_names_la('/m/0p_47')
result = {}
for p in _props_la( mid ):
result[ _key( p ) ] = _name ( _get_la( mid, p ) )
return result
#FIXME: _all_names_la is going into destinations which are types and have a ton of instance links...
#Example: e._name('/m/0p_47') <---the name of a topic
#
def _name( mid ):
if isinstance( mid, str ):
nm = _mid( '/type/object/name' )
return _get_n( mid, nm )
elif isinstance( mid, list ) or isinstance( mid, tuple ) or isinstance( mid, set ):
return [ _name( e ) for e in mid ]
else:
return None
#for internal use only
def _get_linked( mid ):
tm = _mid( '/type/object/type' ) #exclude types, they have tons of instance links
query = "select destination from links where source = '%s' and property <> '%s' " % ( mid, tm )
return set( [ r[0] for r in fetch_all( query ) ] )
#for internal use only
def _get_connections_internal( entity1, target, path, all_paths, depth, max_depth):
import copy
if depth > max_depth:
return
if True:
print
print str(entity1) + ', ' + str(target)
print str( path )
print str( all_paths )
print depth
path.append( entity1 )
linked1 = _get_linked( entity1 )
if target in linked1 or entity1 == target:
path.append( target )
all_paths.append( path )
#print str( path )
return
for l1 in linked1:
if l1 in path:
continue
_get_connections_internal( l1,
target,
copy.copy( path ),
all_paths,
depth+1,
max_depth )
#Example: e._name(e._get_connections('/m/0p_47', '/m/0cwtm')) <---find path in the graph between the two entities
def _get_connections( entity1, target ):
result = []
_get_connections_internal( entity1, target, [], result, 0, 2 )
return result
#for internal use only
def _get_connections_internal2( entity1, entity2, path1, path2, all_paths, depth, max_depth, level ):
import copy
if depth > max_depth:
return
if level < 0: level = 0
path1.append( entity1 )
path2.append( entity2 )
if entity1 == entity2 and level == 0:
all_paths.append( ( path1, path2 ) ) #no need to append entity1 or entity2 to the paths
return
linked1 = _get_linked( entity1 )
if entity2 in linked1 and entity2 not in path1 and level == 0:
path1.append( entity2 )
all_paths.append( ( path1, path2 ) )
return
linked2 = _get_linked( entity2 )
if entity1 in linked2 and entity1 not in path2 and level == 0:
path2.append( entity1 )
all_paths.append( ( path1, path2 ) )
return
inters = linked1.intersection( linked2 )
inters = inters.difference( set( path1 ) )
inters = inters.difference( set( path2 ) )
if inters and level == 0:
for e in inters: #these are many paths, have to clone
p1 = copy.copy( path1 )
p1.append( e )
p2 = copy.copy( path2 )
p2.append( e )
all_paths.append( ( p1,p2 ) )
return
for l1 in linked1:
if l1 in path1 or l1 in path2:
continue
for l2 in linked2:
if l2 in path1 or l2 in path2:
continue
_get_connections_internal2( l1, l2,
copy.copy( path1 ), copy.copy( path2 ),
all_paths,
depth+1,
max_depth,
level - 1 )
#Example: e._name(e._get_connections2('/m/0p_47', '/m/0cwtm')) <---returns two meeting paths starting from both entities
# e._name(e._get_connections('/m/0p_47', '/m/0cwtm', level=1)) <---search deeper
# e._name(e._get_connections('/m/0p_47', '/m/0cwtm', level=2)) <---even deeper
def _get_connections2( entity1, entity2, level = 0 ):
result = []
_get_connections_internal2( entity1, entity2, [], [], result, 0, 15, level )
return result
And here is a sample web2py controller (just copy edb.py in the web2py models directory):
# -*- coding: utf-8 -*-
def mid_to_url( mid ):
return mid.split('/')[2]
def index():
form = FORM( TABLE( TR( INPUT(_name='term', _value=request.vars.term ) ),
TR(INPUT(_type='submit', _value='Search') ) ),
_method='get')
typed, ranked = _search_by_name( request.vars.term )
rows = []
for r in ranked:
keys = []
for t in typed[r]:
k = _key( t )
if k:
keys.append( k )
rows.append( TR( TD( A(_name( r ),
_href = URL('result', args = [mid_to_url(r)]))),
TD( XML( '<br/>'.join( keys ) ) ) ) )
result = TABLE( *rows )
return {
'form': form,
'result' : result
}
def result():
path, data = '', ''
if not request.args:
return { 'path':path, 'data':data}
path_rows = []
for ra in range(len(request.args)):
if ra%2:
arrow_url = URL( 'static', 'images/blue_arr.png' )
display_name = _key('/m/'+request.args[ra]) #it's a property
else:
arrow_url = URL( 'static', 'images/red_arr.png' )
display_name = _name('/m/'+request.args[ra]) #it's a topic
path_rows.append( TD( A( display_name, _href=URL( args = request.args[0:ra+1] ) ) ) )
path_rows.append( TD( IMG( _src = arrow_url ) ) )
path = TABLE( *path_rows )
elems = [ '/m/'+a for a in request.args ]
if _mid( '/type/property' ) in _types( elems[-1] ): #we are rendering a property
objects = _get_ln( elems[-2], elems[-1] )
if not objects: #there should be a better way to see if this is anonymous
objects = _get_la( elems[-2], elems[-1] )
data = TABLE( *[ TR( TD( A(_name(o), _href = URL( args = request.args+[mid_to_url(o)])))) for o in objects ] )
else: #we are rendering a topic
direct_props = TABLE(*[TR(TD(_key(p)), TD(', '.join(_get_n( elems[-1], p)))) for p in _props_n( elems[-1] )])
linked_named_props = TABLE(*[TR(TD(A(_key(p),
_href = URL(args = request.args+[mid_to_url(p)])))) for p in _props_ln( elems[-1] ) ] )
linked_anon_props = TABLE(*[TR(TD(A(_key(p),
_href = URL(args = request.args+[mid_to_url(p)])))) for p in _props_la( elems[-1] ) ] )
data = TABLE( TR( TH( 'Linked named data:'), TH( 'Linked anonymous data:' ), TH( 'Direct data:' ) ),
TR( TD( linked_named_props ), TD( linked_anon_props ), TD( direct_props ) ) )
return { 'path': path, 'data':data }

Importing code from file into Python and adding file dialog box

I have a python script signalgen.py that plays audio using equations but I would like to be able to hard code the file where the equation is stored in eq1.txt or choose a file and import the equation.
The problems I'm having are:
1) How can I hard code a file and it's path correctly so it will play the equation as audio
I get an error
Traceback (most recent call last):
File "signalgen.py", line 484, in need_data
v += (datum * self.sig_level)
TypeError: can't multiply sequence by non-int of type 'float'
The specific block of code which I believe is causing the issue
def equation_import_function(self,t,f):
fileobj=open("/home/rat/eq1.txt","r")
eqdata =fileobj.read() #read whole file
fileobj.close()
#return math.tan(2.0*math.pi*f*t)
return eqdata
I have this line of code in the eq1.txt file-> math.tan(2.0*math.pi*f*t)
2) How can I add a file open dialog box to be able to choose a file and import the equation.
PS I'm using Ubuntu 10.04 (Linux) and the equations will be several pages long this is the reason I would like to import them into python from text files
Here's the entire code if you want to look at what I'm using below or seen on pastebin which includes line numbers http://pastebin.com/HZg0Jhaw
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ***************************************************************************
# * Copyright (C) 2011, Paul Lutus *
# * *
# * This program is free software; you can redistribute it and/or modify *
# * it under the terms of the GNU General Public License as published by *
# * the Free Software Foundation; either version 2 of the License, or *
# * (at your option) any later version. *
# * *
# * This program is distributed in the hope that it will be useful, *
# * but WITHOUT ANY WARRANTY; without even the implied warranty of *
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
# * GNU General Public License for more details. *
# * *
# * You should have received a copy of the GNU General Public License *
# * along with this program; if not, write to the *
# * Free Software Foundation, Inc., *
# * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
# ***************************************************************************
# version date 01-12-2011
VERSION = '1.1'
import re, sys, os
import gobject
gobject.threads_init()
import gst
import gtk
gtk.gdk.threads_init()
import time
import struct
import math
import random
import signal
import webbrowser
class Icon:
icon = [
"32 32 17 1",
" c None",
". c #2A2E30",
"+ c #333739",
"# c #464A4C",
"# c #855023",
"$ c #575A59",
"% c #676A69",
"& c #CC5B00",
"* c #777A78",
"= c #DB731A",
"- c #8A8C8A",
"; c #969895",
"> c #F68C22",
", c #A5A7A4",
"' c #F49D4A",
") c #B3B5B2",
"! c #DEE0DD",
" &&&&&&& ",
" &&&===='''''& ",
" &'''''====&'& ",
" +++++&'&&&&& &'& ",
" +#$%****&'&+ &'& ",
" +#**%$#++#&'&*#+ &'& ",
" +#**#+++++++&'&#**#+ &'& ",
" +$*$+++++++++&'&++$*$+ &'& ",
" #*#++++++++++&'&+++##&&&'& ",
" +*#++++++++#&&&'&+++#=''''& ",
" +*$++++++++#=''''&+++&'>>>'& ",
" #*+++++++++&'>>>'&+++#='''= ",
" +%$++++++++##='''=###++#&&&# ",
" +*#+++++++####&&&######++#*+ ",
" +*+++++++####++#$%$$####++*+ ",
" +*++++++##+#;,,*##*$$$###+*+ ",
" +*#++++###%!!!!,;#$*$$$###*+ ",
" +%$++++##+)!!!),-*+-%$$$#$%+ ",
" +#*+++###+-!!!,;-%#;%%$$+*#+ ",
" +*#++####+$*-*%#+*-%%$##*+ ",
" ++*#+###$$%#++#%;;*%%$#-$+ ",
" +#%+###$$%*;;;;-*%%%#**+ ",
" .+$%###$$$*******%$$*-+. ",
" .+#%%##$$*#*#%%%$%-%+. ",
" .++#%$$$$$$%%%%--#+. ",
" +++##$%*****%+++ ",
" +++++++++++++#. ",
" #--%#++#$*-%+ ",
" +%,))),;%+. ",
" ++++++. ",
" ",
" "
]
# this should be a temporary hack
class WidgetFinder:
def localize_widgets(self,parent,xmlfile):
# an unbelievable hack made necessary by
# someone unwilling to fix a year-old bug
with open(xmlfile) as f:
for name in re.findall('(?s) id="(.*?)"',f.read()):
if re.search('^k_',name):
obj = parent.builder.get_object(name)
setattr(parent,name,obj)
class ConfigManager:
def __init__(self,path,dic):
self.path = path
self.dic = dic
def read_config(self):
if os.path.exists(self.path):
with open(self.path) as f:
for record in f.readlines():
se = re.search('(.*?)\s*=\s*(.*)',record.strip())
if(se):
key,value = se.groups()
if (key in self.dic):
widget = self.dic[key]
typ = type(widget)
if(typ == list):
widget[0] = value
elif(typ == gtk.Entry):
widget.set_text(value)
elif(typ == gtk.HScale):
widget.set_value(float(value))
elif(typ == gtk.Window):
w,h = value.split(',')
widget.resize(int(w),int(h))
elif(typ == gtk.CheckButton or typ == gtk.RadioButton or typ == gtk.ToggleButton):
widget.set_active(value == 'True')
elif(typ == gtk.ComboBox):
if(value in widget.datalist):
i = widget.datalist.index(value)
widget.set_active(i)
else:
print "ERROR: reading, cannot identify key %s with type %s" % (key,type(widget))
def write_config(self):
with open(self.path,'w') as f:
for key,widget in sorted(self.dic.iteritems()):
typ = type(widget)
if(typ == list):
value = widget[0]
elif(typ == gtk.Entry):
value = widget.get_text()
elif(typ == gtk.HScale):
value = str(widget.get_value())
elif(typ == gtk.Window):
_,_,w,h = widget.get_allocation()
value = "%d,%d" % (w,h)
elif(typ == gtk.CheckButton or typ == gtk.RadioButton or typ == gtk.ToggleButton):
value = ('False','True')[widget.get_active()]
elif(typ == gtk.ComboBox):
value = widget.get_active_text()
else:
print "ERROR: writing, cannot identify key %s with type %s" % (key,type(widget))
value = "Error"
f.write("%s = %s\n" % (key,value))
def preset_combobox(self,box,v):
if(v in box.datalist):
i = box.datalist.index(v)
box.set_active(i)
else:
box.set_active(0)
def load_combobox(self,obj,data):
if(len(obj.get_cells()) == 0):
# Create a text cell renderer
cell = gtk.CellRendererText ()
obj.pack_start(cell)
obj.add_attribute (cell, "text", 0)
obj.get_model().clear()
for s in data:
obj.append_text(s.strip())
setattr(obj,'datalist',data)
class TextEntryController:
def __init__(self,parent,widget):
self.par = parent
self.widget = widget
widget.connect('scroll-event',self.scroll_event)
widget.set_tooltip_text('Enter number or:\n\
Mouse wheel: increase,decrease\n\
Shift/Ctrl/Alt: faster change')
def scroll_event(self,w,evt):
q = (-1,1)[evt.direction == gtk.gdk.SCROLL_UP]
# magnify change if shift,ctrl,alt pressed
for m in (1,2,4):
if(self.par.mod_key_val & m): q *= 10
s = self.widget.get_text()
v = float(s)
v += q
v = max(0,v)
s = self.par.format_num(v)
self.widget.set_text(s)
class SignalGen:
M_AM,M_FM = range(2)
W_SINE,W_TRIANGLE,W_SQUARE,W_SAWTOOTH,W_EQUATION_IMPORT = range(5)
waveform_strings = ('Sine','Triangle','Square','Sawtooth', 'Equation_Import')
R_48000,R_44100,R_22050,R_16000,R_11025,R_8000,R_4000 = range(7)
sample_rates = ('48000','44100','22050','16000', '11025', '8000', '4000')
def __init__(self):
self.restart = False
# exit correctly on system signals
signal.signal(signal.SIGTERM, self.close)
signal.signal(signal.SIGINT, self.close)
# precompile struct operator
self.struct_int = struct.Struct('i')
self.max_level = (2.0**31)-1
self.gen_functions = (
self.sine_function,
self.triangle_function,
self.square_function,
self.sawtooth_function,
self.equation_import_function
)
self.main_color = gtk.gdk.color_parse('#c04040')
self.sig_color = gtk.gdk.color_parse('#40c040')
self.mod_color = gtk.gdk.color_parse('#4040c0')
self.noise_color = gtk.gdk.color_parse('#c040c0')
self.pipeline = False
self.count = 0
self.imod = 0
self.rate = 1
self.mod_key_val = 0
self.sig_freq = 440
self.mod_freq = 3
self.sig_level = 100
self.mod_level = 100
self.noise_level = 100
self.enable = True
self.sig_waveform = SignalGen.W_SINE
self.sig_enable = True
self.sig_function = False
self.mod_waveform = SignalGen.W_SINE
self.mod_function = False
self.mod_mode = SignalGen.M_AM
self.mod_enable = False
self.noise_enable = False
self.sample_rate = SignalGen.R_22050
self.left_audio = True
self.right_audio = True
self.program_name = self.__class__.__name__
self.config_file = os.path.expanduser("~/." + self.program_name)
self.builder = gtk.Builder()
self.xmlfile = 'signalgen_gui.glade'
self.builder.add_from_file(self.xmlfile)
WidgetFinder().localize_widgets(self,self.xmlfile)
self.k_quit_button.connect('clicked',self.close)
self.k_help_button.connect('clicked',self.launch_help)
self.k_mainwindow.connect('destroy',self.close)
self.k_mainwindow.set_icon(gtk.gdk.pixbuf_new_from_xpm_data(Icon.icon))
self.title = self.program_name + ' ' + VERSION
self.k_mainwindow.set_title(self.title)
self.tooltips = {
self.k_sample_rate_combobox : 'Change data sampling rate',
self.k_left_checkbutton : 'Enable left channel audio',
self.k_right_checkbutton : 'Enable right channel audio',
self.k_sig_waveform_combobox : 'Select signal waveform',
self.k_mod_waveform_combobox : 'Select modulation waveform',
self.k_mod_enable_checkbutton : 'Enable modulation',
self.k_sig_enable_checkbutton : 'Enable signal',
self.k_noise_enable_checkbutton : 'Enable white noise',
self.k_mod_am_radiobutton : 'Enable amplitude modulation',
self.k_mod_fm_radiobutton : 'Enable frequency modulation',
self.k_quit_button : 'Quit %s' % self.title,
self.k_enable_checkbutton : 'Enable output',
self.k_help_button : 'Visit the %s Web page' % self.title,
}
for k,v in self.tooltips.iteritems():
k.set_tooltip_text(v)
self.config_data = {
'SampleRate' : self.k_sample_rate_combobox,
'LeftChannelEnabled' : self.k_left_checkbutton,
'RightChannelEnabled' : self.k_right_checkbutton,
'SignalWaveform' : self.k_sig_waveform_combobox,
'SignalFrequency' : self.k_sig_freq_entry,
'SignalLevel' : self.k_sig_level_entry,
'SignalEnabled' : self.k_sig_enable_checkbutton,
'ModulationWaveform' : self.k_mod_waveform_combobox,
'ModulationFrequency' : self.k_mod_freq_entry,
'ModulationLevel' : self.k_mod_level_entry,
'ModulationEnabled' : self.k_mod_enable_checkbutton,
'AmplitudeModulation' : self.k_mod_am_radiobutton,
'FrequencyModulation' : self.k_mod_fm_radiobutton,
'NoiseEnabled' : self.k_noise_enable_checkbutton,
'NoiseLevel' : self.k_noise_level_entry,
'OutputEnabled' : self.k_enable_checkbutton,
}
self.cm = ConfigManager(self.config_file,self.config_data)
self.cm.load_combobox(self.k_sig_waveform_combobox,self.waveform_strings)
self.k_sig_waveform_combobox.set_active(self.sig_waveform)
self.cm.load_combobox(self.k_mod_waveform_combobox,self.waveform_strings)
self.k_mod_waveform_combobox.set_active(self.mod_waveform)
self.cm.load_combobox(self.k_sample_rate_combobox,self.sample_rates)
self.k_sample_rate_combobox.set_active(self.sample_rate)
self.k_sig_freq_entry.set_text(self.format_num(self.sig_freq))
self.k_sig_level_entry.set_text(self.format_num(self.sig_level))
self.k_mod_freq_entry.set_text(self.format_num(self.mod_freq))
self.k_mod_level_entry.set_text(self.format_num(self.mod_level))
self.k_noise_level_entry.set_text(self.format_num(self.noise_level))
self.k_main_viewport_border.modify_bg(gtk.STATE_NORMAL,self.main_color)
self.k_sig_viewport_border.modify_bg(gtk.STATE_NORMAL,self.sig_color)
self.k_mod_viewport_border.modify_bg(gtk.STATE_NORMAL,self.mod_color)
self.k_noise_viewport_border.modify_bg(gtk.STATE_NORMAL,self.noise_color)
self.sig_freq_cont = TextEntryController(self,self.k_sig_freq_entry)
self.sig_level_cont = TextEntryController(self,self.k_sig_level_entry)
self.mod_freq_cont = TextEntryController(self,self.k_mod_freq_entry)
self.mod_level_cont = TextEntryController(self,self.k_mod_level_entry)
self.noise_level_cont = TextEntryController(self,self.k_noise_level_entry)
self.k_mainwindow.connect('key-press-event',self.key_event)
self.k_mainwindow.connect('key-release-event',self.key_event)
self.k_enable_checkbutton.connect('toggled',self.update_values)
self.k_sig_freq_entry.connect('changed',self.update_entry_values)
self.k_sig_level_entry.connect('changed',self.update_entry_values)
self.k_sig_enable_checkbutton.connect('toggled',self.update_checkbutton_values)
self.k_mod_freq_entry.connect('changed',self.update_entry_values)
self.k_mod_level_entry.connect('changed',self.update_entry_values)
self.k_noise_level_entry.connect('changed',self.update_entry_values)
self.k_sample_rate_combobox.connect('changed',self.update_values)
self.k_sig_waveform_combobox.connect('changed',self.update_values)
self.k_mod_waveform_combobox.connect('changed',self.update_values)
self.k_left_checkbutton.connect('toggled',self.update_checkbutton_values)
self.k_right_checkbutton.connect('toggled',self.update_checkbutton_values)
self.k_mod_enable_checkbutton.connect('toggled',self.update_checkbutton_values)
self.k_noise_enable_checkbutton.connect('toggled',self.update_checkbutton_values)
self.k_mod_am_radiobutton.connect('toggled',self.update_checkbutton_values)
self.cm.read_config()
self.update_entry_values()
self.update_checkbutton_values()
self.update_values()
def format_num(self,v):
return "%.2f" % v
def get_widget_text(self,w):
typ = type(w)
if(typ == gtk.ComboBox):
return w.get_active_text()
elif(typ == gtk.Entry):
return w.get_text()
def get_widget_num(self,w):
try:
return float(self.get_widget_text(w))
except:
return 0.0
def restart_test(self,w,pv):
nv = w.get_active()
self.restart |= (nv != pv)
return nv
def update_entry_values(self,*args):
self.sig_freq = self.get_widget_num(self.k_sig_freq_entry)
self.sig_level = self.get_widget_num(self.k_sig_level_entry) / 100.0
self.mod_freq = self.get_widget_num(self.k_mod_freq_entry)
self.mod_level = self.get_widget_num(self.k_mod_level_entry) / 100.0
self.noise_level = self.get_widget_num(self.k_noise_level_entry) / 100.0
def update_checkbutton_values(self,*args):
self.left_audio = self.k_left_checkbutton.get_active()
self.right_audio = self.k_right_checkbutton.get_active()
self.mod_enable = self.k_mod_enable_checkbutton.get_active()
self.sig_enable = self.k_sig_enable_checkbutton.get_active()
self.mod_mode = (SignalGen.M_FM,SignalGen.M_AM)[self.k_mod_am_radiobutton.get_active()]
self.noise_enable = self.k_noise_enable_checkbutton.get_active()
def update_values(self,*args):
self.restart = (not self.sig_function)
self.sample_rate = self.restart_test(self.k_sample_rate_combobox, self.sample_rate)
self.enable = self.restart_test(self.k_enable_checkbutton,self.enable)
self.mod_waveform = self.k_mod_waveform_combobox.get_active()
self.mod_function = self.gen_functions[self.mod_waveform]
self.sig_waveform = self.k_sig_waveform_combobox.get_active()
self.sig_function = self.gen_functions[self.sig_waveform]
self.k_sample_rate_combobox.set_sensitive(not self.enable)
if(self.restart):
self.init_audio()
def make_and_chain(self,name):
target = gst.element_factory_make(name)
self.chain.append(target)
return target
def unlink_gst(self):
if(self.pipeline):
self.pipeline.set_state(gst.STATE_NULL)
self.pipeline.remove_many(*self.chain)
gst.element_unlink_many(*self.chain)
for item in self.chain:
item = False
self.pipeline = False
time.sleep(0.01)
def init_audio(self):
self.unlink_gst()
if(self.enable):
self.chain = []
self.pipeline = gst.Pipeline("mypipeline")
self.source = self.make_and_chain("appsrc")
rs = SignalGen.sample_rates[self.sample_rate]
self.rate = float(rs)
self.interval = 1.0 / self.rate
caps = gst.Caps(
'audio/x-raw-int,'
'endianness=(int)1234,'
'channels=(int)2,'
'width=(int)32,'
'depth=(int)32,'
'signed=(boolean)true,'
'rate=(int)%s' % rs)
self.source.set_property('caps', caps)
self.sink = self.make_and_chain("autoaudiosink")
self.pipeline.add(*self.chain)
gst.element_link_many(*self.chain)
self.source.connect('need-data', self.need_data)
self.pipeline.set_state(gst.STATE_PLAYING)
def key_event(self,w,evt):
cn = gtk.gdk.keyval_name(evt.keyval)
if(re.search('Shift',cn) != None):
mod = 1
elif(re.search('Control',cn) != None):
mod = 2
elif(re.search('Alt|Meta',cn) != None):
mod = 4
else:
return
if(evt.type == gtk.gdk.KEY_PRESS):
self.mod_key_val |= mod
else:
self.mod_key_val &= ~mod
def sine_function(self,t,f):
return math.sin(2.0*math.pi*f*t)
def triangle_function(self,t,f):
q = 4*math.fmod(t*f,1)
q = (q,2-q)[q > 1]
return (q,-2-q)[q < -1]
def square_function(self,t,f):
if(f == 0): return 0
q = 0.5 - math.fmod(t*f,1)
return (-1,1)[q > 0]
def sawtooth_function(self,t,f):
return 2.0*math.fmod((t*f)+0.5,1.0)-1.0
def equation_import_function(self,t,f):
fileobj=open("/home/rat/eq1.txt","r")
eqdata =fileobj.read() #read whole file
fileobj.close()
#return math.tan(2.0*math.pi*f*t)
return eqdata
def need_data(self,src,length):
bytes = ""
# sending two channels, so divide requested length by 2
ld2 = length / 2
for tt in range(ld2):
t = (self.count + tt) * self.interval
if(not self.mod_enable):
datum = self.sig_function(t,self.sig_freq)
else:
mod = self.mod_function(t,self.mod_freq)
# AM mode
if(self.mod_mode == SignalGen.M_AM):
datum = 0.5 * self.sig_function(t,self.sig_freq) * (1.0 + (mod * self.mod_level))
# FM mode
else:
self.imod += (mod * self.mod_level * self.interval)
datum = self.sig_function(t+self.imod,self.sig_freq)
v = 0
if(self.sig_enable):
v += (datum * self.sig_level)
if(self.noise_enable):
noise = ((2.0 * random.random()) - 1.0)
v += noise * self.noise_level
v *= self.max_level
v = max(-self.max_level,v)
v = min(self.max_level,v)
left = (0,v)[self.left_audio]
right = (0,v)[self.right_audio]
bytes += self.struct_int.pack(left)
bytes += self.struct_int.pack(right)
self.count += ld2
src.emit('push-buffer', gst.Buffer(bytes))
def launch_help(self,*args):
webbrowser.open("http://arachnoid.com/python/signalgen_program.html")
def close(self,*args):
self.unlink_gst()
self.cm.write_config()
gtk.main_quit()
app=SignalGen()
gtk.main()

The imp module will help you to cleanly load Python code chunks from arbitrary files.
#!/usr/bin/env python
# equation in equation-one.py
def eqn(arg):
return arg * 3 + 2
#!/usr/bin/env python
# your code
import imp
path = "equation-one.py"
eq_mod = imp.load_source("equation", path, open(path))
print("Oh the nice stuff in eq_mod: %s" % dir(eq_mod))
In your custom function definition, you can create a file selector dialog, get the selected file path, load the code using imp, and return the result of the function inside the imported module.

I was commenting before, but I stared at your code long enough and kinda realized what you were trying to do, so it was easier for me to post an answer. Please refer to cJ Zougloubs answer as I expand on his suggestion to use the imp module.
Your equation files should implement a common interface:
# equation1.py
def eqn(*args):
return sum(*args)
Then you would load them in using cj Zougloubs suggestion, but with a common interface:
# python_rt.py
def equation_import_function(self, *args):
filepath = ''
# filepath = ... do file chooser dialog here ...
eq_mod = imp.load_source("equation", filepath)
eqdata = eq_mod.eqn(*args)
return eqdata
Now you have a function in your main code that takes any number of arguments, asks the user to pick the equation file, and gets the result for you.
Edit To address your comment more specifically
# equation1.py
import math
def eqn(*args):
f = args[0]
t = args[1]
return math.tan(2.0*math.pi*f*t)
And in your main tool, you would use imp.load_source to bring it in. Wherever you needed that equation for your audio, you could then do:
eq_mod.eqn(f, t)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.