Having read through eBay's guide for including digital signatures to certain of their REST API calls, I am having trouble with generating the signature header. Rather than including all of the documentation here (there is a lot!), I'll provide links to the appropriate pages and some of the documentation. The following page it the starting point provided by eBay:
https://developer.ebay.com/develop/guides/digital-signatures-for-apis
The next page is where I am lead to from the previous page describing how to create the signature:
https://www.ietf.org/archive/id/draft-ietf-httpbis-message-signatures-13.html#name-eddsa-using-curve-edwards25
Which leads me onto the following :
https://www.rfc-editor.org/rfc/rfc8032#section-5.1.6
5.1.6. Sign
The inputs to the signing procedure is the private key, a 32-octet
string, and a message M of arbitrary size. For Ed25519ctx and
Ed25519ph, there is additionally a context C of at most 255 octets
and a flag F, 0 for Ed25519ctx and 1 for Ed25519ph.
1. Hash the private key, 32 octets, using SHA-512. Let h denote the
resulting digest. Construct the secret scalar s from the first
half of the digest, and the corresponding public key A, as
described in the previous section. Let prefix denote the second
half of the hash digest, h[32],...,h[63].
2. Compute SHA-512(dom2(F, C) || prefix || PH(M)), where M is the
message to be signed. Interpret the 64-octet digest as a little-
endian integer r.
3. Compute the point [r]B. For efficiency, do this by first
reducing r modulo L, the group order of B. Let the string R be
the encoding of this point.
4. Compute SHA512(dom2(F, C) || R || A || PH(M)), and interpret the
64-octet digest as a little-endian integer k.
5. Compute S = (r + k * s) mod L. For efficiency, again reduce k
modulo L first.
6. Form the signature of the concatenation of R (32 octets) and the
little-endian encoding of S (32 octets; the three most
significant bits of the final octet are always zero).
I have some Python code from the appendix from this same web page (https://www.rfc-editor.org/rfc/rfc8032#section-6):
## First, some preliminaries that will be needed.
import hashlib
def sha512(s):
return hashlib.sha512(s).digest()
# Base field Z_p
p = 2**255 - 19
def modp_inv(x):
return pow(x, p-2, p)
# Curve constant
d = -121665 * modp_inv(121666) % p
# Group order
q = 2**252 + 27742317777372353535851937790883648493
def sha512_modq(s):
return int.from_bytes(sha512(s), "little") % q
## Then follows functions to perform point operations.
# Points are represented as tuples (X, Y, Z, T) of extended
# coordinates, with x = X/Z, y = Y/Z, x*y = T/Z
def point_add(P, Q):
A, B = (P[1]-P[0]) * (Q[1]-Q[0]) % p, (P[1]+P[0]) * (Q[1]+Q[0]) % p;
C, D = 2 * P[3] * Q[3] * d % p, 2 * P[2] * Q[2] % p;
E, F, G, H = B-A, D-C, D+C, B+A;
return (E*F, G*H, F*G, E*H);
# Computes Q = s * Q
def point_mul(s, P):
Q = (0, 1, 1, 0) # Neutral element
while s > 0:
if s & 1:
Q = point_add(Q, P)
P = point_add(P, P)
s >>= 1
return Q
def point_equal(P, Q):
# x1 / z1 == x2 / z2 <==> x1 * z2 == x2 * z1
if (P[0] * Q[2] - Q[0] * P[2]) % p != 0:
return False
if (P[1] * Q[2] - Q[1] * P[2]) % p != 0:
return False
return True
## Now follows functions for point compression.
# Square root of -1
modp_sqrt_m1 = pow(2, (p-1) // 4, p)
# Compute corresponding x-coordinate, with low bit corresponding to
# sign, or return None on failure
def recover_x(y, sign):
if y >= p:
return None
x2 = (y*y-1) * modp_inv(d*y*y+1)
if x2 == 0:
if sign:
return None
else:
return 0
# Compute square root of x2
x = pow(x2, (p+3) // 8, p)
if (x*x - x2) % p != 0:
x = x * modp_sqrt_m1 % p
if (x*x - x2) % p != 0:
return None
if (x & 1) != sign:
x = p - x
return x
# Base point
g_y = 4 * modp_inv(5) % p
g_x = recover_x(g_y, 0)
G = (g_x, g_y, 1, g_x * g_y % p)
def point_compress(P):
zinv = modp_inv(P[2])
x = P[0] * zinv % p
y = P[1] * zinv % p
return int.to_bytes(y | ((x & 1) << 255), 32, "little")
def point_decompress(s):
if len(s) != 32:
raise Exception("Invalid input length for decompression")
y = int.from_bytes(s, "little")
sign = y >> 255
y &= (1 << 255) - 1
x = recover_x(y, sign)
if x is None:
return None
else:
return (x, y, 1, x*y % p)
## These are functions for manipulating the private key.
def secret_expand(secret):
if len(secret) != 32:
raise Exception("Bad size of private key")
h = sha512(secret)
a = int.from_bytes(h[:32], "little")
a &= (1 << 254) - 8
a |= (1 << 254)
return (a, h[32:])
def secret_to_public(secret):
(a, dummy) = secret_expand(secret)
return point_compress(point_mul(a, G))
## The signature function works as below.
def sign(secret, msg):
a, prefix = secret_expand(secret)
A = point_compress(point_mul(a, G))
r = sha512_modq(prefix + msg)
R = point_mul(r, G)
Rs = point_compress(R)
h = sha512_modq(Rs + A + msg)
s = (r + h * a) % q
return Rs + int.to_bytes(s, 32, "little")
## And finally the verification function.
def verify(public, msg, signature):
if len(public) != 32:
raise Exception("Bad public key length")
if len(signature) != 64:
Exception("Bad signature length")
A = point_decompress(public)
if not A:
return False
Rs = signature[:32]
R = point_decompress(Rs)
if not R:
return False
s = int.from_bytes(signature[32:], "little")
if s >= q: return False
h = sha512_modq(Rs + public + msg)
sB = point_mul(s, G)
hA = point_mul(h, A)
return point_equal(sB, point_add(R, hA))
Now, the problem that I am having is that this code insists on the "secret" consisting of a 32 byte array:
if len(secret) != 32: raise Exception("Bad size of private key")
However, the secret is described as being the private key provided by eBay's Key Management API (https://developer.ebay.com/api-docs/developer/key-management/overview.html), which is not a 32 byte array, but a 64 character ASCII string (see https://developer.ebay.com/api-docs/developer/key-management/resources/signing_key/methods/createSigningKey#h2-samples):
"privateKey": "MC4CAQAwBQYDK2VwBCIEI******************************************n"
When I try to generate a signature with the eBay private key using this Python code, it gives me an error saying it is a "Bad size of private key". If I convert the private key from eBay to a bytearray, it is 64 bytes long. How can I use the Python code to generate the signature header using the private key supplied by eBay?
To further complicate things, I am actually using Excel VBA (Visual Basic) to make the API call after using Python to generate the signature (simply because Python is better at this kind of thing!). eBay's PAID FOR technical support has confirmed that the following headers are correct and that there is no "message" as described in https://www.rfc-editor.org/rfc/rfc8032#section-5.1.6, but they have not yet been of any further help other than suggesting that there may be a "bug".
http.setRequestHeader "signature-input", "sig1=(""x-ebay-signature-key"" ""#method"" ""#path"" ""#authority"");created=1667386210"
http.setRequestHeader "x-ebay-signature-key", "<jwe returned by eBay>"
http.setRequestHeader "x-ebay-enforce-signature", "true"
The remaining header would be as follows once I can generate a valid signature:
http.setRequestHeader "signature" "sig1=:<signature>:"
Everything I have tried results in the same response:
{
"errors": [
{
"errorId": 215122,
"domain": "ACCESS",
"category": "REQUEST",
"message": "Signature validation failed",
"longMessage": "Signature validation failed to fulfill the request."
}
]
}
Here are some example keys like the ones generated by eBay. https://www.ietf.org/archive/id/draft-ietf-httpbis-message-signatures-11.html#appendix-B.1.4
"The following key is an elliptical curve key over the Edwards curve ed25519, referred to in this document as test-key-ed25519. This key is PCKS#8 encoded in PEM format, with no encryption."
-----BEGIN PUBLIC KEY-----
MCowBQYDK2VwAyEAJrQLj5P/89iXES9+vFgrIy29clF9CC/oPPsw3c5D0bs=
-----END PUBLIC KEY-----
-----BEGIN PRIVATE KEY-----
MC4CAQAwBQYDK2VwBCIEIJ+DYvh6SEqVTm50DFtMDoQikTmiCqirVv9mWG9qfSnF
-----END PRIVATE KEY-----
This is the format of private key that I believe that I need to convert to a 32-byte array to work with the above Python code. I believe that there is a typo on the linked to web page and it should be "PKCS", not "PCKS".
UPDATE:
If I run the following command:
openssl ec -in test.pem -text
Where test.pem is a text file containing:
-----BEGIN PRIVATE KEY-----
MC4CAQAwBQYDK2VwBCIEIJ+DYvh6SEqVTm50DFtMDoQikTmiCqirVv9mWG9qfSnF
-----END PRIVATE KEY-----
It displays private and public keys as 32 byte hex dumps, but even when using these values I get the same response as above with the 215122 error. When I verify using the Python "verify" method in the code above with these 32 byte hex dump keys, validation is successful.
Alright so this is where Im at right now, not using the content-digest as it's simply a GET request so just trying to get the basics working, but none of this seems to work.
$public = "xxx";
$private = "yyy";
$jwe = "jwe";
$path = "/sell/fulfillment/v1/order/" . "11-xxxx-yyyy";
$signature_input_txt = '("x-ebay-signature-key" "#method" "#path" "#authority");created=' . time();
// $signature_base = '"content-digest": sha-256=:' . base64_encode($contentDigest) . ":\n";
$signature_base = '"x-ebay-signature-key": ' . $jwe;
$signature_base .= '"#method": POST';
$signature_base .= '"#path": ' . $path;
$signature_base .= '"#authority": ' . "apiz.ebay.com";
$signature_base .= '"#signature-params": ' . $signature_input_txt;
// ensure signature_base is UTF-8
if (!mb_check_encoding($signature_base, 'UTF-8')) {
$signature_base = mb_convert_encoding($signature_base, 'UTF-8');
}
// dd($signature_base);
// base 64 encode our signature_base
$signature_base_base64_encoded = base64_encode($signature_base);
// format the private key as required
$formatted_private_key = "-----BEGIN RSA PRIVATE KEY-----" . PHP_EOL . $private . PHP_EOL . "-----END RSA PRIVATE KEY-----";
// sign
openssl_sign($signature_base_base64_encoded, $signed_signature, $formatted_private_key, "sha256WithRSAEncryption");
return [
'Authorization' => 'Bearer ' . $this->marketplace->getToken('oauth2.access_token', 'production'),
'Accept' => 'application/json',
'Content-Type' => 'application/json',
'Signature-Input' => 'sig1=' . $signature_input_txt,
'Signature' => 'sig1=:' . base64_encode($signed_signature) . ':',
'x-ebay-signature-key' => $jwe,
'x-ebay-enforce-signature' => true
];
I'm going to put this here for anyone struggling to get this working with PHP, adapted from Renegade_Mtl answer (you'd missed the need for a new line for each signature_base and it didn't need to be encoded).
/**
* #param $method - e.g. POST, GET
* #param $path - e.g /sell/finances/v1/seller_funds_summary
* #param $host - e.g. api.ebay.com
* #param $keyset // public, private and jwt keys generated from https://apiz.ebay.com/developer/key_management/v1/signing_key
* #param $timestamp - e.g. time()
* #return array of headers
*/
private function createHeaders(string $method, string $path, string $host, array $tokens, int $time) {
$signature_input_txt = '("x-ebay-signature-key" "#method" "#path" "#authority");created=' . $time;
// $signature_base = '"content-digest": sha-256=:' . base64_encode($contentDigest) . ":\n";
$signature_base = '"x-ebay-signature-key": ' . $tokens['jwe']."\n";
$signature_base .= '"#method": ' . $method."\n";
$signature_base .= '"#path": ' . $path."\n";
$signature_base .= '"#authority": ' . $host."\n";
$signature_base .= '"#signature-params": ' . $signature_input_txt;
// format the private key as required
$formatted_private_key = "-----BEGIN RSA PRIVATE KEY-----" . PHP_EOL . $tokens['privateKey'] . PHP_EOL . "-----END RSA PRIVATE KEY-----";
openssl_sign($signature_base, $signed_signature, $formatted_private_key, "sha256WithRSAEncryption");
return [
'Signature-Input' => 'sig1=' . $signature_input_txt,
'Signature' => 'sig1=:' . base64_encode($signed_signature) . ':',
'x-ebay-signature-key' => $tokens['jwe'],
'x-ebay-enforce-signature' => "true"
];
}
We only use GET's but if you also POST then you'd need also the content digest... Hope this helps someone from wasting hours and hours trying to figure it out.
Related
Like I said I think I found a working strategy tested it in Tradingview worked pretty well actually but I just can't find any python code that calculates DiNapoli Stochastic Indicator.
I have the math formula, some pinescript and mq4 code if anyone can help me with this I think he/she will be the first in the entire internet .
Pinescript Code:
//#author LazyBear
study("DiNapoli Preferred Stochastic Oscillator [LazyBear]", shorttitle="DPSTOCH_LB" , overlay=false)
fk =input(8, title="Fast K")
sk =input(3, title="Slow K")
sd =input(3, title="Slow D")
min_ = lowest(low, fk)
max_ = highest(high, fk)
fast = (close - min_)/(max_ - min_)*100
r = nz(r[1]) + (fast - nz(r[1]))/sk
s = nz(s[1]) + (r - nz(s[1]))/sd
ob=hline(70, title="OBLevel"), os=hline(30, title="OSLevel"), fill(ob,os, gray)
plot(r, color=blue, title="Dinapoli Stoch"), plot(s, color=red, title="Signal")
Lua Code
function Init()
indicator:name("Bigger timeframe Dinapoli Preferred Stochastic");
indicator:description("");
indicator:requiredSource(core.Bar);
indicator:type(core.Oscillator);
indicator.parameters:addGroup("Calculation");
indicator.parameters:addGroup("Calculation");
indicator.parameters:addInteger("K","Number of periods for %K", "", 10, 2, 1000);
indicator.parameters:addInteger("SD", "%D slowing periods", "", 5, 2, 1000);
indicator.parameters:addInteger("D", "Number of periods for %D", "", 5, 2, 1000);
indicator.parameters:addString("BS", "Time frame to calculate stochastic", "", "D1");
indicator.parameters:setFlag("BS", core.FLAG_PERIODS);
indicator.parameters:addGroup("Display");
indicator.parameters:addColor("K_color", "Color of K", "Color of K", core.rgb(0, 255, 0));
indicator.parameters:addColor("D_color", "Color of D", "Color of D", core.rgb(255, 0, 0));
indicator.parameters:addInteger("Kwidth", "K Line Width", "", 1, 1, 5);
indicator.parameters:addInteger("Kstyle", "K Line Style", "", core.LINE_SOLID);
indicator.parameters:setFlag("Kstyle", core.FLAG_LEVEL_STYLE);
indicator.parameters:addInteger("Dwidth", "D Line Width", "", 1, 1, 5);
indicator.parameters:addInteger("Dstyle", "D Line Style", "", core.LINE_SOLID);
indicator.parameters:setFlag("Dstyle", core.FLAG_LEVEL_STYLE);
end
local source; -- the source
local bf_data = nil; -- the high/low data
local k;
local d;
local sd;
local BS;
local bf_length; -- length of the bigger frame in seconds
local dates; -- candle dates
local host;
local Stochastic;
local SK, SD;
local day_offset;
local week_offset;
local extent;
function Prepare(nameOnly)
source = instance.source;
host = core.host;
day_offset = host:execute("getTradingDayOffset");
week_offset = host:execute("getTradingWeekOffset");
BS = instance.parameters.BS;
k = instance.parameters.K;
sd = instance.parameters.SD;
d = instance.parameters.D;
assert(core.indicators:findIndicator("DINAPOLI PREFERRED STOCHASTIC") ~= nil, "Please, download and install DINAPOLI PREFERRED STOCHASTIC.LUA indicator");
extent = ( k + sd + d) * 2;
local s, e, s1, e1;
s, e = core.getcandle(source:barSize(), core.now(), 0, 0);
s1, e1 = core.getcandle(BS, core.now(), 0, 0);
assert ((e - s) < (e1 - s1), "The chosen time frame must be bigger than the chart time frame!");
bf_length = math.floor((e1 - s1) * 86400 + 0.5);
local name = profile:id() .. "(" .. source:name() .. "," .. BS .. "," .. k .. "," .. sd .. "," .. d .. ")";
instance:name(name);
if nameOnly then
return;
end
SK = instance:addStream("K", core.Line, name .. ".K", "K", instance.parameters.K_color, 0);
SK:setWidth(instance.parameters.Kwidth);
SK:setStyle(instance.parameters.Kstyle);
SD = instance:addStream("D", core.Line, name .. ".D", "D", instance.parameters.D_color, 0);
SD:setWidth(instance.parameters.Dwidth);
SD:setStyle(instance.parameters.Dstyle);
SK:addLevel(20);
SK:addLevel(50);
SK:addLevel(80);
SK:setPrecision(math.max(2, instance.source:getPrecision()));
SD:setPrecision(math.max(2, instance.source:getPrecision()));
end
local loading = false;
local loadingFrom, loadingTo;
local pday = nil;
-- the function which is called to calculate the period
function Update(period, mode)
-- get date and time of the hi/lo candle in the reference data
local bf_candle;
bf_candle = core.getcandle(BS, source:date(period), day_offset, week_offset);
-- if data for the specific candle are still loading
-- then do nothing
if loading and bf_candle >= loadingFrom and (loadingTo == 0 or bf_candle <= loadingTo) then
return ;
end
-- if the period is before the source start
-- the do nothing
if period < source:first() then
return ;
end
-- if data is not loaded yet at all
-- load the data
if bf_data == nil then
-- there is no data at all, load initial data
local to, t;
local from;
if (source:isAlive()) then
-- if the source is subscribed for updates
-- then subscribe the current collection as well
to = 0;
else
-- else load up to the last currently available date
t, to = core.getcandle(BS, source:date(period), day_offset, week_offset);
end
from = core.getcandle(BS, source:date(source:first()), day_offset, week_offset);
SK:setBookmark(1, period);
-- shift so the bigger frame data is able to provide us with the stoch data at the first period
from = math.floor(from * 86400 - (bf_length * extent) + 0.5) / 86400;
local nontrading, nontradingend;
nontrading, nontradingend = core.isnontrading(from, day_offset);
if nontrading then
-- if it is non-trading, shift for two days to skip the non-trading periods
from = math.floor((from - 2) * 86400 - (bf_length * extent) + 0.5) / 86400;
end
loading = true;
loadingFrom = from;
loadingTo = to;
bf_data = host:execute("getHistory", 1, source:instrument(), BS, loadingFrom, to, source:isBid());
Stochastic = core.indicators:create("DINAPOLI PREFERRED STOCHASTIC", bf_data, k, sd, d);
return ;
end
-- check whether the requested candle is before
-- the reference collection start
if (bf_candle < bf_data:date(0)) then
SK:setBookmark(1, period);
if loading then
return ;
end
-- shift so the bigger frame data is able to provide us with the stoch data at the first period
from = math.floor(bf_candle * 86400 - (bf_length * extent) + 0.5) / 86400;
local nontrading, nontradingend;
nontrading, nontradingend = core.isnontrading(from, day_offset);
if nontrading then
-- if it is non-trading, shift for two days to skip the non-trading periods
from = math.floor((from - 2) * 86400 - (bf_length * extent) + 0.5) / 86400;
end
loading = true;
loadingFrom = from;
loadingTo = bf_data:date(0);
host:execute("extendHistory", 1, bf_data, loadingFrom, loadingTo);
return ;
end
-- check whether the requested candle is after
-- the reference collection end
if (not(source:isAlive()) and bf_candle > bf_data:date(bf_data:size() - 1)) then
SK:setBookmark(1, period);
if loading then
return ;
end
loading = true;
loadingFrom = bf_data:date(bf_data:size() - 1);
loadingTo = bf_candle;
host:execute("extendHistory", 1, bf_data, loadingFrom, loadingTo);
return ;
end
Stochastic:update(mode);
local p;
p = core.findDate (bf_data, bf_candle, true);
if p == -1 then
return ;
end
if Stochastic:getStream(0):hasData(p) then
SK[period] = Stochastic:getStream(0)[p];
end
if Stochastic:getStream(1):hasData(p) then
SD[period] = Stochastic:getStream(1)[p];
end
end
-- the function is called when the async operation is finished
function AsyncOperationFinished(cookie)
local period;
pday = nil;
period = SK:getBookmark(1);
if (period < 0) then
period = 0;
end
loading = false;
instance:updateFrom(period);
end
Addendum: https://ninjatrader.com/support/forum/forum/ninjascript-file-sharing/ninjascript-file-sharing-discussion/3545-dinapoli-stochastic/page2
I am running community detection in graphs made from telecom CDR data. First I was working with very dense graphs containing 10000 nodes, and the algorithm was producing 150 to 170 communities per graph. I was using Louvain community detection algorithm implemented in Scala for Spark.
When I try to run the same algorithm but implemented in C#, I get around 10 communities per graph. I also did some testing with smaller graph, containing around 300 nodes, and same thing occur. When I run it in Spark with Scala I get around 50 communities. When I run it in python or C# I get from 8 to 10 communities.
I am really surprised to see such difference. Every implementation that I used (Scala, Python or C#) is referring to the paper by VD Blondel https://arxiv.org/abs/0803.0476, so the algorithm should be the same, but the output is completely different. Did anyone experienced something like that when using Spark/Scala vs. python/c#?
This is how the main class Louvain is called:
import org.apache.spark.graphx.{Edge, Graph}
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.log4j.{Level, Logger}
object Driver {
def main(args: Array[String]): Unit ={
val config = LouvainConfig(
"src/data/input/file_with_edges.csv", //input file
"src/data/output/", //output dir
1, //parallelism
2000, //minimumComplessionProgress
1, //progressCounter
",") //delimiter
val sc = new SparkContext("local[*]", "Louvain")
val louvain = new Louvain()
louvain.run(sc, config)
}
}
This is Scala implementation that I am using:
import scala.reflect.ClassTag
import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.serializers.DefaultArraySerializers.ObjectArraySerializer
import com.esotericsoftware.kryo.{Kryo, KryoSerializable}
import org.apache.spark._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.graphx._
import org.apache.spark.broadcast.Broadcast
//import org.apache.spark.{Logging, SparkContext}
import org.apache.spark.{SparkContext}
class Louvain() extends Serializable{
def getEdgeRDD(sc: SparkContext, conf: LouvainConfig, typeConversionMethod: String => Long = _.toLong): RDD[Edge[Long]] = {
sc.textFile(conf.inputFile, conf.parallelism).map(row => {
val tokens = row.split(conf.delimiter).map(_.trim())
tokens.length match {
case 2 => new Edge(typeConversionMethod(tokens(0)),
typeConversionMethod(tokens(1)), 1L)
case 3 => new Edge(typeConversionMethod(tokens(0)),
typeConversionMethod(tokens(1)), tokens(2).toDouble.toLong)
case _ => throw new IllegalArgumentException("invalid input line: " + row)
}
})
}
/**
* Generates a new graph of type Graph[VertexState,Long] based on an
input graph of type.
* Graph[VD,Long]. The resulting graph can be used for louvain computation.
*
*/
def createLouvainGraph[VD: ClassTag](graph: Graph[VD, Long]):
Graph[LouvainData, Long] = {
val nodeWeights = graph.aggregateMessages(
(e:EdgeContext[VD,Long,Long]) => {
e.sendToSrc(e.attr)
e.sendToDst(e.attr)
},
(e1: Long, e2: Long) => e1 + e2
)
graph.outerJoinVertices(nodeWeights)((vid, data, weightOption) => {
val weight = weightOption.getOrElse(0L)
new LouvainData(vid, weight, 0L, weight, false)
}).partitionBy(PartitionStrategy.EdgePartition2D).groupEdges(_ + _)
}
/**
* Creates the messages passed between each vertex to convey
neighborhood community data.
*/
def sendCommunityData(e: EdgeContext[LouvainData, Long, Map[(Long, Long), Long]]) = {
val m1 = (Map((e.srcAttr.community, e.srcAttr.communitySigmaTot) -> e.attr))
val m2 = (Map((e.dstAttr.community, e.dstAttr.communitySigmaTot) -> e.attr))
e.sendToSrc(m2)
e.sendToDst(m1)
}
/**
* Merge neighborhood community data into a single message for each vertex
*/
def mergeCommunityMessages(m1: Map[(Long, Long), Long], m2: Map[(Long, Long), Long]) = {
val newMap = scala.collection.mutable.HashMap[(Long, Long), Long]()
m1.foreach({ case (k, v) =>
if (newMap.contains(k)) newMap(k) = newMap(k) + v
else newMap(k) = v
})
m2.foreach({ case (k, v) =>
if (newMap.contains(k)) newMap(k) = newMap(k) + v
else newMap(k) = v
})
newMap.toMap
}
/**
* Returns the change in modularity that would result from a vertex
moving to a specified community.
*/
def q(
currCommunityId: Long,
testCommunityId: Long,
testSigmaTot: Long,
edgeWeightInCommunity: Long,
nodeWeight: Long,
internalWeight: Long,
totalEdgeWeight: Long): BigDecimal = {
val isCurrentCommunity = currCommunityId.equals(testCommunityId)
val M = BigDecimal(totalEdgeWeight)
val k_i_in_L = if (isCurrentCommunity) edgeWeightInCommunity + internalWeight else edgeWeightInCommunity
val k_i_in = BigDecimal(k_i_in_L)
val k_i = BigDecimal(nodeWeight + internalWeight)
val sigma_tot = if (isCurrentCommunity) BigDecimal(testSigmaTot) - k_i else BigDecimal(testSigmaTot)
var deltaQ = BigDecimal(0.0)
if (!(isCurrentCommunity && sigma_tot.equals(BigDecimal.valueOf(0.0)))) {
deltaQ = k_i_in - (k_i * sigma_tot / M)
//println(s" $deltaQ = $k_i_in - ( $k_i * $sigma_tot / $M")
}
deltaQ
}
/**
* Join vertices with community data form their neighborhood and
select the best community for each vertex to maximize change in
modularity.
* Returns a new set of vertices with the updated vertex state.
*/
def louvainVertJoin(
louvainGraph: Graph[LouvainData, Long],
msgRDD: VertexRDD[Map[(Long, Long), Long]],
totalEdgeWeight: Broadcast[Long],
even: Boolean) = {
// innerJoin[U, VD2](other: RDD[(VertexId, U)])(f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
louvainGraph.vertices.innerJoin(msgRDD)((vid, louvainData, communityMessages) => {
var bestCommunity = louvainData.community
val startingCommunityId = bestCommunity
var maxDeltaQ = BigDecimal(0.0);
var bestSigmaTot = 0L
// VertexRDD[scala.collection.immutable.Map[(Long, Long),Long]]
// e.g. (1,Map((3,10) -> 2, (6,4) -> 2, (2,8) -> 2, (4,8) -> 2, (5,8) -> 2))
// e.g. communityId:3, sigmaTotal:10, communityEdgeWeight:2
communityMessages.foreach({ case ((communityId, sigmaTotal), communityEdgeWeight) =>
val deltaQ = q(
startingCommunityId,
communityId,
sigmaTotal,
communityEdgeWeight,
louvainData.nodeWeight,
louvainData.internalWeight,
totalEdgeWeight.value)
//println(" communtiy: "+communityId+" sigma:"+sigmaTotal+"
//edgeweight:"+communityEdgeWeight+" q:"+deltaQ)
if (deltaQ > maxDeltaQ || (deltaQ > 0 && (deltaQ == maxDeltaQ &&
communityId > bestCommunity))) {
maxDeltaQ = deltaQ
bestCommunity = communityId
bestSigmaTot = sigmaTotal
}
})
// only allow changes from low to high communties on even cyces and
// high to low on odd cycles
if (louvainData.community != bestCommunity && ((even &&
louvainData.community > bestCommunity) || (!even &&
louvainData.community < bestCommunity))) {
//println(" "+vid+" SWITCHED from "+vdata.community+" to "+bestCommunity)
louvainData.community = bestCommunity
louvainData.communitySigmaTot = bestSigmaTot
louvainData.changed = true
}
else {
louvainData.changed = false
}
if (louvainData == null)
println("vdata is null: " + vid)
louvainData
})
}
def louvain(
sc: SparkContext,
graph: Graph[LouvainData, Long],
minProgress: Int = 1,
progressCounter: Int = 1): (Double, Graph[LouvainData, Long], Int) = {
var louvainGraph = graph.cache()
val graphWeight = louvainGraph.vertices.map(louvainVertex => {
val (vertexId, louvainData) = louvainVertex
louvainData.internalWeight + louvainData.nodeWeight
}).reduce(_ + _)
val totalGraphWeight = sc.broadcast(graphWeight)
println("totalEdgeWeight: " + totalGraphWeight.value)
// gather community information from each vertex's local neighborhood
var communityRDD =
louvainGraph.aggregateMessages(sendCommunityData, mergeCommunityMessages)
var activeMessages = communityRDD.count() //materializes the msgRDD
//and caches it in memory
var updated = 0L - minProgress
var even = false
var count = 0
val maxIter = 100000
var stop = 0
var updatedLastPhase = 0L
do {
count += 1
even = !even
// label each vertex with its best community based on neighboring
// community information
val labeledVertices = louvainVertJoin(louvainGraph, communityRDD,
totalGraphWeight, even).cache()
// calculate new sigma total value for each community (total weight
// of each community)
val communityUpdate = labeledVertices
.map({ case (vid, vdata) => (vdata.community, vdata.nodeWeight +
vdata.internalWeight)})
.reduceByKey(_ + _).cache()
// map each vertex ID to its updated community information
val communityMapping = labeledVertices
.map({ case (vid, vdata) => (vdata.community, vid)})
.join(communityUpdate)
.map({ case (community, (vid, sigmaTot)) => (vid, (community, sigmaTot))})
.cache()
// join the community labeled vertices with the updated community info
val updatedVertices = labeledVertices.join(communityMapping).map({
case (vertexId, (louvainData, communityTuple)) =>
val (community, communitySigmaTot) = communityTuple
louvainData.community = community
louvainData.communitySigmaTot = communitySigmaTot
(vertexId, louvainData)
}).cache()
updatedVertices.count()
labeledVertices.unpersist(blocking = false)
communityUpdate.unpersist(blocking = false)
communityMapping.unpersist(blocking = false)
val prevG = louvainGraph
louvainGraph = louvainGraph.outerJoinVertices(updatedVertices)((vid, old, newOpt) => newOpt.getOrElse(old))
louvainGraph.cache()
// gather community information from each vertex's local neighborhood
val oldMsgs = communityRDD
communityRDD = louvainGraph.aggregateMessages(sendCommunityData, mergeCommunityMessages).cache()
activeMessages = communityRDD.count() // materializes the graph
// by forcing computation
oldMsgs.unpersist(blocking = false)
updatedVertices.unpersist(blocking = false)
prevG.unpersistVertices(blocking = false)
// half of the communites can swtich on even cycles and the other half
// on odd cycles (to prevent deadlocks) so we only want to look for
// progess on odd cycles (after all vertcies have had a chance to
// move)
if (even) updated = 0
updated = updated + louvainGraph.vertices.filter(_._2.changed).count
if (!even) {
println(" # vertices moved: " + java.text.NumberFormat.getInstance().format(updated))
if (updated >= updatedLastPhase - minProgress) stop += 1
updatedLastPhase = updated
}
} while (stop <= progressCounter && (even || (updated > 0 && count < maxIter)))
println("\nCompleted in " + count + " cycles")
// Use each vertex's neighboring community data to calculate the
// global modularity of the graph
val newVertices =
louvainGraph.vertices.innerJoin(communityRDD)((vertexId, louvainData,
communityMap) => {
// sum the nodes internal weight and all of its edges that are in
// its community
val community = louvainData.community
var accumulatedInternalWeight = louvainData.internalWeight
val sigmaTot = louvainData.communitySigmaTot.toDouble
def accumulateTotalWeight(totalWeight: Long, item: ((Long, Long), Long)) = {
val ((communityId, sigmaTotal), communityEdgeWeight) = item
if (louvainData.community == communityId)
totalWeight + communityEdgeWeight
else
totalWeight
}
accumulatedInternalWeight = communityMap.foldLeft(accumulatedInternalWeight)(accumulateTotalWeight)
val M = totalGraphWeight.value
val k_i = louvainData.nodeWeight + louvainData.internalWeight
val q = (accumulatedInternalWeight.toDouble / M) - ((sigmaTot * k_i) / math.pow(M, 2))
//println(s"vid: $vid community: $community $q = ($k_i_in / $M) - ( ($sigmaTot * $k_i) / math.pow($M, 2) )")
if (q < 0)
0
else
q
})
val actualQ = newVertices.values.reduce(_ + _)
// return the modularity value of the graph along with the
// graph. vertices are labeled with their community
(actualQ, louvainGraph, count / 2)
}
def compressGraph(graph: Graph[LouvainData, Long], debug: Boolean = true): Graph[LouvainData, Long] = {
// aggregate the edge weights of self loops. edges with both src and dst in the same community.
// WARNING can not use graph.mapReduceTriplets because we are mapping to new vertexIds
val internalEdgeWeights = graph.triplets.flatMap(et => {
if (et.srcAttr.community == et.dstAttr.community) {
Iterator((et.srcAttr.community, 2 * et.attr)) // count the weight from both nodes
}
else Iterator.empty
}).reduceByKey(_ + _)
// aggregate the internal weights of all nodes in each community
val internalWeights = graph.vertices.values.map(vdata =>
(vdata.community, vdata.internalWeight))
.reduceByKey(_ + _)
// join internal weights and self edges to find new interal weight of each community
val newVertices = internalWeights.leftOuterJoin(internalEdgeWeights).map({ case (vid, (weight1, weight2Option)) =>
val weight2 = weight2Option.getOrElse(0L)
val state = new LouvainData()
state.community = vid
state.changed = false
state.communitySigmaTot = 0L
state.internalWeight = weight1 + weight2
state.nodeWeight = 0L
(vid, state)
}).cache()
// translate each vertex edge to a community edge
val edges = graph.triplets.flatMap(et => {
val src = math.min(et.srcAttr.community, et.dstAttr.community)
val dst = math.max(et.srcAttr.community, et.dstAttr.community)
if (src != dst) Iterator(new Edge(src, dst, et.attr))
else Iterator.empty
}).cache()
// generate a new graph where each community of the previous graph is
// now represented as a single vertex
val compressedGraph = Graph(newVertices, edges)
.partitionBy(PartitionStrategy.EdgePartition2D).groupEdges(_ + _)
// calculate the weighted degree of each node
val nodeWeights = compressedGraph.aggregateMessages(
(e:EdgeContext[LouvainData,Long,Long]) => {
e.sendToSrc(e.attr)
e.sendToDst(e.attr)
},
(e1: Long, e2: Long) => e1 + e2
)
// fill in the weighted degree of each node
// val louvainGraph = compressedGraph.joinVertices(nodeWeights)((vid,data,weight)=> {
val louvainGraph = compressedGraph.outerJoinVertices(nodeWeights)((vid, data, weightOption) => {
val weight = weightOption.getOrElse(0L)
data.communitySigmaTot = weight + data.internalWeight
data.nodeWeight = weight
data
}).cache()
louvainGraph.vertices.count()
louvainGraph.triplets.count() // materialize the graph
newVertices.unpersist(blocking = false)
edges.unpersist(blocking = false)
println("******************************************************")
println (louvainGraph.vertices.count())
louvainGraph
}
def saveLevel(
sc: SparkContext,
config: LouvainConfig,
level: Int,
qValues: Array[(Int, Double)],
graph: Graph[LouvainData, Long]) = {
val vertexSavePath = config.outputDir + "/level_" + level + "_vertices"
val edgeSavePath = config.outputDir + "/level_" + level + "_edges"
// save
graph.vertices.saveAsTextFile(vertexSavePath)
graph.edges.saveAsTextFile(edgeSavePath)
// overwrite the q values at each level
sc.parallelize(qValues, 1).saveAsTextFile(config.outputDir + "/qvalues_" + level)
}
//def run[VD: ClassTag](sc: SparkContext, config: LouvainConfig, graph: Graph[VD, Long]): Unit = {
def run[VD: ClassTag](sc: SparkContext, config: LouvainConfig): Unit = {
val edgeRDD = getEdgeRDD(sc, config)
val initialGraph = Graph.fromEdges(edgeRDD, None)
var louvainGraph = createLouvainGraph(initialGraph)
var compressionLevel = -1 // number of times the graph has been compressed
var q_modularityValue = -1.0 // current modularity value
var halt = false
var qValues: Array[(Int, Double)] = Array()
do {
compressionLevel += 1
println(s"\nStarting Louvain level $compressionLevel")
// label each vertex with its best community choice at this level of compression
val (currentQModularityValue, currentGraph, numberOfPasses) =
louvain(sc, louvainGraph, config.minimumCompressionProgress, config.progressCounter)
louvainGraph.unpersistVertices(blocking = false)
louvainGraph = currentGraph
println(s"qValue: $currentQModularityValue")
qValues = qValues :+ ((compressionLevel, currentQModularityValue))
saveLevel(sc, config, compressionLevel, qValues, louvainGraph)
// If modularity was increased by at least 0.001 compress the graph and repeat
// halt immediately if the community labeling took less than 3 passes
//println(s"if ($passes > 2 && $currentQ > $q + 0.001 )")
if (numberOfPasses > 2 && currentQModularityValue > q_modularityValue + 0.001) {
q_modularityValue = currentQModularityValue
louvainGraph = compressGraph(louvainGraph)
}
else {
halt = true
}
} while (!halt)
//finalSave(sc, compressionLevel, q_modularityValue, louvainGraph)
}
}
The code is taken from github https://github.com/athinggoingon/louvain-modularity.
Here is the example of input file, just first 10 lines. The graph is made from csv file, schema is : node1, node2, weight_of_the_edge
104,158,34.23767571520276
146,242,12.49338107205348
36,37,0.6821403413414481
28,286,2.5053934980726456
9,92,0.34412941554076487
222,252,10.502288293870677
235,282,0.25717021769814874
264,79,18.555996343792327
24,244,1.7094102023399587
231,75,21.698401383558213
I have the following code in c++:
for(const char *x = r.ptr, *end = r.ptr + r.len; x != end; ++x) {
switch(*x) {
case 0x5c:
case 0x22:
pc->output[0] = '\\'; pc->output[1] = *x; pc->output += 2;
break;
case 0xa:
pc->output[0] = '\\'; pc->output[1] = 'n'; pc->output += 2;
break;
case 0xd:
pc->output[0] = '\\'; pc->output[1] = 'r'; pc->output += 2;
break;
default:
if(str_escape_2_hex(*x)) {
impl::escape_char_hex(pc->output, *x);
} else {
*pc->output = *x; pc->output++;
}
}
}
And I want to rewrite it to python 2 because I need the same encoder there. I tried with this:
def encode_akv_fields(data):
hexlify = codecs.getencoder('hex')
for i, el in enumerate(str(data)):
if hexlify(el)[0] in ('5c', '22'): # \\ or "
data[i].encode('hex') = '\\' + hexlify(el)[0]
elif hexlify(el)[0] == '0a': # \n
data[i].encode('hex') = '\\n'
elif hexlify(el)[0] == '0d': # \r
data[i].encode('hex') = '\\r'
elif '1f' >= hexlify(el)[0] >= '7f':
tmp3 = (hexlify(el)[0] >> 4) & '0f'.decode('hex')
data[i].encode('hex') = '\\x'
return data
but it doesn't work - I got
SyntaxError: can't assign to function call
Data is a string or dict with values that I want to log. Those logs needs to be in AKV format (Apache key value). And for this to work I need some hex values to be encoded like it is in c++ (the code in c++ works).
How should I create the same encoder in python as I did in c++?
I have files with incorrect JSON that I want to start fixing by getting it into properly grouped chunks.
The brace grouping {{ {} {} } } {{}} {{{}}} should already be correct
How can I grab all the top-level braces, correctly grouped, as separate strings?
If you don't want to install any extra modules simple function will do:
def top_level(s):
depth = 0
start = -1
for i, c in enumerate(s):
if c == '{':
if depth == 0:
start = i
depth += 1
elif c == '}' and depth:
depth -= 1
if depth == 0:
yield s[start:i+1]
print(list(top_level('{{ {} {} } } {{}} {{{}}}')))
Output:
['{{ {} {} } }', '{{}}', '{{{}}}']
It will skip invalid braces but could be easily modified to report an error when they are spotted.
Using the regex module:
In [1]: import regex
In [2]: braces = regex.compile(r"\{(?:[^{}]++|(?R))*\}")
In [3]: braces.findall("{{ {} {} } } {{}} {{{}}}")
Out[3]: ['{{ {} {} } }', '{{}}', '{{{}}}']
pyparsing can be really helpful here. It will handle pathological cases where you have braces inside strings, etc. It might be a little tricky to do all of this work yourself, but fortunately, somebody (the author of the library) has already done the hard stuff for us.... I'll reproduce the code here to prevent link-rot:
# jsonParser.py
#
# Implementation of a simple JSON parser, returning a hierarchical
# ParseResults object support both list- and dict-style data access.
#
# Copyright 2006, by Paul McGuire
#
# Updated 8 Jan 2007 - fixed dict grouping bug, and made elements and
# members optional in array and object collections
#
json_bnf = """
object
{ members }
{}
members
string : value
members , string : value
array
[ elements ]
[]
elements
value
elements , value
value
string
number
object
array
true
false
null
"""
from pyparsing import *
TRUE = Keyword("true").setParseAction( replaceWith(True) )
FALSE = Keyword("false").setParseAction( replaceWith(False) )
NULL = Keyword("null").setParseAction( replaceWith(None) )
jsonString = dblQuotedString.setParseAction( removeQuotes )
jsonNumber = Combine( Optional('-') + ( '0' | Word('123456789',nums) ) +
Optional( '.' + Word(nums) ) +
Optional( Word('eE',exact=1) + Word(nums+'+-',nums) ) )
jsonObject = Forward()
jsonValue = Forward()
jsonElements = delimitedList( jsonValue )
jsonArray = Group(Suppress('[') + Optional(jsonElements) + Suppress(']') )
jsonValue << ( jsonString | jsonNumber | Group(jsonObject) | jsonArray | TRUE | FALSE | NULL )
memberDef = Group( jsonString + Suppress(':') + jsonValue )
jsonMembers = delimitedList( memberDef )
jsonObject << Dict( Suppress('{') + Optional(jsonMembers) + Suppress('}') )
jsonComment = cppStyleComment
jsonObject.ignore( jsonComment )
def convertNumbers(s,l,toks):
n = toks[0]
try:
return int(n)
except ValueError, ve:
return float(n)
jsonNumber.setParseAction( convertNumbers )
Phew! That's a lot ... Now how do we use it? The general strategy here will be to scan the string for matches and then slice those matches out of the original string. Each scan result is a tuple of the form (lex-tokens, start_index, stop_index). For our use, we don't care about the lex-tokens, just the start and stop. We could do: string[result[1], result[2]] and it would work. We can also do string[slice(*result[1:])] -- Take your pick.
results = jsonObject.scanString(testdata)
for result in results:
print '*' * 80
print testdata[slice(*result[1:])]
I would like to parse declarations using pyparsing in a C-like source (GLSL code) such that I get a list of (type, name, value).
For example:
int a[3];
int b=1, c=2.0;
float d = f(z[2], 2) + 3*g(4,a), e;
Point f = {1,2};
I would like to obtain something like:
[ ('int', 'a[3]', ''),
('int', 'b', '1'),
('int', 'c', '2.0'),
('float', 'd', 'f(z[2], 2) + 3*g(4,a)'),
('float', 'e', ''),
('Point', 'f', '{1,2}') ]
I've played with Forward() and operatorPrecedence() to try to parse the rhs expression but I suspect it is not necessary in my case.
So far I have:
IDENTIFIER = Regex('[a-zA-Z_][a-zA-Z_0-9]*')
INTEGER = Regex('([+-]?(([1-9][0-9]*)|0+))')
EQUAL = Literal("=").suppress()
SEMI = Literal(";").suppress()
SIZE = INTEGER | IDENTIFIER
VARNAME = IDENTIFIER
TYPENAME = IDENTIFIER
VARIABLE = Group(VARNAME.setResultsName("name")
+ Optional(EQUAL + Regex("[^,;]*").setResultsName("value")))
VARIABLES = delimitedList(VARIABLE.setResultsName("variable",listAllMatches=True))
DECLARATION = (TYPENAME.setResultsName("type")
+ VARIABLES.setResultsName("variables", listAllMatches=True) + SEMI)
code = """
float a=1, b=3+f(2), c;
float d=1.0, e;
float f = z(3,4);
"""
for (token, start, end) in DECLARATION.scanString(code):
for variable in token.variable:
print token.type, variable.name, variable.value
but the last expression (f=z(3,4)) is not parsed because of the ,.
There is a C struct parser on the pyparsing wiki that might give you a good start.
This seems to work.
IDENTIFIER = Word(alphas+"_", alphas+nums+"_" )
INT_DECIMAL = Regex('([+-]?(([1-9][0-9]*)|0+))')
INT_OCTAL = Regex('(0[0-7]*)')
INT_HEXADECIMAL = Regex('(0[xX][0-9a-fA-F]*)')
INTEGER = INT_HEXADECIMAL | INT_OCTAL | INT_DECIMAL
FLOAT = Regex('[+-]?(((\d+\.\d*)|(\d*\.\d+))([eE][-+]?\d+)?)|(\d*[eE][+-]?\d+)')
LPAREN, RPAREN = Literal("(").suppress(), Literal(")").suppress()
LBRACK, RBRACK = Literal("[").suppress(), Literal("]").suppress()
LBRACE, RBRACE = Literal("{").suppress(), Literal("}").suppress()
SEMICOLON, COMMA = Literal(";").suppress(), Literal(",").suppress()
EQUAL = Literal("=").suppress()
SIZE = INTEGER | IDENTIFIER
VARNAME = IDENTIFIER
TYPENAME = IDENTIFIER
OPERATOR = oneOf("+ - * / [ ] . & ^ ! { }")
PART = nestedExpr() | nestedExpr('{','}') | IDENTIFIER | INTEGER | FLOAT | OPERATOR
EXPR = delimitedList(PART, delim=Empty()).setParseAction(keepOriginalText)
VARIABLE = (VARNAME("name") + Optional(LBRACK + SIZE + RBRACK)("size")
+ Optional(EQUAL + EXPR)("value"))
VARIABLES = delimitedList(VARIABLE.setResultsName("variables",listAllMatches=True))
DECLARATION = (TYPENAME("type") + VARIABLES + SEMICOLON)
code = """
int a[3];
int b=1, c=2.0;
float d = f(z[2], 2) + 3*g(4,a), e;
Point f = {1,2};
"""
for (token, start, end) in DECLARATION.scanString(code):
vtype = token.type
for variable in token.variables:
name = variable.name
size = variable.size
value = variable.value
s = "%s / %s" % (vtype,name)
if size: s += ' [%s]' % size[0]
if value: s += ' / %s' % value[0]
s += ";"
print s