I am attempting to access a function in a C file using Python ctypes. The code is below.
class PacketInfo_t(Structure):
_fields_ = [("pPayload",c_char_p),
("Payload_len",c_uint),
("usSrcPort",c_ushort),
("usDstPort",c_ushort),
("unSrcIP",c_uint),
("unDstIP",c_uint),
("enDirection",c_uint)
]
def func(self):
packets = [(packet,direction) for packet, direction in self.flow.packets]
c_packets = []
packet_num = len(packets)
packet_info_p_array = (POINTER(PacketInfo_t) * packet_num)()
for i,(packet,dire) in enumerate(packets):
if "TCP" in packet:
sport = packet["TCP"].sport
dport = packet["TCP"].dport
payload = packet["TCP"].payload
elif "UDP" in packet:
sport = packet["UDP"].sport
dport = packet["UDP"].dport
payload = packet["UDP"].payload
cpayload=cast(bytes(payload), c_char_p)
if dire == PacketDirection.FORWARD:
int_dire = 0
else:
int_dire = 1
# print sport in python
print("sport in python : ", sport)
packet_info_value = [cpayload,
c_uint(len(payload)),
c_ushort(sport),
c_ushort(dport),
c_uint(ip2long(packet["IP"].src)),
c_uint(ip2long(packet["IP"].dst)),
c_uint(int_dire)]
packet_obj = PacketInfo_t(*packet_info_value)
packet_info_p_array[i] = pointer(packet_obj)
# pdb.set_trace()
print("before into C function, pram's address is ", byref(packet_info_p_array))
print("before into C function, packet_num is ", packet_num)
res = solib.Check_MKCP(byref(packet_info_p_array), packet_num)
return res
The processing object of the program is the network flow, which contains a lot of data packets.
int func_C(PacketInfo_t ** ppPacketInfo, int nPacket){
int i = 0;
unsigned int payload_len = 0;
unsigned int tmplen,f;
unsigned char *payload;
// address
printf("ppPacketInfo's address is %p\n", ppPacketInfo);
printf("start func_C function! nPacket = %u\n", nPacket);
for(i=0;i<nPacket;i++){
payload_len = ppPacketInfo[i]->Payload_len;
// print sport in C
printf("sport = %u\n",ppPacketInfo[i]->usSrcPort);
if(payload_len != 0){
break;
}
}
...
}
But The content of multiple pointer is different in python and C code. image:
enter image description here
Why does this problem occur and how should I solve or think about it?
The minimal case can be download by https://drive.google.com/file/d/1Pkca1J0kJmTd5jp1HkCAxCCsQY2wlUOY/view?usp=sharing
Related
This is the module.py script which loads the erf_utils_io_D.dll which contains io.c and io.h files
I am successful in loading the library and passing the ctype arguments like
c_int, c_float, POINTER(c_int), POINTER(c_Float)
module.py
//python code module.py
import sys
from ctypes import *
#load the required library and make sure the folder where erf_utils_io_D.dll is present
dll = CDLL('D:\\erf_utils_python\\erf_utils_io.dll')
getContourResults = dll.getContourResults
class Utility(object):
def __init__(self):
print('i am inside init')
self.stagename = "post"
self.stateids = (c_int * 1) (2934)
self.stateidcount = 1
self.entityidcount = 1
self.entityid = (c_int * 1) (1)
self.entitytype = "FPM"
self.variablecount = 1
self.ores = [1.0]
self.filename ='allinone_RESULT.erfh5'
#This is how char** is treted in python for variablegroup
self.variablegroup = ["FPM_Mach_Number"]
self.string_length = len(self.variablegroup)
self.select_type = (c_wchar_p * self.string_length)
self.select = self.select_type()
for key, item in enumerate(self.variablegroup):
self.select[key] = item
#This is how char** is treated infor variable
self.variable = ["FPM_Mach_Number"]
self.var_len = len(self.variable)
self.var_type = (c_wchar_p * self.var_len)
self.variable_list = self.var_type()
for key, item in enumerate(self.variable):
self.variable_list[key] = item
def run(self):
getContourResults.argtypes = (POINTER(c_char_p), POINTER(c_char_p), c_int, POINTER(c_int),
c_int, POINTER(c_int), POINTER(c_char), c_int, self.select_type ,
self.var_type, POINTER(c_float))
getContourResults.restype = (c_int)
err = getContourResults(self.filename, self.stagename, self.stateidcount,
self.stateids, self.entityidcount,self.entityid, self.entitytype, self.variablecount, self.select, self.variable_list, self.ores)
reader = Utility()
reader.run()
code.cpp looks like this
extern "C"
{
#endif
__declspec(dllexport) int getContourResults(char* iFilename, char* iStagename, int iStateidCnt, int* Stateids,
int iEntityIdCount, int* iEntityids, char* iEntityType,
int iVariablecnt, char** iVariablegroup, char** ivariable,
float* oResults);
}
Please let me know how to pass arguments from python script to the method getContourResults() present in io.c
There was no indication of implementation of the function so I made one with some assumptions. You should be able to adapt to your real use case with this example. Main fixes were .argtypes corrections and using c_char_p instead of c_wchar_p and a simple helper function for turning a list into a ctypes array.
Here's some clarification on types:
c_char_p == char* in C. Pass a byte string, e.g. b'string'.
c_wchar_p == wchar_t* in C. Pass a Unicode string, e.g. 'string'.
POINTER(c_char_p) == char** in C.
POINTER(c_float) == float* in C. Create storage (c_float()) and pass byref. Access the returned float as a Python integer with .value member.
test.cpp
#include <stdio.h>
extern "C" __declspec(dllexport) int getContourResults(char* iFilename, char* iStagename, int iStateidCnt, int* Stateids,
int iEntityIdCount, int* iEntityids, char* iEntityType,
int iVariablecnt, char** iVariablegroup, char** ivariable,
float* oResults)
{
printf("iFilename = %s\n"
"iStagename = %s\n"
,iFilename,iStagename);
for(int i = 0; i < iStateidCnt; ++i)
printf("Stateids[%d] = %d\n",i,Stateids[i]);
for(int i = 0; i < iEntityIdCount; ++i)
printf("iEntityids[%d] = %d\n",i,iEntityids[i]);
printf("iEntityType = %s\n",iEntityType);
for(int i = 0; i < iVariablecnt; ++i) {
printf("iVariablegroup[%d] = %s\n",i,iVariablegroup[i]);
printf("ivariable[%d] = %s\n",i,ivariable[i]);
}
*oResults = 1.2f;
return 5;
}
test.py
from ctypes import *
dll = CDLL('./test')
dll.getContourResults = dll.getContourResults
dll.getContourResults.argtypes = (c_char_p,c_char_p,c_int,POINTER(c_int),c_int,POINTER(c_int),c_char_p,
c_int,POINTER(c_char_p),POINTER(c_char_p),POINTER(c_float))
dll.getContourResults.restype = c_int
def make_array(ctype,arr):
return len(arr),(ctype * len(arr))(*arr)
def getContourResults(filename,stagename,sids,eids,entitytype,groups,variables):
stateidcount,stateids = make_array(c_int,sids)
entityidcount,entityid = make_array(c_int,eids)
groups = [b'group1',b'group2']
variables = [b'var1',b'var2']
if len(groups) != len(variables):
raise ValueError('assuming groups and variables same length')
_,variablegroup = make_array(c_char_p,groups)
variablecount,variable = make_array(c_char_p,variables)
ores = c_float()
err = dll.getContourResults(filename,stagename,stateidcount,stateids,entityidcount,entityid,
entitytype,variablecount,variablegroup,variable,byref(ores))
return err,ores.value
sids = [1,2,3]
eids = [4,5,6]
groups = [b'group1',b'group2']
variables = [b'var1',b'var2']
err,ores = getContourResults(b'filename',b'stagename',sids,eids,b'entitytype',groups,variables)
print(f'err = {err}')
print(f'ores = {ores:.2f}')
Output:
iFilename = filename
iStagename = stagename
Stateids[0] = 1
Stateids[1] = 2
Stateids[2] = 3
iEntityids[0] = 4
iEntityids[1] = 5
iEntityids[2] = 6
iEntityType = entitytype
iVariablegroup[0] = group1
ivariable[0] = var1
iVariablegroup[1] = group2
ivariable[1] = var2
err = 5
ores = 1.20
I got a system where I would send a command from my host computer using Python Socket (the computer is the server) and the MKR1000 (client) would send back information depends on the command sent.
Unfortunately, the bidirectional communication is unstable. I can guarantee the MKR1000 received the command and (maybe) sending information back, but for some reason, my host computer would not receive the command.
Anyway, this is my first time trying out socket, so I would like some guru to review my code and maybe spot the mistake in here? Thanks a lot.
Python:
import socket
import time
def coor2bytes(coor_fnc):
coorByte = [0, 0, 0, 0, 0, 0]
if (coor_fnc[0] >= 0):
coorByte[0] = (coor_fnc[0] >> 8) & 0xFF # High byte of X
coorByte[1] = coor_fnc[0] & 0xFF # Low byte of X
else:
coor_fnc[0] = coor_fnc[0]*(-1)
coorByte[0] = (coor_fnc[0] >> 8) & 0xFF # High byte of X
coorByte[0] = coorByte[0] ^ 0x80
coorByte[1] = coor_fnc[0] & 0xFF # Low byte of X
if (coor_fnc[1] >= 0):
coorByte[2] = (coor_fnc[1] >> 8) & 0xFF # High byte of Y
coorByte[3] = coor_fnc[1] & 0xFF # Low byte of Y
else:
coor_fnc[1] = coor_fnc[1]*(-1)
coorByte[2] = (coor_fnc[1] >> 8) & 0xFF # High byte of X
coorByte[2] = coorByte[2] ^ 0x80
coorByte[3] = coor_fnc[1] & 0xFF # Low byte of X
if (coor_fnc[2] >= 0):
coorByte[4] = (coor_fnc[2] >> 8) & 0xFF # High byte of Phi
coorByte[5] = coor_fnc[2] & 0xFF # Low byte of Phi
else:
coor_fnc[2] = coor_fnc[2]*(-1)
coorByte[4] = (coor_fnc[2] >> 8) & 0xFF # High byte of Phi
coorByte[4] = coorByte[4] ^ 0x80
coorByte[5] = coor_fnc[2] & 0xFF # Low byte of Phi
return coorByte
def bytes2coor(byte_fnc):
receivedCoor_fnc = [0, 0, 0]
receivedCoor_fnc[0] = ((-1)**(byte_fnc[0]>>7)) * ((byte_fnc[1]) | (((byte_fnc[0]&0x7f)<<8)))
receivedCoor_fnc[1] = ((-1)**(byte_fnc[2]>>7)) * ((byte_fnc[3]) | (((byte_fnc[2]&0x7f)<<8)))
receivedCoor_fnc[2] = ((-1)**(byte_fnc[4]>>7)) * ((byte_fnc[5]) | (((byte_fnc[4]&0x7f)<<8)))
return receivedCoor_fnc
if __name__ == '__main__':
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((socket.gethostname(), 1234)) # bind(ip, port)
print("Done binding.")
s.listen(2)
clientsocket, address = s.accept()
print(f"Connection from {address} has been established!")
clientsocket.settimeout(1)
while True:
print();
print("What you want to do?")
print("0. Send target")
print("1. Get current coordinate")
print("2. Set current coordinate (not yet implement)")
try:
a = int(input("I choose: "))
except Exception:
print("Error.")
a = -1;
if (a == 0):
coor = [0, 0, 0]
try:
coor[0] = int(input("X: "))
coor[1] = -int(input("y: "))
coor[2] = int(input("phi: "))
coorByte = coor2bytes(coor)
clientsocket.send(bytes([0]))
clientsocket.send(bytes(coorByte))
print("I already sent the target.")
except Exception:
print("Error.")
elif (a == 1):
receive = 0
while (not receive):
try:
clientsocket.send(bytes([1]))
bytesReceived = []
full_msg = []
while (len(full_msg) < 8):
bytesReceived = clientsocket.recv(8)
for x in range(len(bytesReceived)):
full_msg.append(bytesReceived[x])
receivedCoor = bytes2coor(full_msg)
print("coordinate received: " + str(receivedCoor))
receive = 1
except socket.timeout:
print("Time out. Will try again.")
elif (a == 2):
setCoor = [0, 0, 0]
try:
setCoor[0] = int(input("X: "))
setCoor[1] = -int(input("y: "))
setCoor[2] = int(input("phi: "))
setcoorByte = coor2bytes(setCoor)
clientsocket.send(bytes([2]))
clientsocket.send(bytes(setcoorByte))
print("I already sent the new coordinate.")
except Exception:
print("Error.")
else:
print("Not yet implement.")
Arduino:
#include <WiFi101.h>
#include <SPI.h>
// To connect to the server on laptop
char ssid[] = "iPhone";
char pass[] = "00000000";
int status = WL_IDLE_STATUS;
IPAddress server(172,20,10,3);
WiFiClient client;
// Random variable
int a, i, j, k, m;
byte buf0[7];
byte buf1[7];
byte buf2[7];
long start = millis();
int elapsedTime = 0;
int timeout = 0;
void setup() {
// put your setup code here, to run once:
// Serial.begin(115200);
Serial.begin(115200);
Serial1.begin(115200);
// status = WiFi.begin(ssid, pass);
while (status != WL_CONNECTED) {
status = WiFi.begin(ssid, pass);
}
j = client.connect(server, 1234);
while (j != 1) {
j = client.connect(server, 1234);
}
}
void loop()
{
if (client.available()) {
a = client.read();
Serial.print("I got: ");
Serial.println(a);
if (a == 0) { // Send new target to Due
Serial.println("I send target.");
j = 0;
start = millis();
while(j<6) {
elapsedTime = millis() - start;
if (elapsedTime > 1000) {
timeout = 1;
break;
}
if (client.available()>0) {
buf0[j] = client.read();
Serial.println(buf0[j]);
j++;
}
}
if (timeout != 1) {
Serial1.write((byte) 0);
// Send coordinate back to Due
for (i = 0; i<6; i++) {
Serial1.write(buf0[i]);
}
} else {
timeout = 0;
}
} else if (a == 1) {
// Get the coordinate from the Due
Serial.println("I receive coordinate.");
Serial1.write((byte) 1);
k = 0;
start = millis();
while(k < 6) {
elapsedTime = millis() - start;
if (elapsedTime > 1000) {
timeout = 1;
break;
}
if (Serial1.available() > 0) {
buf1[k] = Serial1.read();
Serial.println(buf1[k]);
k++;
}
}
if (timeout != 1) {
for (i=0;i<6;i++) {
client.write(buf1[i]);
delay(10);
}
client.write((byte) 0); // fill in the blank size
delay(10);
client.write((byte) 0);
} else {
timeout = 0;
}
// for (int i = 0; i<8; i++) {
// client.write((byte) 0);
// }
} else if (a == 2) { // set the current coordinnate to be something else.
Serial.println("I set coordinate.");
m = 0;
while(m<6) {
if (client.available()>0) {
buf2[m] = client.read();
Serial.println(buf2[m]);
m++;
}
}
Serial1.write((byte) 2);
// Send coordinate back to Due
for (i = 0; i<6; i++) {
Serial1.write(buf2[i]);
}
} else if (a == 3) { // identify yourself
Serial.println("Identify myself.");
client.write((byte) 1);
}
}
}
If you have time to read through the Arduino code, then you would see that I actually have serial communication between my MKR and Due too. I also can guarantee that the MKR can receive all those data from the Due and not stuck in some infinite loop.
Thank you!
Ok so for some reason if I added a delay right after the MKR connecting to WiFi, before connecting to the server, then everything just works.
I have the following code in c++:
for(const char *x = r.ptr, *end = r.ptr + r.len; x != end; ++x) {
switch(*x) {
case 0x5c:
case 0x22:
pc->output[0] = '\\'; pc->output[1] = *x; pc->output += 2;
break;
case 0xa:
pc->output[0] = '\\'; pc->output[1] = 'n'; pc->output += 2;
break;
case 0xd:
pc->output[0] = '\\'; pc->output[1] = 'r'; pc->output += 2;
break;
default:
if(str_escape_2_hex(*x)) {
impl::escape_char_hex(pc->output, *x);
} else {
*pc->output = *x; pc->output++;
}
}
}
And I want to rewrite it to python 2 because I need the same encoder there. I tried with this:
def encode_akv_fields(data):
hexlify = codecs.getencoder('hex')
for i, el in enumerate(str(data)):
if hexlify(el)[0] in ('5c', '22'): # \\ or "
data[i].encode('hex') = '\\' + hexlify(el)[0]
elif hexlify(el)[0] == '0a': # \n
data[i].encode('hex') = '\\n'
elif hexlify(el)[0] == '0d': # \r
data[i].encode('hex') = '\\r'
elif '1f' >= hexlify(el)[0] >= '7f':
tmp3 = (hexlify(el)[0] >> 4) & '0f'.decode('hex')
data[i].encode('hex') = '\\x'
return data
but it doesn't work - I got
SyntaxError: can't assign to function call
Data is a string or dict with values that I want to log. Those logs needs to be in AKV format (Apache key value). And for this to work I need some hex values to be encoded like it is in c++ (the code in c++ works).
How should I create the same encoder in python as I did in c++?
We would really appreciate any kind of help, because we are driving crazy with our program making it faster using C language.
The values obtained don't change, always are 0,0,0,0
Here is the code, running in Linux:
from scipy import weave
pasa = 0
coorX = -11.8
coorY = -7.9
INC=0.01296
##def weave_update():
code="""
int i,j, pasa;
double coorX, coorY,INC;
for (i=0; i < 1296;i++){
yminf = coorY + INC*(i);
ymaxf = yminf + INC;
for (j=0; j < 1936;j++){
xminc = coorX + INC*(j);
xmaxc = xminc + INC;
pasa = 1;
break;
}
if (pasa == 1){
break;
}
}
"""
weave.inline(code,['yminf','xminc','xmaxc','ymaxf'],type_converters=weave.converters.blitz,compiler='gcc')
print yminf,xminc,xmaxc,ymaxf
Looks like two issues. First, you need to pass in all of the variables that the C code needs access to from python. So, your inline call needs to be:
weave.inline(code, ['coorX','coorY','INC'])
Secondly, you need to return the values you want from the weave code, because modifying them in C doesn't affect their value in Python. Here's one way to do it:
py::tuple ret(4);
ret[0] = yminf;
ret[1] = xminc;
ret[2] = xmaxc;
ret[3] = ymaxf;
return_val = ret;
With these modifications, the following file seems to work correctly:
from scipy import weave
coorX = -11.8
coorY = -7.9
INC = 0.01296
code="""
int i,j, pasa = 0;
double yminf,xminc,xmaxc,ymaxf;
for (i=0; i < 1296;i++){
yminf = coorY + INC*(i);
ymaxf = yminf + INC;
for (j=0; j < 1936;j++){
xminc = coorX + INC*(j);
xmaxc = xminc + INC;
pasa = 1;
break;
}
if (pasa == 1){
break;
}
}
py::tuple ret(4);
ret[0] = yminf;
ret[1] = xminc;
ret[2] = xmaxc;
ret[3] = ymaxf;
return_val = ret;
"""
yminf,xminc,xmaxc,ymaxf = weave.inline(code,['coorX','coorY','INC'])
print yminf,xminc,xmaxc,ymaxf
I am trying to convert the Java Code to Python Code and i have done it so far. Java Code works but Python Code doesn't work. Please help me.
Python Code
import random
class QLearning():
alpha = 0.1
gamma = 0.9
state_a = 0
state_b = 1
state_c = 2
state_d = 3
state_e = 4
state_f = 5
states_count = 6
states = [state_a, state_b, state_c, state_d, state_e, state_f]
R = [[0 for x in range(states_count)] for x in range(states_count)]
Q = [[0 for x in range(states_count)] for x in range(states_count)]
action_from_a = [state_b, state_d]
action_from_b = [state_a, state_c, state_e]
action_from_c = [state_c]
action_from_d = [state_a, state_e]
action_from_e = [state_b, state_d, state_f]
action_from_f = [state_c, state_e]
actions = [action_from_a, action_from_b, action_from_c, action_from_d, action_from_e, action_from_f]
state_names = ["A","B","C","D","E","F"]
def __init__(self):
self.R[self.state_b][self.state_c] = 100
self.R[self.state_f][self.state_c] = 100
def run(self):
for i in range(1000):
state = random.randrange(self.states_count)
while(state != self.state_c):
actions_from_state = self.actions[state]
index = random.randrange(len(actions_from_state))
action = actions_from_state[index]
next_state = action
q = self.Q_Value(state, action)
max_Q = self.max_q(next_state)
r = self.R_Value(state, action)
value = q + self.alpha * (r + self.gamma * max_Q - q)
self.set_q(state, action, value)
state = next_state
def max_q(self, s):
self.run().actions_from_state = self.actions[s]
max_value = 5
for i in range(len(self.run().actions_from_state)):
self.run().next_state = self.run().actions_from_state[i]
self.run().value = self.Q[s][self.run().next_state]
if self.run().value > max_value:
max_value = self.run().value
return max_value
def policy(self, state):
self.run().actions_from_state = self.actions[state]
max_value = 5
policy_goto_state = state
for i in range(len(self.run().actions_from_state)):
self.run().next_state = self.run().actions_from_state[i]
self.run().value = self.Q[state][self.run().next_state]
if self.run().value > max_value:
max_value = self.run().value
policy_goto_state = self.run().next_state
return policy_goto_state
def Q_Value(self, s,a):
return self.Q[s][a]
def set_q(self, s, a, value):
self.Q[s][a] = value
def R_Value(self, s, a):
return self.R[s][a]
def print_result(self):
print("Print Result")
for i in range(len(self.Q)):
print("Out From (0)".format(self.state_names[i]))
for j in range(len(self.Q[i])):
print(self.Q[i][j])
def show_policy(self):
print("Show Policy")
for i in range(len(self.states)):
fro = self.states[i]
to = self.policy(fro)
print("From {0} goto {1}".format(self.state_names[fro], self.state_names[to]))
obj = QLearning()
obj.run()
obj.print_result()
obj.show_policy()
Java Code
import java.text.DecimalFormat;
import java.util.Random;
public class Qlearning {
final DecimalFormat df = new DecimalFormat("#.##");
// path finding
final double alpha = 0.1;
final double gamma = 0.9;
// states A,B,C,D,E,F
// e.g. from A we can go to B or D
// from C we can only go to C
// C is goal state, reward 100 when B->C or F->C
//
// _______
// |A|B|C|
// |_____|
// |D|E|F|
// |_____|
//
final int stateA = 0;
final int stateB = 1;
final int stateC = 2;
final int stateD = 3;
final int stateE = 4;
final int stateF = 5;
final int statesCount = 6;
final int[] states = new int[]{stateA,stateB,stateC,stateD,stateE,stateF};
// http://en.wikipedia.org/wiki/Q-learning
// http://people.revoledu.com/kardi/tutorial/ReinforcementLearning/Q-Learning.htm
// Q(s,a)= Q(s,a) + alpha * (R(s,a) + gamma * Max(next state, all actions) - Q(s,a))
int[][] R = new int[statesCount][statesCount]; // reward lookup
double[][] Q = new double[statesCount][statesCount]; // Q learning
int[] actionsFromA = new int[] { stateB, stateD };
int[] actionsFromB = new int[] { stateA, stateC, stateE };
int[] actionsFromC = new int[] { stateC };
int[] actionsFromD = new int[] { stateA, stateE };
int[] actionsFromE = new int[] { stateB, stateD, stateF };
int[] actionsFromF = new int[] { stateC, stateE };
int[][] actions = new int[][] { actionsFromA, actionsFromB, actionsFromC,
actionsFromD, actionsFromE, actionsFromF };
String[] stateNames = new String[] { "A", "B", "C", "D", "E", "F" };
public Qlearning() {
init();
}
public void init() {
R[stateB][stateC] = 100; // from b to c
R[stateF][stateC] = 100; // from f to c
}
public static void main(String[] args) {
long BEGIN = System.currentTimeMillis();
Qlearning obj = new Qlearning();
obj.run();
obj.printResult();
obj.showPolicy();
long END = System.currentTimeMillis();
System.out.println("Time: " + (END - BEGIN) / 1000.0 + " sec.");
}
void run() {
/*
1. Set parameter , and environment reward matrix R
2. Initialize matrix Q as zero matrix
3. For each episode: Select random initial state
Do while not reach goal state o
Select one among all possible actions for the current state o
Using this possible action, consider to go to the next state o
Get maximum Q value of this next state based on all possible actions o
Compute o Set the next state as the current state
*/
// For each episode
Random rand = new Random();
for (int i = 0; i < 1000; i++) { // train episodes
// Select random initial state
int state = rand.nextInt(statesCount);
while (state != stateC) // goal state
{
// Select one among all possible actions for the current state
int[] actionsFromState = actions[state];
// Selection strategy is random in this example
int index = rand.nextInt(actionsFromState.length);
int action = actionsFromState[index];
// Action outcome is set to deterministic in this example
// Transition probability is 1
int nextState = action; // data structure
// Using this possible action, consider to go to the next state
double q = Q(state, action);
double maxQ = maxQ(nextState);
int r = R(state, action);
double value = q + alpha * (r + gamma * maxQ - q);
setQ(state, action, value);
// Set the next state as the current state
state = nextState;
}
}
}
double maxQ(int s) {
int[] actionsFromState = actions[s];
double maxValue = Double.MIN_VALUE;
for (int i = 0; i < actionsFromState.length; i++) {
int nextState = actionsFromState[i];
double value = Q[s][nextState];
if (value > maxValue)
maxValue = value;
}
return maxValue;
}
// get policy from state
int policy(int state) {
int[] actionsFromState = actions[state];
double maxValue = Double.MIN_VALUE;
int policyGotoState = state; // default goto self if not found
for (int i = 0; i < actionsFromState.length; i++) {
int nextState = actionsFromState[i];
double value = Q[state][nextState];
if (value > maxValue) {
maxValue = value;
policyGotoState = nextState;
}
}
return policyGotoState;
}
double Q(int s, int a) {
return Q[s][a];
}
void setQ(int s, int a, double value) {
Q[s][a] = value;
}
int R(int s, int a) {
return R[s][a];
}
void printResult() {
System.out.println("Print result");
for (int i = 0; i < Q.length; i++) {
System.out.print("out from " + stateNames[i] + ": ");
for (int j = 0; j < Q[i].length; j++) {
System.out.print(df.format(Q[i][j]) + " ");
}
System.out.println();
}
}
// policy is maxQ(states)
void showPolicy() {
System.out.println("\nshowPolicy");
for (int i = 0; i < states.length; i++) {
int from = states[i];
int to = policy(from);
System.out.println("from "+stateNames[from]+" goto "+stateNames[to]);
}
}
}
Traceback
C:\Python33\python.exe "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py"
Traceback (most recent call last):
File "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py", line 4, in <module>
class QLearning():
File "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py", line 19, in QLearning
R = [[0 for x in range(states_count)] for x in range(states_count)]
File "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py", line 19, in <listcomp>
R = [[0 for x in range(states_count)] for x in range(states_count)]
NameError: global name 'states_count' is not defined
To access all of the class attributes you define (i.e. everything between class QLearning and def __init__), you need to use self or the class name:
self.states_count
or
QLearning.states_count
I don't know the algorithm, but it is possible that these class attributes should be instance attributes (i.e. separate for each instance of the class, rather than shared amongst all instances) and therefore defined in __init__ (or other instance methods) using self anyway.