The problem.
I'm trying to make a metropolis simulation of the 2D Ising model. My code reproduces some of the expected behaviour: i.e. there's a critical transition at vaguely the right temperature, but:
Energy has a smoothed-out transition. Magnetisation is a sharp step down from 1 to 0, while energy continues to rise, and doesn't show any change of gradient.
The heat capacity shows a peak, but instead of rising when I increase the lattice size, the peak gets smaller.
The heat capacity is noisy: the peak is often doubled, and fitting it to a Lorentzian is a nightmare. Worse yet, running the simulation multiple times gets me the same data (as evidenced by the small errorbar.
My project is a c++ program that simulates a single lattice, and a python script that analyses the output. As of now the script pipes the program stdout and assigns it to internal variables. It's inefficient, but that's for debugging purposes.
the full source code can be found here. For convenience, see the main routines of the c++ program and the python script below.
The code:
lattice.h
...
/* Converts given pair of indices to those with periodic boundary conditions. */
int inline to_periodic(int row, int col, int size) {
...
}
class lattice {
private:
unsigned int size_;
std::vector<short> *spins_;
float J_;
float H_;
public:
lattice() noexcept : size_(0), spins_(NULL), J_(1.0), H_(0.0) {}
lattice(int new_size, double new_J, double new_H) noexcept
: size_(new_size), spins_(new std::vector<short>(size_ * size_, 1)),
J_(new_J), H_(new_H) {}
lattice(const lattice &other) noexcept
: lattice(other.size_, other.J_, other.H_) {
...
float compute_point_energy(int row, int col);
};
#endif
simulation.h
#ifndef simulation_h
#define simulation_h
#include "lattice.h"
#include "rng.h"
#include <gsl/gsl_rng.h>
class simulation {
private:
unsigned int time_ = 0; // Current time of the simulation.
rng r_ = rng();
lattice spin_lattice_;
double temperature_;
double mean_magnetisation_ = 1;
double mean_energy_;
double total_magnetisation_;
double total_energy_;
unsigned int print_interval_ = 1;
void advance();
public:
void set_print_interval(unsigned int new_print_interval) { print_interval_ = new_print_interval; }
simulation(int new_size, double new_temp, double new_J, double new_H)
: time_(0), spin_lattice_(lattice(new_size, new_J, new_H)), temperature_(new_temp),
mean_energy_(new_J * (-4)), total_magnetisation_(new_size * new_size),
total_energy_(compute_energy(spin_lattice_)) {}
void print_status(FILE *f) {
f = f==NULL? stdout : f;
fprintf(f, "%4d\t%e \t%e\t%e\n", time_, mean_magnetisation_,
mean_energy_, temperature_);
}
void advance(unsigned int time_steps, FILE *output);
double compute_energy(lattice &other);
double compute_dE(int row, int col) {
return -2*spin_lattice_.compute_point_energy(row, col);
}
void set_to_chequerboard(int step);
void print_lattice(){
spin_lattice_.print();
};
// void load_custom(const lattice& custom);
};
#endif
simulation.cpp
...
void simulation::advance(unsigned int time_steps, FILE *output) {
unsigned int area = spin_lattice_.get_size() * spin_lattice_.get_size();
for (unsigned int i = 0; i < time_steps; i++) {
if (time_ % print_interval_ == 0) {
total_magnetisation_ = spin_lattice_.total_magnetisation();
mean_magnetisation_ = total_magnetisation_ / area;
total_energy_ = compute_energy(spin_lattice_);
mean_energy_ = total_energy_ / area;
print_status(output);
}
advance();
}
}
void simulation::advance() {
// #pragma omp parallel for collapse(2)
for (unsigned int row = 0; row < spin_lattice_.get_size(); row++) {
for (unsigned int col = 0; col < spin_lattice_.get_size(); col++) {
double dE = compute_dE(row, col);
double p = r_.random_uniform();
if (dE <0 || exp(-dE / temperature_) > p) {
spin_lattice_.flip(row, col);
}
}
}
time_++;
}
double simulation::compute_energy(lattice &other) {
double energy_sum = 0;
unsigned int max = other.get_size();
#pragma omp parallel for reduction(+ : energy_sum)
for (unsigned int i = 0; i < max; i++) {
for (unsigned int j = 0; j < max; j++) {
energy_sum += other.compute_point_energy(i, j);
}
}
return energy_sum/2;
}
lattice.cpp
.....
void lattice::print() {
...
}
float lattice::compute_point_energy(int row, int col) {
int accumulator = get(row + 1, col) + get(row - 1, col) + get(row, col - 1) +
get(row, col + 1);
return -get(row, col) * (accumulator * J_ + H_);
}
int lattice::total_magnetisation() {
...
}
investigator.py
.....
# -------------------------------------------------------------------------
def smart_duration(temperature, multiplier=1.):
return int(((10**3)*base_duration*multiplier)/(
(temperature - t_c)**2 + breadth))
def investigate_temperature_dependence(temps=None, lattice_sizes=None,
**kwargs):
if temps is None:
temps = append(linspace(1.5, 2.2, 8), linspace(2.2, 2.3, 8))
temps = append(temps, linspace(2.3, 4, 8))
if lattice_sizes is None:
lattice_sizes = [50, 250, 300]
fig, ax = plt.subplots(nrows=2, sharex='col')
ax[0].set_ylabel('mean magnetisation / arb. u.')
ax[1].set_ylabel('mean energy / arb. u.')
ax[0].axvline(x=t_c, color='k', ls='--')
ax[1].axvline(x=t_c, color='k', ls='--')
for l in lattice_sizes:
print(l)
simulations = [Simulation(lattice_size=l, temperature=t,
duration=smart_duration(l, t)) for t in
temps]
data = [multi_run(s, **kwargs) for s in simulations]
terminal_magnetisations = array(data).T[0]
sigma_magnetisations = array(data).T[1]
terminal_energies = array(data).T[2]
ax[0].plot(temps, terminal_magnetisations, '-',
label='N = ' + str(l))
ax[1].plot(temps, terminal_energies, '-', label='N =' + str(l))
plt.xlabel('temperature / arb. u.')
plt.xticks(
append(linspace(min(temps), t_c, 3), linspace(t_c, max(temps), 3)))
save_plot('Critical temperature')
# -------------------------------------------------------------------------
def theoretical_capacity(x, a, b, c, d):
return c + (b*x)/((x - a)**2 + d)
def investigate_heat_capacity(lattice_sizes=None, temps=None, **kwargs):
if temps is None:
temps = linspace(1.5, 3, 60)
if lattice_sizes is None:
lattice_sizes = [16,32, 34, 36]
fig, axes = plt.subplots(nrows=len(lattice_sizes), sharex='col')
crit_temps = []
for l, ax in zip(lattice_sizes, axes):
crit_temps.append(fit_and_plot_capacity(ax, l, temps, **kwargs))
fig.set_size_inches(10.5, 10.5)
plt.xlabel('temperature / arb. u.')
save_plot('Heat capacity')
return crit_temps
def fit_and_plot_capacity(ax, l, temps, **kwargs):
"""
Plot the heat capacity of simulations at given temperature and lattice size.
Afterwards fit a lorentzian and plot.
Parameters:
-----------
ax : pyplot.axis
what to plot to.
l : int
lattice size
temps: numpy.array
Array of temperatures where to evaluate heat capacity.
Returns:
-------
popt[0]: float
most likely critical temperature.
"""
global use_disk
use_disk = False
simulations = [Simulation(lattice_size=l, temperature=t, duration=2)
for t in temps]
# sigmas = [stdev(s.mean_energies[:]) for s in simulations]
sigmas = array([multi_run(s, **kwargs) for s in simulations]).T[3]
meta_sigmas = array([multi_run(s, **kwargs) for s in simulations]).T[4]
# print(meta_sigmas)
Cs = [sigma**2/(temp**2)*10**3 for temp, sigma in zip(temps, sigmas)]
C_errs = Cs[:]*meta_sigmas[:]
try:
popt, pcov = curve_fit(theoretical_capacity, temps, Cs,
sigma=meta_sigmas, bounds=(
[min(temps) - .2, 0, 0, 0],
[max(temps) + .2, inf, inf, .7]))
except RuntimeError:
popt = [temps[argmax(Cs)], (max(Cs) - min(Cs))/4, min(Cs),
(max(temps) - min(temps))/4]
print('I\'m too dumb to fit')
ax.axvline(x=popt[0], ls='--', color='g')
ax.plot(temps, theoretical_capacity(temps, *popt), 'g-',
label='N = ' + str(l) + 'd = ' + str(popt[3]) + ' fit')
ax.errorbar(temps*10, Cs, fmt='b.', yerr=C_errs, label='N = ' + str(l))
ax.set_ylabel(r'C $\cdot 10^3$/ arb. u.')
ax.axvline(x=t_c, ls='-.', color='k')
ax.set_xticks(
append(linspace(min(temps), t_c, 6), linspace(t_c, max(temps), 6)))
ax.legend(loc='best')
return popt[0]
def multi_run(sim, re_runs:int=2, take_last:int=300):
"""
Re run the Ising model simulation multiple times, and gather statistics.
Parameters:
----------
sim: simulation
re_runs: int
number of times to repeat simulation
take_last: int
How many of the final points to take statistics over.
Returns:
list:
"""
global use_disk
use_disk = False
sim.duration = smart_duration(sim.temperature)
print(sim.duration)
magnetizations = []
sigma_magnetizations = []
energies = []
sigma_energies = []
for i in range(re_runs):
sim.data = sim.run()
# Make each run take 50% longer, so that we
# can see if a system is still settling
sim.duration *= 2
last_magnetizations = sim.mean_magnetizations()[-take_last:]
magnetizations.append(abs(mean(last_magnetizations)))
sigma_magnetizations.append(std(last_magnetizations))
last_energies = sim.mean_energies()[-take_last:]
energies.append(mean(last_energies))
sigma_energies.append(std(last_energies))
return [mean(magnetizations), mean(sigma_magnetizations),
mean(energies), mean(sigma_energies),
std(sigma_energies)/mean(sigma_energies)]
# -------------------------------------------------------------------------
def finite_size_scale(N, t_inf, a, v):
return t_inf + a*(N**(-1/v))
def investigate_finite_size_scaling(critical_temperatures, lattice_sizes,
**kwargs):
if critical_temperatures is None:
critical_temperatures = investigate_heat_capacity(lattice_sizes,
**kwargs)
args, cov = curve_fit(finite_size_scale, lattice_sizes,
critical_temperatures)
plt.plot(lattice_sizes, critical_temperatures, 'b+', label='data')
plt.plot(lattice_sizes, finite_size_scale(lattice_sizes, *args), 'r-',
label='fit')
plt.ylabel('critical temperature / arb. u.')
plt.xlabel('Lattice size')
save_plot('Finite size scaling')
return args[0], sqrt(cov[0, 0])
# -------------------------------------------------------------------------
t_c = 2/log(1 + sqrt(2))
use_disk = False
breadth = 1
base_duration = 50
sizes = [16, 32, 40, 50, 70]
# investigate_time_evolution()
# investigate_temperature_dependence(lattice_sizes=sizes, re_runs=4)
critical_temps = investigate_heat_capacity(lattice_sizes=sizes,
take_last=300)
# temp_inf = investigate_finite_size_scaling(critical_temps, sizes)
# print(temp_inf)
# print((t_c - temp_inf[0])/ temp_inf[1], ' Standard errors away')
Pictures
To illustrate the problem take a look here:
As I said, magnetisation is fine, but energy isn't. There isn't a sharp transition.
Heat capacity is even worse. It starts to plateau for
My thoughts.
I might simply not be running the simulation over a long enough time tens of thousands of MCS, and averaging over too few datapoints (the last 300), but then the errorbars would be bigger, or at least visible.
Related
I have the same error as many of the questions here: OSError: exception: access violation reading 0x000002369EF14000. However, the situation is different and I couldn't figure out why this happens. Moreover, the address appears to be different every time I run the code.
I ran the following python code and get the error above. How do I fix this issue? (Or what is the general cause for the OSError: exception: access violation reading error?)
from ctypes import *
import numpy as np
splines = CDLL('splines.dll')
splines.spline_basis.argtypes = [POINTER(c_double), POINTER(c_int), POINTER(c_int), POINTER(c_double), POINTER(c_int),
POINTER(c_int), POINTER(c_double), POINTER(c_int)]
x = np.random.rand(10)
ord = 3
knots = np.linspace(0, 1, 5)
nk = len(knots)
nx = len(x)
ind = sorted(range(x.shape[0]), key=lambda k: x[k])
sortx = x[ind]
derivs = nx
derivs = [derivs if i == 0 else np.nan for i in ind]
ncoef = nk - ord
temp = splines.spline_basis(np.array(knots).ctypes.data_as(POINTER(c_double)), POINTER(c_int)(c_int(ncoef)),
POINTER(c_int)(c_int(ord)),
sortx.ctypes.data_as(POINTER(c_double)), np.array(derivs).ctypes.data_as(POINTER(c_int)),
POINTER(c_int)(c_int(nx)), np.zeros([ord, nx]).ctypes.data_as(POINTER(c_double)),
POINTER(c_int)(c_int(nx)))
print(temp)
The dll file is compiled from the c code below, which builds a spline basis. The main function used is the spline_basis function
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "splines.h"
double* ldel, * rdel;
long orderm1;
void
diff_table(double* ti, double x, int ndiff)
{
register double* r = rdel, * l = ldel, * dpt = ti;
while (ndiff--) {
*r++ = *dpt++ - x;
*l++ = x - *--ti;
}
}
void
basis_funcs(double* ti, double x, double* b)
{
int j, r;
double saved, term;
diff_table(ti, x, orderm1);
b[0] = 1.;
for (j = 1; j <= orderm1; j++) {
saved = 0.;
for (r = 0; r < j; r++) {
term = b[r] / (rdel[r] + ldel[j - 1 - r]);
b[r] = saved + rdel[r] * term;
saved = ldel[j - 1 - r] * term;
}
b[j] = saved;
}
}
double
evaluate(double* ti, double x, double* a, int nder)
{
register double* lpt, * rpt, * apt;
register int inner;
int outer = orderm1;
while (nder--) {
for (inner = outer, apt = a, lpt = ti - outer; inner--; apt++, lpt++)
*apt = outer * (*(apt + 1) - *apt) / (*(lpt + outer) - *lpt);
outer--;
}
diff_table(ti, x, (long)outer);
while (outer--)
for (apt = a, lpt = ldel + outer, rpt = rdel, inner = outer + 1;
inner--; lpt--, rpt++, apt++)
*apt = (*(apt + 1) * *lpt + *apt * *rpt) / (*rpt + *lpt);
return(*a);
}
void
spline_value(double* knots, double* coeff, int * ncoeff,
int * order, double* x, int * nx, int * deriv,
double* y)
{
long n = *nx;
double* a, * last = knots + *ncoeff;
a = calloc(*order, sizeof(double));
orderm1 = *order - 1L; /* allocate difference tables */
rdel = calloc(orderm1, sizeof(double));
ldel = calloc(orderm1, sizeof(double));
knots += *order; /* First *order knots must be <= all x's */
while (n--) {
while (knots <= last && *knots <= *x) { knots++; coeff++; }
memcpy(a, coeff, *order);
*y++ = evaluate(knots, *x++, a, (int)*deriv);
}
free(ldel); free(rdel); free(a);
}
void spline_basis(double* knots, int * ncoeff, int * order,
double* xvals, int * derivs, int * nx,
double* basis, int * offsets)
{ /* evaluate the non-zero B-spline basis */
/* functions (or their derivatives) at */
/* xvals. */
int n = *nx, i, j;
double* dpt, * coeff, * last = knots + *ncoeff;
orderm1 = *order - 1L;
rdel = calloc(orderm1, sizeof(double));
ldel = calloc(orderm1, sizeof(double));
coeff = calloc(*order, sizeof(double));
dpt = (knots += *order); /* first *order knots must be <= all xvals */
for (; n--; xvals++, derivs++) {
while (dpt < last && *dpt <= *xvals) dpt++;
if (*derivs) { /* slow method for derivatives */
for (i = 0; i < *order; i++) {
for (j = 0; j < *order; j++) coeff[j] = 0;
coeff[i] = 1;
*basis++ = evaluate(dpt, *xvals, coeff, (int)*derivs);
}
}
else {
basis_funcs(dpt, *xvals, basis); /* fast method for value */
basis += *order;
}
*offsets++ = (long)(dpt - knots);
}
free(ldel); free(rdel); free(coeff);
}
void lin_interp(double* x, double* y, double* x0, double* y0, int * nvals)
{
int n = *nvals;
double* firstx = x;
while (n--) {
while (*x < *x0) { x++; y++; }
if (x > firstx) { x--; y--; }
if (*x > * x0) *y0++ = *y + (*(y + 1) - *y) * (*x0 - *x) / (*(x + 1) - *x);
else *y0++ = *y;
x0++;
}
}
The header file includes declaration of functions only.
void diff_table(double* ti, double x, int ndiff);
void basis_funcs(double* ti, double x, double* b);
double evaluate(double* ti, double x, double* a, int nder);
void spline_value(double* knots, double* coeff, int* ncoeff, int* order, double* x, int* nx, int* deriv, double* y);
__declspec(dllexport) void spline_basis(double* knots, int* ncoeff, int* order, double* xvals, int* derivs, int* nx, double* basis, int* offsets);
void lin_interp(double* x, double* y, double* x0, double* y0, int* nvals);
I'm pretty much a layman in C and I'm learning Python. I need to write the routine described below (in C) for Python:
#include <stdio.h>
#include <math.h>
main()
{
float hold[26], hnew[26];
float dt, dx;
float t, s;
float ho;
float time;
float f1, d2h;
int i;
int nx, nlx;
int n, nend;
int kount, kprint;
dt = 5.0;
dx = 10.0;
t = 0.02;
s = 0.002;
nx = 11;
nlx = nx-1;
ho = 16.0;
for( i = 1; i <= nx; i++ )
{
hold[i] = ho;
hnew[i] = ho;
}
hold[nx] = 11.0;
printf("\t\t\t\thead\t\t\t\t time\n\n");
kount = 1;
kprint = 2;
time = dt;
nend = 100;
for( n = 1; n <= nend; n++ )
{
/* update solution */
for( i = 2; i <= nlx; i++ )
{
f1 = dt*t/s;
d2h = ( hold[i+1] - 2.0*hold[i] + hold[i-1])/(dx*dx);
hnew[i] = hold[i] + (f1*d2h);
}
for( i = 1; i <= nlx; i++ )
{
hold[i] = hnew[i];
}
if( kount == kprint )
{
for( i = 1; i <= nx; i++ )
{
printf(" %.2f",hold[i]);
}
printf(" %6.2f\n",time);
kount = 0;
}
time = time + dt;
kount = kount + 1;
}
}
This is my attempt at Python:
import numpy as np
dt = 5.0
dx = 10.0
t = 0.02
s = 0.002
nx = 11
nlx = nx - 1
ho = 16.0
hold = np.zeros(nx+1)
hnew = np.zeros(nx+1)
for i in range(nx):
hold[i] = ho
hnew[i] = ho
hold[nx] = 11.0
However, I can't get over this because I don't know the Python correspondent of the printf function. What would be the correct form of this function in Python? What does it reffer to?
Just print() in Python with .format.
For example:
x, y = 1, 2
print("x = {0}, y = {1}".format(x, y))
Here's the doc
To print similar to C's printf, the following is an example:
f = 3.25645
g = 3.14159265358979
for fl in (f,g):
print(f'{fl:.2f}')
3.26
3.14
The first f in the print is the format specifier. The f in the braces says to consider the number as a float.
it just print() (see a small program below)
squares = []
for x in range(14):
squares.append(x**2)
squares
squares2 = [x**2 for x in range(100)]
print (squares2)
I am writing some code that takes binary data from Python, Pipes it to C++, does some processing on the data, (in this case calculating a mutual information metric) and then pipes the results back to python. While testing I have found that everything works fine if the data I send is a set of 2 arrays with dimensions less than 1500 X 1500, but if I send 2 arrays that are 2K X 2K I get back a lot of corrupted nonsense.
I currently believe the algorithmic portion of the code is fine because it provides the expected answers during testing with small (<=1500 X1500) arrays. That leads me to believe that this is an issue with either the stdin or stdout piping. That maybe I’m passing some intrinsic limit somewhere.
The Python Code and C++ code are below.
Python Code:
import subprocess
import struct
import sys
import numpy as np
#set up the variables needed
bytesPerDouble = 8
sizeX = 2000
sizeY = 2000
offset = sizeX*sizeY
totalBytesPerArray = sizeX*sizeY*bytesPerDouble
totalBytes = totalBytesPerArray*2 #the 2 is because we pass 2 different versions of the 2D array
#setup the testing data array
a = np.zeros(sizeX*sizeY*2, dtype='d')
for i in range(sizeX):
for j in range(sizeY):
a[j+i*sizeY] = i
a[j+i*sizeY+offset] = i
if i % 10 == 0:
a[j+i*sizeY+offset] = j
data = a.tobytes('C')
strTotalBytes = str(totalBytes)
strLineBytes = str(sizeY*bytesPerDouble)
#communicate with c++ code
print("starting C++ code")
command = "C:\Python27\PythonPipes.exe"
proc = subprocess.Popen([command, strTotalBytes, strLineBytes, str(sizeY), str(sizeX)], stdin=subprocess.PIPE,stderr=subprocess.PIPE,stdout=subprocess.PIPE)
ByteBuffer = (data)
proc.stdin.write(ByteBuffer)
print("Reading results back from C++")
for i in range(sizeX):
returnvalues = proc.stdout.read(sizeY*bytesPerDouble)
a = buffer(returnvalues)
b = struct.unpack_from(str(sizeY)+'d', a)
print str(b) + " " + str(i)
print('done')
C++ Code:
Main function:
int main(int argc, char **argv) {
int count = 0;
long totalbytes = stoi(argv[argc-4], nullptr,10); //bytes being transfered
long bytechunk = stoi(argv[argc - 3], nullptr, 10); //bytes being transfered at a time
long height = stoi(argv[argc-2], nullptr, 10); //bytes being transfered at a time
long width = stoi(argv[argc-1], nullptr, 10); //bytes being transfered at a time
long offset = totalbytes / sizeof(double) / 2;
data = new double[totalbytes/sizeof(double)];
int columnindex = 0;
//read in data from pipe
while (count<totalbytes) {
fread(&(data[columnindex]), 1, bytechunk, stdin);
columnindex += bytechunk / sizeof(double);
count += bytechunk;
}
//calculate the data transform
MutualInformation MI = MutualInformation();
MI.Initialize(data, height, width, offset);
MI.calcMI();
count = 0;
//*
//write out data to pipe
columnindex = 0;
while (count<totalbytes/2) {
fwrite(&(MI.getOutput()[columnindex]), 1, bytechunk, stdout);
fflush(stdout);
count += bytechunk;
columnindex += bytechunk/sizeof(double);
}
//*/
delete [] data;
return 0;
}
and in case you need it the actual processing code:
double MutualInformation::calcMI(){
double rvalue = 0.0;
std::map<int, map<int, double>> lHistXY = map<int, map<int, double>>();
std::map<int, double> lHistX = map<int, double>();
std::map<int, double> lHistY = map<int, double>();
typedef std::map<int, std::map<int, double>>::iterator HistXY_iter;
typedef std::map<int, double>::iterator HistY_iter;
//calculate Entropys and MI
double MI = 0.0;
double Hx = 0.0;
double Hy = 0.0;
double Px = 0.0;
double Py = 0.0;
double Pxy = 0.0;
//scan through the image
int ip = 0;
int jp = 0;
int chipsize = 3;
//setup zero array
double * zeros = new double[this->mHeight];
for (int j = 0; j < this->mHeight; j++){
zeros[j] = 0.0;
}
//zero out Output array
for (int i = 0; i < this->mWidth; i++){
memcpy(&(this->mOutput[i*this->mHeight]), zeros, this->mHeight*8);
}
double index = 0.0;
for (int ioutter = chipsize; ioutter < (this->mWidth - chipsize); ioutter++){
//write out processing status
//index = (double)ioutter;
//fwrite(&index, 8, 1, stdout);
//fflush(stdout);
//*
for (int j = chipsize; j < (this->mHeight - chipsize); j++){
//clear the histograms
lHistX.clear();
lHistY.clear();
lHistXY.clear();
//chip out a section of the image
for (int k = -chipsize; k <= chipsize; k++){
for (int l = -chipsize; l <= chipsize; l++){
ip = ioutter + k;
jp = j + l;
//update X histogram
if (lHistX.count(int(this->mData[ip*this->mHeight + jp]))){
lHistX[int(this->mData[ip*this->mHeight + jp])] += 1.0;
}else{
lHistX[int(this->mData[ip*this->mHeight + jp])] = 1.0;
}
//update Y histogram
if (lHistY.count(int(this->mData[ip*this->mHeight + jp+this->mOffset]))){
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] += 1.0;
}
else{
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] = 1.0;
}
//update X and Y Histogram
if (lHistXY.count(int(this->mData[ip*this->mHeight + jp]))){
//X Key exists check if Y key exists
if (lHistXY[int(this->mData[ip*this->mHeight + jp])].count(int(this->mData[ip*this->mHeight + jp + this->mOffset]))){
//X & Y keys exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] += 1;
}else{
//X exist but Y doesn't
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
}
}else{
//X Key Didn't exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
};
}
}
//calculate PMI, Hx, Hy
// iterator->first = key
// iterator->second = value
MI = 0.0;
Hx = 0.0;
Hy = 0.0;
for (HistXY_iter Hist2D_iter = lHistXY.begin(); Hist2D_iter != lHistXY.end(); Hist2D_iter++) {
Px = lHistX[Hist2D_iter->first] / ((double) this->mOffset);
Hx -= Px*log(Px);
for (HistY_iter HistY_iter = Hist2D_iter->second.begin(); HistY_iter != Hist2D_iter->second.end(); HistY_iter++) {
Py = lHistY[HistY_iter->first] / ((double) this->mOffset);
Hy -= Py*log(Py);
Pxy = HistY_iter->second / ((double) this->mOffset);
MI += Pxy*log(Pxy / Py / Px);
}
}
//normalize PMI to max(Hx,Hy) so that the PMI value runs from 0 to 1
if (Hx >= Hy && Hx > 0.0){
MI /= Hx;
}else if(Hy > Hx && Hy > 0.0){
MI /= Hy;
}
else{
MI = 0.0;
}
//write PMI to data output array
if (MI < 1.1){
this->mOutput[ioutter*this->mHeight + j] = MI;
}
else{
this->mOutput[ioutter*this->mHeight + j] = 0.0;
}
}
}
return rvalue;
}
with arrays that return something that makes sense I get output bounded between 0 and 1 like this:
(0.0, 0.0, 0.0, 0.7160627908692593, 0.6376472316395495, 0.5728801401524277,...
with the 2Kx2K or higher arrays I get nonesense like this (even though the code clamps the values between 0 and 1):
(-2.2491400820412374e+228, -2.2491400820412374e+228, -2.2491400820412374e+228, -2.2491400820412374e+228, -2.2491400820412374e+228,...
I would like to know why this code is corrupting the data set after it is assigned between 0.0 and 1, and whether or not it is a piping issue, a stdin/stdout issue, a buffer issue of some sort, or a coding issue I am simply not seeing.
Update I tried passing the data in smaller chunks using the code that Chris suggested with no luck. also of note is that I added a catch for ferror on stdout and it never got tripped so I am pretty sure that the bytes are at least making it to stdout. Is it possible that something else is writing to stdout somehow? maybe an extra byte making its way into stdout while my program is running? I find this doubtful as the errors are appearing consistently on the 4th fwrite read in the 10th entry.
Per Craig's request here is the full C++ code (the full Python Code is already posted): it is sitting in 3 files:
main.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include "./MutualInformation.h"
double * data;
using namespace std;
void
xxwrite(unsigned char *buf, size_t wlen, FILE *fo)
{
size_t xlen;
for (; wlen > 0; wlen -= xlen, buf += xlen) {
xlen = wlen;
if (xlen > 1024)
xlen = 1024;
xlen = fwrite(buf, 1, xlen, fo);
fflush(fo);
}
}
int main(int argc, char **argv) {
int count = 0;
long totalbytes = stoi(argv[argc-4], nullptr,10); //bytes being transfered
long bytechunk = stoi(argv[argc - 3], nullptr, 10); //bytes being transfered at a time
long height = stoi(argv[argc-2], nullptr, 10); //bytes being transfered at a time
long width = stoi(argv[argc-1], nullptr, 10); //bytes being transfered at a time
long offset = totalbytes / sizeof(double) / 2;
data = new double[totalbytes/sizeof(double)];
int columnindex = 0;
//read in data from pipe
while (count<totalbytes) {
fread(&(data[columnindex]), 1, bytechunk, stdin);
columnindex += bytechunk / sizeof(double);
count += bytechunk;
}
//calculate the data transform
MutualInformation MI = MutualInformation();
MI.Initialize(data, height, width, offset);
MI.calcMI();
count = 0;
columnindex = 0;
while (count<totalbytes/2) {
xxwrite((unsigned char*)&(MI.getOutput()[columnindex]), bytechunk, stdout);
count += bytechunk;
columnindex += bytechunk/sizeof(double);
}
delete [] data;
return 0;
}
MutualInformation.h
#include <map>
using namespace std;
class MutualInformation
{
private:
double * mData;
double * mOutput;
long mHeight;
long mWidth;
long mOffset;
public:
MutualInformation();
~MutualInformation();
bool Initialize(double * data, long Height, long Width, long Offset);
const double * getOutput();
double calcMI();
};
MutualInformation.cpp
#include "MutualInformation.h"
MutualInformation::MutualInformation()
{
this->mData = nullptr;
this->mOutput = nullptr;
this->mHeight = 0;
this->mWidth = 0;
}
MutualInformation::~MutualInformation()
{
delete[] this->mOutput;
}
bool MutualInformation::Initialize(double * data, long Height, long Width, long Offset){
bool rvalue = false;
this->mData = data;
this->mHeight = Height;
this->mWidth = Width;
this->mOffset = Offset;
//allocate output data
this->mOutput = new double[this->mHeight*this->mWidth];
return rvalue;
}
const double * MutualInformation::getOutput(){
return this->mOutput;
}
double MutualInformation::calcMI(){
double rvalue = 0.0;
std::map<int, map<int, double>> lHistXY = map<int, map<int, double>>();
std::map<int, double> lHistX = map<int, double>();
std::map<int, double> lHistY = map<int, double>();
typedef std::map<int, std::map<int, double>>::iterator HistXY_iter;
typedef std::map<int, double>::iterator HistY_iter;
//calculate Entropys and MI
double MI = 0.0;
double Hx = 0.0;
double Hy = 0.0;
double Px = 0.0;
double Py = 0.0;
double Pxy = 0.0;
//scan through the image
int ip = 0;
int jp = 0;
int chipsize = 3;
//setup zero array
double * zeros = new double[this->mHeight];
for (int j = 0; j < this->mHeight; j++){
zeros[j] = 0.0;
}
//zero out Output array
for (int i = 0; i < this->mWidth; i++){
memcpy(&(this->mOutput[i*this->mHeight]), zeros, this->mHeight*8);
}
double index = 0.0;
for (int ioutter = chipsize; ioutter < (this->mWidth - chipsize); ioutter++){
for (int j = chipsize; j < (this->mHeight - chipsize); j++){
//clear the histograms
lHistX.clear();
lHistY.clear();
lHistXY.clear();
//chip out a section of the image
for (int k = -chipsize; k <= chipsize; k++){
for (int l = -chipsize; l <= chipsize; l++){
ip = ioutter + k;
jp = j + l;
//update X histogram
if (lHistX.count(int(this->mData[ip*this->mHeight + jp]))){
lHistX[int(this->mData[ip*this->mHeight + jp])] += 1.0;
}else{
lHistX[int(this->mData[ip*this->mHeight + jp])] = 1.0;
}
//update Y histogram
if (lHistY.count(int(this->mData[ip*this->mHeight + jp+this->mOffset]))){
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] += 1.0;
}
else{
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] = 1.0;
}
//update X and Y Histogram
if (lHistXY.count(int(this->mData[ip*this->mHeight + jp]))){
//X Key exists check if Y key exists
if (lHistXY[int(this->mData[ip*this->mHeight + jp])].count(int(this->mData[ip*this->mHeight + jp + this->mOffset]))){
//X & Y keys exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] += 1;
}else{
//X exist but Y doesn't
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
}
}else{
//X Key Didn't exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
};
}
}
//calculate PMI, Hx, Hy
// iterator->first = key
// iterator->second = value
MI = 0.0;
Hx = 0.0;
Hy = 0.0;
for (HistXY_iter Hist2D_iter = lHistXY.begin(); Hist2D_iter != lHistXY.end(); Hist2D_iter++) {
Px = lHistX[Hist2D_iter->first] / ((double) this->mOffset);
Hx -= Px*log(Px);
for (HistY_iter HistY_iter = Hist2D_iter->second.begin(); HistY_iter != Hist2D_iter->second.end(); HistY_iter++) {
Py = lHistY[HistY_iter->first] / ((double) this->mOffset);
Hy -= Py*log(Py);
Pxy = HistY_iter->second / ((double) this->mOffset);
MI += Pxy*log(Pxy / Py / Px);
}
}
//normalize PMI to max(Hx,Hy) so that the PMI value runs from 0 to 1
if (Hx >= Hy && Hx > 0.0){
MI /= Hx;
}else if(Hy > Hx && Hy > 0.0){
MI /= Hy;
}
else{
MI = 0.0;
}
//write PMI to data output array
if (MI < 1.1){
this->mOutput[ioutter*this->mHeight + j] = MI;
}
else{
this->mOutput[ioutter*this->mHeight + j] = 0.0;
//cout << "problem with output";
}
}
}
//*/
return rvalue;
}
SOLVED By 6502
6502's answer below solved my problem. I needed to explicitly tell Windows to use a binary mode for stdin / stdout. to do that I had to include 2 new header files in my main cpp file.
#include <fcntl.h>
#include <io.h>
add the following lines of code (modified away from 6502's POSIX versions because Visual Studio complained) to the beginning of my main function
_setmode(_fileno(stdout), O_BINARY);
_setmode(_fileno(stdin), O_BINARY);
and then add these lines to my Python code:
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
The problem is that stdin/stdout in windows are opened in text mode, not in binary mode and therefore will mess up when the character 13 (\r) is sent.
You can set for example binary mode in Python with
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
and in C++ with
_setmode(fileno(stdout), O_BINARY);
_setmode(fileno(stdin), O_BINARY);
See https://msdn.microsoft.com/en-us/library/tw4k6df8.aspx
Your C++ fwrite code does not account for getting a "short" transfer.
Here's a slight tweak:
//write out data to pipe
columnindex = 0;
while (count < totalbytes / 2) {
wlen = fwrite(&(MI.getOutput()[columnindex]), 1, bytechunk, stdout);
fflush(stdout);
count += wlen;
columnindex += wlen / sizeof(double);
}
Note: You still need to be careful as this would still have issues if wlen returns and it's not a multiple of sizeof(double). For example, if bytechunk were 16 and wlen came back with 14, you'd need an additional fwrite with length 2 before continuing the loop. A generalization of this is just to treat the entire data matrix as a giant byte buffer and loop on that.
Actually, you'll get about the same efficiency with many much smaller transfers that are capped by a fixed (i.e. "known safe amount") of [say] 1024 bytes. This works because the output is a byte stream.
Here's a slightly more general solution that I've often used:
void
xxwrite(void *buf,size_t wlen,FILE *fo)
{
size_t xlen;
for (; wlen > 0; wlen -= xlen, buf += xlen) {
xlen = wlen;
if (xlen > 1024)
xlen = 1024;
xlen = fwrite(buf,1,xlen,fo);
fflush(fo);
}
}
//write out data to pipe
columnindex = 0;
while (count < totalbytes / 2) {
xxwrite(&(MI.getOutput()[columnindex]), bytechunk, stdout);
count += bytechunk;
columnindex += bytechunk / sizeof(double);
}
UPDATE:
I've downloaded all your code and run it. I've got good news and bad news: The code runs fine here, even for a matrix size above 3000. I ran it both using xxwrite and without and the results were the same.
Using my limited python skills, I added some pretty print to your python script (e.g. some line wrap) and had it check every value for range and annotate any bad values. There were none found by the script. Also, visual inspection of the values turned up nothing [this was true before the pretty print, so it hasn't introduced anything]. Just lots of zeros and then blocks in the 0.9 range.
The only difference I can see is that I'm using gcc [and, of course, python] on linux. But, from your script it seems your using Windows [based on the C:\... path for your C++ executable. This shouldn't matter for this application, but I mention it anyway.
So, pipes work here. One thing you might try is to direct the C++ output to a file. Then, have the script read back from the file (i.e. no pipe) and see if that makes a difference. I tend to think not, but ...
Also, I don't know what compiler and python implementation you're using under Windows. Whenever I have to do this, I usually have Cygwin installed as it gives one of the closest implementations of linux/Unix-like environment (i.e. pipes are more likely to work as advertised).
Anyway, here's the modified script. Also note that I added os.getenv to grab alternate matrix sizes and an alternate place for the C++ executable, so that it would work for both of us with minimal pain
#!/usr/bin/python
import subprocess
import struct
import sys
import os
import numpy as np
val = os.getenv("MTX","2000")
sizeX = int(val)
sizeY = sizeX
print "sizeX=%d sizeY=%d" % (sizeX,sizeY)
#set up the variables needed
bytesPerDouble = 8
offset = sizeX*sizeY
totalBytesPerArray = sizeX*sizeY*bytesPerDouble
totalBytes = totalBytesPerArray*2 #the 2 is because we pass 2 different versions of the 2D array
#setup the testing data array
a = np.zeros(sizeX*sizeY*2, dtype='d')
for i in range(sizeX):
for j in range(sizeY):
a[j+i*sizeY] = i
a[j+i*sizeY+offset] = i
if i % 10 == 0:
a[j+i*sizeY+offset] = j
data = a.tobytes('C')
strTotalBytes = str(totalBytes)
strLineBytes = str(sizeY*bytesPerDouble)
#communicate with c++ code
print("starting C++ code")
command = os.getenv("CPGM",None);
if command is None:
command = "C:\Python27\PythonPipes.exe"
proc = subprocess.Popen([command, strTotalBytes, strLineBytes, str(sizeY), str(sizeX)], stdin=subprocess.PIPE,stderr=subprocess.PIPE,stdout=subprocess.PIPE)
ByteBuffer = (data)
proc.stdin.write(ByteBuffer)
def prt(i,b):
hangflg = 0
per = 8
for j in range(0,len(b)):
if ((j % per) == 0):
print("[%d,%d]" % (i,j)),
q = b[j]
print(q),
hangflg = 1
if (q < 0.0) or (q > 1.0):
print("=WTF"),
if ((j % per) == (per - 1)):
print("")
hangflg = 0
if (hangflg):
print("")
print("Reading results back from C++")
for i in range(sizeX):
returnvalues = proc.stdout.read(sizeY*bytesPerDouble)
a = buffer(returnvalues)
b = struct.unpack_from(str(sizeY)+'d', a)
prt(i,b)
###print str(b) + " " + str(i)
###print str(i) + ": " + str(b)
print('done')
somehow I get different results using numpy's linalg.inv and OpenCv's Mat::inv, e.g. given a matrix S:
[747.8561839552103, 359.9317804054358, 204.6165451419812, 241.7376332155144, 126.132733370211, 81.57610562583466;
359.9317804054358, 204.6165451419812, 140.4788277943211, 126.132733370211, 80.55127348381332, 47.7506303038364;
204.6165451419812, 140.4788277943211, 116.7472083846913, 80.55127348381332, 63.86033402962857, 35.56970826526131;
241.7376332155144, 126.132733370211, 80.55127348381332, 81.57610562583466, 47.7506303038364, 29.55106507912703;
126.132733370211, 80.55127348381332, 63.86033402962857, 47.7506303038364, 35.56970826526131, 20.31193086803655;
81.57610562583466, 47.7506303038364, 35.56970826526131, 29.55106507912703, 20.31193086803655, 12]
OpenCV S.inv() results in
[-19562262532715.65, 137.3094415699439, -44015090698406.93, 78249050130618.84, 88030181396160.73, -78249050129617.47;
124.2797501878658, 19.14093204142484, 301.2737811201993, -531.0713785028685, -686.3332686269306, 655.5356524828615;
-44015090698424.45, 330.6065529853437, -99033954070961.39, 176060362793110.9, 198067908140346.2, -176060362790691.7;
78249050130642.06, -583.2397919514979, 176060362793093.2, -312996200521532.5, -352120725583424.9, 312996200517305.3;
88030181396256.19, -744.9706630355842, 198067908140482.2, -352120725583702.1, -396135816277430.2, 352120725578288.9;
-78249050129732.52, 707.7008280168318, -176060362790880.5, 312996200517672.6, 352120725578424.8, -312996200512573.6]
numpy's linalg.inv results in:
[[2685109332201.37, -23.46, 6041495997420.42, -10740437328763.82, -12082991994731.51, 10740437328597.41]
[-32.56, 19.14, -51.60, 96.23, 19.43, 28.24]
[6041495997407.60, -31.13, 13593365994129.85, -24165983989574.97, -27186731988120.73, 24165983989366.80]
[-10740437328747.65, 59.84, -24165983989589.86, 42961749314884.59, 48331967978891.43, -42961749314440.71]
[-12082991994663.29, -21.50, -27186731988024.93, 48331967978691.32, 54373463976152.90, -48331967978849.60]
[10740437328516.33, 64.62, 24165983989235.66, -42961749314181.06, -48331967978757.62, 42961749314608.99]]
Okay, now some detail of how I stumbled across this:
I am working on a routine to fit an ellipse given a point set; I have transferred code from: http://nicky.vanforeest.com/misc/fitEllipse/fitEllipse.html to C++:
template <typename InputIt>
std::array<double, 6> fit_ellipse(InputIt first, InputIt last) {
typedef double value_type;
auto num_points = std::distance(first, last);
cv::Mat_<value_type> D(num_points, 6);
size_t row(0);
for (auto it(first); it != last; ++it, ++row) {
auto &point = *it;
auto &x = std::get<0>(point);
auto &y = std::get<1>(point);
D(row, 0) = x * x;
D(row, 1) = x * y;
D(row, 2) = y * y;
D(row, 3) = x;
D(row, 4) = y;
D(row, 5) = 1.f;
}
auto S = D.t() * D;
cv::Mat_<value_type> C = cv::Mat_<value_type>::zeros(6, 6);
C(0, 2) = C(2, 0) = 2; C(1, 1) = -1;
cv::Mat Sinv = S.inv();
std::cout << "Sinv:" << Sinv << std::endl;
cv::Mat_<value_type> E, V; cv::eigen(Sinv * C, E, V);
int n;
cv::minMaxIdx(cv::abs(E), nullptr, nullptr, nullptr, &n);
return {{V(n, 0), V(n, 1), V(n, 2), V(n, 3), V(n, 4), V(n, 5)}};
}
To comparison the Python code:
import numpy as np
from numpy.linalg import eig, inv
def fitEllipse(x,y):
x = x[:,np.newaxis]
y = y[:,np.newaxis]
D = np.hstack((x*x, x*y, y*y, x, y, np.ones_like(x)))
S = np.dot(D.T,D)
C = np.zeros([6,6])
C[0,2] = C[2,0] = 2; C[1,1] = -1
Sinv = inv(S)
print("Sinv: %s" % Sinv)
E, V = eig(np.dot(Sinv, C))
n = np.argmax(np.abs(E))
a = V[:,n]
return a
The code is run initializing following points:
TEST(fit_ellipse, test_example) {
namespace bmc = boost::math::constants;
typedef std::array<double, 2> point_type;
std::vector<point_type> points;
auto arc = .8;
for (size_t i(0); i < 12; ++i) {
auto r = i * arc * bmc::pi<double>() / 12;
points.push_back({1.5 * std::cos(r) + 2, std::sin(r) + 1});
}
auto a = fit_ellipse(points.begin(), points.end());
std::cout << "Parameters: "
for (auto &p : a)
std::cout << p << ' ';
std::cout << std::endl;
}
Same in Python:
R = np.arange(0, arc * np.pi, arc * np.pi / 12)
x = 1.5 * np.cos(R) + 2
y = np.sin(R) + 1.
print("Parameters: %s" % fitEllipse(x,y))
Any Idea what I am missing out?
I am trying to convert the Java Code to Python Code and i have done it so far. Java Code works but Python Code doesn't work. Please help me.
Python Code
import random
class QLearning():
alpha = 0.1
gamma = 0.9
state_a = 0
state_b = 1
state_c = 2
state_d = 3
state_e = 4
state_f = 5
states_count = 6
states = [state_a, state_b, state_c, state_d, state_e, state_f]
R = [[0 for x in range(states_count)] for x in range(states_count)]
Q = [[0 for x in range(states_count)] for x in range(states_count)]
action_from_a = [state_b, state_d]
action_from_b = [state_a, state_c, state_e]
action_from_c = [state_c]
action_from_d = [state_a, state_e]
action_from_e = [state_b, state_d, state_f]
action_from_f = [state_c, state_e]
actions = [action_from_a, action_from_b, action_from_c, action_from_d, action_from_e, action_from_f]
state_names = ["A","B","C","D","E","F"]
def __init__(self):
self.R[self.state_b][self.state_c] = 100
self.R[self.state_f][self.state_c] = 100
def run(self):
for i in range(1000):
state = random.randrange(self.states_count)
while(state != self.state_c):
actions_from_state = self.actions[state]
index = random.randrange(len(actions_from_state))
action = actions_from_state[index]
next_state = action
q = self.Q_Value(state, action)
max_Q = self.max_q(next_state)
r = self.R_Value(state, action)
value = q + self.alpha * (r + self.gamma * max_Q - q)
self.set_q(state, action, value)
state = next_state
def max_q(self, s):
self.run().actions_from_state = self.actions[s]
max_value = 5
for i in range(len(self.run().actions_from_state)):
self.run().next_state = self.run().actions_from_state[i]
self.run().value = self.Q[s][self.run().next_state]
if self.run().value > max_value:
max_value = self.run().value
return max_value
def policy(self, state):
self.run().actions_from_state = self.actions[state]
max_value = 5
policy_goto_state = state
for i in range(len(self.run().actions_from_state)):
self.run().next_state = self.run().actions_from_state[i]
self.run().value = self.Q[state][self.run().next_state]
if self.run().value > max_value:
max_value = self.run().value
policy_goto_state = self.run().next_state
return policy_goto_state
def Q_Value(self, s,a):
return self.Q[s][a]
def set_q(self, s, a, value):
self.Q[s][a] = value
def R_Value(self, s, a):
return self.R[s][a]
def print_result(self):
print("Print Result")
for i in range(len(self.Q)):
print("Out From (0)".format(self.state_names[i]))
for j in range(len(self.Q[i])):
print(self.Q[i][j])
def show_policy(self):
print("Show Policy")
for i in range(len(self.states)):
fro = self.states[i]
to = self.policy(fro)
print("From {0} goto {1}".format(self.state_names[fro], self.state_names[to]))
obj = QLearning()
obj.run()
obj.print_result()
obj.show_policy()
Java Code
import java.text.DecimalFormat;
import java.util.Random;
public class Qlearning {
final DecimalFormat df = new DecimalFormat("#.##");
// path finding
final double alpha = 0.1;
final double gamma = 0.9;
// states A,B,C,D,E,F
// e.g. from A we can go to B or D
// from C we can only go to C
// C is goal state, reward 100 when B->C or F->C
//
// _______
// |A|B|C|
// |_____|
// |D|E|F|
// |_____|
//
final int stateA = 0;
final int stateB = 1;
final int stateC = 2;
final int stateD = 3;
final int stateE = 4;
final int stateF = 5;
final int statesCount = 6;
final int[] states = new int[]{stateA,stateB,stateC,stateD,stateE,stateF};
// http://en.wikipedia.org/wiki/Q-learning
// http://people.revoledu.com/kardi/tutorial/ReinforcementLearning/Q-Learning.htm
// Q(s,a)= Q(s,a) + alpha * (R(s,a) + gamma * Max(next state, all actions) - Q(s,a))
int[][] R = new int[statesCount][statesCount]; // reward lookup
double[][] Q = new double[statesCount][statesCount]; // Q learning
int[] actionsFromA = new int[] { stateB, stateD };
int[] actionsFromB = new int[] { stateA, stateC, stateE };
int[] actionsFromC = new int[] { stateC };
int[] actionsFromD = new int[] { stateA, stateE };
int[] actionsFromE = new int[] { stateB, stateD, stateF };
int[] actionsFromF = new int[] { stateC, stateE };
int[][] actions = new int[][] { actionsFromA, actionsFromB, actionsFromC,
actionsFromD, actionsFromE, actionsFromF };
String[] stateNames = new String[] { "A", "B", "C", "D", "E", "F" };
public Qlearning() {
init();
}
public void init() {
R[stateB][stateC] = 100; // from b to c
R[stateF][stateC] = 100; // from f to c
}
public static void main(String[] args) {
long BEGIN = System.currentTimeMillis();
Qlearning obj = new Qlearning();
obj.run();
obj.printResult();
obj.showPolicy();
long END = System.currentTimeMillis();
System.out.println("Time: " + (END - BEGIN) / 1000.0 + " sec.");
}
void run() {
/*
1. Set parameter , and environment reward matrix R
2. Initialize matrix Q as zero matrix
3. For each episode: Select random initial state
Do while not reach goal state o
Select one among all possible actions for the current state o
Using this possible action, consider to go to the next state o
Get maximum Q value of this next state based on all possible actions o
Compute o Set the next state as the current state
*/
// For each episode
Random rand = new Random();
for (int i = 0; i < 1000; i++) { // train episodes
// Select random initial state
int state = rand.nextInt(statesCount);
while (state != stateC) // goal state
{
// Select one among all possible actions for the current state
int[] actionsFromState = actions[state];
// Selection strategy is random in this example
int index = rand.nextInt(actionsFromState.length);
int action = actionsFromState[index];
// Action outcome is set to deterministic in this example
// Transition probability is 1
int nextState = action; // data structure
// Using this possible action, consider to go to the next state
double q = Q(state, action);
double maxQ = maxQ(nextState);
int r = R(state, action);
double value = q + alpha * (r + gamma * maxQ - q);
setQ(state, action, value);
// Set the next state as the current state
state = nextState;
}
}
}
double maxQ(int s) {
int[] actionsFromState = actions[s];
double maxValue = Double.MIN_VALUE;
for (int i = 0; i < actionsFromState.length; i++) {
int nextState = actionsFromState[i];
double value = Q[s][nextState];
if (value > maxValue)
maxValue = value;
}
return maxValue;
}
// get policy from state
int policy(int state) {
int[] actionsFromState = actions[state];
double maxValue = Double.MIN_VALUE;
int policyGotoState = state; // default goto self if not found
for (int i = 0; i < actionsFromState.length; i++) {
int nextState = actionsFromState[i];
double value = Q[state][nextState];
if (value > maxValue) {
maxValue = value;
policyGotoState = nextState;
}
}
return policyGotoState;
}
double Q(int s, int a) {
return Q[s][a];
}
void setQ(int s, int a, double value) {
Q[s][a] = value;
}
int R(int s, int a) {
return R[s][a];
}
void printResult() {
System.out.println("Print result");
for (int i = 0; i < Q.length; i++) {
System.out.print("out from " + stateNames[i] + ": ");
for (int j = 0; j < Q[i].length; j++) {
System.out.print(df.format(Q[i][j]) + " ");
}
System.out.println();
}
}
// policy is maxQ(states)
void showPolicy() {
System.out.println("\nshowPolicy");
for (int i = 0; i < states.length; i++) {
int from = states[i];
int to = policy(from);
System.out.println("from "+stateNames[from]+" goto "+stateNames[to]);
}
}
}
Traceback
C:\Python33\python.exe "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py"
Traceback (most recent call last):
File "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py", line 4, in <module>
class QLearning():
File "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py", line 19, in QLearning
R = [[0 for x in range(states_count)] for x in range(states_count)]
File "C:/Users/Ajay/Documents/Python Scripts/RL/QLearning.py", line 19, in <listcomp>
R = [[0 for x in range(states_count)] for x in range(states_count)]
NameError: global name 'states_count' is not defined
To access all of the class attributes you define (i.e. everything between class QLearning and def __init__), you need to use self or the class name:
self.states_count
or
QLearning.states_count
I don't know the algorithm, but it is possible that these class attributes should be instance attributes (i.e. separate for each instance of the class, rather than shared amongst all instances) and therefore defined in __init__ (or other instance methods) using self anyway.