Pain in the Nash (C++)
So called because the fact that I had to write my own Nash equilibrium solver was a real pain. I'm amazed that there aren't any readily-available Nash-solving libraries!
#include <fstream>
#include <iostream>
#include <vector>
#include <array>
#include <random>
#include <utility>
typedef double NumT;
static const NumT EPSILON = 1e-5;
struct Index {
int me;
int them;
Index(int me, int them) : me(me), them(them) {}
};
struct Value {
NumT me;
NumT them;
Value(void) : me(0), them(0) {}
Value(NumT me, NumT them) : me(me), them(them) {}
};
template <int subDimMe, int subDimThem>
struct Game {
const std::array<NumT, 9> *valuesMe;
const std::array<NumT, 9> *valuesThemT;
std::array<int, subDimMe> coordsMe;
std::array<int, subDimThem> coordsThem;
Game(
const std::array<NumT, 9> *valuesMe,
const std::array<NumT, 9> *valuesThemT
)
: valuesMe(valuesMe)
, valuesThemT(valuesThemT)
, coordsMe{}
, coordsThem{}
{}
Index baseIndex(Index i) const {
return Index(coordsMe[i.me], coordsThem[i.them]);
}
Value at(Index i) const {
Index i2 = baseIndex(i);
return Value(
(*valuesMe)[i2.me * 3 + i2.them],
(*valuesThemT)[i2.me + i2.them * 3]
);
}
Game<2, 2> subgame22(int me0, int me1, int them0, int them1) const {
Game<2, 2> b(valuesMe, valuesThemT);
b.coordsMe[0] = coordsMe[me0];
b.coordsMe[1] = coordsMe[me1];
b.coordsThem[0] = coordsThem[them0];
b.coordsThem[1] = coordsThem[them1];
return b;
}
};
struct Strategy {
std::array<NumT, 3> probMe;
std::array<NumT, 3> probThem;
Value expectedValue;
bool valid;
Strategy(void)
: probMe{}
, probThem{}
, expectedValue()
, valid(false)
{}
void findBestMe(const Strategy &b) {
if(b.valid && (!valid || b.expectedValue.me > expectedValue.me)) {
*this = b;
}
}
};
template <int dimMe, int dimThem>
Strategy nash_pure(const Game<dimMe, dimThem> &g) {
Strategy s;
int choiceMe = -1;
int choiceThem = 0;
for(int me = 0; me < dimMe; ++ me) {
for(int them = 0; them < dimThem; ++ them) {
const Value &v = g.at(Index(me, them));
bool valid = true;
for(int me2 = 0; me2 < dimMe; ++ me2) {
if(g.at(Index(me2, them)).me > v.me) {
valid = false;
}
}
for(int them2 = 0; them2 < dimThem; ++ them2) {
if(g.at(Index(me, them2)).them > v.them) {
valid = false;
}
}
if(valid) {
if(choiceMe == -1 || v.me > s.expectedValue.me) {
s.expectedValue = v;
choiceMe = me;
choiceThem = them;
}
}
}
}
if(choiceMe != -1) {
Index iBase = g.baseIndex(Index(choiceMe, choiceThem));
s.probMe[iBase.me] = 1;
s.probThem[iBase.them] = 1;
s.valid = true;
}
return s;
}
Strategy nash_mixed(const Game<2, 2> &g) {
// P Q
// p a A b B
// q c C d D
Value A = g.at(Index(0, 0));
Value B = g.at(Index(0, 1));
Value C = g.at(Index(1, 0));
Value D = g.at(Index(1, 1));
// q = 1-p, Q = 1-P
// Pick p such that choice of P,Q is arbitrary
// p*A+(1-p)*C = p*B+(1-p)*D
// p*A+C-p*C = p*B+D-p*D
// p*(A+D-B-C) = D-C
// p = (D-C) / (A+D-B-C)
NumT p = (D.them - C.them) / (A.them + D.them - B.them - C.them);
// P*a+(1-P)*b = P*c+(1-P)*d
// P*a+b-P*b = P*c+d-P*d
// P*(a+d-b-c) = d-b
// P = (d-b) / (a+d-b-c)
NumT P = (D.me - B.me) / (A.me + D.me - B.me - C.me);
Strategy s;
if(p >= -EPSILON && p <= 1 + EPSILON && P >= -EPSILON && P <= 1 + EPSILON) {
if(p <= 0) {
p = 0;
} else if(p >= 1) {
p = 1;
}
if(P <= 0) {
P = 0;
} else if(P >= 1) {
P = 1;
}
Index iBase0 = g.baseIndex(Index(0, 0));
Index iBase1 = g.baseIndex(Index(1, 1));
s.probMe[iBase0.me] = p;
s.probMe[iBase1.me] = 1 - p;
s.probThem[iBase0.them] = P;
s.probThem[iBase1.them] = 1 - P;
s.expectedValue = Value(
P * A.me + (1 - P) * B.me,
p * A.them + (1 - p) * C.them
);
s.valid = true;
}
return s;
}
Strategy nash_mixed(const Game<3, 3> &g) {
// P Q R
// p a A b B c C
// q d D e E f F
// r g G h H i I
Value A = g.at(Index(0, 0));
Value B = g.at(Index(0, 1));
Value C = g.at(Index(0, 2));
Value D = g.at(Index(1, 0));
Value E = g.at(Index(1, 1));
Value F = g.at(Index(1, 2));
Value G = g.at(Index(2, 0));
Value H = g.at(Index(2, 1));
Value I = g.at(Index(2, 2));
// r = 1-p-q, R = 1-P-Q
// Pick p,q such that choice of P,Q,R is arbitrary
NumT q = ((
+ A.them * (I.them-H.them)
+ G.them * (B.them-C.them)
- B.them*I.them
+ H.them*C.them
) / (
(G.them+E.them-D.them-H.them) * (B.them+I.them-H.them-C.them) -
(H.them+F.them-E.them-I.them) * (A.them+H.them-G.them-B.them)
));
NumT p = (
((G.them+E.them-D.them-H.them) * q + (H.them-G.them)) /
(A.them+H.them-G.them-B.them)
);
NumT Q = ((
+ A.me * (I.me-F.me)
+ C.me * (D.me-G.me)
- D.me*I.me
+ F.me*G.me
) / (
(C.me+E.me-B.me-F.me) * (D.me+I.me-F.me-G.me) -
(F.me+H.me-E.me-I.me) * (A.me+F.me-C.me-D.me)
));
NumT P = (
((C.me+E.me-B.me-F.me) * Q + (F.me-C.me)) /
(A.me+F.me-C.me-D.me)
);
Strategy s;
if(
p >= -EPSILON && q >= -EPSILON && p + q <= 1 + EPSILON &&
P >= -EPSILON && Q >= -EPSILON && P + Q <= 1 + EPSILON
) {
if(p <= 0) { p = 0; }
if(q <= 0) { q = 0; }
if(P <= 0) { P = 0; }
if(Q <= 0) { Q = 0; }
if(p + q >= 1) {
if(p > q) {
p = 1 - q;
} else {
q = 1 - p;
}
}
if(P + Q >= 1) {
if(P > Q) {
P = 1 - Q;
} else {
Q = 1 - P;
}
}
Index iBase0 = g.baseIndex(Index(0, 0));
s.probMe[iBase0.me] = p;
s.probThem[iBase0.them] = P;
Index iBase1 = g.baseIndex(Index(1, 1));
s.probMe[iBase1.me] = q;
s.probThem[iBase1.them] = Q;
Index iBase2 = g.baseIndex(Index(2, 2));
s.probMe[iBase2.me] = 1 - p - q;
s.probThem[iBase2.them] = 1 - P - Q;
s.expectedValue = Value(
A.me * P + B.me * Q + C.me * (1 - P - Q),
A.them * p + D.them * q + G.them * (1 - p - q)
);
s.valid = true;
}
return s;
}
template <int dimMe, int dimThem>
Strategy nash_validate(Strategy &&s, const Game<dimMe, dimThem> &g, Index unused) {
if(!s.valid) {
return s;
}
NumT exp;
exp = 0;
for(int them = 0; them < dimThem; ++ them) {
exp += s.probThem[them] * g.at(Index(unused.me, them)).me;
}
if(exp > s.expectedValue.me) {
s.valid = false;
return s;
}
exp = 0;
for(int me = 0; me < dimMe; ++ me) {
exp += s.probMe[me] * g.at(Index(me, unused.them)).them;
}
if(exp > s.expectedValue.them) {
s.valid = false;
return s;
}
return s;
}
Strategy nash(const Game<2, 2> &g, bool verbose) {
Strategy s = nash_mixed(g);
s.findBestMe(nash_pure(g));
if(!s.valid && verbose) {
std::cerr << "No nash equilibrium found!" << std::endl;
}
return s;
}
Strategy nash(const Game<3, 3> &g, bool verbose) {
Strategy s = nash_mixed(g);
s.findBestMe(nash_validate(nash_mixed(g.subgame22(1, 2, 1, 2)), g, Index(0, 0)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(1, 2, 0, 2)), g, Index(0, 1)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(1, 2, 0, 1)), g, Index(0, 2)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 2, 1, 2)), g, Index(1, 0)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 2, 0, 2)), g, Index(1, 1)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 2, 0, 1)), g, Index(1, 2)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 1, 1, 2)), g, Index(2, 0)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 1, 0, 2)), g, Index(2, 1)));
s.findBestMe(nash_validate(nash_mixed(g.subgame22(0, 1, 0, 1)), g, Index(2, 2)));
s.findBestMe(nash_pure(g));
if(!s.valid && verbose) {
// theory says this should never happen, but fp precision makes it possible
std::cerr << "No nash equilibrium found!" << std::endl;
}
return s;
}
struct PlayerState {
int balls;
int ducks;
PlayerState(int balls, int ducks) : balls(balls), ducks(ducks) {}
PlayerState doReload(int maxBalls) const {
return PlayerState(std::min(balls + 1, maxBalls), ducks);
}
PlayerState doThrow(void) const {
return PlayerState(std::max(balls - 1, 0), ducks);
}
PlayerState doDuck(void) const {
return PlayerState(balls, std::max(ducks - 1, 0));
}
std::array<double,3> flail(int maxBalls) const {
// opponent has obvious win;
// try stuff at random and hope the opponent is bad
(void) ducks;
int options = 0;
if(balls > 0) {
++ options;
}
if(balls < maxBalls) {
++ options;
}
if(ducks > 0) {
++ options;
}
std::array<double,3> p{};
if(balls < balls) {
p[0] = 1.0f / options;
}
if(balls > 0) {
p[1] = 1.0f / options;
}
return p;
}
};
class GameStore {
protected:
const int balls;
const int ducks;
const std::size_t playerStates;
const std::size_t gameStates;
public:
static std::string filename(int turn) {
return "nashdata_" + std::to_string(turn) + ".dat";
}
GameStore(int maxBalls, int maxDucks)
: balls(maxBalls)
, ducks(maxDucks)
, playerStates((balls + 1) * (ducks + 1))
, gameStates(playerStates * playerStates)
{}
std::size_t playerIndex(const PlayerState &p) const {
return p.balls * (ducks + 1) + p.ducks;
}
std::size_t gameIndex(const PlayerState &me, const PlayerState &them) const {
return playerIndex(me) * playerStates + playerIndex(them);
}
std::size_t fileIndex(const PlayerState &me, const PlayerState &them) const {
return 2 + gameIndex(me, them) * 2;
}
PlayerState stateFromPlayerIndex(std::size_t i) const {
return PlayerState(i / (ducks + 1), i % (ducks + 1));
}
std::pair<PlayerState, PlayerState> stateFromGameIndex(std::size_t i) const {
return std::make_pair(
stateFromPlayerIndex(i / playerStates),
stateFromPlayerIndex(i % playerStates)
);
}
std::pair<PlayerState, PlayerState> stateFromFileIndex(std::size_t i) const {
return stateFromGameIndex((i - 2) / 2);
}
};
class Generator : public GameStore {
static char toDat(NumT v) {
int iv = int(v * 256.0);
return char(std::max(std::min(iv, 255), 0));
}
std::vector<Value> next;
public:
Generator(int maxBalls, int maxDucks)
: GameStore(maxBalls, maxDucks)
, next()
{}
const Value &nextGame(const PlayerState &me, const PlayerState &them) const {
return next[gameIndex(me, them)];
}
void make_probabilities(
std::array<NumT, 9> &g,
const PlayerState &me,
const PlayerState &them
) const {
const int RELOAD = 0;
const int THROW = 1;
const int DUCK = 2;
g[RELOAD * 3 + RELOAD] =
nextGame(me.doReload(balls), them.doReload(balls)).me;
g[RELOAD * 3 + THROW] =
(them.balls > 0) ? -1
: nextGame(me.doReload(balls), them.doThrow()).me;
g[RELOAD * 3 + DUCK] =
nextGame(me.doReload(balls), them.doDuck()).me;
g[THROW * 3 + RELOAD] =
(me.balls > 0) ? 1
: nextGame(me.doThrow(), them.doReload(balls)).me;
g[THROW * 3 + THROW] =
((me.balls > 0) == (them.balls > 0))
? nextGame(me.doThrow(), them.doThrow()).me
: (me.balls > 0) ? 1 : -1;
g[THROW * 3 + DUCK] =
(me.balls > 0 && them.ducks == 0) ? 1
: nextGame(me.doThrow(), them.doDuck()).me;
g[DUCK * 3 + RELOAD] =
nextGame(me.doDuck(), them.doReload(balls)).me;
g[DUCK * 3 + THROW] =
(them.balls > 0 && me.ducks == 0) ? -1
: nextGame(me.doDuck(), them.doThrow()).me;
g[DUCK * 3 + DUCK] =
nextGame(me.doDuck(), them.doDuck()).me;
}
Game<3, 3> make_game(const PlayerState &me, const PlayerState &them) const {
static std::array<NumT, 9> globalValuesMe;
static std::array<NumT, 9> globalValuesThemT;
#pragma omp threadprivate(globalValuesMe)
#pragma omp threadprivate(globalValuesThemT)
make_probabilities(globalValuesMe, me, them);
make_probabilities(globalValuesThemT, them, me);
Game<3, 3> g(&globalValuesMe, &globalValuesThemT);
for(int i = 0; i < 3; ++ i) {
g.coordsMe[i] = i;
g.coordsThem[i] = i;
}
return g;
}
Strategy solve(const PlayerState &me, const PlayerState &them, bool verbose) const {
if(me.balls > them.balls + them.ducks) { // obvious answer
Strategy s;
s.probMe[1] = 1;
s.probThem = them.flail(balls);
s.expectedValue = Value(1, -1);
return s;
} else if(them.balls > me.balls + me.ducks) { // uh-oh
Strategy s;
s.probThem[1] = 1;
s.probMe = me.flail(balls);
s.expectedValue = Value(-1, 1);
return s;
} else if(me.balls == 0 && them.balls == 0) { // obvious answer
Strategy s;
s.probMe[0] = 1;
s.probThem[0] = 1;
s.expectedValue = nextGame(me.doReload(balls), them.doReload(balls));
return s;
} else {
return nash(make_game(me, them), verbose);
}
}
void generate(int turns, bool saveAll, bool verbose) {
next.clear();
next.resize(gameStates);
std::vector<Value> current(gameStates);
std::vector<char> data(2 + gameStates * 2);
for(std::size_t turn = turns; (turn --) > 0;) {
if(verbose) {
std::cerr << "Generating for turn " << turn << "..." << std::endl;
}
NumT maxDiff = 0;
NumT msd = 0;
data[0] = balls;
data[1] = ducks;
#pragma omp parallel for reduction(+:msd), reduction(max:maxDiff)
for(std::size_t meBalls = 0; meBalls < balls + 1; ++ meBalls) {
for(std::size_t meDucks = 0; meDucks < ducks + 1; ++ meDucks) {
const PlayerState me(meBalls, meDucks);
for(std::size_t themBalls = 0; themBalls < balls + 1; ++ themBalls) {
for(std::size_t themDucks = 0; themDucks < ducks + 1; ++ themDucks) {
const PlayerState them(themBalls, themDucks);
const std::size_t p1 = gameIndex(me, them);
Strategy s = solve(me, them, verbose);
NumT diff;
data[2+p1*2 ] = toDat(s.probMe[0]);
data[2+p1*2+1] = toDat(s.probMe[0] + s.probMe[1]);
current[p1] = s.expectedValue;
diff = current[p1].me - next[p1].me;
msd += diff * diff;
maxDiff = std::max(maxDiff, std::abs(diff));
}
}
}
}
if(saveAll) {
std::ofstream fs(filename(turn).c_str(), std::ios_base::binary);
fs.write(&data[0], data.size());
fs.close();
}
if(verbose) {
std::cerr
<< "Expectations changed by at most " << maxDiff
<< " (RMSD: " << std::sqrt(msd / gameStates) << ")" << std::endl;
}
if(maxDiff < 0.0001f) {
if(verbose) {
std::cerr << "Expectations have converged. Stopping." << std::endl;
}
break;
}
std::swap(next, current);
}
// Always save turn 0 with the final converged expectations
std::ofstream fs(filename(0).c_str(), std::ios_base::binary);
fs.write(&data[0], data.size());
fs.close();
}
};
void open_file(std::ifstream &target, int turn, int maxDucks, int maxBalls) {
target.open(GameStore::filename(turn).c_str(), std::ios::binary);
if(target.is_open()) {
return;
}
target.open(GameStore::filename(0).c_str(), std::ios::binary);
if(target.is_open()) {
return;
}
Generator(maxBalls, maxDucks).generate(200, false, false);
target.open(GameStore::filename(0).c_str(), std::ios::binary);
}
int choose(int turn, const PlayerState &me, const PlayerState &them, int maxBalls) {
std::ifstream fs;
open_file(fs, turn, std::max(me.ducks, them.ducks), maxBalls);
unsigned char balls = fs.get();
unsigned char ducks = fs.get();
fs.seekg(GameStore(balls, ducks).fileIndex(me, them));
unsigned char p0 = fs.get();
unsigned char p1 = fs.get();
fs.close();
// only 1 random number per execution; no need to seed a PRNG
std::random_device rand;
int v = std::uniform_int_distribution<int>(0, 254)(rand);
if(v < p0) {
return 0;
} else if(v < p1) {
return 1;
} else {
return 2;
}
}
int main(int argc, const char *const *argv) {
if(argc == 4) { // maxTurns, maxBalls, maxDucks
Generator(atoi(argv[2]), atoi(argv[3])).generate(atoi(argv[1]), true, true);
return 0;
}
if(argc == 7) { // turn, meBalls, themBalls, meDucks, themDucks, maxBalls
std::cout << choose(
atoi(argv[1]),
PlayerState(atoi(argv[2]), atoi(argv[4])),
PlayerState(atoi(argv[3]), atoi(argv[5])),
atoi(argv[6])
) << std::endl;
return 0;
}
return 1;
}
Compile as C++11 or better. For performance, it's good to compile with OpenMP support (but this is just for speed; it's not required)
g++ -std=c++11 -fopenmp pain_in_the_nash.cpp -o pain_in_the_nash
This uses Nash equilibria to decide what to do on each turn, which means that in theory it will always win or draw in the long run (over many games), no matter what strategy the opponent uses. Whether that's the case in practice depends on whether I made any mistakes in the implementation. However, since this KoTH competition only has a single round against each opponent, it probably won't do very well on the leaderboard.
My original idea was to have a simple valuation function for each game state (e.g. each ball is worth +b, each duck is +d), but this leads to obvious problems figuring out what those valuations should be, and means it can't act on diminishing returns of gathering more and more balls, etc. So instead, this will analyse the entire game tree, working backwards from turn 1000, and fill in the actual valuations based on how each game could pan out.
The result is that I have absolutely no idea what strategy this uses, except for a couple of hard-coded "obvious" behaviours (throw snowballs if you have more balls than your opponent has balls+ducks, and reload if you're both out of snowballs). If anybody wants to analyse the dataset it produces I imagine there's some interesting behaviour to discover!
Testing this against "Save One" shows that it does indeed win in the long-run, but only by a small margin (514 wins, 486 losses, 0 draws in the first batch of 1000 games, and 509 wins, 491 losses, 0 draws in the second).
Important!
This will work out-of-the-box, but that's not a great idea. It takes about 9 minutes on my moderately-developer-spec laptop to generate the full game tree. But it will save the final probabilities into a file once they're generated, and after that each turn is just generating a random number and comparing it against 2 bytes, so it's super-fast.
To shortcut all that, just download this file (3.5MB) and put it in the directory with the executable.
Or you can generate it yourself by running:
./pain_in_the_nash 1000 50 25
Which will save one file per turn, until convergence. Note that each file is 3.5MB and it will converge at turn 720 (i.e. 280 files, ~1GB), and since most games don't get anywhere near turn 720, the pre-convergence files have very low importance.
Comments are not for extended discussion; this conversation has been moved to chat.
– Dennis – 2017-05-16T21:47:27.030@HyperNeutrino More questions: I thought the "judging standard" would be 50 snowballs and 25 ducks? And why is there a draw sometimes after ~18 rounds? – CommonGuy – 2017-05-17T14:25:18.243
@Manu Ehh crap I forgot to change the settings in my VM arguments. And also, that's because if they go into an endless loop of snowball collisions, it ends it after 10 rounds of repeating a period-1 or period-2 loop. – HyperNeutrino – 2017-05-17T14:28:04.453
1So, will there be another round? Cause i wanna upload my bot and would be curious how well he would work. – erbsenhirn – 2017-05-18T12:00:00.960
@erbsenhirn If you upload a bot and ping me in chat or on The Nineteenth Byte, and I'll run another run.
– HyperNeutrino – 2017-05-18T12:06:36.577