Hi guys, i'm following the CFRM tutorial found here :
http://modelai.gettysburg.edu/2013/cfr/cfr.pdf for calculating NE with chance sampling. I've tried to adapt the tutorial for a head up push fold game, My results are reasonable but ultimately incorrect/too tight compared to results from other software like Holdem Resources and ICMIzer.
I was hoping someone could give any advice about where I potentially may be going wrong as I've been stuck on this for a fairly long time now:
Trainer:
Code:
function train_CFR_HUPOKER(iterations,stacks,bigblind) {
var cards = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52];
var util = 0;
nodeMap = {};
for (var i = 0; i < iterations; i++) {
// Card shuffling, Pretty sure this is correct
for (var c1 = cards.length - 1; c1 > 0; c1--) {
var c2 = Math.floor(Math.random() * (c1 + 1));
var tmp = cards[c1];
cards[c1] = cards[c2];
cards[c2] = tmp;
}
util += CFR_HUPOKER(cards,stacks,bigblind, "", 1, 1);
}
console.log("Average game value: " + util / iterations);
PrintResults(nodeMap);
CFR function:
Code:
function CFR_HUPOKER(cards,stacks,bigblind,history, p0, p1) {
var plays = history.length;
var player = (plays % 2);
var opponent = 1 - player;
if (plays >= 1) {
var terminalPass = history.slice(-1) == 'F';
var varBet = history.slice(-2) == ("AA");
if (terminalPass)
if (player==1) {
return 0.5*bigblind
} else {
return 1.0*bigblind
}
else if (varBet) {
return utilityFunctionHU(cards[player*2], cards[(player*2) + 1],cards[opponent*2], cards[(opponent*2) + 1], stacks)
}
}
var infoSet = cardstrat + " " + history;
var node = nodeMap[infoSet];
if (node == null) {
node = new Node(infoSet);
nodeMap[infoSet] = node;
}
var strategy = node.getStrategy(player == 0 ? p0 : p1);
var util = new Array(NUM_ACTIONS);
var nodeUtil = 0;
for (var a = 0; a < NUM_ACTIONS; a++) {
var nextHistory = history + (a == 0 ? "A" : "F");
util[a] = player == 0
? - CFR_HUPOKER(cards,stacks,bigblind, nextHistory, p0 * strategy[a], p1)
: - CFR_HUPOKER(cards,stacks,bigblind, nextHistory, p0, p1 * strategy[a]);
nodeUtil += strategy[a] * util[a];
}
for (var a = 0; a < NUM_ACTIONS; a++) {
var regret = util[a] - nodeUtil;
node.regretSum[a] += (player == 0 ? p1 : p0) * regret;
}
return nodeUtil;
}
and my node object:
Code:
function Node(infoSet) {
this.infoSet = infoSet;
this.regretSum = new Array(NUM_ACTIONS).fill(0);
this.strategy = new Array(NUM_ACTIONS).fill(0);
this.regretSum = new Array(NUM_ACTIONS).fill(0);
this.strategySum = new Array(NUM_ACTIONS).fill(0);
this.getStrategy = function(realizationWeight) {
var normalizingSum = 0;
for (var a = 0; a < NUMBER_OF_ACTIONS; a++) {
this.strategy[a] = this.regretSum[a] > 0 ? this.regretSum[a] : 0;
normalizingSum += this.strategy[a];
}
for (var a = 0; a < NUMBER_OF_ACTIONS; a++) {
if (normalizingSum > 0)
this.strategy[a] /= normalizingSum;
else
this.strategy[a] = 1.0 / 2;
this.strategySum[a] += this.strategy[a] * realizationWeight;
}
return this.strategy;
};
this.getAverageStrategy = function() {
var avgStrategy = new Array(NUM_ACTIONS);
var normalizingSum = 0;
for (var a = 0; a < NUM_ACTIONS; a++) {
normalizingSum += this.strategySum[a];
}
for (var a = 0; a < NUM_ACTIONS; a++) {
if (normalizingSum > 0) {
avgStrategy[a] = this.strategySum[a] / normalizingSum
} else {
avgStrategy[a] = 1.0 / NUM_ACTIONS;
}
}
return avgStrategy
};
this.toString = function() {
return this.getAverageStrategy().toString();
}
}
This utility function:
Quote:
function utilityFunctionHU(c0,c1,c2,c3,stacks) {
var ind_1 = GetIndex(c0,c1,c0%4==c1%4);
var ind_2 = GetIndex(c2,c3,c2%4==c3%4);
var count = PWin[ind_1][ind_2]+ PWin[ind_2][ind_1] + PTie[ind_2][ind_1]
return (2.0 * (PWin[ind_1][ind_2]/count - (PWin[ind_2][ind_1]/count)) * stacks) + ((PTie[ind_1][ind_2]/count) * stacks);
}
This is all mostly from the above tutorial link and so i really can't see why my results aren't the same as other results. For example for 10 BB's player 0 arrives at the following strategy:
Code:
A K Q J T 9 8 7 6 5 4 3 2
1 1 1 1 1 1 1 1 1 1 1 1 1 A
1 1 1 1 1 1 1 1 1 1 1 1 1 K
1 1 1 1 1 1 1 1 1 1 1 1 0 Q
1 1 1 1 1 1 1 1 1 1 0 0 0 J
1 1 1 1 1 1 1 1 1 0 0 0 0 T
1 1 1 1 1 1 1 1 1 1 0 0 0 9
1 1 0 0 1 1 1 1 1 1 0 0 0 8
1 0 0 0 0 0 0 1 1 1 0 0 0 7
1 0 0 0 0 0 0 0 1 1 1 0 0 6
1 0 0 0 0 0 0 0 0 1 1 0 0 5
1 0 0 0 0 0 0 0 0 0 1 0 0 4
1 0 0 0 0 0 0 0 0 0 0 1 0 3
1 0 0 0 0 0 0 0 0 0 0 0 1 2
Which is close but incorrect, according to other sources its a (58.4%) 22+,A2+,K2+,Q2s+,Q7o+,J3s+,J8o+,T4s+,T8o+,95s+,97o +,84s+,87o,74s+,76o,64s+,53s+,43s solution.
Any help is greatly appreciated if any game theory experts notice anything. What doesn't make sense to me is when our strategy is always fold, and the utility of shoving is say +800 chips. We have no summed regret for not shoving because the probability p0 is 0 which seems intuitively incorrect but who knows. I think the issue is not my utility function, i did some testing, the probability matrices i have seems to be correct...
Also I do realise it's fairly absurd to ask someone else to debug code they didn't write but it's sort of background to the what I'm really asking in a general sense if it makes sense for the summed regrets at this line:
Code:
node.regretSum[a] += (player == 0 ? p1 : p0) * regret;
to be 0 when our regret value is large, It's something I don't quite understand and someone with a better GT background could probably answer easier.