Joint inference of biases and preferences II

Restaurant Choice: Time-inconsistent vs. optimal MDP agents

Returning to the MDP Restaurant Choice problem, we compare a model that assumes an optimal, non-discounting MDP agent to a model that includes both time-inconsistent and optimal agents. We also consider models that expand the set of preferences the agent can have.

Assume discounting, infer “Naive” or “Sophisticated”

Before making a direct comparison, we demonstrate that we can infer the preferences of time-inconsistent agents from observations of their behavior.

First we condition on the path where the agent moves to Donut North. We call this the Naive path because it is distinctive to the Naive hyperbolic discounter (who is tempted by Donut North on the way to Veg):

///fold: restaurant choice MDP, naiveTrajectory
var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var naiveTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"l"],
  [{"loc":[2,5],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5],"timeAtRestaurant":0},"l"],
  [{"loc":[2,5],"terminateAfterAction":true,"timeLeft":6,"previousLoc":[2,5],"timeAtRestaurant":1},"l"]
];
///
viz.gridworld(mdp.world, { trajectory: naiveTrajectory });

For inference, we specialize the approach in the previous chapter for agents in MDPs that are potentially time inconsistent. So we infer $\nu$ and $k$ (the hyperbolic discounting parameters) but not the initial belief state $b_0$ . The function exampleGetPosterior is a slightly simplified version of the library function we use below.

var exampleGetPosterior = function(mdp, prior, observedStateAction){
  var world = mdp.world;
  var makeUtilityFunction = mdp.makeUtilityFunction;
  return Infer({ model() {

    // Sample parameters from prior
    var priorUtility = prior.priorUtility;
    var utilityTable = priorUtility();
    var priorDiscounting = prior.discounting
    var sophisticatedOrNaive = priorDiscounting().sophisticatedOrNaive;

    var priorAlpha = prior.priorAlpha;

    // Create agent with those parameters
    var agent = makeMDPAgent({
      utility: makeUtilityFunction(utilityTable),
      alpha: priorAlpha(),
      discount: priorDiscounting().discount,
      sophisticatedOrNaive : sophisticatedOrNaive
    }, world);

    var agentAction = agent.act;

    // Condition on observed actions
    map(function(stateAction) {
      var state = stateAction[0];
      var action = stateAction[1];
      observe(agentAction(state, 0), action);
    }, observedStateAction);

    // return parameters and summary statistics
    var vegMinusDonut = sum(utilityTable['Veg']) - sum(utilityTable['Donut N']);

    return {
      utility: utilityTable,
      sophisticatedOrNaive: discounting.sophisticatedOrNaive,
      discount: discounting.discount,
      alpha,
      vegMinusDonut,
    };
  }});
};

This inference function allows for inference over the softmax parameter ( $\alpha$ or alpha) and the discount constant ( $k$ or discount). For this example, we fix these values so that the agent has low noise ( $\alpha=1000$ ) and so $k=1$ . We also fix the timeCost utility to be small and negative and Noodle’s utility to be negative. We infer only the agent’s utilities and whether they are Naive or Sophisticated.

///fold: Call to hyperbolic library function and helper display function
var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var naiveTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"l"],
  [{"loc":[2,5],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5],"timeAtRestaurant":0},"l"],
  [{"loc":[2,5],"terminateAfterAction":true,"timeLeft":6,"previousLoc":[2,5],"timeAtRestaurant":1},"l"]
];

var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x){
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x){
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var sophisticationPriorDataTable = map(
    function(x) {
      return {
        sophisticatedOrNaive: x,
        probability: getPriorProb({sophisticatedOrNaive: x}),
        distribution: 'prior'
      };
    },
    ['naive', 'sophisticated']);

  var sophisticationPosteriorDataTable = map(
    function(x){
      return {
        sophisticatedOrNaive: x,
        probability: getPosteriorProb({sophisticatedOrNaive: x}),
        distribution: 'posterior'
      };
    },
    ['naive', 'sophisticated']);

  var sophisticatedOrNaiveDataTable = append(sophisticationPosteriorDataTable,
                                             sophisticationPriorDataTable);

  viz.bar(sophisticatedOrNaiveDataTable, { groupBy: 'distribution' });

  var vegMinusDonutPriorDataTable = map(
    function(x){
      return {
        vegMinusDonut: x,
        probability: getPriorProb({vegMinusDonut: x}),
        distribution: 'prior'
      };
    },
    [-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60]);

  var vegMinusDonutPosteriorDataTable = map(
    function(x){
      return {
        vegMinusDonut: x,
        probability: getPosteriorProb({vegMinusDonut: x}),
        distribution: 'posterior'
      };
    },
    [-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60]);

  var vegMinusDonutDataTable = append(vegMinusDonutPriorDataTable,
                                      vegMinusDonutPosteriorDataTable);

  viz.bar(vegMinusDonutDataTable, { groupBy: 'distribution' });

  var donutTemptingPriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPriorProb({donutTempting: x}),
        distribution: 'prior'
      };
    },
    [true, false]);

  var donutTemptingPosteriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPosteriorProb({donutTempting: x}),
        distribution: 'posterior'
      };
    },
    [true, false]);

  var donutTemptingDataTable = append(donutTemptingPriorDataTable,
                                      donutTemptingPosteriorDataTable);

  viz.bar(donutTemptingDataTable, { groupBy: 'distribution' });
};
///

// Prior on agent's utility function: each restaurant has an
// *immediate* utility and a *delayed* utility (which is received after a
// delay of 1).
var priorUtility = function(){
  var utilityValues =  [-10, 0, 10, 20];
  var donut = [uniformDraw(utilityValues), uniformDraw(utilityValues)];
  var veg = [uniformDraw(utilityValues), uniformDraw(utilityValues)];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

var priorDiscounting = function(){
  return {
    discount: 1,
    sophisticatedOrNaive: uniformDraw(['naive', 'sophisticated'])
  };
};
var priorAlpha = function(){ return 1000; };
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

// Get world and observations
var posterior = getPosterior(mdp.world, prior, naiveTrajectory);

// To get the prior, we condition on the empty list of observations
displayResults(getPosterior(mdp.world, prior, []), posterior);

We display maximum values and marginal distributions for both the prior and the posterior conditioned on the path shown above. To compute the prior, we simply condition on the empty list of observations.

The first graph shows the distribution over whether the agent is Sophisticated or Naive (labeled sophisticatedOrNaive). For the other graphs, we compute summary statistics of the agent’s parameters and display the distribution over them. The variable vegMinusDonut is the difference in total utility between Veg and Donut, ignoring the fact that each restaurant has an immediate and delayed utility. Inference rules out cases where the total utility is equal (which is most likely in the prior), since the agent would simply go to Donut South in that case. Finally, we introduce a variable donutTempting, which is true if the agent prefers Veg to Donut North at the start but reverses this preference when adjacent to Donut North. The prior probability of donutTempting is less than $0.1$ , since it depends on relatively delicate balance of utilities and the discounting behavior. The posterior is closer to $0.9$ , suggesting (along with the posterior on sophisticatedOrNaive) that this is the explanation of the data favored by the model.

Using the same prior, we condition on the “Sophisticated” path (i.e. the path distinctive to the Sophisticated agent who avoids the temptation of Donut North and takes the long route to Veg):

///fold:
var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var sophisticatedTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"r"],
  [{"loc":[4,3],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"r"],
  [{"loc":[5,3],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[4,3]},"u"],
  [{"loc":[5,4],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[5,3]},"u"],
  [{"loc":[5,5],"terminateAfterAction":false,"timeLeft":5,"previousLoc":[5,4]},"u"],
  [{"loc":[5,6],"terminateAfterAction":false,"timeLeft":4,"previousLoc":[5,5]},"l"],
  [{"loc":[4,6],"terminateAfterAction":false,"timeLeft":3,"previousLoc":[5,6]},"u"],
  [{"loc":[4,7],"terminateAfterAction":false,"timeLeft":2,"previousLoc":[4,6],"timeAtRestaurant":0},"l"],
  [{"loc":[4,7],"terminateAfterAction":true,"timeLeft":2,"previousLoc":[4,7],"timeAtRestaurant":1},"l"]
];
///
viz.gridworld(mdp.world, { trajectory: sophisticatedTrajectory });

Here are the results of inference:

///fold: Definition of world, prior and inference function is same as above codebox
var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var sophisticationPriorDataTable = map(
    function(x){
      return {
        sophisticatedOrNaive: x,
        probability: getPriorProb({sophisticatedOrNaive: x}),
        distribution: 'prior'
      };
    },
    ['naive', 'sophisticated']);

  var sophisticationPosteriorDataTable = map(
    function(x){
      return {
        sophisticatedOrNaive: x,
        probability: getPosteriorProb({sophisticatedOrNaive: x}),
        distribution: 'posterior'
      };
    },
    ['naive', 'sophisticated']);

  var sophisticatedOrNaiveDataTable = append(sophisticationPriorDataTable,
                                             sophisticationPosteriorDataTable);

  viz.bar(sophisticatedOrNaiveDataTable, { groupBy: 'distribution' });

  var vegMinusDonutPriorDataTable = map(
    function(x) {
      return {
        vegMinusDonut: x,
        probability: getPriorProb({vegMinusDonut: x}),
        distribution: 'prior'
      };
    },
    [-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60]);

  var vegMinusDonutPosteriorDataTable = map(
    function(x) {
      return {
        vegMinusDonut: x,
        probability: getPosteriorProb({vegMinusDonut: x}),
        distribution: 'posterior'
      };
    },
    [-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60]);

  var vegMinusDonutDataTable = append(vegMinusDonutPriorDataTable,
                                      vegMinusDonutPosteriorDataTable);

  viz.bar(vegMinusDonutDataTable, { groupBy: 'distribution' });

  var donutTemptingPriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPriorProb({ donutTempting: x }),
        distribution: 'prior'
      };
    },
    [true, false]);

  var donutTemptingPosteriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPosteriorProb({ donutTempting: x }),
        distribution: 'posterior'
      };
    },
    [true, false]);

  var donutTemptingDataTable = append(donutTemptingPriorDataTable,
                                      donutTemptingPosteriorDataTable);

  viz.bar(donutTemptingDataTable, { groupBy: 'distribution' });
};

// Prior on agent's utility function
var priorUtility = function() {
  var utilityValues =  [-10, 0, 10, 20];
  var donut = [uniformDraw(utilityValues), uniformDraw(utilityValues)];
  var veg = [uniformDraw(utilityValues), uniformDraw(utilityValues)];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

var priorDiscounting = function(){
  return {
    discount: 1,
    sophisticatedOrNaive: uniformDraw(['naive','sophisticated'])
  };
};
var priorAlpha = function(){ return 1000; };
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

var sophisticatedTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"r"],
  [{"loc":[4,3],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"r"],
  [{"loc":[5,3],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[4,3]},"u"],
  [{"loc":[5,4],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[5,3]},"u"],
  [{"loc":[5,5],"terminateAfterAction":false,"timeLeft":5,"previousLoc":[5,4]},"u"],
  [{"loc":[5,6],"terminateAfterAction":false,"timeLeft":4,"previousLoc":[5,5]},"l"],
  [{"loc":[4,6],"terminateAfterAction":false,"timeLeft":3,"previousLoc":[5,6]},"u"],
  [{"loc":[4,7],"terminateAfterAction":false,"timeLeft":2,"previousLoc":[4,6],"timeAtRestaurant":0},"l"],
  [{"loc":[4,7],"terminateAfterAction":true,"timeLeft":2,"previousLoc":[4,7],"timeAtRestaurant":1},"l"]
];
///

// Get world and observations
var posterior = getPosterior(mdp.world, prior, sophisticatedTrajectory);
displayResults(getPosterior(mdp.world, prior, []), posterior);

If the agent goes directly to Veg, then they don’t provide information about whether they are Naive or Sophisticated. Using the same prior again, we do inference on this path:

///fold:
var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var vegDirectTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"u"],
  [{"loc":[3,6],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5]},"r"],
  [{"loc":[4,6],"terminateAfterAction":false,"timeLeft":5,"previousLoc":[3,6]},"u"],
  [{"loc":[4,7],"terminateAfterAction":false,"timeLeft":4,"previousLoc":[4,6],"timeAtRestaurant":0},"l"],
  [{"loc":[4,7],"terminateAfterAction":true,"timeLeft":4,"previousLoc":[4,7],"timeAtRestaurant":1},"l"]
];
///
viz.gridworld(mdp.world, { trajectory: vegDirectTrajectory });

Here are the results of inference:

// Definition of world, prior and inference function is same as above codebox

///fold:
var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
    + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
    + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
    + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var sophisticationPriorDataTable = map(function(x) {
    return {sophisticatedOrNaive: x,
            probability: getPriorProb({sophisticatedOrNaive: x}),
            distribution: 'prior'};
  }, ['naive', 'sophisticated']);

  var sophisticationPosteriorDataTable = map(function(x) {
    return {sophisticatedOrNaive: x,
            probability: getPosteriorProb({sophisticatedOrNaive: x}),
            distribution: 'posterior'};
  }, ['naive', 'sophisticated']);

  var sophisticatedOrNaiveDataTable = append(sophisticationPriorDataTable,
                                             sophisticationPosteriorDataTable);

  viz.bar(sophisticatedOrNaiveDataTable, { groupBy: 'distribution' });

  var vegMinusDonutPriorDataTable = map(function(x){
    return {
      vegMinusDonut: x,
      probability: getPriorProb({vegMinusDonut: x}),
      distribution: 'prior'
    };
  }, [-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60]);

  var vegMinusDonutPosteriorDataTable = map(function(x){
    return {vegMinusDonut: x,
            probability: getPosteriorProb({vegMinusDonut: x}),
            distribution: 'posterior'};
  }, [-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60]);

  var vegMinusDonutDataTable = append(vegMinusDonutPriorDataTable,
                                      vegMinusDonutPosteriorDataTable);

  viz.bar(vegMinusDonutDataTable, {groupBy: 'distribution'});


  var donutTemptingPriorDataTable = map(function(x){
    return {
      donutTempting: x,
      probability: getPriorProb({donutTempting: x}),
      distribution: 'prior'
    };
  }, [true, false]);

  var donutTemptingPosteriorDataTable = map(function(x){
    return {
      donutTempting: x,
      probability: getPosteriorProb({donutTempting: x}),
      distribution: 'posterior'
    };
  }, [true, false]);

  var donutTemptingDataTable = append(donutTemptingPriorDataTable,
                                      donutTemptingPosteriorDataTable);

  viz.bar(donutTemptingDataTable, { groupBy: 'distribution' });
};

// Prior on agent's utility function
var priorUtility = function() {
  var utilityValues =  [-10, 0, 10, 20];
  var donut = [uniformDraw(utilityValues), uniformDraw(utilityValues)];
  var veg = [uniformDraw(utilityValues), uniformDraw(utilityValues)];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

var priorDiscounting = function() {
  return {
    discount: 1,
    sophisticatedOrNaive: uniformDraw(['naive','sophisticated'])
  };
};
var priorAlpha = function(){return 1000;};
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var vegDirectTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"u"],
  [{"loc":[3,6],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5]},"r"],
  [{"loc":[4,6],"terminateAfterAction":false,"timeLeft":5,"previousLoc":[3,6]},"u"],
  [{"loc":[4,7],"terminateAfterAction":false,"timeLeft":4,"previousLoc":[4,6],"timeAtRestaurant":0},"l"],
  [{"loc":[4,7],"terminateAfterAction":true,"timeLeft":4,"previousLoc":[4,7],"timeAtRestaurant":1},"l"]
];
///

var posterior = getPosterior(mdp.world, prior, vegDirectTrajectory);
displayResults(getPosterior(mdp.world, prior, []), posterior);

Assume non-discounting, infer preferences and softmax

We want to compare a model that assumes an optimal MDP agent with one that allows for time-inconsistency. We first show the inferences by the model that assumes optimality. This model can only explain the anomalous Naive and Sophisticated paths in terms of softmax noise (lower values for $\alpha$ ). We display the prior and posteriors for both the Naive and Sophisticated paths.

///fold:
var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var vegMinusDonutPriorDataTable = map(
    function(x){
      return {
        vegMinusDonut: x,
        probability: getPriorProb({ vegMinusDonut: x }),
        distribution: 'prior'
      };
    },
    [-50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50]);

  var vegMinusDonutPosteriorDataTable = map(
    function(x){
      return {
        vegMinusDonut: x,
        probability: getPosteriorProb({ vegMinusDonut: x }),
        distribution: 'posterior'
      };
    },
    [-50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50]);

  var vegMinusDonutDataTable = append(vegMinusDonutPriorDataTable,
                                      vegMinusDonutPosteriorDataTable);

  viz.bar(vegMinusDonutDataTable, { groupBy: 'distribution' });

  var alphaPriorDataTable = map(
    function(x){
      return {
        alpha: x,
        probability: getPriorProb({ alpha: x }),
        distribution: 'prior'
      };
    },
    [0.1, 10, 100, 1000]);

  var alphaPosteriorDataTable = map(
    function(x){
      return {
        alpha: x,
        probability: getPosteriorProb({ alpha: x }),
        distribution: 'posterior'
      };
    },
    [0.1, 10, 100, 1000]);

  var alphaDataTable = append(alphaPriorDataTable,
                              alphaPosteriorDataTable);

  viz.bar(alphaDataTable, { groupBy: 'distribution' });
};

var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var naiveTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"l"],
  [{"loc":[2,5],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5],"timeAtRestaurant":0},"l"],
  [{"loc":[2,5],"terminateAfterAction":true,"timeLeft":6,"previousLoc":[2,5],"timeAtRestaurant":1},"l"]
];

var sophisticatedTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"r"],
  [{"loc":[4,3],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"r"],
  [{"loc":[5,3],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[4,3]},"u"],
  [{"loc":[5,4],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[5,3]},"u"],
  [{"loc":[5,5],"terminateAfterAction":false,"timeLeft":5,"previousLoc":[5,4]},"u"],
  [{"loc":[5,6],"terminateAfterAction":false,"timeLeft":4,"previousLoc":[5,5]},"l"],
  [{"loc":[4,6],"terminateAfterAction":false,"timeLeft":3,"previousLoc":[5,6]},"u"],
  [{"loc":[4,7],"terminateAfterAction":false,"timeLeft":2,"previousLoc":[4,6],"timeAtRestaurant":0},"l"],
  [{"loc":[4,7],"terminateAfterAction":true,"timeLeft":2,"previousLoc":[4,7],"timeAtRestaurant":1},"l"]
];
///

// Prior on agent's utility function
var priorUtility = function() {
  var utilityValues = [-10, 0, 10, 20, 30, 40];
  // with no discounting, delayed utilities are ommitted
  var donut = [uniformDraw(utilityValues), 0];
  var veg = [uniformDraw(utilityValues), 0];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

// We assume no discounting (so *sophisticated* has no effect here)
var priorDiscounting = function() {
  return {
    discount: 0,
    sophisticatedOrNaive: 'sophisticated'
  };
};

var priorAlpha = function(){ return uniformDraw([0.1, 10, 100, 1000]); };
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

// Get world and observations
var world = mdp.world;

print('Prior and posterior after observing Naive path');
var posteriorNaive = getPosterior(world, prior, naiveTrajectory);
displayResults(getPosterior(world, prior, []), posteriorNaive);

print('Prior and posterior after observing Sophisticated path');
var posteriorSophisticated = getPosterior(world, prior, sophisticatedTrajectory);
displayResults(getPosterior(world, prior, []), posteriorSophisticated);

The graphs show two important results:

For the Naive path, the agent is inferred to prefer Donut, while for the Sophisticated path, Veg is inferred. In both cases, the inference fits with where the agent ends up.
High values for $\alpha$ are ruled out in each case, showing that the model explains the behavior in terms of noise.

What happens if we observe the agent taking the Naive path repeatedly? While noise is needed to explain the agent’s path, too much noise is inconsistent with taking an identical path repeatedly. This is confirmed in the results below:

///fold: Prior is same as above
var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var vegMinusDonutPriorDataTable = map(
    function(x) {
      return {
        vegMinusDonut: x,
        probability: getPriorProb({ vegMinusDonut: x }),
        distribution: 'prior'
      };
    },
    [-50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50]);

  var vegMinusDonutPosteriorDataTable = map(
    function(x){
      return {
        vegMinusDonut: x,
        probability: getPosteriorProb({ vegMinusDonut: x }),
        distribution: 'posterior'
      };
    },
    [-50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50]);

  var vegMinusDonutDataTable = append(vegMinusDonutPriorDataTable,
                                      vegMinusDonutPosteriorDataTable);

  viz.bar(vegMinusDonutDataTable, { groupBy: 'distribution' });

  var alphaPriorDataTable = map(
    function(x){
      return {
        alpha: x,
        probability: getPriorProb({alpha: x}),
        distribution: 'prior'
      };
    },
    [0.1, 10, 100, 1000]);

  var alphaPosteriorDataTable = map(
    function(x){
      return {
        alpha: x,
        probability: getPosteriorProb({alpha: x}),
        distribution: 'posterior'
      };
    },
    [0.1, 10, 100, 1000]);

  var alphaDataTable = append(alphaPriorDataTable,
                              alphaPosteriorDataTable);

  viz.bar(alphaDataTable, { groupBy: 'distribution' });
};

// Prior on agent's utility function
var priorUtility = function() {
  var utilityValues = [-10, 0, 10, 20, 30, 40];
  // with no discounting, delayed utilities are ommitted
  var donut = [uniformDraw(utilityValues), 0];
  var veg = [uniformDraw(utilityValues), 0];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

// We assume no discounting (so *sophisticated* has no effect here)
var priorDiscounting = function(){
  return {
    discount: 0,
    sophisticatedOrNaive: 'sophisticated'
  };
};

var priorAlpha = function(){
  return uniformDraw([0.1, 10, 100, 1000]);
};
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});

var naiveTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"l"],
  [{"loc":[2,5],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5],"timeAtRestaurant":0},"l"],
  [{"loc":[2,5],"terminateAfterAction":true,"timeLeft":6,"previousLoc":[2,5],"timeAtRestaurant":1},"l"]
];
///

var numberRepeats = 2; // with 2 repeats, we condition a total of 3 times
var posteriorNaive = getPosterior(mdp.world, prior, naiveTrajectory, numberRepeats);
print('Prior and posterior after conditioning 3 times on Naive path');
displayResults(getPosterior(mdp.world, prior, []), posteriorNaive);

Model that includes discounting: jointly infer discounting, preferences, softmax noise

Our inference model now has the optimal agent as a special case but also includes time-inconsistent agents. This model jointly infers the discounting behavior, the agent’s utilities and the softmax noise.

We show two different posteriors. The first is after conditioning on the Naive path (as above). In the second, we imagine that we have observed the agent taking the same path on multiple occasions (three times) and we condition on this.

///fold:
var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var sophisticationPriorDataTable = map(
    function(x) {
      return {
        sophisticatedOrNaive: x,
        probability: getPriorProb({ sophisticatedOrNaive: x }),
        distribution: 'prior'
      };
    },
    ['naive', 'sophisticated']);

  var sophisticationPosteriorDataTable = map(
    function(x) {
      return {
        sophisticatedOrNaive: x,
        probability: getPosteriorProb({ sophisticatedOrNaive: x }),
        distribution: 'posterior'
      };
    },
    ['naive', 'sophisticated']);

  var sophisticatedOrNaiveDataTable = append(sophisticationPosteriorDataTable,
                                             sophisticationPriorDataTable);

  viz.bar(sophisticatedOrNaiveDataTable, { groupBy: 'distribution' });

  var vegMinusDonutPriorDataTable = map(
    function(x) {
      return {
        vegMinusDonut: x,
        probability: getPriorProb({ vegMinusDonut: x }),
        distribution: 'prior'
      };
    },
    [-10, 0, 10, 20, 30, 40, 50, 60, 70]);

  var vegMinusDonutPosteriorDataTable = map(
    function(x) {
      return {
        vegMinusDonut: x,
        probability: getPosteriorProb({ vegMinusDonut: x }),
        distribution: 'posterior'
      };
    },
    [-10, 0, 10, 20, 30, 40, 50, 60, 70]);

  var vegMinusDonutDataTable = append(vegMinusDonutPriorDataTable,
                                      vegMinusDonutPosteriorDataTable);

  viz.bar(vegMinusDonutDataTable, { groupBy: 'distribution' });

  var donutTemptingPriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPriorProb({donutTempting: x}),
        distribution: 'prior'
      };
    },
    [true, false]);

  var donutTemptingPosteriorDataTable = map(
    function(x){
      return {
        donutTempting: x,
        probability: getPosteriorProb({ donutTempting: x }),
        distribution: 'posterior'
      };
    },
    [true, false]);

  var donutTemptingDataTable = append(donutTemptingPriorDataTable,
                                      donutTemptingPosteriorDataTable);

  viz.bar(donutTemptingDataTable, { groupBy: 'distribution' });

  var alphaPriorDataTable = map(
    function(x){
      return {
        alpha: x,
        probability: getPriorProb({alpha: x}),
        distribution: 'prior'
      };
    },
    [0.1, 10, 1000]);

  var alphaPosteriorDataTable = map(
    function(x){
      return {
        alpha: x,
        probability: getPosteriorProb({ alpha: x }),
        distribution: 'posterior'
      };
    },
    [0.1, 10, 1000]);

  var alphaDataTable = append(alphaPriorDataTable,
                              alphaPosteriorDataTable);

  viz.bar(alphaDataTable, { groupBy: 'distribution' });
};

var naiveTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"l"],
  [{"loc":[2,5],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5],"timeAtRestaurant":0},"l"],
  [{"loc":[2,5],"terminateAfterAction":true,"timeLeft":6,"previousLoc":[2,5],"timeAtRestaurant":1},"l"]
];

var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});
///

// Prior on agent's utility function. We fix the delayed utilities
// to make inference faster
var priorUtility = function() {
  var utilityValues =  [-10, 0, 10, 20, 30];
  var donut = [uniformDraw(utilityValues), -10];
  var veg = [uniformDraw(utilityValues), 20];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

var priorDiscounting = function() {
  return {
    discount: uniformDraw([0, 1]),
    sophisticatedOrNaive: uniformDraw(['naive','sophisticated'])
  };
};
var priorAlpha = function(){
  return uniformDraw([.1, 10, 1000]);
};
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

// Get world and observations
var world = mdp.world;

var posterior = getPosterior(world, prior, naiveTrajectory);
print('Prior and posterior after observing Naive path');
displayResults(getPosterior(world, prior, []), posterior);

print('Prior and posterior after observing Naive path three times');
var numberRepeats = 2;
displayResults(getPosterior(world, prior, []),
               getPosterior(world, prior, naiveTrajectory, numberRepeats));

Conditioning on the Naive path once, the probabilities of the agent being Naive and of donutTempting both go up. However, the probability of high softmax noise also goes up. In terms of preferences, we rule out a strong preference for Veg and slightly reduce a preference for Donut. So if the agent were Naive, tempted by Donut and with very low noise, our inference would not place most of the posterior on this explanation. There are two reasons for this. First, this agent is unlikely in the prior. Second, the explanation of the behavior in terms of noise is plausible. (In our Gridworld setup, we don’t allow the agent to backtrack to the previous state. This means there are few cases where a softmax noisy agent would behavior differently than a low noise one.). Conditioning on the same Naive path three times makes the explanation in terms of noise much less plausible: the agent would makes the same “mistake” three times and makes no other mistakes. (The results for the Sophisticated path are similar.)

In summary, if we observe the agent repeatedly take the Naive path, the “Optimal Model” explains this in terms of a preference for Donut and significant softmax noise (explaining why the agent takes Donut North over Donut South). The “Discounting Model” is similar to the Optimal Model when it observes the Naive path once. However, observing it multiple times, it infers that the agent has low noise and an overall preference for Veg.

Preferences for the two Donut Store branches can vary

Another explanation of the Naive path is that the agent has a preference for the “Donut N” branch of the Donut Store over the “Donut S” branch. Maybe this branch is better run or has more space. If we add this to our set of possible preferences, inference changes significantly.

To speed up inference, we use a fixed assumption that the agent is Naive. There are three explanations of the agent’s path:

Softmax noise: measured by $\alpha$
The agent is Naive and tempted by Donut: measured by discount and donutTempting
The agent prefers Donut N to Donut S: measured by donutNGreaterDonutS (i.e. Donut N’s utility is greater than Donut S’s).

These three can also be combined to explain the behavior.

///fold:
var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var naiveTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"u"],
  [{"loc":[3,4],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"u"],
  [{"loc":[3,5],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[3,4]},"l"],
  [{"loc":[2,5],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[3,5],"timeAtRestaurant":0},"l"],
  [{"loc":[2,5],"terminateAfterAction":true,"timeLeft":6,"previousLoc":[2,5],"timeAtRestaurant":1},"l"]
];

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var alphaPriorDataTable = map(
    function(x) {
      return {
        alpha: x,
        probability: getPriorProb({alpha: x}),
        distribution: 'prior'
      };
    },
    [0.1, 100, 1000]);

  var alphaPosteriorDataTable = map(
    function(x) {
      return {
        alpha: x,
        probability: getPosteriorProb({alpha: x}),
        distribution: 'posterior'
      };
    },
    [0.1, 100, 1000]);

  var alphaDataTable = append(alphaPriorDataTable,
                              alphaPosteriorDataTable);

  viz.bar(alphaDataTable, { groupBy: 'distribution' });

  var donutTemptingPriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPriorProb({ donutTempting: x }),
        distribution: 'prior'
      };
    },
    [true, false]);

  var donutTemptingPosteriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPosteriorProb({ donutTempting: x }),
        distribution: 'posterior'
      };
    },
    [true, false]);

  var donutTemptingDataTable = append(donutTemptingPriorDataTable,
                                      donutTemptingPosteriorDataTable);

  viz.bar(donutTemptingDataTable, { groupBy: 'distribution' });

  var discountPriorDataTable = map(
    function(x) {
      return {
        discount: x,
        probability: getPriorProb({ discount: x }),
        distribution: 'prior'
      };
    },
    [0, 1]);

  var discountPosteriorDataTable = map(
    function(x) {
      return {
        discount: x,
        probability: getPosteriorProb({ discount: x }),
        distribution: 'posterior'
      };
    },
    [0, 1]);

  var discountDataTable = append(discountPriorDataTable,
                                 discountPosteriorDataTable);

  viz.bar(discountDataTable, { groupBy: 'distribution' });

  var donutNvsSPriorDataTable = map(
    function(x) {
      return {
        donutNGreaterDonutS: x,
        probability: getPriorProb({ donutNGreaterDonutS: x }),
        distribution: 'prior'
      };
    },
    [false, true]);

  var donutNvsSPosteriorDataTable = map(
    function(x) {
      return {
        donutNGreaterDonutS: x,
        probability: getPosteriorProb({ donutNGreaterDonutS: x }),
        distribution: 'posterior'
      };
    },
    [false, true]);

  var donutNvsSDataTable = append(donutNvsSPriorDataTable,
                                  donutNvsSPosteriorDataTable);

  viz.bar(donutNvsSDataTable, { groupBy: 'distribution' });
};

var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});
///

// Prior on agent's utility function
var priorUtility = function() {
  var utilityValues = [-10, 0, 10, 20];
  return {
    'Donut N': [uniformDraw(utilityValues), -10],
    'Donut S': [uniformDraw(utilityValues), -10],
    'Veg': [20, uniformDraw(utilityValues)],
    'Noodle': [-10, -10],
    'timeCost': -.01
  };
};

var priorDiscounting = function() {
  return {
    discount: uniformDraw([0, 1]),
    sophisticatedOrNaive: 'naive'
  };
};
var priorAlpha = function(){
  return uniformDraw([.1, 100, 1000]);
};
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

// Get world and observations
var posterior = getPosterior(mdp.world, prior, naiveTrajectory);
displayResults(getPosterior(mdp.world, prior, []), posterior);

The explanation in terms of Donut North being preferred does well in the posterior. This is because the discounting explanation (even assuming the agent is Naive) is unlikely a priori (due to our simple uniform priors on utilities and discounting). While high noise is more plausible a priori, the noise explanation still needs to posit a low probability series of events.

We see a similar result if we enrich the set of possible utilities for the Sophisticated path. This time, we allow the timeCost, i.e. the cost for taking a single timestep, to be positive. This means the agent prefers to spend as much time as possible moving around before reaching a restaurant. Here are the results:

Observe the sophisticated path with possibly positive timeCost:

///fold:
var restaurantHyperbolicInfer = getRestaurantHyperbolicInfer();
var getPosterior = restaurantHyperbolicInfer.getPosterior;

var displayResults = function(priorDist, posteriorDist) {

  var priorUtility = priorDist.MAP().val.utility;
  print('Prior highest-probability utility for Veg: ' + priorUtility['Veg']
        + '. Donut: ' + priorUtility['Donut N'] + ' \n');

  var posteriorUtility = posteriorDist.MAP().val.utility;
  print('Posterior highest-probability utility for Veg: '
        + posteriorUtility['Veg'] + '. Donut: ' + posteriorUtility['Donut N']
        + ' \n');

  var getPriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(priorDist, label);
    return Math.exp(dist.score(x));
  };

  var getPosteriorProb = function(x) {
    var label = _.keys(x)[0];
    var dist = getMarginalObject(posteriorDist, label);
    return Math.exp(dist.score(x));
  };

  var alphaPriorDataTable = map(
    function(x) {
      return {
        alpha: x,
        probability: getPriorProb({alpha: x}),
        distribution: 'prior'
      };
    },
    [0.1, 100, 1000]);

  var alphaPosteriorDataTable = map(
    function(x) {
      return {
        alpha: x,
        probability: getPosteriorProb({alpha: x}),
        distribution: 'posterior'
      };
    },
    [0.1, 100, 1000]);

  var alphaDataTable = append(alphaPriorDataTable,
                              alphaPosteriorDataTable);

  viz.bar(alphaDataTable, { groupBy: 'distribution' });

  var donutTemptingPriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPriorProb({ donutTempting: x }),
        distribution: 'prior'
      };
    },
    [true, false]);

  var donutTemptingPosteriorDataTable = map(
    function(x) {
      return {
        donutTempting: x,
        probability: getPosteriorProb({ donutTempting: x }),
        distribution: 'posterior'
      };
    },
    [true, false]);

  var donutTemptingDataTable = append(donutTemptingPriorDataTable,
                                      donutTemptingPosteriorDataTable);

  viz.bar(donutTemptingDataTable, { groupBy: 'distribution' });

  var discountPriorDataTable = map(
    function(x){
      return {
        discount: x,
        probability: getPriorProb({ discount: x }),
        distribution: 'prior'
      };
    },
    [0, 1]);

  var discountPosteriorDataTable = map(
    function(x){
      return {
        discount: x,
        probability: getPosteriorProb({ discount: x }),
        distribution: 'posterior'
      };
    },
    [0, 1]);

  var discountDataTable = append(discountPriorDataTable,
                                 discountPosteriorDataTable);

  viz.bar(discountDataTable, { groupBy: 'distribution' });

  var timeCostPriorDataTable = map(
    function(x) {
      return {
        timeCost: x,
        probability: getPriorProb({ timeCost: x }),
        distribution: 'prior'
      };
    },
    [-0.01, 0.1, 1]);

  var timeCostPosteriorDataTable = map(
    function(x) {
      return {
        timeCost: x,
        probability: getPosteriorProb({ timeCost: x }),
        distribution: 'posterior'
      };
    },
    [-0.01, 0.1, 1]);

  var timeCostDataTable = append(timeCostPriorDataTable,
                                 timeCostPosteriorDataTable);

  viz.bar(timeCostDataTable, { groupBy: 'distribution' });
};

var sophisticatedTrajectory = [
  [{"loc":[3,1],"terminateAfterAction":false,"timeLeft":11},"u"],
  [{"loc":[3,2],"terminateAfterAction":false,"timeLeft":10,"previousLoc":[3,1]},"u"],
  [{"loc":[3,3],"terminateAfterAction":false,"timeLeft":9,"previousLoc":[3,2]},"r"],
  [{"loc":[4,3],"terminateAfterAction":false,"timeLeft":8,"previousLoc":[3,3]},"r"],
  [{"loc":[5,3],"terminateAfterAction":false,"timeLeft":7,"previousLoc":[4,3]},"u"],
  [{"loc":[5,4],"terminateAfterAction":false,"timeLeft":6,"previousLoc":[5,3]},"u"],
  [{"loc":[5,5],"terminateAfterAction":false,"timeLeft":5,"previousLoc":[5,4]},"u"],
  [{"loc":[5,6],"terminateAfterAction":false,"timeLeft":4,"previousLoc":[5,5]},"l"],
  [{"loc":[4,6],"terminateAfterAction":false,"timeLeft":3,"previousLoc":[5,6]},"u"],
  [{"loc":[4,7],"terminateAfterAction":false,"timeLeft":2,"previousLoc":[4,6],"timeAtRestaurant":0},"l"],
  [{"loc":[4,7],"terminateAfterAction":true,"timeLeft":2,"previousLoc":[4,7],"timeAtRestaurant":1},"l"]
];

var ___ = ' ';
var DN = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', DN , ___, '#', ___],
  ['#', '#', '#', ___, '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  noReverse: true,
  maxTimeAtRestaurant: 2,
  start: [3, 1],
  totalTime: 11
});
///


// Prior on agent's utility function
var priorUtility = function() {
  var utilityValues =  [-10, 0, 10, 20, 30];
  var donut = [uniformDraw(utilityValues), -10]
  var veg = [uniformDraw(utilityValues), 20];
  return {
    'Donut N': donut,
    'Donut S': donut,
    'Veg': veg,
    'Noodle': [-10, -10],
    'timeCost': uniformDraw([-0.01, 0.1, 1])
  };
};

var priorDiscounting = function() {
  return {
    discount: uniformDraw([0, 1]),
    sophisticatedOrNaive: 'sophisticated'
  };
};
var priorAlpha = function(){
  return uniformDraw([0.1, 100, 1000]);
};
var prior = {
  utility: priorUtility,
  discounting: priorDiscounting,
  alpha: priorAlpha
};

var posterior = getPosterior(mdp.world, prior, sophisticatedTrajectory);
displayResults(getPosterior(mdp.world, prior, []), posterior);

Next chapter: Multi-agent models