import numpy as np
from autograd import grad
from scipy.optimize import minimize
from scipy.interpolate import interp1d

u = np.sqrt
delta = 0.9    
cake_grid = np.linspace(0, 1, 100)

# solve optimization in Bellman equation
# return value of right-hand side and optimal action 
def bellman(V, cake):
    if cake == 0:
        return 0, 0
    else:
        rhs = lambda consume: - u(consume) - delta * V(cake - consume)
        result = minimize(rhs, 0.5*cake, bounds=[(0, cake)])
        return - result.fun, result.x.item()

# execute value iteration step V -> Vnew
def value_iteration(V):
    value_arr = np.empty_like(cake_grid)
    for i, c in enumerate(cake_grid):
        value, action = bellman(V, c)
        value_arr[i] = value
    Vnew = interp1d(cake_grid, value_arr)
    return Vnew

# iterate until convergence starting from V = 0
def fixed_point(tol=1e-6, max_iter=2000):
    err = 1
    iter = 0
    V = lambda c: 0
    while (err > tol) & (iter < max_iter):
        Vnew = value_iteration(V)
        err = ((V(cake_grid) - Vnew(cake_grid))**2).sum()
        V = Vnew
        iter += 1
    return V, err, iter

# get optimal policy from value function
def policy(V):
    action_arr = np.empty_like(cake_grid)
    for i, c in enumerate(cake_grid):
        value, action = bellman(V, c)
        action_arr[i] = action
    policy = interp1d(cake_grid, action_arr)
    return policy

# run, get value and policy functions, and print data
fp = fixed_point()
V = fp[0]
policy = policy(V)
print("Converged after", fp[2], "iterations")
print("Error is", fp[1])

Converged after 34 iterations
Error is 8.022033503718986e-07

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")

# numeric solution
plt.plot(cake_grid, [V(c) for c in cake_grid], label="numeric")

# analytic solution
plt.plot(cake_grid, [np.sqrt(x/(1-delta**2)) for x in cake_grid], label="analytic")  

plt.xlabel("Remaining Cake")
plt.ylabel("Value Function")
plt.legend()
plt.show()

# numeric solution
plt.plot(cake_grid, [policy(c) for c in cake_grid], label="numeric")

# analytic solution
plt.plot(cake_grid, [(1-delta**2)*x for x in cake_grid], label="analytic")  

plt.xlabel("Remaining Cake")
plt.ylabel("Amount to Eat")
plt.legend()
plt.show()

Day 10: Dynamic Programming¶

BUSI520 - Python for Business Research¶

Kerry Back, JGSB, Rice University¶

Dynamic Programming¶

Example: Cake-Eating Problem¶

Value Function¶

Value Iteration¶

Compare value functions¶

Compare policy functions¶