Skip to content

Commit e6748dc

Browse files
author
ddbourgin
committed
fix typos
1 parent 8dbe385 commit e6748dc

File tree

2 files changed

+77
-77
lines changed

2 files changed

+77
-77
lines changed

numpy_ml/bandits/bandits.py

Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,29 @@ def __repr__(self):
2121
params = ", ".join(["{}={}".format(k, v) for (k, v) in HP.items() if k != "id"])
2222
return "{}({})".format(HP["id"], params)
2323

24+
@property
25+
def hyperparameters(self):
26+
"""A dictionary of the bandit hyperparameters"""
27+
return {}
28+
29+
@abstractmethod
30+
def oracle_payoff(self, context=None):
31+
"""
32+
Return the expected reward for an optimal agent.
33+
34+
Parameters
35+
----------
36+
context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
37+
The current context matrix for each of the bandit arms, if
38+
applicable. Default is None.
39+
40+
Returns
41+
-------
42+
optimal_rwd : float
43+
The expected reward under an optimal policy.
44+
"""
45+
pass
46+
2447
def pull(self, arm_id, context=None):
2548
"""
2649
"Pull" (i.e., sample from) a given arm's payoff distribution.
@@ -43,24 +66,6 @@ def pull(self, arm_id, context=None):
4366
self.step += 1
4467
return self._pull(arm_id, context)
4568

46-
@abstractmethod
47-
def oracle_payoff(self, context=None):
48-
"""
49-
Return the expected reward for an optimal agent.
50-
51-
Parameters
52-
----------
53-
context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
54-
The current context matrix for each of the bandit arms, if
55-
applicable. Default is None.
56-
57-
Returns
58-
-------
59-
optimal_rwd : float
60-
The expected reward under an optimal policy.
61-
"""
62-
pass
63-
6469
def reset(self):
6570
"""Reset the bandit step and action counters to zero."""
6671
self.step = 0
@@ -69,11 +74,6 @@ def reset(self):
6974
def _pull(self, arm_id):
7075
pass
7176

72-
@property
73-
def hyperparameters(self):
74-
"""A dictionary of the bandit hyperparameters"""
75-
return {}
76-
7777

7878
class MultinomialBandit(Bandit):
7979
def __init__(self, payoffs, payoff_probs):
@@ -114,11 +114,6 @@ def hyperparameters(self):
114114
"payoff_probs": self.payoff_probs,
115115
}
116116

117-
def _pull(self, arm_id, context):
118-
payoffs = self.payoffs[arm_id]
119-
probs = self.payoff_probs[arm_id]
120-
return np.random.choice(payoffs, p=probs)
121-
122117
def oracle_payoff(self, context=None):
123118
"""
124119
Return the expected reward for an optimal agent.
@@ -135,6 +130,11 @@ def oracle_payoff(self, context=None):
135130
"""
136131
return self.best_ev
137132

133+
def _pull(self, arm_id, context):
134+
payoffs = self.payoffs[arm_id]
135+
probs = self.payoff_probs[arm_id]
136+
return np.random.choice(payoffs, p=probs)
137+
138138

139139
class BernoulliBandit(Bandit):
140140
def __init__(self, payoff_probs):
@@ -168,9 +168,6 @@ def hyperparameters(self):
168168
"payoff_probs": self.payoff_probs,
169169
}
170170

171-
def _pull(self, arm_id, context):
172-
return int(np.random.rand() <= self.payoff_probs[arm_id])
173-
174171
def oracle_payoff(self, context=None):
175172
"""
176173
Return the expected reward for an optimal agent.
@@ -187,6 +184,9 @@ def oracle_payoff(self, context=None):
187184
"""
188185
return self.best_ev
189186

187+
def _pull(self, arm_id, context):
188+
return int(np.random.rand() <= self.payoff_probs[arm_id])
189+
190190

191191
class GaussianBandit(Bandit):
192192
def __init__(self, payoff_dists, payoff_probs):
@@ -286,15 +286,6 @@ def __init__(self, G, start_vertex, end_vertex):
286286
placeholder = [None] * len(self.paths)
287287
super().__init__(placeholder, placeholder)
288288

289-
def _calc_arm_evs(self):
290-
I2V = self.G.get_vertex
291-
evs = np.zeros(len(self.paths))
292-
for p_ix, path in enumerate(self.paths):
293-
for ix, v_i in enumerate(path[:-1]):
294-
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
295-
evs[p_ix] -= e.weight
296-
return evs
297-
298289
@property
299290
def hyperparameters(self):
300291
"""A dictionary of the bandit hyperparameters"""
@@ -305,15 +296,6 @@ def hyperparameters(self):
305296
"start_vertex": self.start_vertex,
306297
}
307298

308-
def _pull(self, arm_id, context):
309-
reward = 0
310-
I2V = self.G.get_vertex
311-
path = self.paths[arm_id]
312-
for ix, v_i in enumerate(path[:-1]):
313-
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
314-
reward -= e.weight
315-
return reward
316-
317299
def oracle_payoff(self, context=None):
318300
"""
319301
Return the expected reward for an optimal agent.
@@ -330,6 +312,24 @@ def oracle_payoff(self, context=None):
330312
"""
331313
return self.best_ev
332314

315+
def _calc_arm_evs(self):
316+
I2V = self.G.get_vertex
317+
evs = np.zeros(len(self.paths))
318+
for p_ix, path in enumerate(self.paths):
319+
for ix, v_i in enumerate(path[:-1]):
320+
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
321+
evs[p_ix] -= e.weight
322+
return evs
323+
324+
def _pull(self, arm_id, context):
325+
reward = 0
326+
I2V = self.G.get_vertex
327+
path = self.paths[arm_id]
328+
for ix, v_i in enumerate(path[:-1]):
329+
e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
330+
reward -= e.weight
331+
return reward
332+
333333

334334
class ContextualBernoulliBandit(Bandit):
335335
def __init__(self, context_probs):
@@ -379,12 +379,6 @@ def get_context(self):
379379
context[np.random.choice(D), :] = 1
380380
return random_one_hot_matrix(1, D).ravel()
381381

382-
def _pull(self, arm_id, context):
383-
D, K = self.context_probs.shape
384-
arm_probs = context[:, arm_id] @ self.context_probs
385-
arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
386-
return arm_rwds[arm_id]
387-
388382
def oracle_payoff(self, context):
389383
"""
390384
Return the expected reward for an optimal agent.
@@ -402,6 +396,12 @@ def oracle_payoff(self, context):
402396
"""
403397
return context[:, 0] @ self.best_ev
404398

399+
def _pull(self, arm_id, context):
400+
D, K = self.context_probs.shape
401+
arm_probs = context[:, arm_id] @ self.context_probs
402+
arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
403+
return arm_rwds[arm_id]
404+
405405

406406
class ContextualLinearBandit(Bandit):
407407
def __init__(self, K, D, payoff_variance=1):
@@ -484,12 +484,6 @@ def get_context(self):
484484
"""
485485
return np.random.normal(size=(self.D, self.K))
486486

487-
def _pull(self, arm_id, context):
488-
K, thetas = self.K, self.thetas
489-
self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
490-
self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
491-
return (self.arm_evs + self._noise)[arm_id]
492-
493487
def oracle_payoff(self, context):
494488
"""
495489
Return the expected reward for an optimal agent.
@@ -507,3 +501,9 @@ def oracle_payoff(self, context):
507501
"""
508502
best_arm = np.argmax(self.arm_evs)
509503
return self.arm_evs[best_arm]
504+
505+
def _pull(self, arm_id, context):
506+
K, thetas = self.K, self.thetas
507+
self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
508+
self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
509+
return (self.arm_evs + self._noise)[arm_id]

numpy_ml/bandits/policies.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,6 @@ def parameters(self):
3232
"""A dictionary containing the current policy parameters"""
3333
pass
3434

35-
@abstractmethod
36-
def _initialize_params(self, bandit):
37-
"""
38-
Initialize any policy-specific parameters that depend on information
39-
from the bandit environment.
40-
"""
41-
pass
42-
4335
def act(self, bandit, context=None):
4436
"""
4537
Select an arm and sample from its payoff distribution.
@@ -68,17 +60,17 @@ def act(self, bandit, context=None):
6860
self._update_params(arm_id, rwd, context)
6961
return rwd, arm_id
7062

71-
def _pull_arm(self, bandit, arm_id, context):
72-
"""Execute a bandit action and return the received reward."""
73-
self.step += 1
74-
return bandit.pull(arm_id, context)
75-
7663
def reset(self):
7764
"""Reset the policy parameters and counters to their initial states."""
7865
self.step = 0
7966
self._reset_params()
8067
self.is_initialized = False
8168

69+
def _pull_arm(self, bandit, arm_id, context):
70+
"""Execute a bandit action and return the received reward."""
71+
self.step += 1
72+
return bandit.pull(arm_id, context)
73+
8274
@abstractmethod
8375
def _select_arm(self, bandit, context):
8476
"""Select an arm based on the current context"""
@@ -89,6 +81,14 @@ def _update_params(self, bandit, context):
8981
"""Update the policy parameters after an interaction"""
9082
pass
9183

84+
@abstractmethod
85+
def _initialize_params(self, bandit):
86+
"""
87+
Initialize any policy-specific parameters that depend on information
88+
from the bandit environment.
89+
"""
90+
pass
91+
9292
@abstractmethod
9393
def _reset_params(self):
9494
"""
@@ -267,7 +267,7 @@ def _update_params(self, arm_id, reward, context=None):
267267
def _reset_params(self):
268268
"""
269269
Reset any model-specific parameters. This gets called within the
270-
public `self.reset()` method.
270+
public :method:`reset` method.
271271
"""
272272
self.ev_estimates = {}
273273
self.pull_counts = defaultdict(lambda: 0)
@@ -282,7 +282,7 @@ def __init__(self, alpha=1, beta=1):
282282
Notes
283283
-----
284284
The policy assumes independent Beta priors on the Bernoulli arm payoff
285-
probabilities, :math:`\\theta`:
285+
probabilities, :math:`\theta`:
286286
287287
.. math::
288288
@@ -414,7 +414,7 @@ def __init__(self, alpha=1):
414414
415415
Notes
416416
-----
417-
LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.bandits.ContextualLinearBandit>` environments.
417+
LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.ContextualLinearBandit>` environments.
418418
419419
References
420420
----------

0 commit comments

Comments
 (0)