fix typos

ddbourgin · ddbourgin · commit e6748dc99501 · 2020-04-13T23:19:00.000-04:00
diff --git a/numpy_ml/bandits/bandits.py b/numpy_ml/bandits/bandits.py
@@ -21,6 +21,29 @@ def __repr__(self):
         params = ", ".join(["{}={}".format(k, v) for (k, v) in HP.items() if k != "id"])
         return "{}({})".format(HP["id"], params)
 
+    @property
+    def hyperparameters(self):
+        """A dictionary of the bandit hyperparameters"""
+        return {}
+
+    @abstractmethod
+    def oracle_payoff(self, context=None):
+        """
+        Return the expected reward for an optimal agent.
+
+        Parameters
+        ----------
+        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
+            The current context matrix for each of the bandit arms, if
+            applicable. Default is None.
+
+        Returns
+        -------
+        optimal_rwd : float
+            The expected reward under an optimal policy.
+        """
+        pass
+
     def pull(self, arm_id, context=None):
         """
         "Pull" (i.e., sample from) a given arm's payoff distribution.
@@ -43,24 +66,6 @@ def pull(self, arm_id, context=None):
         self.step += 1
         return self._pull(arm_id, context)
 
-    @abstractmethod
-    def oracle_payoff(self, context=None):
-        """
-        Return the expected reward for an optimal agent.
-
-        Parameters
-        ----------
-        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
-            The current context matrix for each of the bandit arms, if
-            applicable. Default is None.
-
-        Returns
-        -------
-        optimal_rwd : float
-            The expected reward under an optimal policy.
-        """
-        pass
-
     def reset(self):
         """Reset the bandit step and action counters to zero."""
         self.step = 0
@@ -69,11 +74,6 @@ def reset(self):
     def _pull(self, arm_id):
         pass
 
-    @property
-    def hyperparameters(self):
-        """A dictionary of the bandit hyperparameters"""
-        return {}
-
 
 class MultinomialBandit(Bandit):
     def __init__(self, payoffs, payoff_probs):
@@ -114,11 +114,6 @@ def hyperparameters(self):
             "payoff_probs": self.payoff_probs,
         }
 
-    def _pull(self, arm_id, context):
-        payoffs = self.payoffs[arm_id]
-        probs = self.payoff_probs[arm_id]
-        return np.random.choice(payoffs, p=probs)
-
     def oracle_payoff(self, context=None):
         """
         Return the expected reward for an optimal agent.
@@ -135,6 +130,11 @@ def oracle_payoff(self, context=None):
         """
         return self.best_ev
 
+    def _pull(self, arm_id, context):
+        payoffs = self.payoffs[arm_id]
+        probs = self.payoff_probs[arm_id]
+        return np.random.choice(payoffs, p=probs)
+
 
 class BernoulliBandit(Bandit):
     def __init__(self, payoff_probs):
@@ -168,9 +168,6 @@ def hyperparameters(self):
             "payoff_probs": self.payoff_probs,
         }
 
-    def _pull(self, arm_id, context):
-        return int(np.random.rand() <= self.payoff_probs[arm_id])
-
     def oracle_payoff(self, context=None):
         """
         Return the expected reward for an optimal agent.
@@ -187,6 +184,9 @@ def oracle_payoff(self, context=None):
         """
         return self.best_ev
 
+    def _pull(self, arm_id, context):
+        return int(np.random.rand() <= self.payoff_probs[arm_id])
+
 
 class GaussianBandit(Bandit):
     def __init__(self, payoff_dists, payoff_probs):
@@ -286,15 +286,6 @@ def __init__(self, G, start_vertex, end_vertex):
         placeholder = [None] * len(self.paths)
         super().__init__(placeholder, placeholder)
 
-    def _calc_arm_evs(self):
-        I2V = self.G.get_vertex
-        evs = np.zeros(len(self.paths))
-        for p_ix, path in enumerate(self.paths):
-            for ix, v_i in enumerate(path[:-1]):
-                e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
-                evs[p_ix] -= e.weight
-        return evs
-
     @property
     def hyperparameters(self):
         """A dictionary of the bandit hyperparameters"""
@@ -305,15 +296,6 @@ def hyperparameters(self):
             "start_vertex": self.start_vertex,
         }
 
-    def _pull(self, arm_id, context):
-        reward = 0
-        I2V = self.G.get_vertex
-        path = self.paths[arm_id]
-        for ix, v_i in enumerate(path[:-1]):
-            e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
-            reward -= e.weight
-        return reward
-
     def oracle_payoff(self, context=None):
         """
         Return the expected reward for an optimal agent.
@@ -330,6 +312,24 @@ def oracle_payoff(self, context=None):
         """
         return self.best_ev
 
+    def _calc_arm_evs(self):
+        I2V = self.G.get_vertex
+        evs = np.zeros(len(self.paths))
+        for p_ix, path in enumerate(self.paths):
+            for ix, v_i in enumerate(path[:-1]):
+                e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
+                evs[p_ix] -= e.weight
+        return evs
+
+    def _pull(self, arm_id, context):
+        reward = 0
+        I2V = self.G.get_vertex
+        path = self.paths[arm_id]
+        for ix, v_i in enumerate(path[:-1]):
+            e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
+            reward -= e.weight
+        return reward
+
 
 class ContextualBernoulliBandit(Bandit):
     def __init__(self, context_probs):
@@ -379,12 +379,6 @@ def get_context(self):
         context[np.random.choice(D), :] = 1
         return random_one_hot_matrix(1, D).ravel()
 
-    def _pull(self, arm_id, context):
-        D, K = self.context_probs.shape
-        arm_probs = context[:, arm_id] @ self.context_probs
-        arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
-        return arm_rwds[arm_id]
-
     def oracle_payoff(self, context):
         """
         Return the expected reward for an optimal agent.
@@ -402,6 +396,12 @@ def oracle_payoff(self, context):
         """
         return context[:, 0] @ self.best_ev
 
+    def _pull(self, arm_id, context):
+        D, K = self.context_probs.shape
+        arm_probs = context[:, arm_id] @ self.context_probs
+        arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
+        return arm_rwds[arm_id]
+
 
 class ContextualLinearBandit(Bandit):
     def __init__(self, K, D, payoff_variance=1):
@@ -484,12 +484,6 @@ def get_context(self):
         """
         return np.random.normal(size=(self.D, self.K))
 
-    def _pull(self, arm_id, context):
-        K, thetas = self.K, self.thetas
-        self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
-        self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
-        return (self.arm_evs + self._noise)[arm_id]
-
     def oracle_payoff(self, context):
         """
         Return the expected reward for an optimal agent.
@@ -507,3 +501,9 @@ def oracle_payoff(self, context):
         """
         best_arm = np.argmax(self.arm_evs)
         return self.arm_evs[best_arm]
+
+    def _pull(self, arm_id, context):
+        K, thetas = self.K, self.thetas
+        self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
+        self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
+        return (self.arm_evs + self._noise)[arm_id]
diff --git a/numpy_ml/bandits/policies.py b/numpy_ml/bandits/policies.py
@@ -32,14 +32,6 @@ def parameters(self):
         """A dictionary containing the current policy parameters"""
         pass
 
-    @abstractmethod
-    def _initialize_params(self, bandit):
-        """
-        Initialize any policy-specific parameters that depend on information
-        from the bandit environment.
-        """
-        pass
-
     def act(self, bandit, context=None):
         """
         Select an arm and sample from its payoff distribution.
@@ -68,17 +60,17 @@ def act(self, bandit, context=None):
         self._update_params(arm_id, rwd, context)
         return rwd, arm_id
 
-    def _pull_arm(self, bandit, arm_id, context):
-        """Execute a bandit action and return the received reward."""
-        self.step += 1
-        return bandit.pull(arm_id, context)
-
     def reset(self):
         """Reset the policy parameters and counters to their initial states."""
         self.step = 0
         self._reset_params()
         self.is_initialized = False
 
+    def _pull_arm(self, bandit, arm_id, context):
+        """Execute a bandit action and return the received reward."""
+        self.step += 1
+        return bandit.pull(arm_id, context)
+
     @abstractmethod
     def _select_arm(self, bandit, context):
         """Select an arm based on the current context"""
@@ -89,6 +81,14 @@ def _update_params(self, bandit, context):
         """Update the policy parameters after an interaction"""
         pass
 
+    @abstractmethod
+    def _initialize_params(self, bandit):
+        """
+        Initialize any policy-specific parameters that depend on information
+        from the bandit environment.
+        """
+        pass
+
     @abstractmethod
     def _reset_params(self):
         """
@@ -267,7 +267,7 @@ def _update_params(self, arm_id, reward, context=None):
     def _reset_params(self):
         """
         Reset any model-specific parameters. This gets called within the
-        public `self.reset()` method.
+        public :method:`reset` method.
         """
         self.ev_estimates = {}
         self.pull_counts = defaultdict(lambda: 0)
@@ -282,7 +282,7 @@ def __init__(self, alpha=1, beta=1):
         Notes
         -----
         The policy assumes independent Beta priors on the Bernoulli arm payoff
-        probabilities, :math:`\\theta`:
+        probabilities, :math:`\theta`:
 
         .. math::
 
@@ -414,7 +414,7 @@ def __init__(self, alpha=1):
 
         Notes
         -----
-        LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.bandits.ContextualLinearBandit>` environments.
+        LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.ContextualLinearBandit>` environments.
 
         References
         ----------