9 |
\newcommand{\bx}{\mathbf{x}} |
\newcommand{\bx}{\mathbf{x}} |
10 |
\newcommand{\bH}{\mathbf{H}} |
\newcommand{\bH}{\mathbf{H}} |
11 |
\newcommand{\ie}{{\em i.e.}} |
\newcommand{\ie}{{\em i.e.}} |
12 |
|
\newcommand{\eg}{{\em e.g.}} |
13 |
|
|
14 |
\begin{document} |
\begin{document} |
15 |
|
|
19 |
\begin{itemize} |
\begin{itemize} |
20 |
\item $\bx$ is a vector (an element of some vector space $W$) |
\item $\bx$ is a vector (an element of some vector space $W$) |
21 |
\item $\alpha$ is a constant scalar |
\item $\alpha$ is a constant scalar |
22 |
|
\item $\phi$ is a scalar function of a scalar (\eg $\phi(x) = x^2$) |
23 |
\item $f$ and $g$ are scalar functions of $W$ |
\item $f$ and $g$ are scalar functions of $W$ |
24 |
\item $\mathbf{u} \otimes \mathbf{v}$ is tensor product of two vectors, |
\item $\mathbf{u} \otimes \mathbf{v}$ is tensor product of two vectors, |
25 |
computed as the outer product of their vectors of coefficients in |
computed as the outer product of their vectors of coefficients in |
38 |
\frac{\partial f}{\partial z} |
\frac{\partial f}{\partial z} |
39 |
\end{bmatrix} |
\end{bmatrix} |
40 |
\end{equation} |
\end{equation} |
41 |
where $x$, $y$, $z$ are the coordinates in $W$ (\ie{} some basis |
where $x$, $y$, $z$ are the coordinates in $W$ (\ie some basis |
42 |
is assumed). The formulae below don't assume a particular dimension. |
is assumed). The formulae below don't assume a particular dimension. |
43 |
Note that $\nabla$ can also be used to define divergence and curl |
Note that $\nabla$ can also be used to define divergence and curl |
44 |
of vector fields, but for the time being these are not Diderot's concern. |
of vector fields, but for the time being these are not Diderot's concern. |
67 |
\nabla (f + g) &= \nabla f + \nabla g \\ |
\nabla (f + g) &= \nabla f + \nabla g \\ |
68 |
\nabla (f g) &= f \nabla g + g \nabla f \label{eq:grad-prod} \\ |
\nabla (f g) &= f \nabla g + g \nabla f \label{eq:grad-prod} \\ |
69 |
\nabla (\alpha f) &= \alpha \nabla f \label{eq:grad-scale} \\ |
\nabla (\alpha f) &= \alpha \nabla f \label{eq:grad-scale} \\ |
70 |
|
\nabla (\phi(f)) &= \phi'(f) \nabla f \label{eq:grad-chain} \\ |
71 |
\nabla (f^n) &= n f^{n-1} \nabla f \label{eq:grad-pow} \\ |
\nabla (f^n) &= n f^{n-1} \nabla f \label{eq:grad-pow} \\ |
72 |
\nabla \left(\frac{f}{g}\right) |
\nabla \left(\frac{f}{g}\right) |
73 |
&= \frac{\nabla f}{g} - \frac{f \nabla g}{g^2} \label{eq:grad-frac} \\ |
&= \frac{\nabla f}{g} - \frac{f \nabla g}{g^2} \label{eq:grad-frac} \\ |
88 |
\bH (f^n) &= n f^{n-2} \left( (n-1) \nabla f \otimes \nabla f + f \bH f \right) \label{eq:hess-pow} |
\bH (f^n) &= n f^{n-2} \left( (n-1) \nabla f \otimes \nabla f + f \bH f \right) \label{eq:hess-pow} |
89 |
\end{align} |
\end{align} |
90 |
|
|
91 |
All of these can actually be derived with $\bH f = \nabla \otimes \nabla f$ |
All of these can actually be derived with $\bH f = \nabla \otimes |
92 |
and the rules above. Someone may want to doublecheck (\ref{eq:hess-quot}). |
\nabla f$ and the rules above. Someone may want to doublecheck |
93 |
|
(\ref{eq:hess-quot}). I didn't include the Hessian of the chain rule |
94 |
|
(analogous to (\ref{eq:grad-chain})) because the process of starting |
95 |
|
to derive it inspired me to start learning enough ML to write a program |
96 |
|
that would derive it for me... |
97 |
|
|
98 |
\end{document} |
\end{document} |