\documentclass[11pt]{article} \usepackage{amsmath} \usepackage{amsfonts} \usepackage{array} \usepackage{amssymb} \usepackage{bm} \newcommand{\bx}{\mathbf{x}} \newcommand{\bH}{\mathbf{H}} \newcommand{\ie}{{\em i.e.}} \newcommand{\eg}{{\em e.g.}} \begin{document} \title{Math for Diderot} In the following, \begin{itemize} \item $\bx$ is a vector (an element of some vector space $W$) \item $\alpha$ is a constant scalar \item $\phi$ is a scalar function of a scalar (\eg $\phi(x) = x^2$) \item $f$ and $g$ are scalar functions of $W$ \item $\mathbf{u} \otimes \mathbf{v}$ is tensor product of two vectors, computed as the outer product of their vectors of coefficients in some basis. \item $\nabla f$ is a the gradient (first derivative) of $f$, computed in 3-D as: \begin{equation} \nabla f = \begin{bmatrix} \frac{\partial}{\partial x} \\ \frac{\partial}{\partial y} \\ \frac{\partial}{\partial z} \end{bmatrix} f = \begin{bmatrix} \frac{\partial f}{\partial x} \\ \frac{\partial f}{\partial y} \\ \frac{\partial f}{\partial z} \end{bmatrix} \end{equation} where $x$, $y$, $z$ are the coordinates in $W$ (\ie some basis is assumed). The formulae below don't assume a particular dimension. Note that $\nabla$ can also be used to define divergence and curl of vector fields, but for the time being these are not Diderot's concern. \item $\bH f = \nabla \otimes \nabla f$ is the Hessian (second derivative) of $f$, computed in 3-D as: \begin{equation} \bH f = \begin{bmatrix} \frac{\partial}{\partial x} \\ \frac{\partial}{\partial y} \\ \frac{\partial}{\partial z} \end{bmatrix} \begin{bmatrix} \frac{\partial}{\partial x} & \frac{\partial}{\partial y} & \frac{\partial}{\partial z} \end{bmatrix} f = \begin{bmatrix} \frac{\partial^2 f}{\partial x_1^2} & \frac{\partial^2 f}{\partial x_1 \partial x_2} & \frac{\partial^2 f}{\partial x_1 \partial x_3} \\ \frac{\partial^2 f}{\partial x_1 \partial x_2} & \frac{\partial^2 f}{\partial x_2^2} & \frac{\partial^2 f}{\partial x_2 \partial x_3} \\ \frac{\partial^2 f}{\partial x_1 \partial x_3} & \frac{\partial^2 f}{\partial x_2 \partial x_3} & \frac{\partial^2 f}{\partial x_3^2} \end{bmatrix} \end{equation} \end{itemize} Basic rules for the gradient: \begin{align} \nabla (f + g) &= \nabla f + \nabla g \\ \nabla (f g) &= f \nabla g + g \nabla f \label{eq:grad-prod} \\ \nabla (\alpha f) &= \alpha \nabla f \label{eq:grad-scale} \\ \nabla (\phi(f)) &= \phi'(f) \nabla f \label{eq:grad-chain} \\ \nabla (f^n) &= n f^{n-1} \nabla f \label{eq:grad-pow} \\ \nabla \left(\frac{f}{g}\right) &= \frac{\nabla f}{g} - \frac{f \nabla g}{g^2} \label{eq:grad-frac} \\ \end{align} (\ref{eq:grad-scale}) follows from (\ref{eq:grad-prod}) with $\nabla \alpha = 0$. (\ref{eq:grad-frac}) follows from (\ref{eq:grad-pow}). Basic rules for the Hessian: \begin{align} \bH (f + g) &= \bH f + \bH g \\ \bH (\alpha f) &= \alpha \bH f \\ \bH (f g) &= f \bH g + \nabla f \otimes \nabla g + \nabla g \otimes \nabla f + g \bH f \\ \bH \left(\frac{f}{g}\right) &= \frac{\bH f}{g} - \frac{\nabla f \otimes \nabla g + \nabla g \otimes \nabla f + f \bH g}{g^2} + \frac{2 f \nabla g \otimes \nabla g}{g^3} \label{eq:hess-quot} \\ \bH (f^n) &= n f^{n-2} \left( (n-1) \nabla f \otimes \nabla f + f \bH f \right) \label{eq:hess-pow} \end{align} All of these can actually be derived with $\bH f = \nabla \otimes \nabla f$ and the rules above. Someone may want to doublecheck (\ref{eq:hess-quot}). I didn't include the Hessian of the chain rule (analogous to (\ref{eq:grad-chain})) because the process of starting to derive it inspired me to start learning enough ML to write a program that would derive it for me... \end{document}
Click to toggle
does not end with </html> tag
does not end with </body> tag
The output has ended thus: red me to start learning enough ML to write a program that would derive it for me... \end{document}