\documentclass[12pt,titlepage]{article} \usepackage{amsmath} \usepackage{amsfonts} \usepackage{amssymb} \usepackage{amsthm} \usepackage{mathtools} \usepackage{graphicx} \usepackage{color} \usepackage{ucs} \usepackage[utf8x]{inputenc} \usepackage{xparse} \usepackage{hyperref} %----Macros---------- % % Unresolved issues: % % \righttoleftarrow % \lefttorightarrow % % \color{} with HTML colorspec % \bgcolor % \array with options (without options, it's equivalent to the matrix environment) % Of the standard HTML named colors, white, black, red, green, blue and yellow % are predefined in the color package. Here are the rest. \definecolor{aqua}{rgb}{0, 1.0, 1.0} \definecolor{fuschia}{rgb}{1.0, 0, 1.0} \definecolor{gray}{rgb}{0.502, 0.502, 0.502} \definecolor{lime}{rgb}{0, 1.0, 0} \definecolor{maroon}{rgb}{0.502, 0, 0} \definecolor{navy}{rgb}{0, 0, 0.502} \definecolor{olive}{rgb}{0.502, 0.502, 0} \definecolor{purple}{rgb}{0.502, 0, 0.502} \definecolor{silver}{rgb}{0.753, 0.753, 0.753} \definecolor{teal}{rgb}{0, 0.502, 0.502} % Because of conflicts, \space and \mathop are converted to % \itexspace and \operatorname during preprocessing. % itex: \space{ht}{dp}{wd} % % Height and baseline depth measurements are in units of tenths of an ex while % the width is measured in tenths of an em. \makeatletter \newdimen\itex@wd% \newdimen\itex@dp% \newdimen\itex@thd% \def\itexspace#1#2#3{\itex@wd=#3em% \itex@wd=0.1\itex@wd% \itex@dp=#2ex% \itex@dp=0.1\itex@dp% \itex@thd=#1ex% \itex@thd=0.1\itex@thd% \advance\itex@thd\the\itex@dp% \makebox[\the\itex@wd]{\rule[-\the\itex@dp]{0cm}{\the\itex@thd}}} \makeatother % \tensor and \multiscript \makeatletter \newif\if@sup \newtoks\@sups \def\append@sup#1{\edef\act{\noexpand\@sups={\the\@sups #1}}\act}% \def\reset@sup{\@supfalse\@sups={}}% \def\mk@scripts#1#2{\if #2/ \if@sup ^{\the\@sups}\fi \else% \ifx #1_ \if@sup ^{\the\@sups}\reset@sup \fi {}_{#2}% \else \append@sup#2 \@suptrue \fi% \expandafter\mk@scripts\fi} \def\tensor#1#2{\reset@sup#1\mk@scripts#2_/} \def\multiscripts#1#2#3{\reset@sup{}\mk@scripts#1_/#2% \reset@sup\mk@scripts#3_/} \makeatother % \slash \makeatletter \newbox\slashbox \setbox\slashbox=\hbox{$/$} \def\itex@pslash#1{\setbox\@tempboxa=\hbox{$#1$} \@tempdima=0.5\wd\slashbox \advance\@tempdima 0.5\wd\@tempboxa \copy\slashbox \kern-\@tempdima \box\@tempboxa} \def\slash{\protect\itex@pslash} \makeatother % math-mode versions of \rlap, etc % from Alexander Perlis, "A complement to \smash, \llap, and lap" % http://math.arizona.edu/~aprl/publications/mathclap/ \def\clap#1{\hbox to 0pt{\hss#1\hss}} \def\mathllap{\mathpalette\mathllapinternal} \def\mathrlap{\mathpalette\mathrlapinternal} \def\mathclap{\mathpalette\mathclapinternal} \def\mathllapinternal#1#2{\llap{$\mathsurround=0pt#1{#2}$}} \def\mathrlapinternal#1#2{\rlap{$\mathsurround=0pt#1{#2}$}} \def\mathclapinternal#1#2{\clap{$\mathsurround=0pt#1{#2}$}} % Renames \sqrt as \oldsqrt and redefine root to result in \sqrt[#1]{#2} \let\oldroot\root \def\root#1#2{\oldroot #1 \of{#2}} \renewcommand{\sqrt}[2][]{\oldroot #1 \of{#2}} % Manually declare the txfonts symbolsC font \DeclareSymbolFont{symbolsC}{U}{txsyc}{m}{n} \SetSymbolFont{symbolsC}{bold}{U}{txsyc}{bx}{n} \DeclareFontSubstitution{U}{txsyc}{m}{n} % Manually declare the stmaryrd font \DeclareSymbolFont{stmry}{U}{stmry}{m}{n} \SetSymbolFont{stmry}{bold}{U}{stmry}{b}{n} % Manually declare the MnSymbolE font \DeclareFontFamily{OMX}{MnSymbolE}{} \DeclareSymbolFont{mnomx}{OMX}{MnSymbolE}{m}{n} \SetSymbolFont{mnomx}{bold}{OMX}{MnSymbolE}{b}{n} \DeclareFontShape{OMX}{MnSymbolE}{m}{n}{ <-6> MnSymbolE5 <6-7> MnSymbolE6 <7-8> MnSymbolE7 <8-9> MnSymbolE8 <9-10> MnSymbolE9 <10-12> MnSymbolE10 <12-> MnSymbolE12}{} % Declare specific arrows from txfonts without loading the full package \makeatletter \def\re@DeclareMathSymbol#1#2#3#4{% \let#1=\undefined \DeclareMathSymbol{#1}{#2}{#3}{#4}} \re@DeclareMathSymbol{\neArrow}{\mathrel}{symbolsC}{116} \re@DeclareMathSymbol{\neArr}{\mathrel}{symbolsC}{116} \re@DeclareMathSymbol{\seArrow}{\mathrel}{symbolsC}{117} \re@DeclareMathSymbol{\seArr}{\mathrel}{symbolsC}{117} \re@DeclareMathSymbol{\nwArrow}{\mathrel}{symbolsC}{118} \re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118} \re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119} \re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119} \re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46} \re@DeclareMathSymbol{\Perp}{\mathrel}{symbolsC}{121} \re@DeclareMathSymbol{\Vbar}{\mathrel}{symbolsC}{121} \re@DeclareMathSymbol{\sslash}{\mathrel}{stmry}{12} \re@DeclareMathSymbol{\bigsqcap}{\mathop}{stmry}{"64} \re@DeclareMathSymbol{\biginterleave}{\mathop}{stmry}{"6} \re@DeclareMathSymbol{\invamp}{\mathrel}{symbolsC}{77} \re@DeclareMathSymbol{\parr}{\mathrel}{symbolsC}{77} \makeatother % \llangle, \rrangle, \lmoustache and \rmoustache from MnSymbolE \makeatletter \def\Decl@Mn@Delim#1#2#3#4{% \if\relax\noexpand#1% \let#1\undefined \fi \DeclareMathDelimiter{#1}{#2}{#3}{#4}{#3}{#4}} \def\Decl@Mn@Open#1#2#3{\Decl@Mn@Delim{#1}{\mathopen}{#2}{#3}} \def\Decl@Mn@Close#1#2#3{\Decl@Mn@Delim{#1}{\mathclose}{#2}{#3}} \Decl@Mn@Open{\llangle}{mnomx}{'164} \Decl@Mn@Close{\rrangle}{mnomx}{'171} \Decl@Mn@Open{\lmoustache}{mnomx}{'245} \Decl@Mn@Close{\rmoustache}{mnomx}{'244} \makeatother % Widecheck \makeatletter \DeclareRobustCommand\widecheck[1]{{\mathpalette\@widecheck{#1}}} \def\@widecheck#1#2{% \setbox\z@\hbox{\m@th$#1#2$}% \setbox\tw@\hbox{\m@th$#1% \widehat{% \vrule\@width\z@\@height\ht\z@ \vrule\@height\z@\@width\wd\z@}$}% \dp\tw@-\ht\z@ \@tempdima\ht\z@ \advance\@tempdima2\ht\tw@ \divide\@tempdima\thr@@ \setbox\tw@\hbox{% \raise\@tempdima\hbox{\scalebox{1}[-1]{\lower\@tempdima\box \tw@}}}% {\ooalign{\box\tw@ \cr \box\z@}}} \makeatother % \mathraisebox{voffset}[height][depth]{something} \makeatletter \NewDocumentCommand\mathraisebox{moom}{% \IfNoValueTF{#2}{\def\@temp##1##2{\raisebox{#1}{$\m@th##1##2$}}}{% \IfNoValueTF{#3}{\def\@temp##1##2{\raisebox{#1}[#2]{$\m@th##1##2$}}% }{\def\@temp##1##2{\raisebox{#1}[#2][#3]{$\m@th##1##2$}}}}% \mathpalette\@temp{#4}} \makeatletter % udots (taken from yhmath) \makeatletter \def\udots{\mathinner{\mkern2mu\raise\p@\hbox{.} \mkern2mu\raise4\p@\hbox{.}\mkern1mu \raise7\p@\vbox{\kern7\p@\hbox{.}}\mkern1mu}} \makeatother %% Fix array \newcommand{\itexarray}[1]{\begin{matrix}#1\end{matrix}} %% \itexnum is a noop \newcommand{\itexnum}[1]{#1} %% Renaming existing commands \newcommand{\underoverset}[3]{\underset{#1}{\overset{#2}{#3}}} \newcommand{\widevec}{\overrightarrow} \newcommand{\darr}{\downarrow} \newcommand{\nearr}{\nearrow} \newcommand{\nwarr}{\nwarrow} \newcommand{\searr}{\searrow} \newcommand{\swarr}{\swarrow} \newcommand{\curvearrowbotright}{\curvearrowright} \newcommand{\uparr}{\uparrow} \newcommand{\downuparrow}{\updownarrow} \newcommand{\duparr}{\updownarrow} \newcommand{\updarr}{\updownarrow} \newcommand{\gt}{>} \newcommand{\lt}{<} \newcommand{\map}{\mapsto} \newcommand{\embedsin}{\hookrightarrow} \newcommand{\Alpha}{A} \newcommand{\Beta}{B} \newcommand{\Zeta}{Z} \newcommand{\Eta}{H} \newcommand{\Iota}{I} \newcommand{\Kappa}{K} \newcommand{\Mu}{M} \newcommand{\Nu}{N} \newcommand{\Rho}{P} \newcommand{\Tau}{T} \newcommand{\Upsi}{\Upsilon} \newcommand{\omicron}{o} \newcommand{\lang}{\langle} \newcommand{\rang}{\rangle} \newcommand{\Union}{\bigcup} \newcommand{\Intersection}{\bigcap} \newcommand{\Oplus}{\bigoplus} \newcommand{\Otimes}{\bigotimes} \newcommand{\Wedge}{\bigwedge} \newcommand{\Vee}{\bigvee} \newcommand{\coproduct}{\coprod} \newcommand{\product}{\prod} \newcommand{\closure}{\overline} \newcommand{\integral}{\int} \newcommand{\doubleintegral}{\iint} \newcommand{\tripleintegral}{\iiint} \newcommand{\quadrupleintegral}{\iiiint} \newcommand{\conint}{\oint} \newcommand{\contourintegral}{\oint} \newcommand{\infinity}{\infty} \newcommand{\bottom}{\bot} \newcommand{\minusb}{\boxminus} \newcommand{\plusb}{\boxplus} \newcommand{\timesb}{\boxtimes} \newcommand{\intersection}{\cap} \newcommand{\union}{\cup} \newcommand{\Del}{\nabla} \newcommand{\odash}{\circleddash} \newcommand{\negspace}{\!} \newcommand{\widebar}{\overline} \newcommand{\textsize}{\normalsize} \renewcommand{\scriptsize}{\scriptstyle} \newcommand{\scriptscriptsize}{\scriptscriptstyle} \newcommand{\mathfr}{\mathfrak} \newcommand{\statusline}[2]{#2} \newcommand{\tooltip}[2]{#2} \newcommand{\toggle}[2]{#2} % Theorem Environments \theoremstyle{plain} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \newtheorem{prop}{Proposition} \newtheorem{cor}{Corollary} \newtheorem*{utheorem}{Theorem} \newtheorem*{ulemma}{Lemma} \newtheorem*{uprop}{Proposition} \newtheorem*{ucor}{Corollary} \theoremstyle{definition} \newtheorem{defn}{Definition} \newtheorem{example}{Example} \newtheorem*{udefn}{Definition} \newtheorem*{uexample}{Example} \theoremstyle{remark} \newtheorem{remark}{Remark} \newtheorem{note}{Note} \newtheorem*{uremark}{Remark} \newtheorem*{unote}{Note} %------------------------------------------------------------------- \begin{document} %------------------------------------------------------------------- \section*{Principal component analysis} \hypertarget{contents}{}\section*{{Contents}}\label{contents} \noindent\hyperlink{idea}{Idea}\dotfill \pageref*{idea} \linebreak \noindent\hyperlink{centering}{Centering}\dotfill \pageref*{centering} \linebreak \noindent\hyperlink{basic_mathematical_formulation}{Basic mathematical formulation}\dotfill \pageref*{basic_mathematical_formulation} \linebreak \noindent\hyperlink{sparse_pca}{Sparse PCA}\dotfill \pageref*{sparse_pca} \linebreak \noindent\hyperlink{references}{References}\dotfill \pageref*{references} \linebreak \hypertarget{idea}{}\subsection*{{Idea}}\label{idea} According to Wikipedia: \begin{quote}% \textbf{Principal component analysis (PCA)} is a mathematical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of uncorrelated variables called principal components. The number of principal components is less than or equal to the number of original variables. This transformation is defined in such a way that the first principal component has as high a variance as possible (that is, accounts for as much of the variability in the data as possible), and each succeeding component in turn has the highest variance possible under the constraint that it be orthogonal to (uncorrelated with) the preceding components. Principal components are guaranteed to be independent only if the data set is jointly normally distributed. PCA is sensitive to the relative scaling of the original variables. \end{quote} PCA has a variety of alternative names: \begin{itemize}% \item discrete Karhunen–Loève transform (KLT), \item Hotelling transform, \item proper orthogonal decomposition (POD). \end{itemize} PCA with $n$ basis vectors can also be viewed as a form of ``compression''/data reduction, namely the selection of an $n$ dimensional subspace $S$ such that projection of the dataset vectors to $S$ has the minimal summed $L_2$ error relative to the original dataset. (This $L_2$ minimisation interpretation is one ``explanation'' of the dependence of the PCA decomposition on variable scaling.) PCA is also often used for visualising ``dominant'' effects in a data-set and for creating predictive models (reducing the dimensionality of the data set for either computational or machine learning/statistical regularisation considerations). One obvious point is that PCA is only effective at finding structure which is approximately orthogonal vectors, so that other preprocessing such as \emph{detrending} may be required for PCA to be informative. \hypertarget{centering}{}\subsubsection*{{Centering}}\label{centering} The orthogonal transformation used by PCA is implicitly assuming that the dataset is generated by process with a zero ``expected value'', so that generally a ``mean vector'' is subtracted from all the data points or, equivalently, a constant mean vector is added to all reconstructions. There are two approaches to acheiving this: \begin{enumerate}% \item Using additional knowledge about the process generating the dataset to deteremine the mean, e.g., that the mean should actually be exactly zero. \item Using the computed average of the data set as the ``mean vector''. \end{enumerate} The compression viewpoint is relatively insensitive to the choice of mean, but techniques which use the vector of PCA components as random variates can be significantly affected by the mean vector. \hypertarget{basic_mathematical_formulation}{}\subsubsection*{{Basic mathematical formulation}}\label{basic_mathematical_formulation} Given an $m$-dimensional dataset $\{y^{i}\}_{i=0}^K$ the $n$ dimensional PCA is a set of $n$ orthonormal $m$-dimensional vectors $\{\mathbf{v}^j\}_{j=1}^n$ along with an ``artificial'' mean vector $\mathbf{v}^0$ (often referred to as $\mu$) along with a set of $n$-dimensional ``coefficient'' vectors $\{\lambda^{i}\}_{i=0}^K$ which can be used to represent approximations $\hat{y}^i$ to the original $y^i$ using \begin{displaymath} \hat{y}^i=\mu+\sum_{j=1}^n \lambda^i_j \mathbf{v}^j=\sum_{j=0}^n \lambda^i_j \mathbf{v}^j \end{displaymath} where the second expression absorbs the mean vector by using adding a coefficient $\lambda^i_0$ which is always $1$. The crucial property is that, other than the mean vector, all the $\mathbf{v}$s are orthonormal, i.e., $\mathbf{v}^i \cdot \mathbf{v}^j=\delta_{i j}$. In addition to optimising the ``reconstruction error'' for a given number of PCA basis vectors, this orthogonality gives rise to other useful calculational properties of PCA. In the above formulation the $\lambda^i_j$s may naturally have very different magnitudes, so in some applications there is a further decomposition as \begin{displaymath} \hat{y}^i=\mu+\sum_{j=1}^n w^i_j (\nu^j \mathbf{v}^j)=\mu+\sum_{j=1}^n w^i_j {\mathbf{v}'}^j \end{displaymath} (Again the mean $\mu$ can be absorbed into an ``artificial'' principal component if desired.) Here the $\nu^j$ value is the ``length scale'' of the $\mathbf{v}^j$ principal component, so that it can generally be taken as a reasonable assumption that the \emph{loadings} $w^i_j$ come from the same probability distribution, often further assumed to be the $N(0,1)$ normal distribution. (Technically the optimal $\mathbf{v}^j$ vectors turn out to be the \emph{eigenvectors} of covariance matrix of the sample set, and the $\nu^j$s are the square roots of the corresponding \emph{eigenvalues}.) \hypertarget{sparse_pca}{}\subsubsection*{{Sparse PCA}}\label{sparse_pca} Sparse PCA attempts to create a new set of orthogonal vectors which balance the fidelity of the reconstruction with sparseness of the vectors. \hypertarget{references}{}\subsection*{{References}}\label{references} \begin{itemize}% \item \href{http://en.wikipedia.org/wiki/Principal_component_analysis}{Principal component analysis},Wikipedia \end{itemize} category: mathematical methods [[!redirects principal component analysis]] [[!redirects PCA]] [[!redirects Karhunen–Loève transform]] [[!redirects KLT]] [[!redirects Hotelling transform]] [[!redirects Proper orthogonal decomposition]] [[!redirects proper orthogonal decomposition]] \end{document}