An Initial Analysis of Approximation Error for Evolutionary Algorithms

Open as TemplateView PDF
Author:
Jun He
Last Updated:
8 years ago
License:
Other (as stated in the work)
Abstract:
Author: Jun He
This file may be distributed and/or modified
under the LaTeX Project Public License and/or
under the GNU Public License.
Tags:
An Initial Analysis of Approximation Error for Evolutionary Algorithms
\begin{now}
Discover why over 20 million people worldwide trust Overleaf with their work.
%% Author: Jun He
%% Date: 2017.05.11
% designed by Jun He on 11 May 2015 
% This file may be distributed and/or modified
%
% 1. under the LaTeX Project Public License and/or
% 2. under the GNU Public License.

\documentclass[usenames,dvipsnames]{beamer} 

%\usepackage[orientation=portrait,scale=1.1,size=a0]{beamerposter}  
\usepackage[orientation=portrait,scale=0.875,size=a1]{beamerposter} 
\usepackage{aber} %my own defined style
\usetheme{Frankfurt} 
\usecolortheme{whale}
%\usecolortheme{beaver}
%\usecolortheme{crane}

\usepackage{pgfplots}
\pgfplotsset{compat=1.5}
\pgfplotsset{every axis/.append style={thick}} 
 
\usepackage{graphicx}   
\usepackage{booktabs}  
\usebackgroundtemplate{\includegraphics[height=\paperheight]{aber1.jpg}} %insert background photo
\setbeamercolor{author}{fg=white} 
\setbeamercolor{institute}{fg=white} 
\setbeamercolor{date}{fg=white} 
 
\title{{\Huge An Initial  Analysis of Approximation Error for Evolutionary Algorithms}} % Poster title
\author{\Large Jun He \inst{1} \and Yuren Zhou \inst{2} \and Guangming Li \inst{3}}
\institute{\normalsize   \inst{1}Aberystwyth University, UK \,\, \inst{2} Sun Yat-sen University, China\,\, \inst{3} Shenzhen Institute of Information Technology, China}
 
%% own commands
\newcommand{\opt}{\mathrm{max}} 
\newcommand*{\defeq }{:=}  

\begin{document}

\begin{frame}[t]  
\maketitle

\begin{columns}[t]  
\begin{column}{0.45\paperwidth} 
%%%%%%%%%%% Start of Column 1 %%%%%%%%%%

\begin{greenblock}{Motivation}
This work aims at rigorously analyzing the approximation error of evolutionary algorithms (EAs). 
\end{greenblock}

\begin{greenblock}{Background}
Consider an EA for solving a maximization problem:
\begin{align}
\max f(x), \mbox{ subject to } x \in \mathcal{S}.
\end{align}

Let  $f_{\opt}$ denote the fitness of the optimal solution and $F_t$  the expected  fitness of the best solution  found  in the $t$th generation. 

\begin{definition}{Definitions}
\begin{itemize}
\item The \textbf{approximation error} of the EA in the $t$th generation is   
\begin{equation}
E_t:=f_{\opt}-F_t.
\end{equation}   
\item If some positive constants
$\alpha$ and $\beta$ exist with
\begin{align}
\lim_{t \to +\infty} \frac{E_t}{(E_{t-1})^{\alpha}}  =\beta,
\end{align}   then $\{E_t; t=0,1, \cdots\}$ is called to \textbf{converge to $0$ in the order $\alpha$}, with \textbf{asymptotic error constant $\beta$}~\cite{burden2015numerical,gautschi2011numerical}. 

\end{itemize}

\end{definition} 

\end{greenblock}

\begin{greenblock}{Research Questions}
\begin{enumerate}
\item Order $\alpha=?$
\item Asymptotic error constant  $\beta=?$
\end{enumerate} 

\begin{example}{An experimental study}
\begin{description}
\item [EA-I:] $(1+1)$ EA using onebit mutation and elitist selection
\item [EA-II:] $(1+1)$ EA using bitwise mutation and elitist selection
\item [ $f(x)$:]  OneMax function
\item [$\alpha$:] set to 1 
\end{description}

\begin{figure}[ht]
\begin{center}
\begin{tikzpicture}
\begin{axis}[domain=0:100, width=0.6\textwidth, height=0.1\textheight, 
scaled ticks=false,
legend style={draw=none},
legend pos=outer north east,
xlabel={$t$}, 
ylabel={$E_t/E_{t-1}$},
%ymode=log,  
]  
\addplot[red ] table    {Const-EAIf1n.dat};  
\addplot [blue ] table   {Const-EAIIf1n.dat};       
\legend{EA-I, EA-II}
\end{axis}
\end{tikzpicture}  
\end{center}
\caption{For EA-I and II, ${E_t}/{E_{t-1}}$ converge to some $\beta$ but stochastic disturbance exists on EA-II.}
\end{figure} 
\end{example}

\end{greenblock}


\begin{greenblock}{Analysis Tool}  
The analysis tool  is Markov chain theory~\cite{he2003towards,he2016average}. 

Label all populations   by indexes $\{ 0, 1, \cdots, L\}$ where indexes are sorted according to the fitness value of populations from high to low: 
\begin{align}
f_{\max} =f_0  > f_1 \ge \cdots \ge f_L = f_{\min},
\end{align} 
where $f_i$ denotes the fitness of the best individual in the $i$-th population.  

\begin{itemize}
\item $r_{i,j}$ denotes the transition probability of an EA  from $j$ to $i$.  
 
\item Matrix   $\mathbf{R} $  denotes transition probabilities within the set $\{1, \cdots, L \}$.  
\begin{align} 
\mathbf{R}: =  &\begin{pmatrix}  
  r_{1,1}    & r_{1,2}& r_{1,3}& \cdots &   r_{1,L-1}  &r_{1,L}  \\ 
    r_{2,1} & r_{2,2} & r_{2,3}& \cdots & r_{2,L-1} &r_{2,L}\\
 r_{3,1}  &  r_{3,2} & r_{3,3} &  \cdots & r_{3,L-1} &r_{3,L}\\
 \vdots & \vdots &  \vdots  & \vdots & \vdots    \\
  r_{L,1} &  r_{L,2} &  r_{L,3} &\cdots & r_{L,L-1} & r_{L,L}     \\
 \end{pmatrix}. 
\end{align}  

\item Vector 
$
\mathbf{q}_0 \defeq (\Pr(1), \Pr(2), \cdots, \Pr(L) )^T
$ represent the probability distribution of the initial population over the set  $\{1, \cdots, L \}$.
\end{itemize}

Suppose that EAs  can be  modelled by homogeneous Markov chains and  are convergent (approximation error   $E_t \to 0$ when $t \to +\infty$).
\end{greenblock}

\end{column}   
%%%%%%%%%% End of Column 1 %%%%%%%%%%

 

%%%%%%%%%% Start of Column 2 %%%%%%%%%%
\begin{column}{0.45\paperwidth}  
\begin{greenblock}{Main Theoretical Result} 
\begin{enumerate}
\item  In many cases, the order of convergence  $\alpha=1$
\item Asymptotic error constant equals to the spectral radius: $\beta=\rho(\mathbf{R})$. 
\end{enumerate}
\end{greenblock}

\begin{greenblock}{General EAs} 
Under the particular initialization, that is, set $\mathbf{q}_0= \mathbf{v}/|\mathbf{v}|$ where $\mathbf{v}$ is an eigenvector corresponding to the eigenvalue  $\rho(\mathbf{R})$~\cite{he2016average}.

\begin{theorem}{Theorem 1} 
Let $\mathbf{R}$ be the transition submatrix with $\rho(\mathbf{R})<1$.  
 Under particular initialization ,  it holds  for all $t\ge 1$,
 \begin{align} 
\frac{E_t}{E_{t-1}}
 =   \rho(\mathbf{R}).
\end{align}
That is $\alpha=1$ and $\beta= \rho(\mathbf{R})$.
\end{theorem}


\end{greenblock}

\begin{greenblock}{EAs with Primitive Transition Matrices} 

Case 1:   transition matrix $\mathbf{R}$ is primitive. 
 
\begin{definition}{Primitive matrix}
A matrix $\mathbf{R}$ is called primitive if there exists a positive integer $m$ such that $\mathbf{R}^m > \mathbf{O}$. 

This condition means that starting for any state $i$, an EA can reach any other state $j$ in $m$ generations. 
\end{definition}

Under   random initialization, that is, the initial population can be chosen to be  any non-optimal state with a positive probability. Equivalently,  $\mathbf{q}_0 > \mathbf{0}$.

\begin{theorem}{Theorem 2}   
 If $\mathbf{R} $ is primitive, then under random initialization,    it holds
 \begin{align} 
\lim_{t \to +\infty}\frac{E_t}{E_{t-1}}
 =   \rho(\mathbf{R}).
\end{align} 
That is $\alpha=1$ and $\beta= \rho(\mathbf{R})$.
\end{theorem}
 

 
\end{greenblock}


\begin{greenblock}{EAs with Reducible Transition Matrices}
Case 2:   transition matrix $\mathbf{R}$ is reducible. 
\begin{definition}{Definition}
$\mathbf{R}$ is reducible if it can be split as 
\begin{equation} 
\mathbf{R} = \begin{pmatrix}
 \mathbf{R}_{11} & \mathbf{R}_{12} \cr
  \mathbf{O}  & \mathbf{R}_{22} \cr 
\end{pmatrix}  
\end{equation}
where $\mathbf{O}$ is a zero-value submatrix.
\end{definition}

Consider  a special reducible transition matrix $\mathbf{R}$ that is  an upper triangular matrix:
\begin{align}  
\mathbf{R} =  \begin{pmatrix}  
  r_{1,1}    & r_{1,2}& r_{1,3}& \cdots &   r_{1,L-1}  &r_{1,L}  \\ 
   0& r_{2,2} & r_{2,3}& \cdots & r_{2,L-1} &r_{2,L}\\
0 &  0 & r_{3,3} &  \cdots & r_{3,L-1} &r_{3,L}\\
 \vdots & \vdots &  \vdots  & \vdots & \vdots    \\
 0&  0&  0&\cdots & 0& r_{L,L}     \\
 \end{pmatrix}.
\end{align} 
 
\begin{theorem}{Theorem 3}   
 If $\mathbf{R} $ is  upper triangular with unique diagonal entry $r_{i,i}$, then under random initialization,    it holds
 \begin{align} 
\lim_{t \to +\infty}\frac{E_t}{E_{t-1}}
 =   \rho(\mathbf{R}).
\end{align} 
That is $\alpha=1$ and $\beta= \rho(\mathbf{R})$.
\end{theorem} 
 
 
\end{greenblock}

\begin{greenblock}{References}
\setbeamertemplate{bibliography item}{\insertbiblabel}
 
\begin{footnotesize}
\begin{thebibliography}{1}
\bibitem{burden2015numerical}
R.~Burden, J.~Faires, and A.~Burden, \emph{Numerical Analysis}.\hskip 1em plus
  0.5em minus 0.4em\relax Cengage Learning, 2015.

\bibitem{gautschi2011numerical}
W.~Gautschi, \emph{Numerical Analysis}.\hskip 1em plus 0.5em minus 0.4em\relax
  Birkh{\"a}user Boston, 2011.

\bibitem{he2003towards}
J.~He and X.~Yao, ``Towards an analytic framework for analysing the computation
  time of evolutionary algorithms,'' \emph{Artificial Intelligence}, vol. 145,
  no. 1-2, pp. 59--97, 2003.

\bibitem{he2016average} 
J.~He and G.~Lin, ``Average convergence rate of evolutionary algorithms,''
  \emph{IEEE Transactions on Evolutionary Computation}, vol.~20, no.~2, pp.
  316--321, 2016.   

\end{thebibliography}
 \end{footnotesize}
 
\end{greenblock}
\end{column}
%%%%%%%%%% End of Column 2 %%%%%%%%%%  

\end{columns} 

\contact{\color{white} Contact: Jun He, 
  Department of Computer Science, Aberystwyth University, 
  Aberystwyth SY23 3DB, UK. Email:    jun.he@aber.ac.uk}
\end{frame}
 

\end{document}
An Initial Analysis of Approximation Error for Evolutionary Algorithms

Get in touch

Message received