    \magnification=\magstep1
\input amstex
%\input kratt

 \catcode`\@=11 \newif\iftab@\tab@false \newif\ifvtab@\vtab@false
\def\tab{\bgroup\tab@true\vtab@false\vst@bfalse\Strich@false%
   \def\\{\global\hline@@false%
     \ifhline@\global\hline@false\global\hline@@true\fi\cr}
   \edef\l@{\the\leftskip}\ialign\bgroup\hskip\l@##\hfil&&##\hfil\cr}
\def\endtab{\cr\egroup\egroup}
\def\vtab{\vtop\bgroup\vst@bfalse\vtab@true\tab@true\Strich@false%
   \bgroup\def\\{\cr}\ialign\bgroup&##\hfil\cr}
\def\endvtab{\cr\egroup\egroup\egroup}
\def\stab{\D@cke0.5pt\null 
 \bgroup\tab@true\vtab@false\vst@bfalse\Strich@true\Let@@\vspace@
 \normalbaselines\offinterlineskip
  \openup\spreadmlines@
 \edef\l@{\the\leftskip}\ialign
 \bgroup\hskip\l@##\hfil&&##\hfil\crcr}
\def\endstab{\crcr\egroup
 \egroup}
\newif\ifvst@b\vst@bfalse
\def\vstab{\D@cke0.5pt\null
 \vtop\bgroup\tab@true\vtab@false\vst@btrue\Strich@true\bgroup\Let@@\vspace@
 \normalbaselines\offinterlineskip
  \openup\spreadmlines@\bgroup}
\def\endvstab{\crcr\egroup\egroup
 \egroup\tab@false\Strich@false}

\newdimen\htstrut@
\htstrut@8.5\p@
\newdimen\htStrut@
\htStrut@12\p@
\newdimen\dpstrut@
\dpstrut@3.5\p@
\newdimen\dpStrut@
\dpStrut@3.5\p@
\def\openup{\afterassignment\@penup\dimen@=}
\def\@penup{\advance\lineskip\dimen@
  \advance\baselineskip\dimen@
  \advance\lineskiplimit\dimen@
  \divide\dimen@ by2
  \advance\htstrut@\dimen@
  \advance\htStrut@\dimen@
  \advance\dpstrut@\dimen@
  \advance\dpStrut@\dimen@}
\def\Let@@{\relax\iffalse{\fi%
    \def\\{\global\hline@@false%
     \ifhline@\global\hline@false\global\hline@@true\fi\cr}%
    \iffalse}\fi}
\def\matrix{\null\,\vcenter\bgroup
 \tab@false\vtab@false\vst@bfalse\Strich@false\Let@@\vspace@
 \normalbaselines\openup\spreadmlines@\ialign
 \bgroup\hfil$\m@th##$\hfil&&\quad\hfil$\m@th##$\hfil\crcr
 \Mathstrut@\crcr\noalign{\kern-\baselineskip}}
\def\endmatrix{\crcr\Mathstrut@\crcr\noalign{\kern-\baselineskip}\egroup
 \egroup\,}
\def\smatrix{\D@cke0.5pt\null\,
 \vcenter\bgroup\tab@false\vtab@false\vst@bfalse\Strich@true\Let@@\vspace@
 \normalbaselines\offinterlineskip
  \openup\spreadmlines@\ialign
 \bgroup\hfil$\m@th##$\hfil&&\quad\hfil$\m@th##$\hfil\crcr}
\def\endsmatrix{\crcr\egroup
 \egroup\,\Strich@false}
\newdimen\D@cke
\def\Dicke#1{\global\D@cke#1}
\newtoks\tabs@\tabs@{&}
\newif\ifStrich@\Strich@false
\newif\iff@rst

\def\Stricherr@{\iftab@\ifvtab@\errmessage{\noexpand\s not allowed
     here. Use \noexpand\vstab!}%
  \else\errmessage{\noexpand\s not allowed here. Use \noexpand\stab!}%
  \fi\else\errmessage{\noexpand\s not allowed
     here. Use \noexpand\smatrix!}\fi}
\def\format{\ifvst@b\else\crcr\fi\egroup\iffalse{\fi\ifnum`}=0 \fi\format@}
\def\format@#1\\{\def\preamble@{#1}%
 \def\Str@chfehlt##1{\ifx##1\s\Stricherr@\fi\ifx##1\\\let\Next\relax%
   \else\let\Next\Str@chfehlt\fi\Next}%
 \def\c{\hfil\noexpand\ifhline@@\hbox{\vrule height\htStrut@%
   depth\dpstrut@ width\z@}\noexpand\fi%
   \ifStrich@\hbox{\vrule height\htstrut@ depth\dpstrut@ width\z@}%
   \fi\iftab@\else$\m@th\fi\the\hashtoks@\iftab@\else$\fi\hfil}%
 \def\r{\hfil\noexpand\ifhline@@\hbox{\vrule height\htStrut@%
   depth\dpstrut@ width\z@}\noexpand\fi%
   \ifStrich@\hbox{\vrule height\htstrut@ depth\dpstrut@ width\z@}%
   \fi\iftab@\else$\m@th\fi\the\hashtoks@\iftab@\else$\fi}%
 \def\l{\noexpand\ifhline@@\hbox{\vrule height\htStrut@%
   depth\dpstrut@ width\z@}\noexpand\fi%
   \ifStrich@\hbox{\vrule height\htstrut@ depth\dpstrut@ width\z@}%
   \fi\iftab@\else$\m@th\fi\the\hashtoks@\iftab@\else$\fi\hfil}%
 \def\s{\ifStrich@\ \the\tabs@\vrule width\D@cke\the\hashtoks@%
          \fi\the\tabs@\ }%
 \def\sa{\ifStrich@\vrule width\D@cke\the\hashtoks@%
            \the\tabs@\ %
            \fi}%
 \def\se{\ifStrich@\ \the\tabs@\vrule width\D@cke\the\hashtoks@\fi}%
 \def\cd{\hfil\noexpand\ifhline@@\hbox{\vrule height\htStrut@%
   depth\dpstrut@ width\z@}\noexpand\fi%
   \ifStrich@\hbox{\vrule height\htstrut@ depth\dpstrut@ width\z@}%
   \fi$\dsize\m@th\the\hashtoks@$\hfil}%
 \def\rd{\hfil\noexpand\ifhline@@\hbox{\vrule height\htStrut@%
   depth\dpstrut@ width\z@}\noexpand\fi%
   \ifStrich@\hbox{\vrule height\htstrut@ depth\dpstrut@ width\z@}%
   \fi$\dsize\m@th\the\hashtoks@$}%
 \def\ld{\noexpand\ifhline@@\hbox{\vrule height\htStrut@%
   depth\dpstrut@ width\z@}\noexpand\fi%
   \ifStrich@\hbox{\vrule height\htstrut@ depth\dpstrut@ width\z@}%
   \fi$\dsize\m@th\the\hashtoks@$\hfil}%
 \ifStrich@\else\Str@chfehlt#1\\\fi%
 \setbox\z@\hbox{\xdef\Preamble@{\preamble@}}\ifnum`{=0 \fi\iffalse}\fi
 \ialign\bgroup\span\Preamble@\crcr}
\newif\ifhline@\hline@false
\newif\ifhline@@\hline@@false
\def\hlinefor#1{\multispan@{\strip@#1 }\leaders\hrule height\D@cke\hfill%
    \global\hline@true\ignorespaces}
\def\htextfor#1 #2{\multispan@{\strip@#1 }\hfil\hbox{#2}\hfill%
    \global\hline@true\ignorespaces}
\catcode`\@=13

%$$\smatrix \format \sa \c\s\r\s\l\se\\
%\omit&x&&2&&1&\\
%\hlinefor{5}\\
%&yy&&44444444444&\omit&33333333333&\\
%\omit&\omit&\hlinefor{5}\\
%\omit&1&& &&\endsmatrix$$
% 
%{\stab \Dicke7pt\format \sa \c\s\r\s\l\se\\
%\omit&x&&2&&1&\\
%\hlinefor{5}\\
%&yy&&44444444444&\omit&33333333333&\\
%\omit&\omit&\hlinefor{5}\\
%\omit&1&& &&\endstab}

\def\br#1#2{\thickfracwithdelims[]\thickness0{#1}{#2}}






\TagsAsMath
\documentstyle{amsppt}


\headline=               
{\ifnum\pageno=1\hfil
 \else\vbox{\line {\tenit 
Knuth's first analysis of an algorithm
 \hfil\tenrm\number\pageno}
\vskip 2pt\hrule}\fi}
\footline={\hfil}
\catcode`\@=11
\redefine\logo@{}
\firstpage@false
\catcode`\@=13

\define\cc{\overline{c}}
\define\C{\overline{C}}
\define\r{\overline{\rho}}
\define\eps{\varepsilon}

\NoBlackBoxes
\TagsOnRight
\topmatter
\noindent
{\tt [Typeset by Helmut Prodinger, November 1997]}
\vskip 3cm

\title 
Notes on ``open'' addressing\\
{\fiverm [My first analysis of an algorithm, originally done during summer 1962 in Madison.]}
\endtitle
\author Don Knuth 7/22/63 \endauthor
\endtopmatter
\document



\subhead{1. Introduction and Definitions}
\endsubhead
Open addressing is a widely--used technique
for keeping ``symbol tables.'' The method was first used in 1954 by Samuel, Amdahl,
and Boehme in an assembly program for the IBM 701. An extensive discussion of
the method was given by Peterson in  1957 \cite{1}, and frequent references have been
made to it ever since (e.g. Schay and Spruth \cite{2}, Iverson \cite{3}). However, the
timing characteristics have apparently never been exactly established, and indeed
the author has heard reports of several reputable mathematicians who failed to
find the solutions after some trial. Therefore it is the purpose of this note to
indicate one way by which the solution can be obtained.


We will use the following abstract model to describe the method: $N $ is 
a positive integer, and we have an array of $N$  variables $x_1,x_2,\dots,x_N$.
At the beginning, $x_i=0$, for $1\le i\le N$. 

To ``enter the $k$--th item in the table,'' we mean that an integer $a_k$  is
calculated, $1\le a_k\le N$, depending only on the item, and the following process is
carried out: 

\roster
\item Set $j=a_k$.
\item ``The comparison step.'' If $x_j=0$, set $x_j=1$ and stop; we say ``the
$k$--th item has fallen into position $x_j$.''
\item If $j=N$, go to step 5.
\item Increase $j$        by 1 and return to step 2.
\item ``The overflow step.'' If this step is entered twice, the table is full, 
i.e. $x_i=1$ for $1\le i \le N$. Otherwise set $j$  to 1 and return to step 2.
\endroster

Observe the cyclic character of this algorithm.

We are concerned with the statistics of this method, with respect to the number
of times the comparison step must be executed. More precisely, we consider all of
the $N^k$ possible sequences $a_1,a_2,\dots,a_k$ to be equally probable, and we ask,
{\bf ``What is the probability that the comparison step is used precisely
$\bold m$ times when the $\bold k$--th item is placed?''  }

\subhead{2. Non--overflow (self--contained) sequences}
\endsubhead
Let $\br nk$ denote the number of sequences $a_1,a_2,\dots,a_k$ ($1\le a_i\le n$)
in which no overflow step occurs during the entire process of placing $k$ items, if
the algorithm is used for $N=n$. (By convention, we set $\br n0=1$.)

\proclaim{Lemma 1} If $0\le k\le n+1$, then
$\br nk=(n+1)^k-k(n+1)^{k-1}$.  
\endproclaim  
\demo{Proof} This proof is based on the fact that $\br nk$ is precisely the number of
sequences $b_1,b_2,\dots,b_k$ ($1\le b_i \le n+1$) in which, if the algorithm is carried out
for $S=n+1$, then $x_{n+1}$ at the end of the operation. This follows because every
sequence of the former type is one of the latter, and conversely the condition implies
in particular that $1\le b_i \le n$, and that no overflow step occurs.

But sequences of the latter type are easily enumerated, because the algorithm has
circular symmetry; of the $(n+1)^k$ possible sequences $b_1,b_2,\dots,b_k$,
exactly $k/(n+1)$  of these leave $x_{n+1}\neq 0$. This shows that
$$
\br nk=(n+1)^k\left(1-\frac{k}{n+1}\right)\;.
$$ 
\enddemo

\subhead{3. Sequential pile--up}\endsubhead
 How many sequences $a_1,\dots,a_{k-1}$ 
($1\le a_i\le N$) leave
$$
X_{N-t-1}=0, \ X_{N-t}=\dots=X_{N-1}=1,\  X_N=0\ ?
$$
Let this number be denoted by $Q(N,k,t)$.
 
\proclaim{Lemma 2} If $0<k\le N$, $0\le t<N-1$, then
$Q(N,k,t)=\binom{k-1}{t}\br{N-t-2}{k-t-1} $.   
\endproclaim  
\demo{Proof}
In order to construct such a sequence, we have a subsequence of $t$ items
which fall into the range $x_{N-t}$ through $x_{N-1}$; there are
$\br tt$ such sequences. The remaining terms form a subsequence of $k-t-1$ 
items which all land in the range $x_1$ through $x_{N-t-2}$; there are
$\br{N-t-2}{k-t-1}$ such sequences. Finally, there are $\binom{k-1}{t}$ ways
to put these two subsequences together. This completes the proof.
\enddemo

Notice that the stated formula for $Q(N,k,t)$ is valid also for the excluded
case $t=N-1$, if we adopt the convention that
$$
\br{-1}0=1.
$$

\subhead{4. The probability $P(N,k,m)$ }\endsubhead
Let $P(N,k,m)$ be the probability that $m$ comparison steps are required to
place the $k$--th item, i.e. step 2 of the algorithm is entered $m$  times.

\proclaim{Lemma 3} The number of sequences $a_1,\dots,a_k$ {\rm (}$1\le a_i\le N${\rm ) }
in which the $k$--th item falls in position  $x_N$  after precisely $m$ 
comparisons, is
$$
\sum_{i=m}^NQ(N,k,i-1)=\br{N-1}{k-1}-\sum_{i=1}^{m-1}Q(N,k,i-1), 
\quad\text{{\rm for $1\le k\le N$.}}
$$ 
\endproclaim  
\demo{Proof} We must have $a_k=N-m+1$; and after the first $k-1$  steps, we must 
have $x_i=1$ for $N-m+1\le i\le N$, and also $x_N=0$. Therefore by Lemma 2, the
stated formula is obvious, in lieu of the fact that
$$
\sum_{i=1}^NQ(N,k,i-1)=\br{N-1}{k-1}
$$ 
(the number of sequences which leave $x_N=0$).   
\enddemo


\proclaim{Lemma 4} 
$$
P(N,k,m)=\frac{1}{N^{k-1}}\sum_{i=m}^NQ(N,k,i-1)=1-\frac{k-1}{N}
-\frac{1}{N^{k-1}}\sum_{i=1}^{m-1}Q(N,k,i-1).
$$
\endproclaim  
\demo{Proof}The position $x_N$ in Lemma 3 can be changed to $x_i$
 for  any other $i$ 
without affecting the result, by symmetry. There are $N^k$ possible sequences
$a_1,\dots,a_k$ ($1\le a_i\le N$), each assumed to be equally probable; hence
$P(N,k,m)$  is the appropriate fraction of these sequences, and the result is
immediate from Lemma 3.

Now we let $R(N,k,t)=Q(N,k,t-1)/N^{k-3}(N-k)$. By Lemmas 1 and 2, we find that for
$1\le t<N$, $1\le k<N$,
$$
R(N,k,t)=\binom{k-1}{t-1}(N-t)^{k-t-1}(N-k)t^{t-2}\Big/ \Big(N^{k-3}(N-k)\Big)
$$
so that
$$
R(N,k,t)=\binom{k-1}{t-1}\left(\frac{t}{N}\right)^{t-2}\left(1-\frac{t}{N}\right)^
{n-t-1}\,.
$$
\enddemo

\proclaim{Theorem 1}  If $1\le k,m\le N$, then 
$$
P(N,k,m)=1-\frac{k-1}{N}-\frac{N-k}{N^2}\sum_{t=1}^{m-1}R(n,k,t).
$$
\endproclaim  
\demo{Proof} This formula is immediate from Lemma 4, except in the case $k=N$.
But since $P(N,N,m)$  is clearly $1/N$, the above formula holds in all cases.
\enddemo 

\subhead{5. Calculation of the mean}\endsubhead
We are interested in the expected number of comparison steps to place the $k$--th item.
This is
$$
M(N,k)=\sum_{m=1}^NmP(N,k,m).
$$ 
To evaluate $M(N,k)$ in ``closed'' form, we first observe that $\sum_{m=1}^NP(N,k,m)=1$ 
which implies (by Theorem 1)
$$
1=N-k+1-\frac{N-k}{N^2}\sum_{t=1}^N(N-t)R(N,k,t);
$$
hence, if $k<N$, $\sum_{t=1}^N(1-\frac{t}{N})R(N,k,t)=N$. This implies the identity
$$
\sum_{t=1}^k\binom{k-1}{t-1}\left(\frac{t}{N}\right)^{t-2}
\left(1-\frac{t}{n}\right)^{k-t}=N,\tag*
$$
which is rather difficult to derive directly. Actually the latter formula
can be derived independently using Abel's generalized binomial theorem:
$$
(x+z)^n=\sum_{r=0}^n\binom{n}{r}x(x-ry)^{r-1}(z+ry)^{n-r}
$$
which is valid for $x\neq0$ and arbitrary $y,z$. Put $x=1/N$, $Y=-1/N$, 
$z=1-x$   and $n=k-1$, to obtain
$$
1=\sum_{r=0}^{k-1}\binom{k-1}{r}\frac{1}{N}\left(\frac{r+1}{N}\right)^{r+1}
\left(1-\frac{r+1}{N}\right)^{k-r-1}
$$ 
and the required formula ($*$) follows with $t=r+1$. It is therefore valid for
all $k,N>0$ and in particular for $k=N$.

Now let $N$    be fixed, and let
$$
b_k=\sum_{t=1}^N\left(1-\frac{t}{n}\right)^2R(N,k,t).
$$
We have the identity 
$$
\frac{k-t}{k-1}R(N,k,t)=
 \left(1-\frac{t}{N}\right)R(N,k-1,t),
$$
which can be rewritten in the form
$$
\left(1-\frac{t}{N}\right)R(N,k,t)=
\left(1-\frac{k}{N}\right)R(N,k,t)
+\frac{k-1}{N}\left(1-\frac{t}{N}\right)R(N,k-1,t).
$$
Hence, by ($*$), 
$$
b_k=N-k+\frac{k-1}{N}b_{k-1}
$$
and using the value $b_1=N-1$, we find
$$
b_{k+1}=N-\frac{k!}{N^k}
\left(1+N+\frac{N^2}{2!}+\dots+\frac{N^k}{k!}\right), 
\quad\text{for $k\ge0$.}
$$
Let us write, for convenience,
$$
E_r(N,k)=1+\binom{r+1}{r}\frac{k-1}{N}+
\binom{r+2}{r}\frac{(k-1)(k-2)}{N^2}+\dots+
\binom{r+k-1}{r}\frac{(k-1)!}{N^{k-1}}.
$$
Then
$$
b_k=N-1-\frac{k-1}{N}E_0(N,k-1).
$$
We have 
$$\align
M&(N,k)=\sum_{m=1}^NmP(N,k,m)\\
&=\frac{N(N+1)}{2}\left(1-\frac{k-1}{N}\right)
-\frac{N-k}{N^2}\sum_{t=1}^N
\left(\frac{N(N+1)}{2}-\frac{t(t+1)}{2}\right)R(N,k,t)\\
&=\frac{1}{2}\Bigg\{
N(N+1)\left(1-\frac{k-1}{N}\right)\\&\qquad-
\left(1-\frac{k}{N}\right)\sum_{t=1}^N\left[
(2N+1)\left(1-\frac{t}{N}\right)-N\left(1-\frac{t}{N}\right)^2
\right]R(N,k,t)\Bigg\}\;.
\endalign
$$


Putting everything together, we therefore obtain
\proclaim{Theorem 2} 
$$
\align
M(N,k)&=\frac{1}{2}\left(1+k-(k-1)\left(1-\frac{k}{N}\right)E_0(N,k-1)\right)\\
&=\frac{1}{2}\Big(1+kE_0(N,k)-(k-1)E_0(N,k-1)\Big)\\
&=\frac{1}{2}\Big(1+E_1(N,k)\Big)
\endalign
$$
\endproclaim

As an example, let $N=5$. We find the following values for $P(N,k,m)$:


$$
\smatrix \format \sa \c\s\c\s\c\s\c\s\c\s\c\s\c\s\c\se\\
\hlinefor{15}\\
&&&&\omit&&\omit&m&\omit&&\omit&&&&
\\
&P(5,k,m)&&1&\omit&2&\omit&3&\omit&4&\omit&5&&M(5,k)&
\\
\hlinefor{15}\\
&1&&1&\omit&0&\omit&0&\omit&0&\omit&0&&1&
\\
\hlinefor{15}\\
&2&&\frac{4}{5}&\omit&\frac{1}{5}&\omit&0&\omit&0&\omit&0&&\frac{6}{5}&
\\
\hlinefor{15}\\
&3&&\frac{3}{5}&\omit&\frac{7}{25}&\omit&\frac{3}{25}&\omit&0&\omit&0&&\frac{38}{25}&
\\
\hlinefor{15}\\
&4&&\frac{2}{5}&\omit&\frac{34}{125}&\omit&\frac{1}{5}&\omit&\frac{16}{125}&\omit&0&&\frac{257}{125}&
\\
\hlinefor{15}\\
&5&&\frac{1}{5}&\omit&\frac{1}{5}&\omit&\frac{1}{5}&\omit&\frac{1}{5}&\omit&\frac{1}{5}&&3&
\\
\hlinefor{15}
\endsmatrix$$
  
\subhead{6. The standard deviation }\endsubhead
Let $\sigma(N,k)$  be the standard deviation of the distribution $P(N,k,m)$. Using methods similar
to those of the preceding section, and somewhat more pain, the standard deviation can be
obtained. We will merely quote the results here.


\proclaim{Theorem 3} 
$$
\sum_{t=1}^N\left(1-\frac{t}{N}\right)^3R(N,k,t)=N+(k-1)!\sum_{i=0}^k
\left(\binom{i+3}{2}-2k-3\right)\frac{1}{(k-i)!N^i}
$$
$$
\sum_{m=1}^Nm^2P(N,k,m)=\frac{1}{6}+E_3(N,k)-\frac{2}{3}E_2(N,k)+\frac{1}{2}E_1(N,k)
$$
\endproclaim
The reader may at least verify this theorem in the case $N=5$. The 
standard deviation may now be obtained from the latter formula. 

\subhead{7. Searching the table}\endsubhead
Once the table has been constructed, let $k$ be the number of items in it.
A similar algorithm can be used to search the table, and the number of comparisons
needed to find a given item is the same as the number of comparisons which were 
needed to 
enter it in the first place.

If all items in the table are referred to with equal probability, the average number of
comparisons needed when searching the table is
$$
T(N,k)=\frac{1}{k}\sum_{t=1}^kM(N,t)=\frac{1}{2}
\Big(1+E_0(N,k)\Big),
$$
by Theorem 2. 

\subhead{8. Approximations}\endsubhead
Here we give simple upper bounds for $M(N,k)$ and $T(N,k)$. Since all quantities
depend on $E_r(N,K)$  for various $r$, the natural approximation is derived by
writing
$$ 
E_r(N,k)\approx 1+\binom{r+1}{r}\frac{k-1}{N}+
\binom{r+2}{r}\left(\frac{k-1}{N}\right)^2+\dots=\left(\frac{1}{1-\rho}\right)^{r+1}.
$$
Here $\rho=(k-1)/N$ is the approximate density of the items in the table. We then
have the approximation
$$\align
M(N,k)&\approx \frac{1}{2}\left(1+\frac{1}{(1-\rho)^2}\right)\\
T(N,k)&\approx \frac{1}{2}\left(1+\frac{1}{1-\rho}\right)\\
\sigma^2(N,k)&\approx \frac{3}{4}\frac{1}{(1-\rho)^4}-\frac{2}{3}\frac{1}{(1-\rho)^3}
-\frac{1}{12}
\endalign
$$

The estimates for $M(N,k)$ and $T(N,k)$ are upper bounds; we see that the estimates
are dependent upon $N$ and $k$ only with respect to their ratio. The estimates
are best for small values of $\rho$. As  $\rho$ approaches 1, these estimates become
less and less accurate. The worst case is when $k=N$, i.e. $\rho=1-1/N$.
Then the approximations give
$$
(1+N^2)/2, \qquad (9N^4-3N^3-1)/12
$$  
respectively for $M(N,N)$, $\sigma^2(N,N)$; the true values are
$$
(1+N)/2, \qquad (N^2-1)/12
$$  
 respectively. As $N$ becomes large, however, the approximation is good
for fixed $\rho$; in fact, it is not hard to show that
$$
E_r(N,\rho N)\uparrow \frac{1}{(1-\rho)^{r+1}}\quad\text{as }N\to\infty.
$$ 

\subhead{9. Relations to the paper of Schay and Spruth}\endsubhead
In the article by 
Schay and Spruth \cite2, a method is described which the authors claim is superior
to open addressing: the elements are shifted as they are stored, so that the
elements whose starting address is $x_j$ are stored before (in the cyclic sense)
those whose starting address is $x_{j+1}$. However, the method described there,
although more complicated than the open addressing method, actually 
{\bf has exactly the same performance characteristics}. For it was shown by
Peterson \cite1 that $T(N,k)$ is independent of the order in which the elements
enter the table; and there is an order for which the table takes the same form as
stipulated by Schay and Spruth.

Since the more complex method of \cite2 is therefore exactly the same as the
one we have described, with respect to its performance, it is interesting to compare
the timing estimates derived in that paper with the ones derived here.
The formula
$$
T(N,k)\approx 1+\frac{\rho}{2(1-\rho)}
$$
was derived in \cite2, and this coincides with
$$
\frac{1}{2}\left(1+\frac{1}{1-\rho}\right),
$$
the approximation we derived above, except the value $\rho=k/N$, not
$(k-1)/N$, was used in \cite2. In that paper, the approximation occurred when the
binomial probability was replaced by a Poisson distribution. The derivation is completely
different from the one described here, and the authors were not concerned with the
quantities $M(N,k)$ or $P(N,k,m)$.


\subhead{10. Selected values of \ $M(N,k)$,\  $\sigma(N,k)$,\  $T(N,k)$}\endsubhead  
Precise calculations of $E_r(N,k)$ may be rapidly performed by computer, and hence the
exact values of the mean, standard deviation, and average search time. The tables on the 
following page, for example, were computed by the B--5000
in approximately five seconds, which is the time needed to print that amount of
information. The table indicates to what extent the quantities depend on the ratio
$k/N$; all values are exact, except of course for rounding.

\Refs

\ref \no1\by W. W. Peterson\paper Addressing for Random--Access Storage
\jour IBM J. \vol 1\yr 1957\pages130--146
\endref


\ref \no2\by G. Schay, Jr., and W. G. Spruth
\paper Analysis of a File Addressing Method
\jour Comm. ACM \vol 5  \yr 1962\pages459--461
\endref


\ref \no3\by K. E. Iverson
\book A Programming Language\publ Wiley
\yr 1962\pages153--154
\endref




\endRefs












\newpage

{\bf Table 1. $M(N,k)$}


$$
\smatrix \format \sa \r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\se\\
\omit&N&\omit&0.1&\omit&0.2&\omit&0.3&\omit&0.4&\omit&0.5&\omit&0.6&\omit
&0.7&\omit&0.8&\omit&0.9&\omit&1.0&\omit\\
\hlinefor{23}\\
&10&&1.000&&1.100&&1.230&&1.4?2&&1.634&&1.954&&2.404&&3.055&&4.022&&5.500&\\
\hlinefor{23}\\
&100&&1.102&&1.256&&1.476&&1.8?5&&  2.327&&3.218&&4.899&&8.532&&18.029&&50.500&\\
\hlinefor{23}\\
&1000&&1.116&&1.279&&1.516&&1.8?0&&2.480&&3.576&&5.897&&12.213&&40.357&&500.50&\\
\hlinefor{23}\\
&10000&&1.117&&1.281&&1.520&&1.8?8&&2.498&&3.620&&6.039&&12.914&&49.116&&5000.5&\\
\hlinefor{23}\\
&100000&&1.117&&1.281&&1.520&&1.8?9&&2.500&&3.624&&6.054&&12.991&&50.356&&5000.1&\\
\hlinefor{23}\\
&1000000&&1.117&&1.281&&1.520&&1.8?9&&2.500&&3.625&&6.055&&13.000&&50.486&&500001&\\
\hlinefor{23}
\endsmatrix$$

{\bf Table 2. $\sigma(N,k)$}


$$
\smatrix \format \sa \r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\se\\
\omit&N&\omit&0.1&\omit&0.2&\omit&0.3&\omit&0.4&\omit&0.5&\omit&0.6&\omit
&0.7&\omit&0.8&\omit&0.9&\omit&1.0&\omit\\
\hlinefor{23}\\
&10&&0.000&&0.300&&0.487&&0.6?0&&0.930&&1.220&&1.573&&1.994&&2.463&&2.873&\\
\hlinefor{23}\\
&100&&0.344&&0.610&&0.945&&1.420&&  2.146&&3.338&&5.444&&9.455&&17.458&&28.866&\\
\hlinefor{23}\\
&1000&&0.377&&0.661&&1.036&&1.595&&2.514&&4.200&&7.787&&17.390&&55.931&&288.68&\\
\hlinefor{23}\\
&10000&&0.381&&0.667&&1.046&&1.615&&2.561&&4.321&&8.187&&19.369&&78.421&&2886.8&\\
\hlinefor{23}\\
&100000&&0.381&&0.667&&1.047&&1.618&&2.565&&4.334&&8.230&&19.603&&82.209&&28868.&\\
\hlinefor{23}\\
&1000000&&0.381&&0.668&&1.047&&1.618&&2.566&&4.335&&8.235&&19.627&&82.618&&288657&\\
\hlinefor{23}
\endsmatrix$$

{\bf Table 3. $T(N,k)$}


$$
\smatrix \format \sa \r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\s\r\se\\
\omit&N&\omit&0.1&\omit&0.2&\omit&0.3&\omit&0.4&\omit&0.5&\omit&0.6&\omit
&0.7&\omit&0.8&\omit&0.9&\omit&1.0&\omit\\
\hlinefor{23}\\
&10&&1.000&&1.050&&1.110&&1.1?3&&1.273&&1.387&&1.532&&1.722&&1.978&&2.330&\\
\hlinefor{23}\\
&100&&1.049&&1.116&&1.200&&1.312&&1.463&&1.682&&2.020&&2.598&&3.747&& 6.605&\\
\hlinefor{23}\\
&1000&&1.055&&1.124&&1.213&&1.331&&1.496&&1.742&&2.149&& 2.941&& 5.101&&20.151&\\
\hlinefor{23}\\
&10000&&1.055&&1.125&&1.214&&1.333&&1.500&&1.749&&2.165&& 2.994&& 5.451&&63.000&\\
\hlinefor{23}\\
&100000&&1.056&&1.125&&1.214&&1.333&&1.500&&1.750&&2.166&& 2.999&& 5.495&&198.50&\\
\hlinefor{23}\\
&1000000&&1.056&&1.125&&1.214&&1.333&&1.500&&1.750&&2.167&& 3.000&& 5.500&&626.99&\\
\hlinefor{23}
\endsmatrix$$


Observe that the entries in the lower row of each table agree well with the approximations
we have obtained. Observe also that the standard deviation tends to be large compared
with the mean, especially for $k\ge N/2$. Such instability is familiar to anyone who
has watched the open addressing method in operation, as the table begins to get full.

This table gives strong reason to believe that $T(N,N)=\sqrt{\frac{\pi N}{8}}+O(1)$,
but I have not been able to prove this. {\tt Proved 5/20/65}  







\enddocument


