\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage{setspace}
\usepackage{parskip}
\usepackage{titlesec}
\usepackage[section]{placeins}
\usepackage{xcolor}
\usepackage{breakcites}
\usepackage{lineno}
\usepackage{hyphenat}
\PassOptionsToPackage{hyphens}{url}
\usepackage[colorlinks = true,
linkcolor = blue,
urlcolor = blue,
citecolor = blue,
anchorcolor = blue]{hyperref}
\usepackage{etoolbox}
\makeatletter
\patchcmd\@combinedblfloats{\box\@outputbox}{\unvbox\@outputbox}{}{%
\errmessage{\noexpand\@combinedblfloats could not be patched}%
}%
\makeatother
\usepackage{natbib}
\renewenvironment{abstract}
{{\bfseries\noindent{\abstractname}\par\nobreak}\footnotesize}
{\bigskip}
\titlespacing{\section}{0pt}{*3}{*1}
\titlespacing{\subsection}{0pt}{*2}{*0.5}
\titlespacing{\subsubsection}{0pt}{*1.5}{0pt}
\usepackage{authblk}
\usepackage{graphicx}
\usepackage[space]{grffile}
\usepackage{latexsym}
\usepackage{textcomp}
\usepackage{longtable}
\usepackage{tabulary}
\usepackage{booktabs,array,multirow}
\usepackage{amsfonts,amsmath,amssymb}
\providecommand\citet{\cite}
\providecommand\citep{\cite}
\providecommand\citealt{\cite}
% You can conditionalize code for latexml or normal latex using this.
\newif\iflatexml\latexmlfalse
\providecommand{\tightlist}{\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}%
\AtBeginDocument{\DeclareGraphicsExtensions{.pdf,.PDF,.eps,.EPS,.png,.PNG,.tif,.TIF,.jpg,.JPG,.jpeg,.JPEG}}
\usepackage[utf8]{inputenc}
\usepackage[greek,english]{babel}
\begin{document}
\title{Algorithms \& Data Structures}
\author[1]{James S. Wang}%
\affil[1]{Johns Hopkins University}%
\vspace{-1em}
\date{\today}
\begingroup
\let\center\flushleft
\let\endcenter\endflushleft
\maketitle
\endgroup
\sloppy
Comments: //
Blocks: +++++
Separator: \textasciitilde{}
Heading Character: \#
Number of Headings: 1
Heading Names: Topic
Number of Categories: 2
Category Prefixes and Names: (!, Important), (\^{}, Algorithm)
Number of Extensions: 6
Extensions: (\&\&, \&\&, Additional Information), (Simple Definition,
\textless{}e\textgreater{}, \textless{}z\textgreater{}),
(\textbackslash{}{[}, \textbackslash{}{]}, LaTeX), (!{[}{]}(, ), Image),
(!{[}, {]}, Image Explanation), (```java, ```, Code)
\section*{Data Structures}\label{data-structures}
\textbf{Data Structures} \textasciitilde{} These are organized
mechanisms for efficiently storing, accessing, and manipulating data.
They are reusable building blocks for solving complex problems. They
should be clearly designed, generalized, implemented with reusability,
extensibility, and scalability.
\textbf{Type} \textasciitilde{} This is a collection of values. Simple
versions of these include int and bool. Aggregate and more complex
versions of these are known as \textbf{classes}. \%\%A boolean is either
true or false\%\%. \textbf{Data Type} \textasciitilde{} This is a type
with operations on it. \textbf{Abstract Data Types} \textasciitilde{}
These are data types that are independent of, and must not have,
implementation. They are similar to Java interfaces, where all methods
are abstract.
\textsuperscript{\^{}} \textbf{Algebraic Specification}
\textasciitilde{} This is an outline of an abstract data type. It
includes what other abstract data types it uses, what it defines, its
operations, its axioms, its preconditions, and more.
\&\&\textbf{Operations} specify transformations of the data\&\&
\&\&\textbf{Axioms} specify rules that must always be satisfied by the
implementations of our operations\&\& \&\&\textbf{Partial
Operations}:When an operation is limited by pre-conditions, then we say
it is a partial operation and use the -/-\textgreater{} notation.\&\&
\%\%
\begin{verbatim}
adt Variable
uses Any
defines Variable
operations:
new: T ---> Variable
put: Variable x T ---> Variable
get: Variable --> T
axioms:
get(new(t)) = t
get(put(v, t)) = t
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Array} \textasciitilde{} This is a data
structure that stores a list of data. The size of the list cannot
change, and elements can be modified. \%\%
\begin{verbatim}
adt Array
uses Any, Integer
defines Array
operations:
new: Integer x T -/-> Array
get: Array x Integer -/-> T
put: Array x Integer x T -/-> Array
length: Array ---> Integer
preconditions:
new(l, t): 0 < l
get(a, i): 0 <= i < length(a)
put(a, i, t): 0 <= i < length(a)
axioms:
get(new(n, t), i) = t
get(put(a, i, t), j) = (if i = j then t else get(a, j))
length(new(n, t)) = n
length(put(a, i, t)) = length(a)
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Linked List} \textasciitilde{} This is a
data structure consisting of Nodes which store a value and a pointer to
a next Node.
\textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Stack} \textasciitilde{} This is a data
structure that adds to and deletes from the same end. It embodies the
Last In, First Out (LIFO) concept, and consists of four methods: 1. push
2. pop 3. top/peek 4. empty. \%\%
\begin{verbatim}
adt Stack
uses Any, Boolean
defines Stack
operations:
new: ---> Stack
push: Stack x T ---> Stack
pop: Stack -/-> Stack
top: Stack -/-> T
empty: Stack ---> Boolean
preconditions:
pop(s): not empty(s)
top(s): not empty(s)
axioms:
empty(new()
not empty(push(s, t))
top(push(s, t)) = t
pop(push(s, t)) =
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textbf{Amortized Analsis} \textasciitilde{} This is the calculation of
the \_\_\_ cost per operation for a sequence of n operations, which is
equal to the total cost of the operations divided by n.
\textsuperscript{\^{}} \textbf{Queue} \textasciitilde{} This is a data
structure that adds to one end and deletes from the other. \%\%
\begin{verbatim}
adt Queue
uses
defines
operations:
new:
enqueue:
dequeue:
front:
empty:
preconditions:
axioms:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Deque} \textasciitilde{} This is a data
structure that adds to or deletes from either end. \&\&It stands for
double-ended queue. It is both a stack and a queue.\&\& \%
\begin{verbatim}
adt Deque
uses Any
defines Deque
operations:
new:
length:
empty:
front:
back:
insertFront:
removeFront:
insertBack:
removeBack:
preconditions:
axioms:
\end{verbatim}
\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{(Positional) List} \textasciitilde{} This
is a collection of positions with relative ordering. The relative
position is what matters, not the value itself. \%\%
\begin{verbatim}
adt Position
uses Any
defines Position
operations:
get:
put:
preconditions:
axioms:
\end{verbatim}
\%\%
\%\%
\begin{verbatim}
adt List
uses Position, Boolean
defines List
operations:
new:
length:
empty:
insertFront:
insertBack:
insertBefore:
insertAfter:
remove:
removeFront:
removeBack:
front:
back:
previous:
next:
first:
last:
iterator:
preconditions:
axioms:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Set} \textasciitilde{} This is a
collection of objects with position relative to values. The values of
the data being stored determine where and how it gets stored. \&\&A
version of the Set called OrderedSet exists and it has the same
operations as Set, but the elements are expected to be iterated over in
order based on their value.\&\&
\begin{verbatim}
adt Set
uses: Boolean, Integer, Any
defines Set
operations:
new: ---> Set
has: Set x T ---> Boolean
insert: Set x T ---> Set
remove: Set x T ---> Set
size: Set ---> Integer
iterator:
union:
intersect:
subtract:
axioms:
not has(new(), t)
has(insert(s, t), x) = if t=x then true else has(s, x)
has(remove(s, t), x) = if t=x then
\end{verbatim}
\textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{(Rooted) Tree} \textasciitilde{} This is
a position based data structure in which positions have parents and each
position has an unlimited number of children. Connections between
positions (nodes) are called \textbf{edges}. The parent of all nodes is
called the \textbf{root}. A \_\_\_ of a child is called a
\textbf{sub\_\_\_}. Nodes without any children are called
\textbf{leaves}. The \textbf{depth} of the root is 0 and counts how many
``ancestors'' (parents \& parents of parents) a node has. The
\textbf{height} of any leaf is 0 and counts the maximum number of
descendents (children and children of children) a node has. The
\textbf{path} is the number of edges between two nodes. \%\%
\begin{verbatim}
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textbf{Tree Traversals} \textasciitilde{} These are different ways to
iterate through a tree. They include breadth-first (level order) search,
and depth-first search (includes: pre-order, post-order, in-order).
Breadth-first starts with the root, then the children of the root, then
the children of the children. Depth-first pre-order visits the root of a
subtree, then recursively do a pre-order traversal of each child.
Depth-first post-order does a post-order traversal of each child, then
visit root of subtree. Depth-first in-order (for binary trees only)
recursively does in-order traversal of the left child, then visits the
subtree root, and then does in-order traversal of the right child.
\textsuperscript{\^{}} \textbf{Priority Queue} \textasciitilde{} This is
a variation of a standard queue - values go in but only the ``best''
come out, one at a time. These are usually minimum or maximum based.
\%\%
\begin{verbatim}
adt PriorityQueue
uses:
defines: PriorityQueue
operations:
new:
best:
remove:
insert:
empty:
preconditions:
axioms:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Binary Heap} \textasciitilde{} This is a
complete binary tree that is filled on all levels, except the last, and
is filled from left to right. The structure is ordered in that each node
value is greater than or equal to its children's values, so that the
minimum/maximum is always at the root. \%\%
\begin{verbatim}
adt BinaryHeap
uses:
defines: BinaryHeap
operations:
insert:
remove:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Map / Associated Arrays}
\textasciitilde{} This is a collection of key, value pairs called
entries/entities. Keys must be unique and immutable. \&\&Maps that have
duplicated keys are called \textbf{dictionaries} (hashmaps). If you sort
your keys (are comparable), then you have an \textbf{ordered map}
(binary search tree (BST), balanced BST, AVL tree, Red/black, 2-3,
B-tree, Treap, etc.). Likewise, you can have an \textbf{ordered
dictionary}.\&\& \%\%
\begin{verbatim}
adt Map
uses:
defines: Map
operations:
size:
put:
get:
has:
insert:
remove:
(Dictionary only)
findAll:
removeAll:
iterators:
keys:
values:
entries:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Binary Search Tree} \textasciitilde{}
This is a binary tree where each node has 0, 1, or 2 children only. It
has complete ordering - everything in the left of the subtree of the
parent is less than than parent, everything in the right of the subtree
of the parent is greater than the parent, and if duplicates are allowed,
they must be put consistently on the same side. This allows for an
in-order traversal that gives the nodes in ascending order. It's design
is inspired by binary search, and that is what it uses to find a node.
\%\%
\begin{verbatim}
adt BST:
uses:
defines: BST
operations:
find:
insert:
remove:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{AVL Tree / Balanced Binary Search Tree}
\textasciitilde{} This tree not only has an order property, but also a
balance property. For every internal position p of this tree, the
heights of the children of p differ by at most 1. The height of this
binary tree that satisfies the balance property is O(log N) for N nodes
total. This data structure stores the height of each node and adds
post-processing to a binary tree in order to maintain the balance
property.
Insertion: (1) Insert item as in binary search tree. (2) Starting with
new node, go up to update heights. (3) If find a node that has become
unbalanced, do rotation to rebalance. Rebalance reduces height of
subtree by 1, so no need to propegate up - only one adjustment needed at
most because it was previously balanced before the insert. O(log(n)) to
insert and O(1) rotation to rebalance.
Removing: (1) Remove as in binary search tree. (2) Ancestor of actual
deleted leaf node (not value replaced node) may become unbalanced. (3)
Go up from deleted node to find first unbalanced ancestor. (4) Do
rotation : may reduce the height of the sub-tree, causing further
unbalances for ancestors. (5) O(log(N)) to balance and remove.
Rotating: (1) Singles (same direction) and doubles (opposite directions)
(2) How to rotate depends on the balance factor of the (sub)root and the
child on the longer side. (3) Remember that we rebalance from the bottom
up, so both children of an unbalanced node must have -1 \textless{}=
balance factor \textless{}= 1. \%\%
\begin{verbatim}
adt AVLTree:
uses: TreeNode
defines: AVLTree
operations:
insert:
remove:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Treap} \textasciitilde{} This is an
ordered map in which each key-value pair is assigned a random priority.
The binary search order property is based on keys and the priorities are
used to create a heap. This is a \textbf{probabilistic} map
implementation, and there is a good \emph{expected} time complexity
O(log(n)), but not a guaranteed one. O(N) worst case, but unlikely.
Find: O(height) Find as if in BST Insertion: O(height) Insert each node
as if in BST Balance according to Heap priorities \& order should be
maintained automatically Remove: O(height) Remove as if in BST Balance
according to Heap priorities \& order should be maintained automatically
Benefits: Easier rotations than AVLTree (only singles, directions easy
to determine) No need to track heights Good performance in practice \%\%
\begin{verbatim}
adt Treap:
uses:
defines: Treap
operations:
has:
find:
insert:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Hash Map} \textasciitilde{} This is a
data structure used for unordered maps and dictionaries. This has a
worst case O(N) access/item, but the average case is O(1) (constant)
time. It is an implementation of a Map and converts keys to ais an
implementation of a Map and converts keys to array indices through
hashing.
\texttt{hashCode()} is defined in the Object class and by default
returns the integer representation of the memory address. This is not
ideal because if you have two objects that you consider identical, Java
may not consider them identical because they are two separate objects.
The three C's of hashing: Code Compress Collisions
\%\%
\begin{verbatim}
adt HashMap:
uses:
defines: HashMap
operations:
hash:
has:
get:
find:
insert:
put:
remove:
\end{verbatim}
\%\% \textsuperscript{\^{}}
\textsuperscript{\^{}} \textbf{Graph} \textasciitilde{} This is a
mathematical structure for representing relationships. It consists of a
set of \textbf{nodes/vertices} connected by
\textbf{edges/arcs}.\textbf{edges/arcs}. There are two types of these -
directed and undirected. The \textbf{directed} ones have direction
associated with edges (uni/bi-directional), where as \textbf{undirected}
only have bidirectional edges. Each edge has one or two vertices
associated with it, called its endpoints (an edge is said to connect
endpoints). Two vertices are \textbf{adjacent} if they are attached
directly by at least one edge. They are \textbf{connected} if they are
connected by a sequence of edges. The \textbf{neighborhood} of a vertex
is the set of all vertices in G that are adjacent to a vertex v, denoted
N(v). An \textbf{adjacency matrix} is a representation of this as a
2-dimensional arrayas a 2-dimensional array, using boolean 0 and 1's to
depict what vertices are connected. The \textbf{degree} of a vertex is
the number of edges incident with v, and loops are counted as two. The
\textbf{in-degree} of a vertex v is the number of edges that terminate
at v. The \textbf{out-degree} is the number coming from v. A This
structure is \textbf{complete} if it connects all nodes together
directly. The \textbf{path} from one node to another is the sequence of
edges, and the \textbf{length} is the number of edges. This is called
\textbf{connected} if there is a path between every pair of vertices. A
\textbf{cycle} is a path that starts and ends at the same node. A
\textbf{simple} path or cycle does not repeat any nodes or edges.
\&\&A tree is a connected acyclic graph. A rooted tree is a tree with a
special node that is connected to all other nodes. \&\&
\&\&General graph search: Input : Graph G = (V, E) and a starting vertex
s in V Goal : Identify the vertices of V reachable from s in G
\begin{verbatim}
dfs:
mark s as explored, all other vertices as unexplored
S := stack
while S is not empty:
pop top vertex v
if v is unexplored
mark v as explored
for each edge (v, w) in v's adjacency list:
if w is unexplored:
push w to front of S
\end{verbatim}
\&\&
\%\% For a graph G = (V, E) with vertex set V and edge set E. N =
\textbar{}V\textbar{} and M = \textbar{}E\textbar{}. In a simple,
connected, undirected graph, M is bounded by O(N) and O(N\^{}2).
\begin{verbatim}
adt Graph:
uses: :
defines: Graph
operations:
insertVertex(v)
removeVertex(v)
inse
removeVertex(v)
insertEdge(u, v)
removeEdge(e)
numVertices()
numEdges()
vertices()
edges()
areAdjacent(u, v)
isIncident(v, e)
degree(v)
neighbors(v)
\end{verbatim}
\%\% \textsuperscript{\^{}}
\section*{Algorithms}\label{algorithms}
\subsection*{Analysis of Algorithms}\label{analysis-of-algorithms}
\textbf{Algorithms} \textasciitilde{} These are sequences of
steps/actions that produce desired outputs, potentially given specific
inputs. \textbf{Scientific Method} \textasciitilde{} This is our
framework for predicting performance and comparing algorithms. (1)
Observe our algorithm. (2) Hypothesize a model that is consistent with
our observations. (3) Predict events using the hypothesis. (4) Verify
the predictions by making further observations. (5) Validate by
repeating until the hypothesis and observations agree. -- Experiments
must be reproducible and Hypotheses must be falsifiable.
\textbf{Empirical Analysis} \textasciitilde{} This is an analysis of an
algorithm by executing the program to perform experiments. Creating a
model by assuming the time it takes for an algorithm to run, this model
enables us to make predictions. \textbf{Mathematical Analysis}
\textasciitilde{} This is an analysis of an algorithm by counting the
frequency of operations. We often use tilde notations to simplify
analysis, allowing us to explain the behavior through our model.
\textbf{Cost Model} \textasciitilde{} This is a way of finding out the
total running time of an algorithm. It involves using some basic
operation (usually the one with highest cost and frequency) as a proxy
for the running time.
!\textbf{Order-of-growth Classifications} \textasciitilde{} These are
the set of functions that describe the time efficiency of typical
algorithms. They include: 1, log N, N, N log N, N\^{}2, N\^{}3, and
2\^{}N. Generally, only linearithmic and more efficient algorithms are
of practical use. \includegraphics{https://i.imgur.com/NirOO08.png}
!\textbf{Types of Algorithm Analyses} \textasciitilde{} These include
(1) Best case: The lower bound on the cost of the algorithm, determined
by the ``easiest'' input. This is the goal cost for all inputs. (2)
Worst case: The upper bound on the cost, determined by the ``most
difficult'' input. This is a guaranteed cost for all inputs. (3) Average
case: The expected cost for random input. This requires a model for
``random'' input, but provides a way to predict performance.
\textbf{Theory of Algorithms} \textasciitilde{} The goals of this theory
is to establish the ``difficulty'' of problems and develop ``optimal''
algorithms. The approach includes suppression of details in analysis and
elimination of variability in the input model by focusing on the worst
case.
\textbf{Common Notations in Theory of Algorithms} \textasciitilde{}
These include (1) \textbf{Tilde Notation} \textasciitilde{}n: This
involves ignoring lower order terms because as the size of input gets
high, these lower order terms become negligible, providing an
approximate bound on an algorithm. (2) \textbf{Big Theta} \selectlanguage{greek}Θ\selectlanguage{english}(n), which
provides the asymptotic order of growth and is used to classify
algorithms. (3) \textbf{Big O} O(n), which provides the upper bounds of
an algorithm. (4) \textbf{Big Omega} \selectlanguage{greek}Ω\selectlanguage{english}(n), which is used to develop
lower bounds.
\textbf{Shallow Memory Usage} \textasciitilde{} This refers to the total
memory usage for a data type without referenced objects. \textbf{Deep
Memory Usage} \textasciitilde{} This refers to the total memory usage
for a data type including references, adding memory recursively for each
referenced object.
\subsection*{Pure Algorithms}\label{pure-algorithms}
\subsubsection*{Sorting Algorithms}\label{sorting-algorithms}
\textbf{Bubble Sort} \textasciitilde{} \textbf{Insertion Sort}
\textasciitilde{} \textbf{Selection Sort} \textasciitilde{}
\textbf{Selection Sort} \textasciitilde{} \textbf{Heap Sort}
\textasciitilde{} \textbf{Quick Sort} \textasciitilde{} This is a sort
that picks a pivot element from alongthe values apicks a pivot element
from alongthe values and then splits the collection by comparing each
value to pivot (put those \textless{} pivot on left, those
\textgreater{}= pivot on right). Then, recursively sort each group. Best
case: O(n log n) if splits close to half. Worst case: O(n\^{}2) if
splits only one each time. Average case: O(n log n) \textbf{Merge Sort}
\textasciitilde{} \textbf{Bucket Sort} \textasciitilde{} \textbf{Radix
Sort} \textasciitilde{} This is a sort using d-tuples, with M possible
values for each element in the tuple. You start with the back-most
(right-most) dimension and do bucket sort on each dimension, pulling
values out in FIFO order. Runtime: O() \textbf{Stable Sorting}
\textasciitilde{} This is a concept in sorting that maintains the order
of elements with duplicate values. This is important if the sort order
is based on a subset of data members from a larger object. Bucket and
radix sort are stable if you us
\subsection*{Applied Algorithms}\label{applied-algorithms}
\subsubsection*{Union Find}\label{union-find}
+++++ \textbf{The Problem of Dynamic Connectivity} \textasciitilde{} *
Given a set of N objects, a union command means to connect two objects.
Is there a path connecting the two objects? Objects are connected if
they are reflexive, symmetric, and transitive. * Goal: Design a data
structure for union-find: * Number of objects N can be huge * Number of
operations M can be huge * Find queries and union commands may be
intermixed.
+++++
+++++ !\^{}\textbf{Union Find Eager Approach} * Data Structure: * Using
an integer array id{[}{]} of size N, p and q are connected if and only
if they have the same id.
\begin{itemize}
\tightlist
\item
Find:
\begin{itemize}
\tightlist
\item
Check if p and q have the same id to find out if two objects are
connected.
\end{itemize}
\item
Union:
\begin{itemize}
\tightlist
\item
To merge components containing p and q, change all entries whose id
equals id{[}p{]} to id{[}q{]}
\includegraphics{https://i.imgur.com/dQs6143.png}. {[}The elements
at index 0, 1, 2, 5, 6, and 7 all have the same id, and are thus
connected.{]} \textasciitilde{} ```java public class QuickFindUF \{
private int{[}{]} id;
\end{itemize}
public QuickFindUF(int N) \{ id = new int{[}N{]}; for (int i = 0; i
\textless{} N.length; i++) id{[}i{]} = i; \}
public boolean connected(int p, int q) \{ return id{[}p{]} ==
id{[}q{]}; \}
public void union(int p, int q) \{ int pid = id{[}p{]}; int qid =
id{[}q{]}; for (int i = 0; i \textless{} id.length; i++) if (id{[}i{]}
== pid) id{[}i{]} = qid; \} \} ```
\item
Cost Model:
\begin{itemize}
\tightlist
\item
Initialize is O(n)
\item
Find is O(1)
\item
The union method is O(n) and we have to use it n times (for each
element in id). This is too slow because n can be huge and n\^{}2
does not scale well
\end{itemize}
\end{itemize}
+++++
+++++ \^{}\textbf{Union Find Quick Union Approach} * Data Structure: *
Using an integer array id{[}{]} of size N, let id{[}i{]} be the parent
of i. Thus the root of i is
id{[}id{[}id{[}\ldots{}id{[}i{]}\ldots{}{]}{]}{]}.
\begin{itemize}
\tightlist
\item
Find:
\begin{itemize}
\tightlist
\item
Check if p and q have the same root.
\end{itemize}
\item
Union:
\begin{itemize}
\tightlist
\item
To merge components containing p and q, set the id of p's root to
the id of q's root.
\includegraphics{https://i.imgur.com/9Qn6EP5.png} {[}Without the
connection, the root of 3, 2, and 4 is 9, and the root of 5 is 6.
After the union beween p = 3 and q = 5, the roots 9 and 6 are
connected. This is done by setting the id at 9 equal to the 6.{]}
\textasciitilde{} ```java public class QuickUnionUF \{ private
int{[}{]} id;
\end{itemize}
public QuickUnionUF(int N) \{ id = new int{[}N{]}; for (int i = 0; i
\textless{} N; i++) id{[}i{]} = i; \}
private int root(int i) \{ while (i != id{[}i{]}) i = id{[}i{]};
return i; \}
public boolean connected(int p, int q) \{ return root(p) == root(q);
\}
public void union(int p, int q) \{ int i = root(p); int j = root(q);
id{[}i{]} = j; \} \} ```
\item
Cost Model:
\begin{itemize}
\tightlist
\item
Trees can get too tall.
\item
Find is expensive because it is O(n).
\item
Union depends on the find method and worst case it is O(n).
\end{itemize}
\end{itemize}
+++++ \^{}\textbf{Union Find Weighted Quick-Union Approach} * Data
Structure: * Using an integer array id{[}{]} of size N, let id{[}i{]} be
the parent of i. Thus the root of i is
id{[}id{[}id{[}\ldots{}id{[}i{]}\ldots{}{]}{]}{]}. Maintain an extra
array sz{[}i{]} to count the number of objects in the tree rooted at i.
\begin{itemize}
\tightlist
\item
Find:
\begin{itemize}
\tightlist
\item
Check if p and q have the same root.
\end{itemize}
\item
Union:
\begin{itemize}
\tightlist
\item
To merge components containing p and q, set the id of p's root to
the id of q's root. \textasciitilde{} ```java public class
WeightedQuickUnionUF \{ private int{[}{]} id; private int{[}{]} sz;
\end{itemize}
public WeightedQuickUnionUF(int N) \{ id = new int{[}N{]}; sz = new
int{[}N{]}; for (int i = 0; i \textless{} N; i++) id{[}i{]} = i; \}
private int root(int i) \{ while (i != id{[}i{]}) i = id{[}i{]};
return i; \}
public boolean connected(int p, int q) \{ return root(p) == root(q);
\}
public void union(int p, int q) \{ int i = root(p); int j = root(q);
if (i == j) return; if (sz{[}i{]} \textless{} sz{[}j{]}) \{ id{[}i{]}
= j; sz{[}j{]} += sz{[}i{]}; \} else \{ id{[}j{]} = i; sz{[}i{]} +=
sz{[}j{]}; \} \} \} ```
\item
Running time:
\begin{itemize}
\tightlist
\item
The depth of any node x is at most lg N. (base 2)
\begin{itemize}
\tightlist
\item
Why? When does the depth of x increase?
\begin{itemize}
\tightlist
\item
The size of the tree containing x at least doubles each time.
\item
This is because if we have two trees T1 and T2, these two trees
will only merge when \textbar{}T2\textbar{} \textgreater{}=
\textbar{}T1\textbar{}.
\item
The size of the tree containing x can only double at most lg(N)
times.
\item
Otherwise the smaller tree T1 will just go under the bigger tree
T2 and the depth of x will not increase.
\end{itemize}
\end{itemize}
\item
Find: Takes time proportional to the depth of p and q. O(lg(n))
\item
Union: Takes constant time, given roots. O(lg(n))
\end{itemize}
\end{itemize}
+++++
+++++ \^{}\textbf{Union Find Weighted Quick-Union Path Compression
Approach} * Data Structure: * Using an integer array id{[}{]} of size N,
let id{[}i{]} be the parent of i. Thus the root of i is
id{[}id{[}id{[}\ldots{}id{[}i{]}\ldots{}{]}{]}{]}. Maintain an extra
array sz{[}i{]} to count the number of objects in the tree rooted at i.
\begin{itemize}
\tightlist
\item
Find:
\begin{itemize}
\tightlist
\item
Check if p and q have the same root. On the way, make every other
node in the path point to its grandparent (thereby halving the path
length).
\end{itemize}
\item
Union:
\begin{itemize}
\tightlist
\item
To merge components containing p and q, set the id of p's root to
the id of q's root. Before doing so, identify the smaller tree
because we want to link the root of the smaller tree to the root of
the largest tree. Update the sz{[}{]} array. \textasciitilde{}
```java public class WeightedQuickUnionPCUF \{ private int{[}{]} id;
private int{[}{]} sz;
\end{itemize}
public WeightedQuickUnionPCUF(int N) \{ id = new int{[}N{]}; sz = new
int{[}N{]}; for (int i = 0; i \textless{} N; i++) id{[}i{]} = i; \}
private int root(int i) \{ while (i != id{[}i{]}) \{ i = id{[}i{]};
id{[}i{]} = id{[}id{[}i{]}{]}; \} return i; \}
public boolean connected(int p, int q) \{ return root(p) == root(q);
\}
public void union(int p, int q) \{ int i = root(p); int j = root(q);
if (i == j) return; if (sz{[}i{]} \textless{} sz{[}j{]}) \{ id{[}i{]}
= j; sz{[}j{]} += sz{[}i{]}; \} else \{ id{[}j{]} = i; sz{[}i{]} +=
sz{[}j{]}; \} \} \} ```
\item
Running time:
\begin{itemize}
\tightlist
\item
Starting from an empty data structure, any sequence of M union-find
ops on N objects makes \textless{}= c(N + M lg* N) array accesses.
(lg* is the iterative star log function, meaning the number of times
you need to take lg to get to 1, which is practically less than or
equal to 5.)
\begin{itemize}
\tightlist
\item
Analysis can be improved to N + M \(\alpha\)(M, N).
\end{itemize}
\end{itemize}
\end{itemize}
+++++
\textbf{Percolation} \textasciitilde{} This a state of a system that is
an N by N grid of sites where each site is open with probability p (or
blocked with probability 1 - p) such that the top and bottom are
connected by open sites. In other words, if an open site is modeled by a
white square and a blocked site is modeled by a black square, this is
whether there is a path of white squares from the top to the bottom.
\textbf{Monte Carlo Simulation} \textasciitilde{} This is a simulation
where you initialize an N by N grid as completely blocked. Then we
declare random sites open until the top is connected to the bottom and
the system is percolated. This simulation is used to estimate the value
for p where the chance of a system percolating shifts from likely to
percolate to unlikely to percolate.
\selectlanguage{english}
\FloatBarrier
\end{document}