## loading python libraries
# necessary to display plots inline:
%matplotlib inline
# load the libraries
import matplotlib.pyplot as plt # 2D plotting library
import numpy as np # package for scientific computing
from math import * # package for mathematics (pi, arctan, sqrt, factorial ...)
import sympy as sympy # package for symbolic computation
from sympy import *
This is a math puzzle that used to be asked in job interviews for positions such as quantitative analysts or traders.
Here is the puzzle: a frog starts at the bank of a river located at $x=0$. The opposite bank is at $x=10$. The frog's first jump is chosen uniformly at random from ${1, 2, \dots, 10}$ (if it lands on $10$, the frog crosses the river and the process ends). Afterward, at each time step, if the frog is at some position $y<10$, it jumps to a uniform random location in ${y+1, \dots, 10}$.
# Question 1
def MatrixFrog(L):
# L = length of the river (L=10 in the exercise)
# returns the transition matrix of the location of the frog
Matrix=np.zeros([L+1,L+1])
Matrix[L,L]=1 # absorbing state
for i in range(L):
for j in range(i+1,L+1):
Matrix[i,j]=1/(L-i)
return Matrix
print('---- Question 1 ---')
print('The transition matrix Q = ')
print(np.round(MatrixFrog(6),3))
# Question 2
def DistributionFrog(L,i,t):
# L = length of the river (L=10 in the exercise)
# 0 <= i <= L : location of the frog
# t : time step
Power_of_M=np.linalg.matrix_power(MatrixFrog(L),t)
return Power_of_M[0,i]
print('---- Question 2 ---')
t=3
print('Distribution of X at time t =',t,' is ',[np.round(DistributionFrog(10,i,t),2) for i in range(11)])
# Question 3
def DistributionT(L,t):
if t==0:
return 0
return DistributionFrog(L,L,t)-DistributionFrog(L,L,t-1)
print('---- Question 3 ---')
L=10
plt.plot(range(1,L+1),[DistributionT(L,t) for t in range(1,L+1)],'o-',label='t -> P(T=t)')
plt.title('Distribution of T')
plt.xticks(range(1,L+1))
plt.legend()
plt.show()
---- Question 1 --- The transition matrix Q = [[0. 0.167 0.167 0.167 0.167 0.167 0.167] [0. 0. 0.2 0.2 0.2 0.2 0.2 ] [0. 0. 0. 0.25 0.25 0.25 0.25 ] [0. 0. 0. 0. 0.333 0.333 0.333] [0. 0. 0. 0. 0. 0.5 0.5 ] [0. 0. 0. 0. 0. 0. 1. ] [0. 0. 0. 0. 0. 0. 1. ]] ---- Question 2 --- Distribution of X at time t = 3 is [0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.02, 0.04, 0.07, 0.14, 0.71] ---- Question 3 ---
Initially, $0\leq m\leq N$ sheep are pro-Mac. At times $t= 0,1,2,...,$ a randomly and uniformly chosen sheep bleats its opinion and instantly one sheep of the other camp switches its opinion. The process ends when unanimity is reached.
We will ask:
def MatrixSheep(N):
# returns the transition matrix of the pro-Mac sheep
Matrix=np.zeros([N+1,N+1])
Matrix[N,N]=1 # absorbing state
Matrix[0,0]=1 # absorbing state
for k in range(1,N):
Matrix[k,k+1]=k/N
Matrix[k,k-1]=(N-k)/N
return Matrix
def ProbaSheep(N,m,i,t):
# returns the probability that there are i pro-Max sheep (among N) at time t
# when the process starts with m pro-Mac sheep
MatrixQ=MatrixSheep(N)
Power=np.linalg.matrix_power(MatrixQ,t)
return Power[m,i]
N=100
m=70
t=30
Probabilities=[ProbaSheep(N,m,i,t) for i in range(N+1)]
top=max(Probabilities)
plt.plot(Probabilities,'o-')
plt.plot([m,m],[0,top],'r--',label='initial m='+str(m)+' ')
plt.title('Probability distribution at time t ='+str(t)+', with N= '+str(N)+' sheep')
plt.xlabel('Total number of sheep'),plt.ylabel('Probability'),plt.legend()
plt.legend()
plt.show()
#print(Probabilities[-1])
As $0$ and $N$ are absorbing states the processus is eventually absorbed at $0$ or $N$: there is unanimity pro-Mac or pro-Windows among sheep.
For fixed $N$ let $p_{m}$ denote the probability that the unanimity is achieved for Mac, starting from $m$ pro-Mac and $N-m$ pro-Windows sheep.
# Question 2
N=20
Probabilities_proMac=[ProbaSheep(N,m,N,N**2) for m in range(N+1)]
plt.plot(Probabilities_proMac,'o-')
plt.xlabel('Initial number of pro-Mac'),plt.ylabel('Probability')
plt.title('Probability that Mac wins')
plt.xticks(range(0,N+1,2))
plt.yticks(np.arange(0,1.1,0.25))
plt.show()
#print(Probabilities_proMac)
We consider the following probabilistic model. (Its name refers to this historical event.)
Initially there is a population of $N$ gangsters, $a$ of them belong to gang $A$ and $b=N-a$ belong to gang $B$. At times $t= 0,1,2,...,$ a randomly and uniformly chosen gangster (among survivors) kills a member of the other gang. The process ends when one of the gangs is wiped out.
We want to find:
def GangA_wins_aux(starting_point,Probabilities={}):
# auxiliary function which returns the probability that gang A wins starting from a,b individuals
# using memoization
a,b=starting_point
if starting_point not in Probabilities:
if b==0:
Probabilities[(a,b)]=1
elif a==0:
Probabilities[(a,b)]=0
else:
Probabilities[(a,b)]=GangA_wins_aux((a,b-1))*a/(a+b) + GangA_wins_aux((a-1,b))*b/(a+b)
return Probabilities[starting_point]
def GangA_wins(a,b):
# returns the probability that gang A wins starting from a,b individuals
return GangA_wins_aux((a,b))
N=20
Results=[GangA_wins(n,N) for n in range(2*N)]
plt.plot(Results,'o-',label='Prob. that gang A wins')
plt.xlabel('Initial size of gang A')
plt.legend()
plt.show()
def ExpectedSurvivorsA(a,b):
# return the number of survivals of gang A, starting from a,b individuals
if b==0:
return a
elif a==0:
return 0
else:
return ExpectedSurvivorsA(a,b-1)*a/(a+b) + ExpectedSurvivorsA(a-1,b)*b/(a+b)
N=8
b_max=20
plt.plot([ExpectedSurvivorsA(i,N) for i in range(b_max)],'o-',label='i -> e_{i,8}')
plt.legend()
plt.show()
We consider the following problem. Consider a group of $n\geq 2$ people, we assume that their birthdays $X_1,\dots,X_n$ are uniformly distributed and independent in $\{1,2,\dots,k\}$, with $k=365$. The birthday paradox asks for the probability of the event
$$ E_{n,k} =\{ \text{ there exist }i\neq j, 1\leq i,j \leq n; X_i=X_j\}. $$Obviously we have that $\mathbb{P}(E_{n,365})=1$ as soon as $n\geq 365$. The so-called paradox is that a high probability is reached for quite small values of $n$. </div>
def TwoIdenticalBirthdays(n,k):
# returns the probability P(E_{n,k})
Vector=np.arange(k-n+1,k+1) # computes [k-n+1,...,k]
Quotient=Vector/(k+0.0) # '+0.0' forces the float division
Product= np.prod(Quotient)
return 1-Product
# Test : (for n=8,k=365 this should return 0.0743...)
print('For n=8, two identical birthdays with probability '+str(TwoIdenticalBirthdays(8,365)))
For n=8, two identical birthdays with probability 0.07433529235166902
# Question 1
BirthdayParadox = [TwoIdenticalBirthdays(n,365) for n in range(2,100,3)]
plt.plot(range(2,100,3),BirthdayParadox,'o')
plt.xlabel('Size $n$ of the group'),plt.ylabel('Probability')
plt.title('Probability in the birthday paradox')
plt.show()
# Question 2
BirthdayParadox = [TwoIdenticalBirthdays(n,365) for n in range(1,365)]
i=1
while BirthdayParadox[i]<0.75:
i=i+1
print('-----------------')
print('Question 2')
print('For n = '+str(i)+', we have '+str(TwoIdenticalBirthdays(i,365))+' chance of 2 identical birthdays')
print('For n = '+str(i+1)+', we have '+str(TwoIdenticalBirthdays(i+1,365))+' chance of 2 identical birthdays')
print('-----------------')
----------------- Question 2 For n = 31, we have 0.7304546337286439 chance of 2 identical birthdays For n = 32, we have 0.7533475278503206 chance of 2 identical birthdays -----------------