Select Page
```# Copyright (c) 2020 Brit Nicholson.
# the terms of the Simple Non Code License (SNCL) v2.3.0,
# which is available at

import numpy as np
import random
import fnmatch
import time

def recomb(anc_1, anc_2, rand_fish, fun_spots):

rand = random.random()
if rand < 0.25:
out_a = recomb_a_first(anc_1, rand_fish, fun_spots)
out_b = recomb_a_first(anc_2, rand_fish, fun_spots)
elif rand < 0.5:
out_a = recomb_a_first(anc_1, rand_fish, fun_spots)
out_b = recomb_b_first(anc_2, rand_fish, fun_spots)
elif rand < 0.75:
out_a = recomb_b_first(anc_1, rand_fish, fun_spots)
out_b = recomb_a_first(anc_2, rand_fish, fun_spots)
else:
out_a = recomb_b_first(anc_1, rand_fish, fun_spots)
out_b = recomb_b_first(anc_2, rand_fish, fun_spots)

out = np.column_stack((out_a, out_b))
return out

def recomb_a_first(anc, fun_rand_fish, fun_spots):

random.shuffle(fun_spots)
num_of_recombs = 22 + np.random.poisson(fun_rand_fish)
cut_spots = sorted(fun_spots[:num_of_recombs])

a = anc[:,0].tolist()
b = anc[:,1].tolist()

if len(cut_spots) == 0:
recomb_out = a
return recomb_out

recomb_out = []
for i in range(0, len(cut_spots)):
if i == 0:
recomb_out = a[0:cut_spots]
if i % 2 == 1:
recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]])
last_a = False
else:
recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]])
last_a = True
if last_a:
recomb_out.extend(b[cut_spots[i]:len(b)])
else:
recomb_out.extend(a[cut_spots[i]:len(a)])
return recomb_out

def recomb_b_first(anc, fun_rand_fish, fun_spots):

random.shuffle(fun_spots)
num_of_recombs = 22 + np.random.poisson(fun_rand_fish)
cut_spots = sorted(fun_spots[:num_of_recombs])

a = anc[:,0].tolist()
b = anc[:,1].tolist()

if len(cut_spots) == 0:
recomb_out = b
return recomb_out

recomb_out = []
for i in range(0, len(cut_spots)):
if i == 0:
recomb_out = b[0:cut_spots]
if i % 2 == 1:
recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]])
last_a = True
else:
recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]])
last_a = False
if last_a:
recomb_out.extend(b[cut_spots[i]:len(b)])
else:
recomb_out.extend(a[cut_spots[i]:len(a)])
return recomb_out

# Out of 1B Poisson values, the smallest I got was 28 and the largest was 94, so 99 (or 97+) genes should be fine.
genes_num = 99
genes_list = list(range(0, genes_num))
spots = list(range(1, genes_num))

rand_fish = 33 # λ

# Gen. = 2 simulates grandparents, 3 simulates great-grandparents, etc.
gen = 2

trials = 500000

# Only enter a "1" for one of the following to see what % of DNA one shares w/ that relative only.
aunt_bool = 0
cous_1st_bool = 0
great_aunt_bool = 0
par_of_cous_bool = 0
nth_cous_bool = 0
nth_anc_bool = 1

# Use this to change an nth relative to a half-relative
half = 0

# Create the farthest back ancestors. Label homolouges; otherwise, aunts share 50%.
for num in range(1, int((2**gen)/2) + 1):
vars()['first_anc_' + str(num)] = np.column_stack(([str(num) + 'a' for i in genes_list], [str(num) + 'b' for i in genes_list]))

if nth_cous_bool == 1 or par_of_cous_bool == 1:
fill = np.column_stack(([0 for i in genes_list], [0 for i in genes_list]))

start_time = time.time()

if aunt_bool + cous_1st_bool + great_aunt_bool + par_of_cous_bool + nth_cous_bool + nth_anc_bool != 1:
raise ValueError('One and only one relative must be compared.')

if great_aunt_bool == 1 and gen != 3:
raise ValueError('Great Aunts should only be compared when gen = 3.')

percent_same = []

for i_trials in range(0, trials):

if gen > 2:
for num in range(1, int((2**gen)/4) + 1):
vars()['new_' + str(num)] = recomb(vars()['first_anc_' + str(2*num-1)], vars()['first_anc_' + str(2*num)], rand_fish, spots)

# Recombine for close relatives. These 2 loops get it down to two grandparents.
# Each anc. in the tree is created, but some are eventually overwritten.
for g in range(gen-1, 2, -1):
for num in range(1, int((2**g)/4) + 1):
vars()['new_' + str(num)] = recomb(vars()['new_' + str(2*num-1)], vars()['new_' + str(2*num)], rand_fish, spots)

# Create a 'mother'
if gen > 2:
mom = recomb(new_1, new_2, rand_fish, spots)
else:
mom = recomb(first_anc_1, first_anc_2, rand_fish, spots)

# Create oneself (only the applicable homologue, which is arbitrarily the maternal one)
rand = random.random()
if rand < 0.5:
me = recomb_a_first(mom, rand_fish, spots)
else:
me = recomb_b_first(mom, rand_fish, spots)

me = np.asarray(me)

if nth_anc_bool == 1:
percent_same.append(np.sum((me == first_anc_2[:,0]) | (me == first_anc_2[:,1]))/(2*genes_num))

elif aunt_bool == 1 or cous_1st_bool == 1:
# Create an 'aunt'
if gen > 2:
aunt = recomb(new_1, new_2, rand_fish, spots)
else:
aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots)

if aunt_bool == 1:
percent_same.append(np.sum((me == aunt[:,0]) | (me == aunt[:,1]))/(2*genes_num))

else:
rand = random.random()
if rand < 0.5:
cous_1st = recomb_a_first(aunt, rand_fish, spots)
else:
cous_1st = recomb_b_first(aunt, rand_fish, spots)
cous_1st = np.asarray(cous_1st)
percent_same.append(np.sum((me == cous_1st))/(2*genes_num))

elif gen == 3 and great_aunt_bool == 1:
# To keep the programming simple and limit run-time, only allow calculations for great-aunts
# and great-uncles (siblings of one's grandparents) when gen == 3
great_aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots)
percent_same.append(np.sum((me == great_aunt[:,0]) | (me == great_aunt[:,1]))/(2*genes_num))

# Recombine for nth cousin or their parent
elif nth_cous_bool == 1 or par_of_cous_bool == 1:

# One only shares 2 close ancestors with a full nth cousin (unless they share more /:)
if half == 0:
new_1 = recomb(first_anc_1, first_anc_2, rand_fish, spots)
else:
new_1 = recomb(first_anc_1, fill, rand_fish, spots)

# If gen > 3, dilute half of the genes with 'fill' for each extra generation
for g in range(gen - 1, 2, -1):

new_1 = recomb(new_1, fill, rand_fish, spots)

# Create a parent of nth cousin (where n = gen - 1)
if gen > 2:
par_of_cous = recomb(new_1, fill, rand_fish, spots)
else:
par_of_cous = new_1
#par_of_cous = recomb(new_1, fill, rand_fish, spots)

if par_of_cous_bool == 1:
percent_same.append(np.sum((me == par_of_cous[:,0]) | (me == par_of_cous[:,1]))/(2*genes_num))

else: # Create an nth cousin (where n = gen - 1)
rand = random.random()
if rand < 0.5:
nth_cous = recomb_a_first(par_of_cous, rand_fish, spots)
else:
nth_cous = recomb_b_first(par_of_cous, rand_fish, spots)
percent_same.append(np.sum((me == nth_cous))/(2*genes_num))

# Calculate the fraction of DNA shared
#percent_same.append(np.sum((me == mom[:,0]) | (me == mom[:,1]))/(2*genes_num))

if half == 1:
print('This is for a half-relative!', "\n")

mean_out = np.mean(percent_same)
std_dev_out = np.std(percent_same)
print(mean_out)
print(std_dev_out)

print("\n")

print("Time to complete: %s seconds." % (time.time() - start_time))

import statistics

def print_results(pv):
print(':', str(round(100*statistics.mean(pv),2)) + '%,',\
'min:', str(round(min(pv),5)) + ',', '95%-Conf.:', str(round(np.percentile(pv, lo),4)) + '-' + str(round(np.percentile(pv, hi),4)) + ',',\
'max:', str(round(max(pv),5)) + ',', round(statistics.stdev(pv),5), 'std,',\
pv.count(0) / len(pv), '0%', pv.count(100) / len(pv), '100%')

confid = 95
lo = (100 - confid) / 2
hi = 100 - lo

print_results(percent_same)```