Select Page
```# Copyright (c) 2020 Brit Nicholson.
# the terms of the Simple Non Code License (SNCL) v2.3.0,
# which is available at

# This is the code used for a calculator that finds the average
# amount and percentiles of an ancestor's DNA when various relatives
# have their DNA genotyped.

import numpy as np
import random
import math
import fnmatch
import time

start_time = time.time()

def recomb(fun_gen, fun_half_segs, first_anc, second_anc=None):

out = first_anc

for gen_num in range(1, fun_gen + 1):

out = random.sample(out, fun_half_segs)
if second_anc is not None:
out.extend(random.sample(second_anc, fun_half_segs))

return out

segs = 97.5

seg_decimal = segs/2 - int(segs/2)
percent_reproduced = []

# Enter 1 for mother, 2 for grandma, 3 for great-grandma, etc. (n+2 for ng grandma):
gen = 2
# Enter one for yourself:
sib_num = 1
aunt_num = 0
par_of_cous_num = 0
cus_num = 1
cus_from_same_aunt_num = 0

trials = 20000
for i in range(0, trials):

if np.random.random() > seg_decimal:
half_segs = math.floor(segs/2)
else:
half_segs = math.ceil(segs/2)

m = list(range(0, half_segs*2))
p = list(m)

m = ['m' + str(i) for i in m]
p = ['p' + str(i) for i in p]

#grandma = recomb(half_segs, m, p)
# The following aunt variable is only used for cousins from the same aunt/uncle:
aunt = recomb(gen-1, half_segs, m, p)

unique_segs = []

# DNA contributed by siblings
for s_num in range(1, sib_num + 1):
unique_segs.extend(recomb(gen, half_segs, m, p))

# DNA contributed by aunts/uncles
for a_num in range(1, aunt_num + 1):
unique_segs.extend(recomb(gen-1, half_segs, m, p))

# DNA contributed by parents of cousins of order “gen” (will be same as aunt or uncle if gen = 2)
for p_of_c_num in range(1, par_of_cous_num + 1):
unique_segs.extend(recomb(gen-1, half_segs, m, p))

# DNA contributed by cousins who are NOT children of the above aunts/uncles:
# Also, each of these cousins is from a different aunt or uncle.
for c_num in range(1, cus_num + 1):
unique_segs.extend(recomb(gen, half_segs, m, p))

# This is for several cousins from the same aunt or uncle.
# It would be burdensome to do this for multiple aunts or uncles
for cfsa_num in range(1, cus_from_same_aunt_num + 1):
unique_segs.extend(recomb(1, half_segs, aunt))

unique_segs = fnmatch.filter(list(set(unique_segs)), 'm*')

percent_reproduced.append(len(unique_segs)/(half_segs*2))

#df = pd.DataFrame(percent_reproduced)

print(np.min(percent_reproduced))

print(np.percentile(percent_reproduced, 2.5))
#print(df.quantile(0.025))

print(np.mean(percent_reproduced))

print(np.percentile(percent_reproduced, 97.5))
#print(df.quantile(0.975))

print(np.max(percent_reproduced))

print("\n")

print("Time to complete: %s seconds." % (time.time() - start_time))```