Select Page
# Copyright (c) 2020 Brit Nicholson.
# All rights reserved. This program is made available under
# the terms of the Simple Non Code License (SNCL) v2.3.0,
# which is available at
# https://github.com/MysteryDash/Simple-Non-Code-License/blob/master/License.txt

# This is the code used for a calculator that finds the average
# amount and percentiles of an ancestor's DNA when various relatives
# have their DNA genotyped.

import numpy as np
import random
import math
import fnmatch
import time

start_time = time.time()

def recomb(fun_gen, fun_half_segs, first_anc, second_anc=None):

  out = first_anc

  for gen_num in range(1, fun_gen + 1):
  
    out = random.sample(out, fun_half_segs)
    if second_anc is not None:
        out.extend(random.sample(second_anc, fun_half_segs))

  return out

segs = 97.5

seg_decimal = segs/2 - int(segs/2)
percent_reproduced = []

# Enter 1 for mother, 2 for grandma, 3 for great-grandma, etc. (n+2 for ng grandma):
gen = 2
# Enter one for yourself:
sib_num = 1
aunt_num = 0
par_of_cous_num = 0
cus_num = 1
cus_from_same_aunt_num = 0

trials = 20000
for i in range(0, trials):

  if np.random.random() > seg_decimal:
    half_segs = math.floor(segs/2)
  else:
    half_segs = math.ceil(segs/2)

  m = list(range(0, half_segs*2))
  p = list(m)

  m = ['m' + str(i) for i in m]
  p = ['p' + str(i) for i in p]

  #grandma = recomb(half_segs, m, p)
  # The following aunt variable is only used for cousins from the same aunt/uncle:
  aunt = recomb(gen-1, half_segs, m, p)

  unique_segs = []

  # DNA contributed by siblings
  for s_num in range(1, sib_num + 1):
      unique_segs.extend(recomb(gen, half_segs, m, p))

  # DNA contributed by aunts/uncles
  for a_num in range(1, aunt_num + 1):
      unique_segs.extend(recomb(gen-1, half_segs, m, p))

  # DNA contributed by parents of cousins of order “gen” (will be same as aunt or uncle if gen = 2)
  for p_of_c_num in range(1, par_of_cous_num + 1):
      unique_segs.extend(recomb(gen-1, half_segs, m, p))

  # DNA contributed by cousins who are NOT children of the above aunts/uncles:
  # Also, each of these cousins is from a different aunt or uncle.
  for c_num in range(1, cus_num + 1):
       unique_segs.extend(recomb(gen, half_segs, m, p))

  # This is for several cousins from the same aunt or uncle.
  # It would be burdensome to do this for multiple aunts or uncles
  for cfsa_num in range(1, cus_from_same_aunt_num + 1):
      unique_segs.extend(recomb(1, half_segs, aunt))
  
  unique_segs = fnmatch.filter(list(set(unique_segs)), 'm*')
  
  percent_reproduced.append(len(unique_segs)/(half_segs*2))

#df = pd.DataFrame(percent_reproduced)

print(np.min(percent_reproduced))

print(np.percentile(percent_reproduced, 2.5))
#print(df.quantile(0.025))

print(np.mean(percent_reproduced))

print(np.percentile(percent_reproduced, 97.5))
#print(df.quantile(0.975))

print(np.max(percent_reproduced))

print("\n")

print("Time to complete: %s seconds." % (time.time() - start_time))