# Copyright (c) 2020 Brit Nicholson. # All rights reserved. This program is made available under # the terms of the Simple Non Code License (SNCL) v2.3.0, # which is available at # https://github.com/MysteryDash/Simple-Non-Code-License/blob/master/License.txt # This is the code used for a calculator that finds the average # amount and percentiles of an ancestor's DNA when various relatives # have their DNA genotyped. import numpy as np import random import math import fnmatch import time start_time = time.time() def recomb(fun_gen, fun_half_segs, first_anc, second_anc=None): out = first_anc for gen_num in range(1, fun_gen + 1): out = random.sample(out, fun_half_segs) if second_anc is not None: out.extend(random.sample(second_anc, fun_half_segs)) return out segs = 97.5 seg_decimal = segs/2 - int(segs/2) percent_reproduced = [] # Enter 1 for mother, 2 for grandma, 3 for great-grandma, etc. (n+2 for ng grandma): gen = 2 # Enter one for yourself: sib_num = 1 aunt_num = 0 par_of_cous_num = 0 cus_num = 1 cus_from_same_aunt_num = 0 trials = 20000 for i in range(0, trials): if np.random.random() > seg_decimal: half_segs = math.floor(segs/2) else: half_segs = math.ceil(segs/2) m = list(range(0, half_segs*2)) p = list(m) m = ['m' + str(i) for i in m] p = ['p' + str(i) for i in p] #grandma = recomb(half_segs, m, p) # The following aunt variable is only used for cousins from the same aunt/uncle: aunt = recomb(gen-1, half_segs, m, p) unique_segs = [] # DNA contributed by siblings for s_num in range(1, sib_num + 1): unique_segs.extend(recomb(gen, half_segs, m, p)) # DNA contributed by aunts/uncles for a_num in range(1, aunt_num + 1): unique_segs.extend(recomb(gen-1, half_segs, m, p)) # DNA contributed by parents of cousins of order “gen” (will be same as aunt or uncle if gen = 2) for p_of_c_num in range(1, par_of_cous_num + 1): unique_segs.extend(recomb(gen-1, half_segs, m, p)) # DNA contributed by cousins who are NOT children of the above aunts/uncles: # Also, each of these cousins is from a different aunt or uncle. for c_num in range(1, cus_num + 1): unique_segs.extend(recomb(gen, half_segs, m, p)) # This is for several cousins from the same aunt or uncle. # It would be burdensome to do this for multiple aunts or uncles for cfsa_num in range(1, cus_from_same_aunt_num + 1): unique_segs.extend(recomb(1, half_segs, aunt)) unique_segs = fnmatch.filter(list(set(unique_segs)), 'm*') percent_reproduced.append(len(unique_segs)/(half_segs*2)) #df = pd.DataFrame(percent_reproduced) print(np.min(percent_reproduced)) print(np.percentile(percent_reproduced, 2.5)) #print(df.quantile(0.025)) print(np.mean(percent_reproduced)) print(np.percentile(percent_reproduced, 97.5)) #print(df.quantile(0.975)) print(np.max(percent_reproduced)) print("\n") print("Time to complete: %s seconds." % (time.time() - start_time))
Recent Comments