# Copyright (c) 2020 Brit Nicholson. # All rights reserved. This program is made available under # the terms of the Simple Non Code License (SNCL) v2.3.0, # which is available at # https://github.com/MysteryDash/Simple-Non-Code-License/blob/master/License.txt import numpy as np import random import fnmatch import time def recomb(anc_1, anc_2, rand_fish, fun_spots): rand = random.random() if rand < 0.25: out_a = recomb_a_first(anc_1, rand_fish, fun_spots) out_b = recomb_a_first(anc_2, rand_fish, fun_spots) elif rand < 0.5: out_a = recomb_a_first(anc_1, rand_fish, fun_spots) out_b = recomb_b_first(anc_2, rand_fish, fun_spots) elif rand < 0.75: out_a = recomb_b_first(anc_1, rand_fish, fun_spots) out_b = recomb_a_first(anc_2, rand_fish, fun_spots) else: out_a = recomb_b_first(anc_1, rand_fish, fun_spots) out_b = recomb_b_first(anc_2, rand_fish, fun_spots) out = np.column_stack((out_a, out_b)) return out def recomb_a_first(anc, fun_rand_fish, fun_spots): random.shuffle(fun_spots) num_of_recombs = 22 + np.random.poisson(fun_rand_fish) cut_spots = sorted(fun_spots[:num_of_recombs]) a = anc[:,0].tolist() b = anc[:,1].tolist() if len(cut_spots) == 0: recomb_out = a return recomb_out recomb_out = [] for i in range(0, len(cut_spots)): if i == 0: recomb_out = a[0:cut_spots[0]] if i % 2 == 1: recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]]) last_a = False else: recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]]) last_a = True if last_a: recomb_out.extend(b[cut_spots[i]:len(b)]) else: recomb_out.extend(a[cut_spots[i]:len(a)]) return recomb_out def recomb_b_first(anc, fun_rand_fish, fun_spots): random.shuffle(fun_spots) num_of_recombs = 22 + np.random.poisson(fun_rand_fish) cut_spots = sorted(fun_spots[:num_of_recombs]) a = anc[:,0].tolist() b = anc[:,1].tolist() if len(cut_spots) == 0: recomb_out = b return recomb_out recomb_out = [] for i in range(0, len(cut_spots)): if i == 0: recomb_out = b[0:cut_spots[0]] if i % 2 == 1: recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]]) last_a = True else: recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]]) last_a = False if last_a: recomb_out.extend(b[cut_spots[i]:len(b)]) else: recomb_out.extend(a[cut_spots[i]:len(a)]) return recomb_out # Out of 1B Poisson values, the smallest I got was 28 and the largest was 94, so 99 (or 97+) genes should be fine. genes_num = 99 genes_list = list(range(0, genes_num)) spots = list(range(1, genes_num)) rand_fish = 33 # λ # Gen. = 2 simulates grandparents, 3 simulates great-grandparents, etc. gen = 2 trials = 500000 # Only enter a "1" for one of the following to see what % of DNA one shares w/ that relative only. aunt_bool = 0 cous_1st_bool = 0 great_aunt_bool = 0 par_of_cous_bool = 0 nth_cous_bool = 0 nth_anc_bool = 1 # Use this to change an nth relative to a half-relative half = 0 # Create the farthest back ancestors. Label homolouges; otherwise, aunts share 50%. for num in range(1, int((2**gen)/2) + 1): vars()['first_anc_' + str(num)] = np.column_stack(([str(num) + 'a' for i in genes_list], [str(num) + 'b' for i in genes_list])) if nth_cous_bool == 1 or par_of_cous_bool == 1: fill = np.column_stack(([0 for i in genes_list], [0 for i in genes_list])) start_time = time.time() if aunt_bool + cous_1st_bool + great_aunt_bool + par_of_cous_bool + nth_cous_bool + nth_anc_bool != 1: raise ValueError('One and only one relative must be compared.') if great_aunt_bool == 1 and gen != 3: raise ValueError('Great Aunts should only be compared when gen = 3.') percent_same = [] for i_trials in range(0, trials): if gen > 2: for num in range(1, int((2**gen)/4) + 1): vars()['new_' + str(num)] = recomb(vars()['first_anc_' + str(2*num-1)], vars()['first_anc_' + str(2*num)], rand_fish, spots) # Recombine for close relatives. These 2 loops get it down to two grandparents. # Each anc. in the tree is created, but some are eventually overwritten. for g in range(gen-1, 2, -1): for num in range(1, int((2**g)/4) + 1): vars()['new_' + str(num)] = recomb(vars()['new_' + str(2*num-1)], vars()['new_' + str(2*num)], rand_fish, spots) # Create a 'mother' if gen > 2: mom = recomb(new_1, new_2, rand_fish, spots) else: mom = recomb(first_anc_1, first_anc_2, rand_fish, spots) # Create oneself (only the applicable homologue, which is arbitrarily the maternal one) rand = random.random() if rand < 0.5: me = recomb_a_first(mom, rand_fish, spots) else: me = recomb_b_first(mom, rand_fish, spots) me = np.asarray(me) if nth_anc_bool == 1: percent_same.append(np.sum((me == first_anc_2[:,0]) | (me == first_anc_2[:,1]))/(2*genes_num)) elif aunt_bool == 1 or cous_1st_bool == 1: # Create an 'aunt' if gen > 2: aunt = recomb(new_1, new_2, rand_fish, spots) else: aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots) if aunt_bool == 1: percent_same.append(np.sum((me == aunt[:,0]) | (me == aunt[:,1]))/(2*genes_num)) else: rand = random.random() if rand < 0.5: cous_1st = recomb_a_first(aunt, rand_fish, spots) else: cous_1st = recomb_b_first(aunt, rand_fish, spots) cous_1st = np.asarray(cous_1st) percent_same.append(np.sum((me == cous_1st))/(2*genes_num)) elif gen == 3 and great_aunt_bool == 1: # To keep the programming simple and limit run-time, only allow calculations for great-aunts # and great-uncles (siblings of one's grandparents) when gen == 3 great_aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots) percent_same.append(np.sum((me == great_aunt[:,0]) | (me == great_aunt[:,1]))/(2*genes_num)) # Recombine for nth cousin or their parent elif nth_cous_bool == 1 or par_of_cous_bool == 1: # One only shares 2 close ancestors with a full nth cousin (unless they share more /:) if half == 0: new_1 = recomb(first_anc_1, first_anc_2, rand_fish, spots) else: new_1 = recomb(first_anc_1, fill, rand_fish, spots) # If gen > 3, dilute half of the genes with 'fill' for each extra generation for g in range(gen - 1, 2, -1): new_1 = recomb(new_1, fill, rand_fish, spots) # Create a parent of nth cousin (where n = gen - 1) if gen > 2: par_of_cous = recomb(new_1, fill, rand_fish, spots) else: par_of_cous = new_1 #par_of_cous = recomb(new_1, fill, rand_fish, spots) if par_of_cous_bool == 1: percent_same.append(np.sum((me == par_of_cous[:,0]) | (me == par_of_cous[:,1]))/(2*genes_num)) else: # Create an nth cousin (where n = gen - 1) rand = random.random() if rand < 0.5: nth_cous = recomb_a_first(par_of_cous, rand_fish, spots) else: nth_cous = recomb_b_first(par_of_cous, rand_fish, spots) percent_same.append(np.sum((me == nth_cous))/(2*genes_num)) # Calculate the fraction of DNA shared #percent_same.append(np.sum((me == mom[:,0]) | (me == mom[:,1]))/(2*genes_num)) if half == 1: print('This is for a half-relative!', "\n") mean_out = np.mean(percent_same) std_dev_out = np.std(percent_same) print(mean_out) print(std_dev_out) print("\n") print("Time to complete: %s seconds." % (time.time() - start_time)) import statistics def print_results(pv): print(':', str(round(100*statistics.mean(pv),2)) + '%,',\ 'min:', str(round(min(pv),5)) + ',', '95%-Conf.:', str(round(np.percentile(pv, lo),4)) + '-' + str(round(np.percentile(pv, hi),4)) + ',',\ 'max:', str(round(max(pv),5)) + ',', round(statistics.stdev(pv),5), 'std,',\ pv.count(0) / len(pv), '0%', pv.count(100) / len(pv), '100%') confid = 95 lo = (100 - confid) / 2 hi = 100 - lo print_results(percent_same)
Recent Comments