Select Page
# Copyright (c) 2020 Brit Nicholson.
# All rights reserved. This program is made available under
# the terms of the Simple Non Code License (SNCL) v2.3.0,
# which is available at
# https://github.com/MysteryDash/Simple-Non-Code-License/blob/master/License.txt

import numpy as np
import random
import fnmatch
import time

def recomb(anc_1, anc_2, rand_fish, fun_spots):
    
    rand = random.random()
    if rand < 0.25:
        out_a = recomb_a_first(anc_1, rand_fish, fun_spots)
        out_b = recomb_a_first(anc_2, rand_fish, fun_spots)
    elif rand < 0.5:
        out_a = recomb_a_first(anc_1, rand_fish, fun_spots)
        out_b = recomb_b_first(anc_2, rand_fish, fun_spots)
    elif rand < 0.75:
        out_a = recomb_b_first(anc_1, rand_fish, fun_spots)
        out_b = recomb_a_first(anc_2, rand_fish, fun_spots)
    else:
        out_a = recomb_b_first(anc_1, rand_fish, fun_spots)
        out_b = recomb_b_first(anc_2, rand_fish, fun_spots)
        
    out = np.column_stack((out_a, out_b))
    return out


def recomb_a_first(anc, fun_rand_fish, fun_spots):
    
    random.shuffle(fun_spots)
    num_of_recombs = 22 + np.random.poisson(fun_rand_fish)
    cut_spots = sorted(fun_spots[:num_of_recombs])
    
    a = anc[:,0].tolist()
    b = anc[:,1].tolist()
    
    if len(cut_spots) == 0:
        recomb_out = a
        return recomb_out

    recomb_out = []
    for i in range(0, len(cut_spots)):
        if i == 0:
            recomb_out = a[0:cut_spots[0]]
        if i % 2 == 1:
            recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]])
            last_a = False
        else:
            recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]])
            last_a = True
    if last_a:
        recomb_out.extend(b[cut_spots[i]:len(b)])
    else:
        recomb_out.extend(a[cut_spots[i]:len(a)])
    return recomb_out


def recomb_b_first(anc, fun_rand_fish, fun_spots):
    
    random.shuffle(fun_spots)
    num_of_recombs = 22 + np.random.poisson(fun_rand_fish)
    cut_spots = sorted(fun_spots[:num_of_recombs])
    
    a = anc[:,0].tolist()
    b = anc[:,1].tolist()
    
    if len(cut_spots) == 0:
        recomb_out = b
        return recomb_out
    
    recomb_out = []
    for i in range(0, len(cut_spots)):
        if i == 0:
            recomb_out = b[0:cut_spots[0]]
        if i % 2 == 1:
            recomb_out.extend(a[cut_spots[i-1]:cut_spots[i]])
            last_a = True
        else:
            recomb_out.extend(b[cut_spots[i-1]:cut_spots[i]])
            last_a = False
    if last_a:
        recomb_out.extend(b[cut_spots[i]:len(b)])
    else:
        recomb_out.extend(a[cut_spots[i]:len(a)])
    return recomb_out

# Out of 1B Poisson values, the smallest I got was 28 and the largest was 94, so 99 (or 97+) genes should be fine.
genes_num = 99
genes_list = list(range(0, genes_num))
spots = list(range(1, genes_num))

rand_fish = 33 # λ

# Gen. = 2 simulates grandparents, 3 simulates great-grandparents, etc.
gen = 2

trials = 500000

# Only enter a "1" for one of the following to see what % of DNA one shares w/ that relative only.
aunt_bool = 0
cous_1st_bool = 0
great_aunt_bool = 0
par_of_cous_bool = 0
nth_cous_bool = 0
nth_anc_bool = 1

# Use this to change an nth relative to a half-relative
half = 0

# Create the farthest back ancestors. Label homolouges; otherwise, aunts share 50%.
for num in range(1, int((2**gen)/2) + 1):
    vars()['first_anc_' + str(num)] = np.column_stack(([str(num) + 'a' for i in genes_list], [str(num) + 'b' for i in genes_list]))
    
if nth_cous_bool == 1 or par_of_cous_bool == 1:
    fill = np.column_stack(([0 for i in genes_list], [0 for i in genes_list]))

start_time = time.time()

if aunt_bool + cous_1st_bool + great_aunt_bool + par_of_cous_bool + nth_cous_bool + nth_anc_bool != 1:
    raise ValueError('One and only one relative must be compared.')
    
if great_aunt_bool == 1 and gen != 3:
    raise ValueError('Great Aunts should only be compared when gen = 3.')

percent_same = []

for i_trials in range(0, trials):
    
    if gen > 2:
        for num in range(1, int((2**gen)/4) + 1):
            vars()['new_' + str(num)] = recomb(vars()['first_anc_' + str(2*num-1)], vars()['first_anc_' + str(2*num)], rand_fish, spots)
    
    # Recombine for close relatives. These 2 loops get it down to two grandparents.
    # Each anc. in the tree is created, but some are eventually overwritten.
    for g in range(gen-1, 2, -1):
        for num in range(1, int((2**g)/4) + 1):
            vars()['new_' + str(num)] = recomb(vars()['new_' + str(2*num-1)], vars()['new_' + str(2*num)], rand_fish, spots)
            
    # Create a 'mother'
    if gen > 2:
        mom = recomb(new_1, new_2, rand_fish, spots)
    else:
        mom = recomb(first_anc_1, first_anc_2, rand_fish, spots)

    # Create oneself (only the applicable homologue, which is arbitrarily the maternal one)
    rand = random.random()
    if rand < 0.5:
        me = recomb_a_first(mom, rand_fish, spots)
    else:
        me = recomb_b_first(mom, rand_fish, spots)
                
    me = np.asarray(me)
    
    if nth_anc_bool == 1:
        percent_same.append(np.sum((me == first_anc_2[:,0]) | (me == first_anc_2[:,1]))/(2*genes_num))
    
    elif aunt_bool == 1 or cous_1st_bool == 1:
    # Create an 'aunt'
        if gen > 2:
            aunt = recomb(new_1, new_2, rand_fish, spots)
        else:
            aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots)
        
        if aunt_bool == 1:
            percent_same.append(np.sum((me == aunt[:,0]) | (me == aunt[:,1]))/(2*genes_num))
        
        else:
            rand = random.random()
            if rand < 0.5:
                cous_1st = recomb_a_first(aunt, rand_fish, spots)
            else:
                cous_1st = recomb_b_first(aunt, rand_fish, spots)
            cous_1st = np.asarray(cous_1st)
            percent_same.append(np.sum((me == cous_1st))/(2*genes_num))
            
    elif gen == 3 and great_aunt_bool == 1:
        # To keep the programming simple and limit run-time, only allow calculations for great-aunts
        # and great-uncles (siblings of one's grandparents) when gen == 3
        great_aunt = recomb(first_anc_1, first_anc_2, rand_fish, spots)
        percent_same.append(np.sum((me == great_aunt[:,0]) | (me == great_aunt[:,1]))/(2*genes_num))
    
    # Recombine for nth cousin or their parent
    elif nth_cous_bool == 1 or par_of_cous_bool == 1:
        
        # One only shares 2 close ancestors with a full nth cousin (unless they share more /:)
        if half == 0:
            new_1 = recomb(first_anc_1, first_anc_2, rand_fish, spots)
        else:
            new_1 = recomb(first_anc_1, fill, rand_fish, spots)
        
        # If gen > 3, dilute half of the genes with 'fill' for each extra generation
        for g in range(gen - 1, 2, -1):
        
            new_1 = recomb(new_1, fill, rand_fish, spots)
        
        # Create a parent of nth cousin (where n = gen - 1)
        if gen > 2:
            par_of_cous = recomb(new_1, fill, rand_fish, spots)
        else:
            par_of_cous = new_1
            #par_of_cous = recomb(new_1, fill, rand_fish, spots)
        
        if par_of_cous_bool == 1:
            percent_same.append(np.sum((me == par_of_cous[:,0]) | (me == par_of_cous[:,1]))/(2*genes_num))
        
        else: # Create an nth cousin (where n = gen - 1)
            rand = random.random()
            if rand < 0.5:
                nth_cous = recomb_a_first(par_of_cous, rand_fish, spots)
            else:
                nth_cous = recomb_b_first(par_of_cous, rand_fish, spots)
            percent_same.append(np.sum((me == nth_cous))/(2*genes_num))
    
    # Calculate the fraction of DNA shared
    #percent_same.append(np.sum((me == mom[:,0]) | (me == mom[:,1]))/(2*genes_num))

if half == 1:
    print('This is for a half-relative!', "\n")
    
mean_out = np.mean(percent_same)
std_dev_out = np.std(percent_same)
print(mean_out)
print(std_dev_out)

print("\n")

print("Time to complete: %s seconds." % (time.time() - start_time))

import statistics

def print_results(pv):
    print(':', str(round(100*statistics.mean(pv),2)) + '%,',\
          'min:', str(round(min(pv),5)) + ',', '95%-Conf.:', str(round(np.percentile(pv, lo),4)) + '-' + str(round(np.percentile(pv, hi),4)) + ',',\
          'max:', str(round(max(pv),5)) + ',', round(statistics.stdev(pv),5), 'std,',\
          pv.count(0) / len(pv), '0%', pv.count(100) / len(pv), '100%')
    
confid = 95
lo = (100 - confid) / 2
hi = 100 - lo
    
print_results(percent_same)