From abc6765e536f8172d9b5e6c64ca9a1c66e7c37bd Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Wed, 25 Sep 2019 12:10:32 -0500 Subject: [PATCH] model_from_basepair_stack_3prime detects circular DNA strands; needs testing --- mrdna/readers/segmentmodel_from_lists.py | 52 ++++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/mrdna/readers/segmentmodel_from_lists.py b/mrdna/readers/segmentmodel_from_lists.py index 1e85d0c..fef1e65 100644 --- a/mrdna/readers/segmentmodel_from_lists.py +++ b/mrdna/readers/segmentmodel_from_lists.py @@ -17,14 +17,35 @@ def _three_prime_list_to_five_prime(three_prime): def _primes_list_to_strands(three_prime, five_prime): five_prime_ends = np.where(five_prime < 0)[0] strands = [] + strand_is_circular = [] - for nt_idx in five_prime_ends: + idx_to_strand = -np.ones(three_prime.shape, dtype=np.int) + + def build_strand(nt_idx, conditional): strand = [nt_idx] - while three_prime[nt_idx] >= 0: + idx_to_strand[nt_idx] = len(strands) + while conditional(nt_idx): nt_idx = three_prime[nt_idx] strand.append(nt_idx) + idx_to_strand[nt_idx] = len(strands) strands.append( np.array(strand, dtype=np.int) ) - return strands + + for nt_idx in five_prime_ends: + build_strand(nt_idx, + lambda nt: three_prime[nt] >= 0) + strand_is_circular.append(False) + + while True: + print("WARNING: working on circular strand {}".format(len(strands))) + ids = np.where(idx_to_strand < 0)[0] + if len(ids) == 0: break + build_strand(ids[0], + lambda nt: three_prime[nt] >= 0 and \ + idx_to_strand[three_prime[nt]] < 0) + strand_is_circular.append(True) + + return strands, strand_is_circular + def basepairs_and_stacks_to_helixmap(basepairs,stacks_above): @@ -228,16 +249,18 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, """ Build map of dsDNA helices and strands """ hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stack) double_stranded_helices = np.unique(hmap[hmap >= 0]) - strands = _primes_list_to_strands(three_prime, five_prime) + strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime) """ Add ssDNA to hmap """ hid = double_stranded_helices[-1]+1 ss_residues = hmap < 0 assert( np.all(bps[ss_residues] == -1) ) - for s in strands: + for s,c in zip(strands, strand_is_circular): ss_residues = s[np.where(hmap[s] < 0)[0]] if len(ss_residues) == 0: continue + if c: + print("WARNING: Circular ssDNA strands may not be modeled correctly") resid_diff = np.diff(ss_residues) tmp = np.where( resid_diff != 1 )[0] @@ -279,12 +302,16 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, ## Find crossovers and 5prime/3prime ends crossovers,prime5,prime3 = [[],[],[]] - for s in strands: + for s,c in zip(strands,strand_is_circular): tmp = np.where(np.diff(hmap[s]) != 0)[0] for i in tmp: crossovers.append( (s[i],s[i+1]) ) - prime5.append(s[0]) - prime3.append(s[-1]) + if c: + if hmap[s[-1]] != hmap[s[0]]: + crossovers.append( (s[-1],s[0]) ) + else: + prime5.append(s[0]) + prime3.append(s[-1]) ## Add connections allSegments = doubleSegments+singleSegments @@ -297,6 +324,15 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1)) is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0)) if is_terminal1 or is_terminal2: + """ Re-evaluate condition for being terminal so that both + strands must be connected at end to have an 'intrahelical' + connection """ + if is_terminal1 and (bps[r1] >= 0): + is_terminal1 = (three_prime[r1] == five_prime[bps[r1]]) + if is_terminal2 and (bps[r2] >= 0): + is_terminal2 = (five_prime[r2] == three_prime[bps[r2]]) + + """ Place connection """ if is_terminal1 and is_terminal2: end1 = seg1.end3 if f1 else seg1.start3 end2 = seg2.start5 if f2 else seg2.end5 -- GitLab