From abc6765e536f8172d9b5e6c64ca9a1c66e7c37bd Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Wed, 25 Sep 2019 12:10:32 -0500
Subject: [PATCH] model_from_basepair_stack_3prime detects circular DNA
 strands; needs testing

---
 mrdna/readers/segmentmodel_from_lists.py | 52 ++++++++++++++++++++----
 1 file changed, 44 insertions(+), 8 deletions(-)

diff --git a/mrdna/readers/segmentmodel_from_lists.py b/mrdna/readers/segmentmodel_from_lists.py
index 1e85d0c..fef1e65 100644
--- a/mrdna/readers/segmentmodel_from_lists.py
+++ b/mrdna/readers/segmentmodel_from_lists.py
@@ -17,14 +17,35 @@ def _three_prime_list_to_five_prime(three_prime):
 def _primes_list_to_strands(three_prime, five_prime):
     five_prime_ends = np.where(five_prime < 0)[0]
     strands = []
+    strand_is_circular = []
     
-    for nt_idx in five_prime_ends:
+    idx_to_strand = -np.ones(three_prime.shape, dtype=np.int)
+
+    def build_strand(nt_idx, conditional):
         strand = [nt_idx]
-        while three_prime[nt_idx] >= 0:
+        idx_to_strand[nt_idx] = len(strands)
+        while conditional(nt_idx):
             nt_idx = three_prime[nt_idx]
             strand.append(nt_idx)
+            idx_to_strand[nt_idx] = len(strands)
         strands.append( np.array(strand, dtype=np.int) )
-    return strands
+
+    for nt_idx in five_prime_ends:
+        build_strand(nt_idx,
+                     lambda nt: three_prime[nt] >= 0)
+        strand_is_circular.append(False)
+
+    while True:
+        print("WARNING: working on circular strand {}".format(len(strands)))
+        ids = np.where(idx_to_strand < 0)[0]
+        if len(ids) == 0: break
+        build_strand(ids[0],
+                     lambda nt: three_prime[nt] >= 0 and \
+                     idx_to_strand[three_prime[nt]] < 0)
+        strand_is_circular.append(True)
+
+    return strands, strand_is_circular
+
 
 def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
 
@@ -228,16 +249,18 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
     """ Build map of dsDNA helices and strands """
     hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stack)
     double_stranded_helices = np.unique(hmap[hmap >= 0])    
-    strands = _primes_list_to_strands(three_prime, five_prime)
+    strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)
 
     """ Add ssDNA to hmap """
     hid = double_stranded_helices[-1]+1
     ss_residues = hmap < 0
     assert( np.all(bps[ss_residues] == -1) )
     
-    for s in strands:
+    for s,c in zip(strands, strand_is_circular):
         ss_residues = s[np.where(hmap[s] < 0)[0]]
         if len(ss_residues) == 0: continue
+        if c:
+            print("WARNING: Circular ssDNA strands may not be modeled correctly")
         resid_diff = np.diff(ss_residues)
         
         tmp = np.where( resid_diff != 1 )[0]
@@ -279,12 +302,16 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
 
     ## Find crossovers and 5prime/3prime ends
     crossovers,prime5,prime3 = [[],[],[]]
-    for s in strands:
+    for s,c in zip(strands,strand_is_circular):
         tmp = np.where(np.diff(hmap[s]) != 0)[0]
         for i in tmp:
             crossovers.append( (s[i],s[i+1]) )
-        prime5.append(s[0])
-        prime3.append(s[-1])
+        if c:
+            if hmap[s[-1]] != hmap[s[0]]:
+                crossovers.append( (s[-1],s[0]) )
+        else:
+            prime5.append(s[0])
+            prime3.append(s[-1])
 
     ## Add connections
     allSegments = doubleSegments+singleSegments
@@ -297,6 +324,15 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
         is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))
         is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))
         if is_terminal1 or is_terminal2:
+            """ Re-evaluate condition for being terminal so that both
+            strands must be connected at end to have an 'intrahelical'
+            connection """
+            if is_terminal1 and (bps[r1] >= 0):
+                is_terminal1 = (three_prime[r1] == five_prime[bps[r1]])
+            if is_terminal2 and (bps[r2] >= 0):
+                is_terminal2 = (five_prime[r2] == three_prime[bps[r2]])
+
+            """ Place connection """
             if is_terminal1 and is_terminal2:
                 end1 = seg1.end3 if f1 else seg1.start3
                 end2 = seg2.start5 if f2 else seg2.end5
-- 
GitLab