From 0d1ed546f7cfa527d0d9e57ee243e6e723c3dbc4 Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Tue, 4 Sep 2018 13:34:22 -0500
Subject: [PATCH] Updated polygon_mesh reader to consider helix direction and
 to add 3/5-prime ends

---
 dnarbd/readers/polygon_mesh.py | 101 ++++++++++++++++++++++-----------
 1 file changed, 67 insertions(+), 34 deletions(-)

diff --git a/dnarbd/readers/polygon_mesh.py b/dnarbd/readers/polygon_mesh.py
index f7bcc3a..2ab0a00 100644
--- a/dnarbd/readers/polygon_mesh.py
+++ b/dnarbd/readers/polygon_mesh.py
@@ -209,7 +209,7 @@ def parse_maya_file(maya_file):
         if conn is not None:
             new_connections.append(conn)
     connections = new_connections
-        
+
     ## Assign base connectivity
     for c in connections:
         h1 = c.helix1
@@ -274,21 +274,61 @@ def convert_maya_to_segments(helices):
         # for b in upaired:
         #     print(hn, b.name, b.end3, b.end5)
         
-        z = [b.position[2] for b in paired_bases]
-        bot = paired_bases[ np.argmin(z) ]
-        top = paired_bases[ np.argmax(z) ]
-
-        if bot.name[0] == "f": bot = bot.basepair
-        if top.name[0] == "b": top = top.basepair
+        ## Find helical axis
+        # if len(paired_bases) < 6:
+        #     raise Exception("Helices shorter than 3 bp are not supported")
+        # coords = np.array([b.position for b in paired_bases])
+        # u,s,vh = np.linalg.svd(coords)
+        # axis = vh[0] / np.linalg.norm(vh[0])
+
+        fwd_bases = list(filter(lambda b:b.name[0] == 'f', paired_bases))
+        # rank = [np.dot(b.position,axis) for b in fwd_bases]
+        rank = [b.position[2] for b in fwd_bases]
+        order = np.argsort(rank)
+        ordered_fwd_bases = [fwd_bases[o] for o in order]
+        next_fwd_is_3end = [b1.end3==b2 for b1,b2 in
+                            zip(ordered_fwd_bases[:-1],ordered_fwd_bases[1:])]
+
+        ## Check if axis points the wrong way
+        assert( np.mean(next_fwd_is_3end) > 0.5 )
+        # if np.mean( next_fwd_is_3end ) < 0.5:
+        #     # axis = axis * -1
+        #     rank = rank[order[-1]]-rank
+        #     order = reversed(order)
+        #     ordered_fwd_bases = reversed(ordered_fwd_bases)
+
+        top = ordered_fwd_bases[-1]
+        bot = ordered_fwd_bases[0].basepair
+
+        assert( bot.name[0] == 'b' )
+        assert( top.name[0] == 'f' )
 
         botPos = 0.5 * (bot.get_position() + bot.basepair.get_position())
         topPos = 0.5 * (top.get_position() + top.basepair.get_position())
 
-        segments[hn] =  DoubleStrandedSegment(hn, num_bp = len(z)//2,
+        segments[hn] =  DoubleStrandedSegment(hn, num_bp = len(fwd_bases),
                                               start_position = botPos,
                                               end_position = topPos)
+
+        ## Add 3prime/5prime ends
+        nt = 0
+        def add_primes(b,nt,fwd):
+            if b.end3 is None:
+                if not fwd: print("3prime at {}[{},{}]".format(hn,nt,fwd))
+                segments[hn].add_3prime(nt,fwd)
+            if b.end5 is None:
+                if not fwd: print("5prime at {}[{},{}]".format(hn,nt,fwd))
+                segments[hn].add_5prime(nt,fwd)
+
+        for b in ordered_fwd_bases:
+            add_primes(b,nt,True)
+            add_primes(b.basepair,nt,False)
+            nt+=1
+
         segmentsBot[hn] = bot
         segmentsTop[hn] = top
+        assert( bot.end3 not in paired_bases )
+        assert( top.end3 not in paired_bases )
 
     """ Sanity check """
     def end_crosses(end,direction):
@@ -322,20 +362,28 @@ def convert_maya_to_segments(helices):
         #     print(bot.parent.name,bot.name, bot.end3.parent.name, bot.end3.name)
 
         ## TODO: clean this up
+        def get_end_positions(ssSeg, is_fwd):
+            start_idx, end_idx = (0,-1)
+            if not is_fwd:
+                start_idx, end_idx = (-1,0)
+
+            r0 = ssSeg[0].get_position()
+            if ssSeg[0].end5 is not None:
+                r0 = 0.75*r0 + 0.25*ssSeg[0].end5.get_position()
+            r1 = ssSeg[-1].get_position()
+            if ssSeg[-1].end3 is not None:
+                r1 = 0.75*r1 + 0.25*ssSeg[-1].end3.get_position()
+            return r0,r1
 
         if top not in connectionToSingleStrand:
             ssSeg = get_end3_ssDNA(top)
-            if len(ssSeg) > 0:
-                r0 = ssSeg[0].get_position()
-                if ssSeg[0].end5 is not None:
-                    r0 = 0.75*r0 + 0.25*ssSeg[0].end5.get_position()
-                r1 = ssSeg[-1].get_position()
-                if ssSeg[-1].end3 is not None:
-                    r1 = 0.75*r1 + 0.25*ssSeg[-1].end3.get_position()
+            num_nt = len(ssSeg)
+            if num_nt > 0:
+                r0,r1 = get_end_positions(ssSeg,is_fwd=True)
                 seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)), 
                                               start_position = r0,
                                               end_position = r1,
-                                              num_nt = len(ssSeg))
+                                              num_nt = num_nt)
                 seg.connect_end3(seg2)
                 connectionToSingleStrand[top] = seg2
                 other = ssSeg[-1].end3
@@ -360,12 +408,7 @@ def convert_maya_to_segments(helices):
         if top.basepair not in connectionToSingleStrand:
             ssSeg = get_end5_ssDNA(top.basepair)[::-1]
             if len(ssSeg) > 0:
-                r0 = ssSeg[-1].get_position()
-                if ssSeg[-1].end5 is not None:
-                    r0 = 0.75*r0 + 0.25*ssSeg[-1].end5.get_position()
-                r1 = ssSeg[0].get_position()
-                if ssSeg[0].end3 is not None:
-                    r1 = 0.75*r1 + 0.25*ssSeg[0].end3.get_position()
+                r0,r1 = get_end_positions(ssSeg,is_fwd=False)
                 seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)),
                                               start_position = r0,
                                               end_position = r1,
@@ -394,12 +437,7 @@ def convert_maya_to_segments(helices):
         if bot not in connectionToSingleStrand:
             ssSeg = get_end3_ssDNA(bot)
             if len(ssSeg) > 0:
-                r0 = ssSeg[0].get_position()
-                if ssSeg[0].end5 is not None:
-                    r0 = 0.75*r0 + 0.25*ssSeg[0].end5.get_position()
-                r1 = ssSeg[-1].get_position()
-                if ssSeg[-1].end3 is not None:
-                    r1 = 0.75*r1 + 0.25*ssSeg[-1].end3.get_position()
+                r0,r1 = get_end_positions(ssSeg,is_fwd=True)
                 seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)),
                                               start_position = r0,
                                               end_position = r1,
@@ -424,12 +462,7 @@ def convert_maya_to_segments(helices):
         if bot.basepair not in connectionToSingleStrand:
             ssSeg = get_end5_ssDNA(bot.basepair)[::-1]
             if len(ssSeg) > 0:
-                r0 = ssSeg[-1].get_position()
-                if ssSeg[-1].end5 is not None:
-                    r0 = 0.75*r0 + 0.25*ssSeg[-1].end5.get_position()
-                r1 = ssSeg[0].get_position()
-                if ssSeg[0].end3 is not None:
-                    r1 = 0.75*r1 + 0.25*ssSeg[0].end3.get_position()
+                r0,r1 = get_end_positions(ssSeg,is_fwd=False)
                 seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)),
                                               start_position = r0,
                                               end_position = r1,
-- 
GitLab