From 0d1ed546f7cfa527d0d9e57ee243e6e723c3dbc4 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 4 Sep 2018 13:34:22 -0500 Subject: [PATCH] Updated polygon_mesh reader to consider helix direction and to add 3/5-prime ends --- dnarbd/readers/polygon_mesh.py | 101 ++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 34 deletions(-) diff --git a/dnarbd/readers/polygon_mesh.py b/dnarbd/readers/polygon_mesh.py index f7bcc3a..2ab0a00 100644 --- a/dnarbd/readers/polygon_mesh.py +++ b/dnarbd/readers/polygon_mesh.py @@ -209,7 +209,7 @@ def parse_maya_file(maya_file): if conn is not None: new_connections.append(conn) connections = new_connections - + ## Assign base connectivity for c in connections: h1 = c.helix1 @@ -274,21 +274,61 @@ def convert_maya_to_segments(helices): # for b in upaired: # print(hn, b.name, b.end3, b.end5) - z = [b.position[2] for b in paired_bases] - bot = paired_bases[ np.argmin(z) ] - top = paired_bases[ np.argmax(z) ] - - if bot.name[0] == "f": bot = bot.basepair - if top.name[0] == "b": top = top.basepair + ## Find helical axis + # if len(paired_bases) < 6: + # raise Exception("Helices shorter than 3 bp are not supported") + # coords = np.array([b.position for b in paired_bases]) + # u,s,vh = np.linalg.svd(coords) + # axis = vh[0] / np.linalg.norm(vh[0]) + + fwd_bases = list(filter(lambda b:b.name[0] == 'f', paired_bases)) + # rank = [np.dot(b.position,axis) for b in fwd_bases] + rank = [b.position[2] for b in fwd_bases] + order = np.argsort(rank) + ordered_fwd_bases = [fwd_bases[o] for o in order] + next_fwd_is_3end = [b1.end3==b2 for b1,b2 in + zip(ordered_fwd_bases[:-1],ordered_fwd_bases[1:])] + + ## Check if axis points the wrong way + assert( np.mean(next_fwd_is_3end) > 0.5 ) + # if np.mean( next_fwd_is_3end ) < 0.5: + # # axis = axis * -1 + # rank = rank[order[-1]]-rank + # order = reversed(order) + # ordered_fwd_bases = reversed(ordered_fwd_bases) + + top = ordered_fwd_bases[-1] + bot = ordered_fwd_bases[0].basepair + + assert( bot.name[0] == 'b' ) + assert( top.name[0] == 'f' ) botPos = 0.5 * (bot.get_position() + bot.basepair.get_position()) topPos = 0.5 * (top.get_position() + top.basepair.get_position()) - segments[hn] = DoubleStrandedSegment(hn, num_bp = len(z)//2, + segments[hn] = DoubleStrandedSegment(hn, num_bp = len(fwd_bases), start_position = botPos, end_position = topPos) + + ## Add 3prime/5prime ends + nt = 0 + def add_primes(b,nt,fwd): + if b.end3 is None: + if not fwd: print("3prime at {}[{},{}]".format(hn,nt,fwd)) + segments[hn].add_3prime(nt,fwd) + if b.end5 is None: + if not fwd: print("5prime at {}[{},{}]".format(hn,nt,fwd)) + segments[hn].add_5prime(nt,fwd) + + for b in ordered_fwd_bases: + add_primes(b,nt,True) + add_primes(b.basepair,nt,False) + nt+=1 + segmentsBot[hn] = bot segmentsTop[hn] = top + assert( bot.end3 not in paired_bases ) + assert( top.end3 not in paired_bases ) """ Sanity check """ def end_crosses(end,direction): @@ -322,20 +362,28 @@ def convert_maya_to_segments(helices): # print(bot.parent.name,bot.name, bot.end3.parent.name, bot.end3.name) ## TODO: clean this up + def get_end_positions(ssSeg, is_fwd): + start_idx, end_idx = (0,-1) + if not is_fwd: + start_idx, end_idx = (-1,0) + + r0 = ssSeg[0].get_position() + if ssSeg[0].end5 is not None: + r0 = 0.75*r0 + 0.25*ssSeg[0].end5.get_position() + r1 = ssSeg[-1].get_position() + if ssSeg[-1].end3 is not None: + r1 = 0.75*r1 + 0.25*ssSeg[-1].end3.get_position() + return r0,r1 if top not in connectionToSingleStrand: ssSeg = get_end3_ssDNA(top) - if len(ssSeg) > 0: - r0 = ssSeg[0].get_position() - if ssSeg[0].end5 is not None: - r0 = 0.75*r0 + 0.25*ssSeg[0].end5.get_position() - r1 = ssSeg[-1].get_position() - if ssSeg[-1].end3 is not None: - r1 = 0.75*r1 + 0.25*ssSeg[-1].end3.get_position() + num_nt = len(ssSeg) + if num_nt > 0: + r0,r1 = get_end_positions(ssSeg,is_fwd=True) seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)), start_position = r0, end_position = r1, - num_nt = len(ssSeg)) + num_nt = num_nt) seg.connect_end3(seg2) connectionToSingleStrand[top] = seg2 other = ssSeg[-1].end3 @@ -360,12 +408,7 @@ def convert_maya_to_segments(helices): if top.basepair not in connectionToSingleStrand: ssSeg = get_end5_ssDNA(top.basepair)[::-1] if len(ssSeg) > 0: - r0 = ssSeg[-1].get_position() - if ssSeg[-1].end5 is not None: - r0 = 0.75*r0 + 0.25*ssSeg[-1].end5.get_position() - r1 = ssSeg[0].get_position() - if ssSeg[0].end3 is not None: - r1 = 0.75*r1 + 0.25*ssSeg[0].end3.get_position() + r0,r1 = get_end_positions(ssSeg,is_fwd=False) seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)), start_position = r0, end_position = r1, @@ -394,12 +437,7 @@ def convert_maya_to_segments(helices): if bot not in connectionToSingleStrand: ssSeg = get_end3_ssDNA(bot) if len(ssSeg) > 0: - r0 = ssSeg[0].get_position() - if ssSeg[0].end5 is not None: - r0 = 0.75*r0 + 0.25*ssSeg[0].end5.get_position() - r1 = ssSeg[-1].get_position() - if ssSeg[-1].end3 is not None: - r1 = 0.75*r1 + 0.25*ssSeg[-1].end3.get_position() + r0,r1 = get_end_positions(ssSeg,is_fwd=True) seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)), start_position = r0, end_position = r1, @@ -424,12 +462,7 @@ def convert_maya_to_segments(helices): if bot.basepair not in connectionToSingleStrand: ssSeg = get_end5_ssDNA(bot.basepair)[::-1] if len(ssSeg) > 0: - r0 = ssSeg[-1].get_position() - if ssSeg[-1].end5 is not None: - r0 = 0.75*r0 + 0.25*ssSeg[-1].end5.get_position() - r1 = ssSeg[0].get_position() - if ssSeg[0].end3 is not None: - r1 = 0.75*r1 + 0.25*ssSeg[0].end3.get_position() + r0,r1 = get_end_positions(ssSeg,is_fwd=False) seg2 = SingleStrandedSegment("S{:03d}".format(len(ss_segments)), start_position = r0, end_position = r1, -- GitLab