From 6f40a20cd9f6511a8fefad822cf3caad651d3eba Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Thu, 27 Feb 2020 11:10:44 -0600 Subject: [PATCH] Fixed edge case with model_from_lists when circular helices have crossovers near their 'ends' --- mrdna/readers/segmentmodel_from_lists.py | 66 +++++++++++++++--------- mrdna/segmentmodel.py | 22 ++++++-- 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/mrdna/readers/segmentmodel_from_lists.py b/mrdna/readers/segmentmodel_from_lists.py index 4759889..51b2877 100644 --- a/mrdna/readers/segmentmodel_from_lists.py +++ b/mrdna/readers/segmentmodel_from_lists.py @@ -117,45 +117,49 @@ def basepairs_and_stacks_to_helixmap(basepairs,stacks_above): hid += 1 ## Create "helix" for each circular segment + intrahelical = [] processed = set() unclaimed_bases = np.where( (basepairs >= 0)*(helixmap == -1) )[0] for nt0 in unclaimed_bases: if nt0 in processed: continue + nt = nt0 all_nts = [nt] + rank = 0 + nt = nt0 + bp = basepairs[nt] + assert(helixmap[nt] == -1) + assert(helixmap[bp] == -1) + helixmap[nt] = helixmap[bp] = hid + helixrank[nt] = helixrank[bp] = rank + is_fwd[bp] = 0 + rank +=1 + processed.add(nt) + processed.add(bp) + counter = 0 while stacks_above[nt] >= 0: + lastnt = nt nt = stacks_above[nt] bp = basepairs[nt] + if nt == nt0 or nt == basepairs[nt0]: + intrahelical.append((lastnt,nt0)) + break + assert( bp >= 0 ) assert(helixmap[nt] == -1) assert(helixmap[bp] == -1) - if nt == nt0: - break - - all_nts.append(nt) - - if counter > 1e6: - raise Exception("DNA is apparently too long; probably there is something wrong with the structure") - counter += 1 - - ## Split circular helix into two groups (TODO: fix segmentmodel so that circular helices work and this is not needed) - for group in (all_nts[:len(all_nts)//2], all_nts[len(all_nts)//2:]): - rank = 0 - for nt in group: - bp = basepairs[nt] - is_fwd[bp] = 0 - helixmap[nt] = helixmap[bp] = hid - helixrank[nt] = helixrank[bp] = rank - processed.add(nt) - processed.add(bp) - rank +=1 - hid += 1 - + helixmap[nt] = helixmap[bp] = hid + helixrank[nt] = helixrank[bp] = rank + is_fwd[bp] = 0 + processed.add(nt) + processed.add(bp) + rank +=1 + hid += 1 - return helixmap, helixrank, is_fwd + return helixmap, helixrank, is_fwd, intrahelical def set_splines(seg, coordinate, hid, hmap, hrank, fwd, orientation=None): @@ -282,7 +286,7 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, five_prime = _three_prime_list_to_five_prime(three_prime) """ Build map of dsDNA helices and strands """ - hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stack) + hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack) double_stranded_helices = np.unique(hmap[hmap >= 0]) strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime) @@ -356,6 +360,7 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, ## Handle connections at the ends is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1)) is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0)) + if is_terminal1 or is_terminal2: """ Re-evaluate condition for being terminal so that both strands must be connected at end to have an 'intrahelical' @@ -383,6 +388,19 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, seg = allSegments[hmap[r]] seg.add_3prime(hrank[r],fwd[r]) + ## Add intrahelical connections to circular helical sections + for nt0,nt1 in intrahelical: + seg = allSegments[hmap[nt0]] + assert( seg is allSegments[hmap[nt1]] ) + if three_prime[nt0] >= 0: + if hmap[nt0] == hmap[three_prime[nt0]]: + seg.connect_end3(seg.start5) + + bp0,bp1 = [bps[nt] for nt in (nt0,nt1)] + if three_prime[bp1] >= 0: + if hmap[bp1] == hmap[three_prime[bp1]]: + seg.connect_start3(seg.end5) + ## Assign sequence if sequence is not None: for hid in range(len(allSegments)): diff --git a/mrdna/segmentmodel.py b/mrdna/segmentmodel.py index 10a5fe9..03984be 100644 --- a/mrdna/segmentmodel.py +++ b/mrdna/segmentmodel.py @@ -98,7 +98,10 @@ class Location(): on_fwd = "on_fwd_strand" else: on_fwd = "on_rev_strand" - return "<Location {}.{}[{:.2f},{:d}]>".format( self.container.name, self.type_, self.address, self.on_fwd_strand) + try: + return "<Location {}.{}[{:d},{}]>".format( self.container.name, self.type_, self.get_nt_pos(), on_fwd ) + except: + return "<Location {}.{}[{:.2f},{:d}]>".format( self.container.name, self.type_, self.address, self.on_fwd_strand) class Connection(): """ Abstract base class for connection between two elements """ @@ -1155,12 +1158,25 @@ class DoubleStrandedSegment(Segment): c = self.nt_pos_to_contour(nt) assert(c >= 0 and c <= 1) - loc = self.get_location_at(c, strands_fwd[0]) + + if nt == 0 and not strands_fwd[0]: + loc = self.start3 + elif nt == self.num_nt-1 and strands_fwd[0]: + loc = self.end3 + else: + loc = self.get_location_at(c, strands_fwd[0]) c = other.nt_pos_to_contour(other_nt) # TODOTODO: may need to subtract or add a little depending on 3prime/5prime assert(c >= 0 and c <= 1) - other_loc = other.get_location_at(c, strands_fwd[1]) + + if other_nt == 0 and strands_fwd[1]: + other_loc = other.start5 + elif other_nt == other.num_nt-1 and not strands_fwd[1]: + other_loc = other.end5 + else: + other_loc = other.get_location_at(c, strands_fwd[1]) + self._connect(other, Connection( loc, other_loc, type_=type_ )) if nt_on_5prime: loc.is_3prime_side_of_connection = False -- GitLab