Fixed how atoms are indexed to allow interhelical bonds when...

Fixed how atoms are indexed to allow interhelical bonds when insertions/deletions are staggered; generalized algorithms for finding indices between crossovers

Fixed how atoms are indexed to allow interhelical bonds when...
501eb452 · cmaffeo2 · f584d0d1 · 501eb452
Commit 501eb452 authored 7 years ago by cmaffeo2
--- a/atomicModel.py
+++ b/atomicModel.py
@@ -132,7 +132,7 @@ class Segment():
            self.nodes[helixId][strandIdx] = []

        n = abstractNucleotide(sequence, position, angle, ntAt5prime, 
-                               isFwd=isFwd, helixId=helixId, zIdx=zIdx)
+                               isFwd=isFwd, helixId=helixId, zIdx=zIdx, idx=strandIdx)

        if ntAt5prime is not None:
            ntAt5prime.set3primeNt(n)
@@ -202,9 +202,12 @@ class Segment():
 class atomicModel():

    def __init__(self, part, ntScale=1.0, randomSeed=1):
+        self.chunks = dict()
+
        self.numNodes = 0
        self.segments = []
-        self.nodes = None
+        self.helixNtsFwd = dict()
+        self.helixNtsRev = dict()
        self.nodeTypeCounts = None
        self.ntScale = ntScale
        self.part = part
@@ -213,7 +216,7 @@ class atomicModel():
        self.bonds = set()
        self.angles = set()
        self.dihedrals = set()
-        
+
        # self._bondParams = set()
        # self._angleParams = set()
        # self._dihedralParams = set()
@@ -227,12 +230,26 @@ class atomicModel():

        self.latticeType = atomicModel._determineLatticeType(part)
        self._buildModel(part)
-        
-    def _helixStrandsToEnds(self, helixStrands):
+
+    def _strandOccupancies(self, helixId):
+        try:
+            ends1,ends2 = self._getStrandEnds(helixId)
+        except:
+            ## hid not in strand_list
+            return []
+
+        strandOccupancies = [ [x for i in range(0,len(e),2)
+                               for x in range(e[i],e[i+1]+1)]
+                              for e in (ends1,ends2) ]
+        return sorted(list(set( strandOccupancies[0] + strandOccupancies[1] )))
+
+    def _getStrandEnds(self, helixId):

        """Utility method to convert cadnano strand lists into list of
        indices of terminal points"""

+        helixStrands = self.strand_list[helixId]
+
        endLists = [[],[]]
        for endList, strandList in zip(endLists,helixStrands):
            lastStrand = None
@@ -271,7 +288,7 @@ class atomicModel():
        ## TODO: compartmentalize following
        ## Loop over virtual helices and build lists of strands 
        vh_list = []
-        strand_list = []
+        self.strand_list = []
        xover_list = []

        numHID = part.getIdNumMax() + 1
@@ -279,7 +296,7 @@ class atomicModel():
            offset_and_size = part.getOffsetAndSize(id_num)
            if offset_and_size is None:
                # add a placeholder
-                strand_list.append(None)
+                self.strand_list.append(None)
                # prop_list.append(None)
            else:
                offset, size = offset_and_size
@@ -289,7 +306,7 @@ class atomicModel():
                #     print(' VHELIX %d fwd_ss:' % id_num, s)
                fwd_idxs, fwd_colors  = fwd_ss.dump(xover_list)
                rev_idxs, rev_colors  = rev_ss.dump(xover_list)
-                strand_list.append((fwd_idxs, rev_idxs))
+                self.strand_list.append((fwd_idxs, rev_idxs))
                
                ## prop_list.append((fwd_colors, rev_colors))
                # for s in strand_list:
@@ -303,11 +320,10 @@ class atomicModel():
        3) For all basepairs, find stacks 5'/3' stacks
        """

-        baseToSegFwd = dict()
-        baseToSegRev = dict()
        for hid in range(numHID):
-            baseToSegFwd[hid] = dict()
-            baseToSegRev[hid] = dict()
+            self.helixNtsFwd[hid] = dict()
+            self.helixNtsRev[hid] = dict()
+            self.chunks[hid] = []

        ## Loop over oligos/segments
        segments = []
@@ -358,55 +374,41 @@ class atomicModel():

                ## We place nts as pairs; spacing is not always uniform along strand.
                ## Here we find the locations between which spacing will be uniform.
-                # compStrandIdxs =  [i for i in comp.idxs() for comp in strand.getComplementStrands()]
-
                compIdxs = [comp.idxs() for comp in strand.getComplementStrands()]
-                assert( compIdxs == sorted(compIdxs) )
-                
-                ## make a list of indeces that are the end-points of "segmentes"
-                splitAfter = [-1]
-                for l,h in compIdxs:
-                    if l > splitAfter[-1]: splitAfter.append(l)
-                    splitAfter.append(h)
-                    
-                ## Restrict the list to sensible values, no segments of 1 nt
-                splitAfter = [s for s in splitAfter if s in range(lo+2,hi-2)]
-                split = []
-                last = -2
-                for s in splitAfter:
-                    if s > last+1:
-                        split.append(s)
-                        last = s
-                strandSegments = [(i+1,j) for i,j in zip([lo-1]+split,split+[hi])]
-
+                chunks = self.combineCompactRegionLists(
+                    [[lo,hi]],
+                    compIdxs,
+                    intersect=True
+                )
+                self.chunks[hid].extend( chunks ) # Add running list of chunks for push bonds

                ## Find zIdx for each nucleotide
                zIdxs = []
-                for l,h in strandSegments:
+                for l,h in chunks:
                    ## Find the number of nts between l,h
                    nts = h-l+1
                    for ins in strand.insertionsOnStrand(l,h):
                        nts += ins.length()
-                    
+
                    for i in range(nts):
                        zIdxs.append( l + (h-l) * float(i)/(nts-1) )

                assert(len(zIdxs) == numNt)

                if isFwd:
-                    baseToSeg = baseToSegFwd[hid]
+                    helixNts = self.helixNtsFwd[hid]
                else:
-                    baseToSeg = baseToSegRev[hid]
+                    helixNts = self.helixNtsRev[hid]
                    zIdxs = reversed(zIdxs)

                ## Find strandOccupancy index for each nucleotide
                strandOccIdx = []
-                for l,h in strandSegments:
+                for l,h in chunks:
                    for i in range(l,h+1):
                        nts = 1
                        for ins in strand.insertionsOnStrand(i,i):
                            nts += ins.length()
-                        baseToSeg[i] = []
+                        helixNts[i] = []
                        for j in range(nts):
                            strandOccIdx.append(i)
                if not isFwd:
@@ -416,33 +418,26 @@ class atomicModel():
                for s,zIdx,idx in zip(seqs,zIdxs,strandOccIdx):
                    p = zIdxToPos(zIdx) 
                    a = zIdxToAngle(zIdx)
-
                    lastNt = seg.addNucleotide(hid, idx, zIdx, s, isFwd,
                                               p, a, lastNt)

-                    baseToSeg[idx].append(lastNt)
+                    helixNts[idx].append(lastNt)
+
+        ## Correct the order of nts in helixNtsRev
+        for hid,ntsAtIdx in self.helixNtsRev.items():
+            for i,nts in ntsAtIdx.items():
+                self.helixNtsRev[hid][i] = reversed(nts)

        ## 2) Find basepairs and stacks
        for hid in range(numHID):
-            helixStrands = strand_list[hid]
-            if helixStrands is None: 
-                continue
-
-            ends1,ends2 = self._helixStrandsToEnds(helixStrands)
-            strandOccupancies = [ [x for i in range(0,len(e),2) 
-                                   for x in range(e[i],e[i+1]+1)] 
-                                  for e in (ends1,ends2) ]
            prevBasepair = None
-            for i in sorted(list(set( strandOccupancies[0] + strandOccupancies[1] ))):
+            for i in self._strandOccupancies(hid):
                ## Check that strand index is dsDNA 
-                if not (i in baseToSegFwd[hid] and i in baseToSegRev[hid]):
+                if not (i in self.helixNtsFwd[hid] and i in self.helixNtsRev[hid]):
                    continue

-                ## TODO Handle insertions (DONE?)
-                # nt1 = baseToSegFwd[hid][i][0]
-                # nt2 = baseToSegRev[hid][i][0]
-                for nt1,nt2 in zip(baseToSegFwd[hid][i], 
-                                   reversed(baseToSegRev[hid][i])):
+                for nt1,nt2 in zip(self.helixNtsFwd[hid][i],
+                                   self.helixNtsRev[hid][i]):
                    self._pairBases(nt1,nt2)
                
                    if prevBasepair is not None and i - prevBasepair[0] <= 3: # TODO: find better way of locating stacks
@@ -650,6 +645,177 @@ class atomicModel():
                nt.firstAtomicIndex = natoms
                natoms += len(nt.atoms(transform=False))

+    def combineRegionLists(self,loHi1,loHi2):
+
+        """Combines two lists of (lo,hi) pairs specifying integer
+        regions a single list of regions.  """
+
+        ## Validate input
+        for l in (loHi1,loHi2):
+            ## Assert each region in lists is sorted
+            for pair in l:
+                assert(len(pair) == 2)
+                assert(pair[0] <= pair[1])
+
+        ## Break input into lists of compact regions
+        compactRegions1,compactRegions2 = [[],[]]
+        for compactRegions,loHi in zip(
+                [compactRegions1,compactRegions2],
+                [loHi1,loHi2]):
+            tmp = []
+            lastHi = loHi[0][0]-1
+            for lo,hi in loHi:
+                if lo-1 != lastHi:
+                    compactRegions.append(tmp)
+                    tmp = []
+                tmp.append((lo,hi))
+                lastHi = hi
+            if len(tmp) > 0:
+                compactRegions.append(tmp)
+
+        ## Build result
+        result = []
+        region = []
+        i,j = [0,0]
+        compactRegions1.append([[1e10]])
+        compactRegions2.append([[1e10]])
+        while i < len(compactRegions1)-1 or j < len(compactRegions2)-1:
+            cr1 = compactRegions1[i]
+            cr2 = compactRegions2[j]
+
+            ## initialize region
+            if len(region) == 0:
+                if cr1[0][0] <= cr2[0][0]:
+                    region = cr1
+                    i += 1
+                    continue
+                else:
+                    region = cr2
+                    j += 1
+                    continue
+
+            if region[-1][-1] >= cr1[0][0]:
+                region = self.combineCompactRegionLists(region, cr1, intersect=False)
+                i+=1
+            elif region[-1][-1] >= cr2[0][0]:
+                region = self.combineCompactRegionLists(region, cr2, intersect=False)
+                j+=1
+            else:
+                result.extend(region)
+                region = []
+
+        assert( len(region) > 0 )
+        result.extend(region)
+        result = sorted(result)
+
+        # print("loHi1:",loHi1)
+        # print("loHi2:",loHi2)
+        # print(result,"\n")
+
+        return result
+
+    def combineCompactRegionLists(self,loHi1,loHi2,intersect=False):
+
+        """Combines two lists of (lo,hi) pairs specifying regions within a
+        compact integer set into a single list of regions.
+
+        examples:
+        loHi1 = [[0,4],[5,7]]
+        loHi2 = [[2,4],[5,9]]
+        out = [(0, 1), (2, 4), (5, 7), (8, 9)]
+
+        loHi1 = [[0,3],[5,7]]
+        loHi2 = [[2,4],[5,9]]
+        out = [(0, 1), (2, 3), (4, 4), (5, 7), (8, 9)]
+        """
+
+        ## Validate input
+        for l in (loHi1,loHi2):
+            ## Assert each region in lists is sorted
+            for pair in l:
+                assert(len(pair) == 2)
+                assert(pair[0] <= pair[1])
+            ## Assert lists are compact
+            for pair1,pair2 in zip(l[::2],l[1::2]):
+                assert(pair1[1]+1 == pair2[0])
+
+
+        ## Find the ends of the region
+        lo = min( [loHi1[0][0], loHi2[0][0]] )
+        hi = max( [loHi1[-1][1], loHi2[-1][1]] )
+
+        ## Make a list of indices where each region will be split
+        splitAfter = []
+        for l,h in loHi2:
+            if l != lo:
+                splitAfter.append(l-1)
+            if h != hi:
+                splitAfter.append(h)
+
+        for l,h in loHi1:
+            if l != lo:
+                splitAfter.append(l-1)
+            if h != hi:
+                splitAfter.append(h)
+        splitAfter = sorted(list(set(splitAfter)))
+
+        # print("splitAfter:",splitAfter)
+
+        split=[]
+        last = -2
+        for s in splitAfter:
+            split.append(s)
+            last = s
+
+        # print("split:",split)
+        returnList = [(i+1,j) if i != j else (i,j) for i,j in zip([lo-1]+split,split+[hi])]
+
+        if intersect:
+            lo = max( [loHi1[0][0], loHi2[0][0]] )
+            hi = min( [loHi1[-1][1], loHi2[-1][1]] )
+            returnList = [r for r in returnList if r[0] >= lo and r[1] <= hi]
+
+        # print("loHi1:",loHi1)
+        # print("loHi2:",loHi2)
+        # print(returnList,"\n")
+        return returnList
+
+    def _getChunksForHelixPair(self,hid1,hid2):
+        chunks = []
+        chunks1,chunks2 = [sorted(list(set(self.chunks[hid]))) for hid in [hid1,hid2]]
+        return self.combineRegionLists(chunks1,chunks2)
+
+        i,j = [0,0]
+        lastHi = -100000
+        # pdb.set_trace()
+        while i < len(chunks1) or j < len(chunks2):
+            if i >= len(chunks1):
+                tmp
+            tmp = [x for x in sorted(list(set(chunks1[i]+chunks2[j]))) if x > lastHi]
+
+            # if len(tmp) > 1:
+            #     chunks.append(tmp[:2])
+            # if len(tmp) > 0:
+            #     lastHi = tmp[min(1,len(tmp))]
+            chunks.append(tmp[:2])
+            lastHi = tmp[min(1,len(tmp))]
+
+            if chunks1[i][1] <= lastHi: i+=1
+            if chunks2[j][1] <= lastHi: j+=1
+        return chunks
+
+    def getNtsBetweenCadnanoIdxInclusive(self,hid,lo,hi,isFwd=True):
+        if isFwd:
+            helixNts = self.helixNtsFwd[hid]
+        else:
+            helixNts = self.helixNtsRev[hid]
+
+        nts = []
+        for i in range(lo,hi+1):
+            if i in helixNts:
+                nts.extend(helixNts[i])
+        return nts
+
    # -------------------------- #
    # Methods for querying model #
    # -------------------------- #
@@ -877,6 +1043,7 @@ class atomicModel():


    def writeENM(self, prefix):
+        pushCount = 0
        if self.latticeType == "square":
            enmTemplate = enmTemplateSQ
        elif self.latticeType == "honeycomb":
@@ -935,41 +1102,49 @@ class atomicModel():
                            fh.write("bond %d %d %f %.2f\n" % (i,j,k,d))


-            print("NO STACKS found for : %d" % noStackPrime)
-            print("NO BPs: %d" % noBasepair)
+            print("NO STACKS found for:", noStackPrime)
+            print("NO BPs:", noBasepair)
            props, origins = self.part.helixPropertiesAndOrigins()
            origins = [np.array(o) for o in origins]
            
            ## Push bonds
-            ntDict = dict()
-            xo = dict()
+            fwdDict = dict()    # dictionaries of nts with keys [hid][zidx]
+            revDict = dict()
+            xo = dict()         # dictionary of crossovers between [hid1][hid2]
+
            helixIds = [hid for s in self.segments for hid in s.nodes.keys()]
            helixIds = sorted(list(set(helixIds)))

+            #- initialize dictionaries
            for h1 in helixIds:
+                fwdDict[h1] = dict()
+                revDict[h1] = dict()
                xo[h1] = dict()
                for h2 in helixIds:
                    xo[h1][h2] = []

+            #- fill dictionaries
            for seg in self.segments:
                for nt in seg:
                    hid = nt.prop['helixId']
-                    # if hid not in ntDict:
-                    #     ntDict[hid] = dict()
-                    if hid not in xo:
-                        xo[hid] = dict()

                    ## Add nt to ntDict
+                    idx = nt.prop['idx']
                    zidx = nt.prop['zIdx']
                    isFwd = nt.prop['isFwd']
-                    ntDict[(hid,zidx,isFwd)] = nt
+                    if isFwd:
+                        fwdDict[hid][idx] = nt
+                    else:
+                        revDict[hid][idx] = nt

-                    ## Find crossovers
+                    ## Find crossovers and ends
                    for nt2 in (nt.ntAt5prime, nt.ntAt3prime):
                        if nt2 is not None:
                            hid2 = nt2.prop['helixId']
                            if hid2 != hid:
-                                xo[hid][hid2].append(zidx)
+                                # xo[hid][hid2].append(zidx)
+                                xo[hid][hid2].append(idx)
+                                xo[hid2][hid].append(idx)

            props = self.part.getModelProperties()
            if props.get('point_type') == PointType.ARBITRARY:
@@ -980,39 +1155,76 @@ class atomicModel():

                if self.latticeType == 'honeycomb':
                    # minDist = 8 # TODO: test against server
-                    minDist = 7 # TODO: test against server
+                    minDist = 8 # TODO: test against server
                elif self.latticeType == 'square':
                    minDist = 11

-                for seg in self.segments:
-                    for nt in seg:
-                        if nt.basepair is None: continue
-                        hid = nt.prop['helixId']
-                        if hid not in xo:
-                            # print("WARNING: writeENM: helix %d not in xo dict" % hid)
+                for hid1 in helixIds:
+                    for hid2 in neighborHelices[hid1]:
+                        if hid2 not in helixIds:
+                            # print("WARNING: writeENM: helix %d not in helixIds" % hid2) # xo[%d] dict" % (hid2,hid1))
                            continue
-                        zidx = nt.prop['zIdx']
-                        isFwd = nt.prop['isFwd']
-                        ai = nt.atoms(transform=False)
-                        for hid2 in neighborHelices[hid]:
-                            # if hid2 not in xo[hid]:
-                            if hid2 not in helixIds:
-                                # print("WARNING: writeENM: helix %d not in helixIds" % hid2) # xo[%d] dict" % (hid2,hid))
-                                continue
-                            dists = [minDist,] + [np.abs(z-zidx) for z in xo[hid][hid2]]
-                            dist = min(dists)
-                            key = (hid2,zidx,isFwd)
-                            if dist >= minDist and key in ntDict:
-                                nt2  = ntDict[key]
-                                if nt2.basepair is None: continue
-                                i = nt.firstAtomicIndex
-                                j = nt2.firstAtomicIndex
-                                if j > i:
-                                    aj = nt2.atoms(transform=False)
+
+                        ## Build chunk for helix pair
+                        chunks = self._getChunksForHelixPair(hid1,hid2)
+
+                        for lo,hi in chunks:
+                            # pdb.set_trace()
+                            nts1 = self.getNtsBetweenCadnanoIdxInclusive(hid1,lo,hi)
+                            nts2 = self.getNtsBetweenCadnanoIdxInclusive(hid2,lo,hi)
+
+                            if len(nts1) <= len(nts2):
+                                iVals = list(range(len(nts1)))
+                                if len(nts1) > 1:
+                                    jVals = [int(round(j*(len(nts2)-1)/(len(nts1)-1))) for j in range(len(nts1))]
+                                else:
+                                    jVals = [0]
+                            else:
+                                if len(nts2) > 1:
+                                    iVals = [int(round(i*(len(nts1)-1)/(len(nts2)-1))) for i in range(len(nts2))]
+                                else:
+                                    iVals = [0]
+                                jVals = list(range(len(nts2)))
+
+                            ntPairs = [[nts1[i],nts2[j]] for i,j in zip(iVals,jVals)]
+
+                            ## Skip pairs close to nearest crossover on lo side
+                            xoIdx = [idx for idx in xo[hid1][hid2] if idx <= lo]
+                            if len(xoIdx) > 0:
+                                xoIdx = max(xoIdx)
+                                assert( type(lo) is int )
+                                xoDist = min([len(self.getNtsBetweenCadnanoIdxInclusive(hid,xoIdx,lo-1)) for hid in (hid1,hid2)])
+
+                                skip=minDist-xoDist
+                                if skip > 0:
+                                    ntPairs = ntPairs[skip:]
+
+                            ## Skip pairs close to nearest crossover on hi side
+                            xoIdx = [idx for idx in xo[hid1][hid2] if idx >= hi]
+                            if len(xoIdx) > 0:
+                                xoIdx = min(xoIdx)
+                                xoDist = min([len(self.getNtsBetweenCadnanoIdxInclusive(hid,hi+1,xoIdx)) for hid in (hid1,hid2)])
+
+                                skip=minDist-xoDist
+                                if skip > 0:
+                                    ntPairs = ntPairs[:-skip]
+
+
+                            for ntPair in ntPairs:
+                                bps = [nt.basepair for nt in ntPair]
+                                if None in bps: continue
+                                for nt1,nt2 in [ntPair,bps]:
+                                    i,j = [nt.firstAtomicIndex for nt in (nt1,nt2)]
+                                    if j <= i: continue
+
+                                    ai,aj = [nt.atoms(transform=False) for nt in (nt1,nt2)]
                                    i += ai.index('P')-1
                                    j += aj.index('P')-1
                                    fh.write("bond %d %d %f %.2f\n" % (i,j,1,31))    
+                                    pushCount += 1
+

+        print("PUSH BONDS:", pushCount)

    def writeNamdFile(self,prefix,numSteps=4800000):
        with open("%s.namd" % prefix,'w') as fh:
@@ -1147,8 +1359,12 @@ if {$nLast == 0} {

        ## $bindir/charmrun +p$procs $bindir/namd2 +netpoll +idlepoll +devices $gpus $config &> $log
        gpus = ','.join([str(gpu) for gpu in gpus])
-        cmd = (charmrun, "+p%d" % numprocs, namd, '+netpoll',
-               '+idlepoll', '+devices', gpus, "%s.namd" % outputPrefix)
+        # cmd = (charmrun, "+p%d" % numprocs, namd, '+netpoll',
+        cmd = (namd,
+               "+ppn", numprocs,
+               '+netpoll','+idlepoll',
+               '+devices', gpus,
+               "%s.namd" % outputPrefix)
        cmd = tuple(str(x) for x in cmd)

        logfile = "%s/%s.log" % (outputDirectory, outputPrefix)