Skip to content
Snippets Groups Projects
Commit 501eb452 authored by cmaffeo2's avatar cmaffeo2
Browse files

Fixed how atoms are indexed to allow interhelical bonds when...

Fixed how atoms are indexed to allow interhelical bonds when insertions/deletions are staggered; generalized algorithms for finding indices between crossovers
parent f584d0d1
No related branches found
No related tags found
No related merge requests found
......@@ -132,7 +132,7 @@ class Segment():
self.nodes[helixId][strandIdx] = []
n = abstractNucleotide(sequence, position, angle, ntAt5prime,
isFwd=isFwd, helixId=helixId, zIdx=zIdx)
isFwd=isFwd, helixId=helixId, zIdx=zIdx, idx=strandIdx)
if ntAt5prime is not None:
ntAt5prime.set3primeNt(n)
......@@ -202,9 +202,12 @@ class Segment():
class atomicModel():
def __init__(self, part, ntScale=1.0, randomSeed=1):
self.chunks = dict()
self.numNodes = 0
self.segments = []
self.nodes = None
self.helixNtsFwd = dict()
self.helixNtsRev = dict()
self.nodeTypeCounts = None
self.ntScale = ntScale
self.part = part
......@@ -213,7 +216,7 @@ class atomicModel():
self.bonds = set()
self.angles = set()
self.dihedrals = set()
# self._bondParams = set()
# self._angleParams = set()
# self._dihedralParams = set()
......@@ -227,12 +230,26 @@ class atomicModel():
self.latticeType = atomicModel._determineLatticeType(part)
self._buildModel(part)
def _helixStrandsToEnds(self, helixStrands):
def _strandOccupancies(self, helixId):
try:
ends1,ends2 = self._getStrandEnds(helixId)
except:
## hid not in strand_list
return []
strandOccupancies = [ [x for i in range(0,len(e),2)
for x in range(e[i],e[i+1]+1)]
for e in (ends1,ends2) ]
return sorted(list(set( strandOccupancies[0] + strandOccupancies[1] )))
def _getStrandEnds(self, helixId):
"""Utility method to convert cadnano strand lists into list of
indices of terminal points"""
helixStrands = self.strand_list[helixId]
endLists = [[],[]]
for endList, strandList in zip(endLists,helixStrands):
lastStrand = None
......@@ -271,7 +288,7 @@ class atomicModel():
## TODO: compartmentalize following
## Loop over virtual helices and build lists of strands
vh_list = []
strand_list = []
self.strand_list = []
xover_list = []
numHID = part.getIdNumMax() + 1
......@@ -279,7 +296,7 @@ class atomicModel():
offset_and_size = part.getOffsetAndSize(id_num)
if offset_and_size is None:
# add a placeholder
strand_list.append(None)
self.strand_list.append(None)
# prop_list.append(None)
else:
offset, size = offset_and_size
......@@ -289,7 +306,7 @@ class atomicModel():
# print(' VHELIX %d fwd_ss:' % id_num, s)
fwd_idxs, fwd_colors = fwd_ss.dump(xover_list)
rev_idxs, rev_colors = rev_ss.dump(xover_list)
strand_list.append((fwd_idxs, rev_idxs))
self.strand_list.append((fwd_idxs, rev_idxs))
## prop_list.append((fwd_colors, rev_colors))
# for s in strand_list:
......@@ -303,11 +320,10 @@ class atomicModel():
3) For all basepairs, find stacks 5'/3' stacks
"""
baseToSegFwd = dict()
baseToSegRev = dict()
for hid in range(numHID):
baseToSegFwd[hid] = dict()
baseToSegRev[hid] = dict()
self.helixNtsFwd[hid] = dict()
self.helixNtsRev[hid] = dict()
self.chunks[hid] = []
## Loop over oligos/segments
segments = []
......@@ -358,55 +374,41 @@ class atomicModel():
## We place nts as pairs; spacing is not always uniform along strand.
## Here we find the locations between which spacing will be uniform.
# compStrandIdxs = [i for i in comp.idxs() for comp in strand.getComplementStrands()]
compIdxs = [comp.idxs() for comp in strand.getComplementStrands()]
assert( compIdxs == sorted(compIdxs) )
## make a list of indeces that are the end-points of "segmentes"
splitAfter = [-1]
for l,h in compIdxs:
if l > splitAfter[-1]: splitAfter.append(l)
splitAfter.append(h)
## Restrict the list to sensible values, no segments of 1 nt
splitAfter = [s for s in splitAfter if s in range(lo+2,hi-2)]
split = []
last = -2
for s in splitAfter:
if s > last+1:
split.append(s)
last = s
strandSegments = [(i+1,j) for i,j in zip([lo-1]+split,split+[hi])]
chunks = self.combineCompactRegionLists(
[[lo,hi]],
compIdxs,
intersect=True
)
self.chunks[hid].extend( chunks ) # Add running list of chunks for push bonds
## Find zIdx for each nucleotide
zIdxs = []
for l,h in strandSegments:
for l,h in chunks:
## Find the number of nts between l,h
nts = h-l+1
for ins in strand.insertionsOnStrand(l,h):
nts += ins.length()
for i in range(nts):
zIdxs.append( l + (h-l) * float(i)/(nts-1) )
assert(len(zIdxs) == numNt)
if isFwd:
baseToSeg = baseToSegFwd[hid]
helixNts = self.helixNtsFwd[hid]
else:
baseToSeg = baseToSegRev[hid]
helixNts = self.helixNtsRev[hid]
zIdxs = reversed(zIdxs)
## Find strandOccupancy index for each nucleotide
strandOccIdx = []
for l,h in strandSegments:
for l,h in chunks:
for i in range(l,h+1):
nts = 1
for ins in strand.insertionsOnStrand(i,i):
nts += ins.length()
baseToSeg[i] = []
helixNts[i] = []
for j in range(nts):
strandOccIdx.append(i)
if not isFwd:
......@@ -416,33 +418,26 @@ class atomicModel():
for s,zIdx,idx in zip(seqs,zIdxs,strandOccIdx):
p = zIdxToPos(zIdx)
a = zIdxToAngle(zIdx)
lastNt = seg.addNucleotide(hid, idx, zIdx, s, isFwd,
p, a, lastNt)
baseToSeg[idx].append(lastNt)
helixNts[idx].append(lastNt)
## Correct the order of nts in helixNtsRev
for hid,ntsAtIdx in self.helixNtsRev.items():
for i,nts in ntsAtIdx.items():
self.helixNtsRev[hid][i] = reversed(nts)
## 2) Find basepairs and stacks
for hid in range(numHID):
helixStrands = strand_list[hid]
if helixStrands is None:
continue
ends1,ends2 = self._helixStrandsToEnds(helixStrands)
strandOccupancies = [ [x for i in range(0,len(e),2)
for x in range(e[i],e[i+1]+1)]
for e in (ends1,ends2) ]
prevBasepair = None
for i in sorted(list(set( strandOccupancies[0] + strandOccupancies[1] ))):
for i in self._strandOccupancies(hid):
## Check that strand index is dsDNA
if not (i in baseToSegFwd[hid] and i in baseToSegRev[hid]):
if not (i in self.helixNtsFwd[hid] and i in self.helixNtsRev[hid]):
continue
## TODO Handle insertions (DONE?)
# nt1 = baseToSegFwd[hid][i][0]
# nt2 = baseToSegRev[hid][i][0]
for nt1,nt2 in zip(baseToSegFwd[hid][i],
reversed(baseToSegRev[hid][i])):
for nt1,nt2 in zip(self.helixNtsFwd[hid][i],
self.helixNtsRev[hid][i]):
self._pairBases(nt1,nt2)
if prevBasepair is not None and i - prevBasepair[0] <= 3: # TODO: find better way of locating stacks
......@@ -650,6 +645,177 @@ class atomicModel():
nt.firstAtomicIndex = natoms
natoms += len(nt.atoms(transform=False))
def combineRegionLists(self,loHi1,loHi2):
"""Combines two lists of (lo,hi) pairs specifying integer
regions a single list of regions. """
## Validate input
for l in (loHi1,loHi2):
## Assert each region in lists is sorted
for pair in l:
assert(len(pair) == 2)
assert(pair[0] <= pair[1])
## Break input into lists of compact regions
compactRegions1,compactRegions2 = [[],[]]
for compactRegions,loHi in zip(
[compactRegions1,compactRegions2],
[loHi1,loHi2]):
tmp = []
lastHi = loHi[0][0]-1
for lo,hi in loHi:
if lo-1 != lastHi:
compactRegions.append(tmp)
tmp = []
tmp.append((lo,hi))
lastHi = hi
if len(tmp) > 0:
compactRegions.append(tmp)
## Build result
result = []
region = []
i,j = [0,0]
compactRegions1.append([[1e10]])
compactRegions2.append([[1e10]])
while i < len(compactRegions1)-1 or j < len(compactRegions2)-1:
cr1 = compactRegions1[i]
cr2 = compactRegions2[j]
## initialize region
if len(region) == 0:
if cr1[0][0] <= cr2[0][0]:
region = cr1
i += 1
continue
else:
region = cr2
j += 1
continue
if region[-1][-1] >= cr1[0][0]:
region = self.combineCompactRegionLists(region, cr1, intersect=False)
i+=1
elif region[-1][-1] >= cr2[0][0]:
region = self.combineCompactRegionLists(region, cr2, intersect=False)
j+=1
else:
result.extend(region)
region = []
assert( len(region) > 0 )
result.extend(region)
result = sorted(result)
# print("loHi1:",loHi1)
# print("loHi2:",loHi2)
# print(result,"\n")
return result
def combineCompactRegionLists(self,loHi1,loHi2,intersect=False):
"""Combines two lists of (lo,hi) pairs specifying regions within a
compact integer set into a single list of regions.
examples:
loHi1 = [[0,4],[5,7]]
loHi2 = [[2,4],[5,9]]
out = [(0, 1), (2, 4), (5, 7), (8, 9)]
loHi1 = [[0,3],[5,7]]
loHi2 = [[2,4],[5,9]]
out = [(0, 1), (2, 3), (4, 4), (5, 7), (8, 9)]
"""
## Validate input
for l in (loHi1,loHi2):
## Assert each region in lists is sorted
for pair in l:
assert(len(pair) == 2)
assert(pair[0] <= pair[1])
## Assert lists are compact
for pair1,pair2 in zip(l[::2],l[1::2]):
assert(pair1[1]+1 == pair2[0])
## Find the ends of the region
lo = min( [loHi1[0][0], loHi2[0][0]] )
hi = max( [loHi1[-1][1], loHi2[-1][1]] )
## Make a list of indices where each region will be split
splitAfter = []
for l,h in loHi2:
if l != lo:
splitAfter.append(l-1)
if h != hi:
splitAfter.append(h)
for l,h in loHi1:
if l != lo:
splitAfter.append(l-1)
if h != hi:
splitAfter.append(h)
splitAfter = sorted(list(set(splitAfter)))
# print("splitAfter:",splitAfter)
split=[]
last = -2
for s in splitAfter:
split.append(s)
last = s
# print("split:",split)
returnList = [(i+1,j) if i != j else (i,j) for i,j in zip([lo-1]+split,split+[hi])]
if intersect:
lo = max( [loHi1[0][0], loHi2[0][0]] )
hi = min( [loHi1[-1][1], loHi2[-1][1]] )
returnList = [r for r in returnList if r[0] >= lo and r[1] <= hi]
# print("loHi1:",loHi1)
# print("loHi2:",loHi2)
# print(returnList,"\n")
return returnList
def _getChunksForHelixPair(self,hid1,hid2):
chunks = []
chunks1,chunks2 = [sorted(list(set(self.chunks[hid]))) for hid in [hid1,hid2]]
return self.combineRegionLists(chunks1,chunks2)
i,j = [0,0]
lastHi = -100000
# pdb.set_trace()
while i < len(chunks1) or j < len(chunks2):
if i >= len(chunks1):
tmp
tmp = [x for x in sorted(list(set(chunks1[i]+chunks2[j]))) if x > lastHi]
# if len(tmp) > 1:
# chunks.append(tmp[:2])
# if len(tmp) > 0:
# lastHi = tmp[min(1,len(tmp))]
chunks.append(tmp[:2])
lastHi = tmp[min(1,len(tmp))]
if chunks1[i][1] <= lastHi: i+=1
if chunks2[j][1] <= lastHi: j+=1
return chunks
def getNtsBetweenCadnanoIdxInclusive(self,hid,lo,hi,isFwd=True):
if isFwd:
helixNts = self.helixNtsFwd[hid]
else:
helixNts = self.helixNtsRev[hid]
nts = []
for i in range(lo,hi+1):
if i in helixNts:
nts.extend(helixNts[i])
return nts
# -------------------------- #
# Methods for querying model #
# -------------------------- #
......@@ -877,6 +1043,7 @@ class atomicModel():
def writeENM(self, prefix):
pushCount = 0
if self.latticeType == "square":
enmTemplate = enmTemplateSQ
elif self.latticeType == "honeycomb":
......@@ -935,41 +1102,49 @@ class atomicModel():
fh.write("bond %d %d %f %.2f\n" % (i,j,k,d))
print("NO STACKS found for : %d" % noStackPrime)
print("NO BPs: %d" % noBasepair)
print("NO STACKS found for:", noStackPrime)
print("NO BPs:", noBasepair)
props, origins = self.part.helixPropertiesAndOrigins()
origins = [np.array(o) for o in origins]
## Push bonds
ntDict = dict()
xo = dict()
fwdDict = dict() # dictionaries of nts with keys [hid][zidx]
revDict = dict()
xo = dict() # dictionary of crossovers between [hid1][hid2]
helixIds = [hid for s in self.segments for hid in s.nodes.keys()]
helixIds = sorted(list(set(helixIds)))
#- initialize dictionaries
for h1 in helixIds:
fwdDict[h1] = dict()
revDict[h1] = dict()
xo[h1] = dict()
for h2 in helixIds:
xo[h1][h2] = []
#- fill dictionaries
for seg in self.segments:
for nt in seg:
hid = nt.prop['helixId']
# if hid not in ntDict:
# ntDict[hid] = dict()
if hid not in xo:
xo[hid] = dict()
## Add nt to ntDict
idx = nt.prop['idx']
zidx = nt.prop['zIdx']
isFwd = nt.prop['isFwd']
ntDict[(hid,zidx,isFwd)] = nt
if isFwd:
fwdDict[hid][idx] = nt
else:
revDict[hid][idx] = nt
## Find crossovers
## Find crossovers and ends
for nt2 in (nt.ntAt5prime, nt.ntAt3prime):
if nt2 is not None:
hid2 = nt2.prop['helixId']
if hid2 != hid:
xo[hid][hid2].append(zidx)
# xo[hid][hid2].append(zidx)
xo[hid][hid2].append(idx)
xo[hid2][hid].append(idx)
props = self.part.getModelProperties()
if props.get('point_type') == PointType.ARBITRARY:
......@@ -980,39 +1155,76 @@ class atomicModel():
if self.latticeType == 'honeycomb':
# minDist = 8 # TODO: test against server
minDist = 7 # TODO: test against server
minDist = 8 # TODO: test against server
elif self.latticeType == 'square':
minDist = 11
for seg in self.segments:
for nt in seg:
if nt.basepair is None: continue
hid = nt.prop['helixId']
if hid not in xo:
# print("WARNING: writeENM: helix %d not in xo dict" % hid)
for hid1 in helixIds:
for hid2 in neighborHelices[hid1]:
if hid2 not in helixIds:
# print("WARNING: writeENM: helix %d not in helixIds" % hid2) # xo[%d] dict" % (hid2,hid1))
continue
zidx = nt.prop['zIdx']
isFwd = nt.prop['isFwd']
ai = nt.atoms(transform=False)
for hid2 in neighborHelices[hid]:
# if hid2 not in xo[hid]:
if hid2 not in helixIds:
# print("WARNING: writeENM: helix %d not in helixIds" % hid2) # xo[%d] dict" % (hid2,hid))
continue
dists = [minDist,] + [np.abs(z-zidx) for z in xo[hid][hid2]]
dist = min(dists)
key = (hid2,zidx,isFwd)
if dist >= minDist and key in ntDict:
nt2 = ntDict[key]
if nt2.basepair is None: continue
i = nt.firstAtomicIndex
j = nt2.firstAtomicIndex
if j > i:
aj = nt2.atoms(transform=False)
## Build chunk for helix pair
chunks = self._getChunksForHelixPair(hid1,hid2)
for lo,hi in chunks:
# pdb.set_trace()
nts1 = self.getNtsBetweenCadnanoIdxInclusive(hid1,lo,hi)
nts2 = self.getNtsBetweenCadnanoIdxInclusive(hid2,lo,hi)
if len(nts1) <= len(nts2):
iVals = list(range(len(nts1)))
if len(nts1) > 1:
jVals = [int(round(j*(len(nts2)-1)/(len(nts1)-1))) for j in range(len(nts1))]
else:
jVals = [0]
else:
if len(nts2) > 1:
iVals = [int(round(i*(len(nts1)-1)/(len(nts2)-1))) for i in range(len(nts2))]
else:
iVals = [0]
jVals = list(range(len(nts2)))
ntPairs = [[nts1[i],nts2[j]] for i,j in zip(iVals,jVals)]
## Skip pairs close to nearest crossover on lo side
xoIdx = [idx for idx in xo[hid1][hid2] if idx <= lo]
if len(xoIdx) > 0:
xoIdx = max(xoIdx)
assert( type(lo) is int )
xoDist = min([len(self.getNtsBetweenCadnanoIdxInclusive(hid,xoIdx,lo-1)) for hid in (hid1,hid2)])
skip=minDist-xoDist
if skip > 0:
ntPairs = ntPairs[skip:]
## Skip pairs close to nearest crossover on hi side
xoIdx = [idx for idx in xo[hid1][hid2] if idx >= hi]
if len(xoIdx) > 0:
xoIdx = min(xoIdx)
xoDist = min([len(self.getNtsBetweenCadnanoIdxInclusive(hid,hi+1,xoIdx)) for hid in (hid1,hid2)])
skip=minDist-xoDist
if skip > 0:
ntPairs = ntPairs[:-skip]
for ntPair in ntPairs:
bps = [nt.basepair for nt in ntPair]
if None in bps: continue
for nt1,nt2 in [ntPair,bps]:
i,j = [nt.firstAtomicIndex for nt in (nt1,nt2)]
if j <= i: continue
ai,aj = [nt.atoms(transform=False) for nt in (nt1,nt2)]
i += ai.index('P')-1
j += aj.index('P')-1
fh.write("bond %d %d %f %.2f\n" % (i,j,1,31))
pushCount += 1
print("PUSH BONDS:", pushCount)
def writeNamdFile(self,prefix,numSteps=4800000):
with open("%s.namd" % prefix,'w') as fh:
......@@ -1147,8 +1359,12 @@ if {$nLast == 0} {
## $bindir/charmrun +p$procs $bindir/namd2 +netpoll +idlepoll +devices $gpus $config &> $log
gpus = ','.join([str(gpu) for gpu in gpus])
cmd = (charmrun, "+p%d" % numprocs, namd, '+netpoll',
'+idlepoll', '+devices', gpus, "%s.namd" % outputPrefix)
# cmd = (charmrun, "+p%d" % numprocs, namd, '+netpoll',
cmd = (namd,
"+ppn", numprocs,
'+netpoll','+idlepoll',
'+devices', gpus,
"%s.namd" % outputPrefix)
cmd = tuple(str(x) for x in cmd)
logfile = "%s/%s.log" % (outputDirectory, outputPrefix)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment