run.py 8.62 KiB
# -*- coding: utf-8 -*-
import json
import cadnano
from cadnano.document import Document
import numpy as np
import glob,re
from beadModel import beadModel
from beadModelTwist import beadModelTwist
from atomicModel import atomicModel
import mdtraj as md
from coords import minimizeRmsd # mdtraj.superpose not working
import multiprocessing as mp
import sys
# twistLp = 1e-6
twistLp = 90
# arbd="/home/cmaffeo2/development/cuda/arbd.dev/src/arbd"
arbd="/home/cmaffeo2/development/cuda/arbd.dbg/src/arbd" # reduced the mem footprint cause vmd
namd="/home/cmaffeo2/development/namd-bin/NAMD_Git-2017-07-06_Linux-x86_64-multicore-CUDA/namd2"
# namd="/home/wilsja/namd/NAMD_CVS-2016-11-09_Linux-x86_64-multicore-CUDA/namd2"
# namd="/home/cmaffeo2/development/namd-bin/NAMD_2.12_Linux-x86_64-multicore-CUDA/namd2"
def readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
seq = []
with open(sequenceFile) as ch:
for l in ch:
l = l.strip().replace(" ", "")
if l[0] in (";","#"): continue
seq.extend([c.upper() for c in l])
return seq
def readArbdCoords(fname):
coords = []
with open(fname) as fh:
for line in fh:
coords.append([float(x) for x in line.split()[1:]])
return np.array(coords)
def readAvgArbdCoords(psf,pdb,dcd,rmsdThreshold=3.5):
## align trajectory to pdb
ref = md.load(pdb, top=pdb)
sel = md.load(dcd, top=pdb)
# ref = sel[-1]
ids = ref.topology.select("name =~ 'd.*'")
assert(len(ids) > 3)
# r0 = ref.xyz[0,ids,:]
r0 = sel.xyz[-1,ids,:]
for t in range(len(sel.xyz)-2,-1,-1):
# print(t)
R,comA,comB = minimizeRmsd(sel.xyz[t,ids,:],r0)
sel.xyz[t,:,:] = np.array( [(r-comA).dot(R)+comB for r in sel.xyz[t]] )
rmsd = np.mean( (sel.xyz[t,ids,:]-r0)**2 )
if rmsd > (0.1*rmsdThreshold)**2:
break
t0=t+1
print( "Averaging coordinates in %s after frame %d" % (dcd, t0) )
pos = sel.xyz[t0:,:,:]
pos = np.mean(pos, axis=0)
return 10*pos # convert to Angstroms
def simulateDesign(inFile, outPrefix, numCoarsestSteps=5000000, gpu=0, shiftEulerZ=0,
sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
# ----------------- #
# Read cadnano file #
# ----------------- #
try:
with open(inFile) as ch:
data = json.load(ch)
except:
with open(inFile) as ch:
## TODO: han
content = ""
for l in ch:
l = re.sub(r"'", r'"', l)
# https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
# l = re.sub(r"{\s*(\w)", r'{"\1', l)
# l = re.sub(r",\s*(\w)", r',"\1', l)
# l = re.sub(r"(\w):", r'\1":', l)
content += l+"\n"
data = json.loads(content)
try:
doc = Document()
cadnano.fileio.v3decode.decode(doc, data)
except:
doc = Document()
cadnano.fileio.v2decode.decode(doc, data)
parts = [p for p in doc.getParts()]
if len(parts) != 1:
raise Exception("Only documents containing a single 'part' are implemented at this time.")
part = parts[0]
if shiftEulerZ != 0:
## Manually fix eulerZ, which is improperly set by cadnano2.5 for some structures
props, origins = part.helixPropertiesAndOrigins()
for t,i in zip( props['eulerZ'], range(part.getIdNumMax()+1) ):
part.setVirtualHelixProperties(i,'eulerZ',t+shiftEulerZ)
# ------------- #
# Create models #
# ------------- #
coarsestModelNoLongBonds = beadModel(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=7, maxNtsPerSNode=4)
coarsestModel = beadModel(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=7, maxNtsPerSNode=4)
coarseModel = beadModel(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=4, maxNtsPerSNode=2)
fineModel = beadModel(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=1, maxNtsPerSNode=1)
finerModel = beadModelTwist(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=1, maxNtsPerSNode=1)
finestModel = atomicModel(part, ntScale=0.25)
coarsestModelNoLongBonds._removeIntrahelicalConnectionsBeyond(100)
coarsestModelNoLongBonds._removeCrossoversBeyond(100)
models = [coarsestModelNoLongBonds, coarsestModel, coarseModel, fineModel, finerModel, finestModel]
# for m in models:
# m._removeIntrahelicalConnectionsBeyond(800)
# for m in models[:3]:
# m._removeCrossoversBeyond(800)
## Custom adjust the z coordinates
# com1 = np.array([-36.7, 42.2, -431.5])
# M1 = np.array(((0,-1,0),(1,0,0),(0,0,1)))
# com2 = np.array([-36.7, 177.0, -429.0])
# M2 = np.array(((0,1,0),(-1,0,0),(0,0,1)))
# for model in models[:-1]:
# beads = [b for h in model for i,b in h[1].nodes.items()]
# for h in model:
# for i,b in h[1].nodes.items():
# if np.abs(b.position[1]-50) < 80:
# r = b.position
# r = np.inner(M1,r-com1) + com1
# r[0] -= 200
# b.position = r
# b.initialPosition = r
# elif b.position[1] > 120:
# r = b.position
# r = np.inner(M2,r-com2) + com2
# b.position = r
# b.initialPosition = r
# # coarsestModel, coarseModel, fineModel = [moveBeads(m) for m in models[:3]]
# # finestModel = models[3]
# for seg in finestModel.segments:
# for nt in seg:
# if nt.position[1] < 0:
# nt.position[0] -= 100
# ------------------------------- #
# Simulate and backmap each model #
# ------------------------------- #
stepId = [i for i in range(5)]
numSteps = [int(0.05*numCoarsestSteps), int(0.95*numCoarsestSteps)]
timestep = [200e-6, 200e-6]
lastModel = None
for i in stepId:
args = dict(arbd=arbd, gpu=gpu, numSteps=10000,
outputDirectory = 'output')
if i < len(numSteps): args['numSteps'] = numSteps[i]
if i < len(numSteps): args['timestep'] = timestep[i]
if i == 4:
args['numSteps'] = 1000000
args['timestep'] = timestep=50e-6
out = "%s-%d" % (outPrefix,i)
lastOut = "%s-%d" % (outPrefix,i-1)
model = models[i]
model.writePsf(out + '.ideal.psf')
model.writePdb(out + '.ideal.pdb')
if lastModel is not None:
if i == 1:
coords = readArbdCoords( "output/%s.0.restart" % lastOut )
else:
coords = readAvgArbdCoords("%s.psf" % lastOut, "%s.ideal.pdb" % lastOut,
"output/%s.0.dcd" % lastOut )
model.backmap( lastModel, coords,
dsDnaHelixNeighborDist=100, dsDnaAllNeighborDist=50,
ssDnaHelixNeighborDist=50)
model.simulate(out, **args)
lastModel = model
coords = readAvgArbdCoords(
"%s.psf" % out, "%s.pdb" % out,
"output/%s.0.dcd" % out )
out = outPrefix + "-5"
try:
finestModel.setScaffoldSeq( readSequenceFile(sequenceFile) )
except Exception as err:
print("WARNING:", err)
finestModel.randomizeUnassignedNtSeqs()
# finestModel.setUnassignedNtSeqs("T")
finestModel.writePdbPsf( out + ".ideal" )
finestModel.backmap( lastModel, coords )
finestModel.simulate( out, numSteps=480000, gpus=gpu, numprocs=4, namd=namd )
if len(sys.argv) > 1:
files = sys.argv[1:]
else:
files = sorted(glob.glob('json/*.json'))
def run(f):
print("Working on %s with gpu %d" % (f,gpu))
out = re.match('json/(.*).json',f).group(1)
oldOut = sys.stdout
oldErr = sys.stderr
with open("%s.log" % out, "w") as fh:
sys.stdout = fh
sys.stderr = fh
try:
simulateDesign(f, out, numCoarsestSteps=10000000, gpu=gpu)
except Exception as err:
print("WARNING: %s: failed to simulate design" % f)
print("ERROR:", err)
sys.stdout.flush()
sys.stdout = oldOut
sys.stderr = oldErr
gpus = [0,1,2]
# gpus = [2]
if __name__ == '__main__':
def init(queue):
global gpu
gpu = queue.get()
idQueue = mp.Manager().Queue()
for gpu in gpus:
idQueue.put(gpu)
pool = mp.Pool(len(gpus), init, (idQueue,))
pool.map(run, files)