Commit 51d8dde5 authored by cmaffeo2's avatar cmaffeo2
Browse files

Added cadnano_segments

parent c73289a3
# -*- coding: utf-8 -*-
import pdb
import numpy as np
import os,sys
from coords import readArbdCoords, readAvgArbdCoords
from segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
arbd="/home/cmaffeo2/development/cuda/arbd.dbg/src/arbd" # reduced the mem footprint cause vmd
namd="/home/cmaffeo2/development/namd-bin/NAMD_Git-2017-07-06_Linux-x86_64-multicore-CUDA/namd2"
def readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
seq = []
with open(sequenceFile) as ch:
for l in ch:
l = l.strip().replace(" ", "")
if l[0] in (";","#"): continue
seq.extend([c.upper() for c in l])
return seq
m13seq = readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat')
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs
## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs
class cadnano_part(SegmentModel):
def __init__(self, part,
max_basepairs_per_bead = 7,
max_nucleotides_per_bead = 5,
local_twist = False
):
self.part = part
self._cadnano_part_to_segments(part)
# SegmentModel.__init__(self,...)
self.segments = [seg for hid,segs in self.helices.items() for seg in segs]
self._add_intrahelical_connections()
self._add_crossovers()
self._add_prime_ends()
SegmentModel.__init__(self, self.segments,
local_twist = local_twist,
max_basepairs_per_bead = max_basepairs_per_bead,
max_nucleotides_per_bead = max_nucleotides_per_bead,
dimensions=(5000,5000,5000))
def _cadnano_part_to_segments(self,part):
from cadnano.cnenum import PointType
segments = dict()
self.helices = helices = dict()
self.helix_ranges = helix_ranges = dict()
props = part.getModelProperties().copy()
if props.get('point_type') == PointType.ARBITRARY:
# TODO add code to encode Parts with ARBITRARY point configurations
raise NotImplementedError("Not implemented")
else:
vh_props, origins = part.helixPropertiesAndOrigins()
self.origins = origins
vh_list = []
strand_list = []
self.xover_list = xover_list = []
numHID = part.getIdNumMax() + 1
for id_num in range(numHID):
offset_and_size = part.getOffsetAndSize(id_num)
if offset_and_size is None:
## Add a placeholder for empty helix
vh_list.append((id_num, 0))
strand_list.append(None)
else:
offset, size = offset_and_size
vh_list.append((id_num, size))
fwd_ss, rev_ss = part.getStrandSets(id_num)
fwd_idxs, fwd_colors = fwd_ss.dump(xover_list)
rev_idxs, rev_colors = rev_ss.dump(xover_list)
strand_list.append((fwd_idxs, rev_idxs))
## Get lists of 5/3prime ends
strands5 = [o.strand5p() for o in part.oligos()]
strands3 = [o.strand3p() for o in part.oligos()]
self._5prime_list = [(s.idNum(),s.isForward(),s.idx5Prime()) for s in strands5]
self._3prime_list = [(s.idNum(),s.isForward(),s.idx3Prime()) for s in strands3]
## Get dictionary of insertions
self.insertions = allInsertions = part.insertions()
## Build helices
for hid in range(numHID):
# print("Working on helix",hid)
helices[hid] = []
helix_ranges[hid] = []
helixStrands = strand_list[hid]
if helixStrands is None: continue
## Build list of tuples containing (idx,length) of insertions/skips
insertions = sorted( [(i[0],i[1].length()) for i in allInsertions[hid].items()],
key=lambda x: x[0] )
## Build list of strand ends and list of mandatory node locations
ends1,ends2 = self._helixStrandsToEnds(helixStrands)
## Find crossovers for this helix
reqNodeZids = sorted(list(set( ends1 + ends2 ) ) )
## Build lists of which nt sites are occupied in the helix
strandOccupancies = [ [x for i in range(0,len(e),2)
for x in range(e[i],e[i+1]+1)]
for e in (ends1,ends2) ]
for i in range( len(reqNodeZids)-1 ):
zid1,zid2 = reqNodeZids[i:i+2]
## Check that there are nts between zid1 and zid2 before adding nodes
zMid = int(0.5*(zid1+zid2))
if zMid not in strandOccupancies[0] and zMid not in strandOccupancies[1]:
continue
numBps = zid2-zid1+1
for ins_idx,length in insertions:
## TODO: ensure placement of insertions is correct
## (e.g. are insertions at the ends handled correctly?)
if ins_idx < zid1:
continue
if ins_idx >= zid2:
break
numBps += length
print("Adding helix with length",numBps,zid1,zid2)
kwargs = dict(name="%d-%d" % (hid,len(helices[hid])),
num_nts = numBps,
start_position = self._get_cadnano_position(hid,zid1),
end_position = self._get_cadnano_position(hid,zid2) )
## TODO get sequence from cadnano api
if zMid in strandOccupancies[0] and zMid in strandOccupancies[1]:
seg = DoubleStrandedSegment(**kwargs)
elif zMid in strandOccupancies[0] or zMid in strandOccupancies[1]:
seg = SingleStrandedSegment(**kwargs)
helices[hid].append( seg )
helix_ranges[hid].append( (zid1,zid2) )
def _get_cadnano_position(self, hid, zid):
return [10*a for a in self.origins[hid]] + [-3.4*zid]
def _helixStrandsToEnds(self, helixStrands):
"""Utility method to convert cadnano strand lists into list of
indices of terminal points"""
endLists = [[],[]]
for endList, strandList in zip(endLists,helixStrands):
lastStrand = None
for s in strandList:
if lastStrand is None:
## first strand
endList.append(s[0])
elif lastStrand[1] != s[0]-1:
assert( s[0] > lastStrand[1] )
endList.extend( [lastStrand[1], s[0]] )
lastStrand = s
if lastStrand is not None:
endList.append(lastStrand[1])
return endLists
def _get_segment(self, hid, zid):
## TODO: rename these variables to segments
segs = self.helices[hid]
ranges = self.helix_ranges[hid]
for i in range(len(ranges)):
zmin,zmax = ranges[i]
if zmin <= zid and zid <= zmax:
return segs[i]
raise Exception("Could not find segment in helix %d at position %d" % (hid,zid))
def _get_nucleotide(self, hid, zid):
seg = self._get_segment(hid,zid)
sid = self.helices[hid].index(seg)
zmin,zmax = self.helix_ranges[hid][sid]
nt = zid-zmin
## Find insertions
# TODO: for i in range(zmin,zid+1): ?
for i in range(zmin,zid):
if i in self.insertions[hid]:
nt += self.insertions[hid][i].length()
return nt
""" Routines to add connnections between helices """
def _add_intrahelical_connections(self):
for hid,segs in self.helices.items():
for i in range(len(segs)-1):
seg1,seg2 = [segs[j] for j in (i,i+1)]
r1,r2 = [self.helix_ranges[hid][j] for j in (i,i+1)]
if r1[1]+1 == r2[0]:
## TODO: connect correct ends
seg1.connect_end3(seg2.start5)
def _add_crossovers(self):
for h1,f1,z1,h2,f2,z2 in self.xover_list:
seg1 = self._get_segment(h1,z1)
seg2 = self._get_segment(h2,z2)
nt1 = self._get_nucleotide(h1,z1)
nt2 = self._get_nucleotide(h2,z2)
## TODO: use different types of crossovers
seg1.add_crossover(nt1,seg2,nt2,[f1,f2])
def _add_prime_ends(self):
for h,fwd,z in self._5prime_list:
seg = self._get_segment(h,z)
nt = self._get_nucleotide(h,z)
print(seg.name,nt,fwd)
seg.add_5prime(nt,fwd)
for h,fwd,z in self._3prime_list:
seg = self._get_segment(h,z)
nt = self._get_nucleotide(h,z)
seg.add_3prime(nt,fwd)
def get_bead(self, hid, zid):
# get segment, get nucleotide,
seg = self._get_segment(hid,zid)
nt = self._get_nucleotide(hid,zid)
# return seg.get_nearest_bead(seg,nt / seg.num_nts)
return seg.get_nearest_bead(seg,nt / (seg.num_nts-1))
def read_json_file(filename):
import json
import re
try:
with open(filename) as ch:
data = json.load(ch)
except:
with open(filename) as ch:
content = ""
for l in ch:
l = re.sub(r"'", r'"', l)
# https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
# l = re.sub(r"{\s*(\w)", r'{"\1', l)
# l = re.sub(r",\s*(\w)", r',"\1', l)
# l = re.sub(r"(\w):", r'\1":', l)
content += l+"\n"
data = json.loads(content)
return data
def decode_cadnano_part(json_data):
import cadnano
from cadnano.document import Document
try:
doc = Document()
cadnano.fileio.v3decode.decode(doc, json_data)
except:
doc = Document()
cadnano.fileio.v2decode.decode(doc, json_data)
parts = [p for p in doc.getParts()]
if len(parts) != 1:
raise Exception("Only documents containing a single cadnano part are implemented at this time.")
part = parts[0]
return part
def package_archive( name, directory ):
...
def run_simulation_protocol( output_name, json_data,
sequence=None,
remove_long_bonds=False,
gpu = 0,
directory=None
):
coarseSteps = 1e7
fineSteps = 1e6
ret = None
d_orig = os.getcwd()
try:
if directory is None:
import tempfile
directory = tempfile.mkdtemp(prefix='origami-'+output_name, dir='./')
elif not os.path.exists(directory):
os.makedirs(directory)
os.chdir(directory)
output_directory = "output"
""" Read in data """
part = decode_cadnano_part(json_data)
model = cadnano_part(part,
max_basepairs_per_bead = 7,
max_nucleotides_per_bead = 4,
local_twist=False)
model._generate_strands() # TODO: move into model creation
# TODO
# try:
# model.set_cadnano_sequence()
# finally:
# ...
# if sequence is not None and len() :
# model.strands[0].set_sequence(seq)
if sequence is None or len(sequence) == 0:
## default m13mp18
sequence = list(m13seq)
# print(sequence)
# for i in sequence:
# print( i, i in ('A','T','C','G') )
# model.strands[0].set_sequence(sequence)
for s in model.segments:
s.randomize_unset_sequence()
# TODO: add the output directory in to the readArbdCoords functions or make it an attribute of the model object
""" Coarse simulation """
# simargs = dict(timestep=200e-6, outputPeriod=1e5, gpu=gpu )
simargs = dict(timestep=200e-6, outputPeriod=1e5, gpu=gpu, arbd=arbd )
if remove_long_bonds:
output_prefix = "%s-0" % output_name
full_output_prefix = "%s/%s" % (output_directory,output_prefix)
## TODO Remove long bonds
model.simulate( outputPrefix = output_prefix, numSteps=0.05*coarseSteps, **simargs )
coordinates = readArbdCoords('%s.0.restart' % full_output_prefix)
output_prefix = "%s-1" % output_name
model._update_segment_positions(coordinates)
# model._clear_beads()
# # model._generate_bead_model( 7, 4, False )
# model._generate_bead_model( 7, 4, False )
model.simulate( outputPrefix = output_prefix, numSteps=0.95*coarseSteps, **simargs )
else:
output_prefix = "%s-1" % output_name
full_output_prefix = "%s/%s" % (output_directory,output_prefix)
model.simulate( outputPrefix = output_prefix, numSteps=coarseSteps, **simargs )
coordinates = readArbdCoords('%s.0.restart' % full_output_prefix)
""" Fine simulation """
output_prefix = "%s-2" % output_name
full_output_prefix = "%s/%s" % (output_directory,output_prefix)
simargs['timestep'] = 50e-6
model._update_segment_positions(coordinates)
model._clear_beads()
model._generate_bead_model( 1, 1, local_twist=True, escapable_twist=True )
model.simulate( outputPrefix = output_prefix, numSteps=fineSteps, **simargs )
coordinates = readAvgArbdCoords('%s.psf' % output_prefix,'%s.pdb' % output_prefix, '%s.0.dcd' % full_output_prefix, rmsdThreshold=1)
""" Freeze twist """
output_prefix = "%s-3" % output_name
full_output_prefix = "%s/%s" % (output_directory,output_prefix)
model._update_segment_positions(coordinates)
model._clear_beads()
model._generate_bead_model( 1, 1, local_twist=True, escapable_twist=False )
model.simulate( outputPrefix = output_prefix, numSteps=fineSteps, **simargs )
coordinates = readAvgArbdCoords('%s.psf' % output_name,'%s.pdb' % output_prefix, '%s.0.dcd' % full_output_prefix )
""" Atomic simulation """
output_prefix = "%s-4" % output_name
full_output_prefix = "%s/%s" % (output_directory,output_prefix)
model._update_segment_positions(coordinates)
model._clear_beads()
model._generate_atomic_model(scale=0.25)
model.atomic_simulate( outputPrefix = full_output_prefix )
# if not os.path.exists(output_directory):
# os.makedirs(output_directory)
# elif not os.path.isdir(output_directory):
# raise Exception("output_directory '%s' is not a directory!" % output_directory)
""" Package result """
ret = "{}@{}:{}".format( os.environ["USER"],
os.environ["HOSTNAME"],
"/path/to/archive.tar.gz" )
except:
raise
finally:
os.chdir(d_orig)
return ret
# pynvml.nvmlInit()
# gpus = range(pynvml.nvmlDeviceGetCount())
# pynvml.nvmlShutdown()
# gpus = [0,1,2]
# print(gpus)
if __name__ == '__main__':
from glob import glob
import re
for f in glob('json/*'):
print("Working on {}".format(f))
out = re.match('json/(.*).json',f).group(1)
data = read_json_file(f)
run_simulation_protocol( out, data, gpu=0 )
# -*- coding: utf-8 -*-
import json
import cadnano
from cadnano.document import Document
import numpy as np
import glob,re
from beadModel import beadModel
from beadModelTwist import beadModelTwist
from atomicModel import atomicModel
import mdtraj as md
from coords import minimizeRmsd # mdtraj.superpose not working
import multiprocessing as mp
import pynvml as pynvml
import sys
# twistLp = 1e-6
twistLp = 90
# arbd="/home/cmaffeo2/development/cuda/arbd.dev/src/arbd"
arbd="/home/cmaffeo2/development/cuda/arbd.dbg/src/arbd" # reduced the mem footprint cause vmd
namd="/home/cmaffeo2/development/namd-bin/NAMD_Git-2017-07-06_Linux-x86_64-multicore-CUDA/namd2"
# namd="/home/wilsja/namd/NAMD_CVS-2016-11-09_Linux-x86_64-multicore-CUDA/namd2"
# namd="/home/cmaffeo2/development/namd-bin/NAMD_2.12_Linux-x86_64-multicore-CUDA/namd2"
def readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
seq = []
with open(sequenceFile) as ch:
for l in ch:
l = l.strip().replace(" ", "")
if l[0] in (";","#"): continue
seq.extend([c.upper() for c in l])
return seq
def readArbdCoords(fname):
coords = []
with open(fname) as fh:
for line in fh:
coords.append([float(x) for x in line.split()[1:]])
return np.array(coords)
def readAvgArbdCoords(psf,pdb,dcd,rmsdThreshold=3.5):
## align trajectory to pdb
ref = md.load(pdb, top=pdb)
sel = md.load(dcd, top=pdb)
# ref = sel[-1]
ids = ref.topology.select("name =~ 'd.*'")
assert(len(ids) > 3)
# r0 = ref.xyz[0,ids,:]
r0 = sel.xyz[-1,ids,:]
t = -1 # in case dcd_frames < 3
for t in range(len(sel.xyz)-2,-1,-1):
# print(t)
R,comA,comB = minimizeRmsd(sel.xyz[t,ids,:],r0)
sel.xyz[t,:,:] = np.array( [(r-comA).dot(R)+comB for r in sel.xyz[t]] )
rmsd = np.mean( (sel.xyz[t,ids,:]-r0)**2 )
if rmsd > (0.1*rmsdThreshold)**2:
break
t0=t+1
print( "Averaging coordinates in %s after frame %d" % (dcd, t0) )
pos = sel.xyz[t0:,:,:]
pos = np.mean(pos, axis=0)
return 10*pos # convert to Angstroms
def simulateDesign(inFile, outPrefix, coarseSteps=1e7, fineSteps=1e7,
gpu=0, shiftEulerZ=0,
sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
""" Read cadnano file """
try:
with open(inFile) as ch:
data = json.load(ch)
except:
with open(inFile) as ch:
## TODO: han
content = ""
for l in ch:
l = re.sub(r"'", r'"', l)
# https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
# l = re.sub(r"{\s*(\w)", r'{"\1', l)
# l = re.sub(r",\s*(\w)", r',"\1', l)
# l = re.sub(r"(\w):", r'\1":', l)
content += l+"\n"
data = json.loads(content)
try:
doc = Document()
cadnano.fileio.v3decode.decode(doc, data)
except:
doc = Document()
cadnano.fileio.v2decode.decode(doc, data)
parts = [p for p in doc.getParts()]
if len(parts) != 1:
raise Exception("Only documents containing a single 'part' are implemented at this time.")
part = parts[0]
if shiftEulerZ != 0:
## Manually fix eulerZ, which is improperly set by cadnano2.5 for some structures
props, origins = part.helixPropertiesAndOrigins()
for t,i in zip( props['eulerZ'], range(part.getIdNumMax()+1) ):
part.setVirtualHelixProperties(i,'eulerZ',t+shiftEulerZ)
""" Create models """
coarsestModelNoLongBonds = beadModel(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=7, maxNtsPerSNode=4)
coarsestModel = beadModel(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=7, maxNtsPerSNode=4)
finerModel = beadModelTwist(part, twistPersistenceLength=twistLp,
maxBpsPerDNode=1, maxNtsPerSNode=1)
finestModel = atomicModel(part, ntScale=0.25)
coarsestModelNoLongBonds._removeIntrahelicalConnectionsBeyond(100)
coarsestModelNoLongBonds._removeCrossoversBeyond(100)
models = [coarsestModelNoLongBonds, coarsestModel, finerModel, finestModel]
""" Examples of how models can be transformed """
## Custom adjust the z coordinates
# com1 = np.array([-36.7, 42.2, -431.5])
# M1 = np.array(((0,-1,0),(1,0,0),(0,0,1)))
# com2 = np.array([-36.7, 177.0, -429.0])
# M2 = np.array(((0,1,0),(-1,0,0),(0,0,1)))
# for model in models[:-1]:
# beads = [b for h in model for i,b in h[1].nodes.items()]
# for h in model:
# for i,b in h[1].nodes.items():
# if np.abs(b.position[1]-50) < 80:
# r = b.position
# r = np.inner(M1,r-com1) + com1
# r[0] -= 200
# b.position = r
# b.initialPosition = r
# elif b.position[1] > 120:
# r = b.position
# r = np.inner(M2,r-com2) + com2
# b.position = r
# b.initialPosition = r
# for seg in finestModel.segments:
# for nt in seg:
# if nt.position[1] < 0:
# nt.position[0] -= 100
""" Simulate and backmap each model """
numSteps = [int(0.05*coarseSteps), int(0.95*coarseSteps), fineSteps]
timestep = [200e-6, 200e-6, 50e-6]
lastModel = None
i = 0
for i in range(len(models)-1):
model = models[i]
out = "%s-%d" % (outPrefix,i)
lastOut = "%s-%d" % (outPrefix,i-1)
model.writePsf(out + '.ideal.psf')
model.writePdb(out + '.ideal.pdb')
if lastModel is not None:
if i == 1:
coords = readArbdCoords( "output/%s.0.restart" % lastOut )
else:
coords = readAvgArbdCoords("%s.psf" % lastOut, "%s.ideal.pdb" % lastOut,
"output/%s.0.dcd" % lastOut )
model.backmap( lastModel, coords,
dsDnaHelixNeighborDist=100, dsDnaAllNeighborDist=50,
ssDnaHelixNeighborDist=50)
model.simulate(out, outputDirectory = 'output',
arbd=arbd, gpu=gpu,
numSteps = numSteps[i],
timestep = timestep[i])
lastModel = model
coords = readAvgArbdCoords(
"%s.psf" % out, "%s.pdb" % out,
"output/%s.0.dcd" % out )
out = outPrefix + "-%d" % (i+1)
try:
finestModel.setScaffoldSeq( readSequenceFile(sequenceFile) )
except Exception as err:
print("WARNING:", err)
finestModel.randomizeUnassignedNtSeqs()
# finestModel.setUnassignedNtSeqs("T")
finestModel.writePdbPsf( out + ".ideal" )
finestModel.backmap( lastModel, coords )
finestModel.simulate( out, numSteps=480000, gpus=gpu, numprocs=8, namd=namd )
if len(sys.argv) > 1: