Commit 56790814 authored by cmaffeo2's avatar cmaffeo2
Browse files

Added script for building model from pdb

parent e138ec52
# -*- coding: utf-8 -*-
import pdb
import numpy as np
import os,sys
from glob import glob
import re
import MDAnalysis as mda
import MDAnalysis.analysis.align as align
from MDAnalysis.analysis.distances import distance_array
from segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
arbd="/home/cmaffeo2/development/cuda/arbd.dbg/src/arbd" # reduced the mem footprint cause vmd
namd="/home/cmaffeo2/development/namd-bin/NAMD_Git-2017-07-06_Linux-x86_64-multicore-CUDA/namd2"
def readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
seq = []
with open(sequenceFile) as ch:
for l in ch:
l = l.strip().replace(" ", "")
if l[0] in (";","#"): continue
seq.extend([c.upper() for c in l])
return seq
m13seq = readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat')
resnames = dict(A='ADE DA DA5 DA3', T='THY DT DT5 DT3',
C='CYT DC DC5 DC3', G='GUA DG DG5 DG3')
bp_bond_atoms = dict( A='N1', T='H3',
C='N3', G='H1' )
bases = resnames.keys()
resname_to_key = {n:k for k,v in resnames.items() for n in v.split()}
complement = dict(A='T', C='G', T='A', G='C')
refUnis = {b:mda.Universe("resources/generate-nts/1-w3dna/{}{}.pdb".format(b,complement[b]))
for b in bases}
stack_text = "not name O5' P O1P O2P OP1 OP2 O3' H*"
ref_name_ids_inv = dict()
for key,val in refUnis.items():
ref_res = val.residues[0]
names = ref_res.atoms.select_atoms( stack_text ).atoms.names
ref_name_ids_inv[key] = np.argsort(np.argsort(names))
## TODO: possibly improve ref_stack_position
# avg_stack_position = [ru.residues[0].atoms.select_atoms(stack_text).center_of_mass()
# for k,ru in refUnis.items()]
# avg_stack_position = np.mean(avg_stack_position,axis=0)
# print(avg_stack_position)
## Obtained by apply stack transform to base
ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))
ref_bp_position = np.array((0, -8.7434375, -0.326354))
def residues_within( cutoff, coords1, coords2, sel1, sel2, universe, selection_text ):
distance_matrix = distance_array( coords1, coords2 )
if sel1 == sel2:
## ignore diagonal
distance_matrix = distance_matrix + np.eye(len(distance_matrix))*10*cutoff
# arr_idxs = np.where((distance_matrix > 0) * (distance_matrix < cutoff))
arr_idxs = np.where((distance_matrix < cutoff))
arr_idx1,arr_idx2 = arr_idxs
atoms = universe.select_atoms(selection_text)
within = -np.ones( atoms.resindices[-1]+1, dtype=np.int )
if len(arr_idx1) == len(np.unique(arr_idx1)) and len(arr_idx2) == len(np.unique(arr_idx2)):
within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2]
else:
unique,ids,counts = np.unique(arr_idx1, return_index=True, return_counts=True)
res1 = sel1.resindices[unique[counts == 1]]
res2 = sel2.resindices[arr_idx2[ids[counts == 1]]]
within[res1] = res2
for i in np.where(counts > 1)[0]:
ids2 = arr_idx1 == unique[i]
ij = np.argmin(distance_matrix[arr_idx1[ids2],arr_idx2[ids2]])
res1 = sel1.resindices[arr_idx1[ids2][ij]]
res2 = sel2.resindices[arr_idx2[ids2][ij]]
within[res1] = res2
# within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2]
return within
def find_base_position_orientation( u, selection_text='all' ):
## Find orientation and center of each nucleotide
transforms,centers = [[],[]]
for r in u.select_atoms(selection_text).residues:
key = resname_to_key[r.resname]
ref_res = refUnis[key].residues[0]
ref,sel = [res.atoms.select_atoms( stack_text ) for res in (ref_res,r)]
assert(len(ref) == len(sel))
## Find order for sel
inv = ref_name_ids_inv[key]
# sel_order = inv[np.argsort(sel.atoms.names)]
sel_order = np.argsort(sel.atoms.names)[inv]
assert( (ref.atoms.names == sel.atoms.names[sel_order]).all() )
## Reorder coordinates in sel so atom names match
c = sel.atoms.center_of_mass()
R,rmsd = align.rotation_matrix(sel.positions[sel_order] - c,
ref.positions - ref.atoms.center_of_mass())
R = np.array(R) # convert from mda matrix to np array
transforms.append(R)
centers.append(c)
centers = np.array(centers, dtype=np.float32)
transforms = np.array(transforms)
return centers,transforms
def find_basepairs( u, centers, transforms, selection_text='all' ):
bonds = { b:"resname {} and name {}".format(resnames[b],bp_bond_atoms[b])
for b in bases }
sel1 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['A'],bonds['C']))
sel2 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['T'],bonds['G']))
possible_basepairs = residues_within( cutoff = 2.2,
coords1=sel1.positions,
coords2=sel2.positions,
sel1 = sel1, sel2 = sel2,
universe = u,
selection_text = selection_text )
## Filter by expected position
ids = possible_basepairs >= 0
possible_resI = np.where( ids )[0]
possible_resJ = possible_basepairs[ ids ].astype(int)
resI,resJ = [[],[]]
for i,j,R1,c1,c2 in zip(possible_resI,possible_resJ,
transforms[possible_resI],
centers[possible_resI],
centers[possible_resJ]):
c2_expected = c1 + ref_bp_position.dot(R1)
if np.linalg.norm(c2_expected-c2) < 1:
resI.append(i)
resJ.append(j)
resI = np.array(resI, dtype=np.int)
resJ = np.array(resJ, dtype=np.int)
## Add reciprocal basepairs
assert( (possible_basepairs[resJ] == -1).all() )
basepairs = -np.ones(possible_basepairs.shape, dtype=np.int)
basepairs[resI] = resJ
basepairs[resJ] = resI
return basepairs
def find_stacks( u, centers, transforms, selection_text='all' ):
## Find orientation and center of each nucleotide
expected_stack_positions = []
for R,c in zip(transforms,centers):
expected_stack_positions.append( c + ref_stack_position.dot(R) )
expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)
sel = u.select_atoms("({}) and name C1'".format( selection_text ))
stacks_above = residues_within( cutoff = 3.5,
coords1=centers,
coords2=expected_stack_positions,
sel1 = sel, sel2 = sel,
universe = u, selection_text = selection_text )
return stacks_above
def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
helixmap = -np.ones(basepairs.shape, dtype=np.int)
helixrank = -np.ones(basepairs.shape)
is_fwd = np.ones(basepairs.shape, dtype=np.int)
## Remove stacks with nts lacking a basepairs
nobp = np.where(basepairs < 0)[0]
stacks_above[nobp] = -1
stacks_with_nobp = np.in1d(stacks_above, nobp)
stacks_above[stacks_with_nobp] = -1
end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]
hid = 0
for end in end_ids:
if helixmap[end] >= 0:
continue
rank = 0
nt = basepairs[end]
bp = basepairs[nt]
assert(helixmap[nt] == -1)
assert(helixmap[bp] == -1)
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
rank +=1
while stacks_above[nt] >= 0:
nt = stacks_above[nt]
if basepairs[nt] < 0: break
bp = basepairs[nt]
assert(helixmap[nt] == -1)
assert(helixmap[bp] == -1)
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
rank +=1
hid += 1
return helixmap, helixrank, is_fwd
def get_crossovers(helixmap,helixrank,stacks_above):
...
def update_helixmap_with_ssdna(helixmap,helixrank,stacks_above):
...
ss_residues = hmap < 0
helixmap
def break_into_contiguous_groups(sequence):
from operator import itemgetter
from itertools import groupby
return [map(itemgetter(1),g)
for i,g in groupby(enumerate(sequence), lambda i,x:i-x)]
def SegmentModelFromPdb(*args,**kwargs):
u = mda.Universe(*args,**kwargs)
for a in u.select_atoms("resname DT* and name C7").atoms:
a.name = "C5M"
## Find basepairs and base stacks
centers,transforms = find_base_position_orientation(u)
bps = find_basepairs(u, centers, transforms)
stacks = find_stacks(u, centers, transforms)
## Build map from residue index to helix index
hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stacks)
double_stranded_helices = np.unique(hmap[hmap >= 0])
## Add ssDNA to hmap
hid = double_stranded_helices[-1]+1
ss_residues = hmap < 0
assert( np.all(bps[ss_residues] == -1) )
for s in u.segments:
r = s.atoms.select_atoms("name C1'").resindices
ss_residues = r[np.where(hmap[r] < 0)[0]]
if len(ss_residues) == 0: continue
resid_diff = np.diff(ss_residues)
tmp = np.where( resid_diff != 1 )[0]
first_res = ss_residues[0]
for i in tmp:
ids = np.arange(first_res, ss_residues[i]+1)
assert( np.all(hmap[ids] == -1) )
hmap[ids] = hid
hrank[ids] = ids-first_res
first_res = ss_residues[i+1]
hid += 1
ids = np.arange(first_res, ss_residues[-1]+1)
assert( np.all(hmap[ids] == -1) )
hmap[ids] = hid
hrank[ids] = ids-first_res
hid+=1
print(hmap)
single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)
## Create double-stranded segments
doubleSegments = []
for hid in double_stranded_helices:
maxrank = np.max( hrank[hmap==hid] )
start = np.where((hmap == hid) * (hrank == 0))[0]
end = np.where((hmap == hid) * (hrank == maxrank))[0]
start_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in start], axis=0 )
end_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in end], axis=0 )
seg = DoubleStrandedSegment(name=str(hid),
num_bp = np.sum(hmap==hid)//2,
start_position = start_pos,
end_position = end_pos)
assert(hid == len(doubleSegments))
doubleSegments.append(seg)
## Create single-stranded segments
singleSegments = []
for hid in single_stranded_helices:
maxrank = np.max( hrank[hmap==hid] )
start = np.where((hmap == hid) * (hrank == 0))[0]
end = np.where((hmap == hid) * (hrank == maxrank))[0]
start_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in start], axis=0 )
end_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in end], axis=0 )
seg = SingleStrandedSegment(name=str(hid),
num_nt = np.sum(hmap==hid),
start_position = start_pos,
end_position = end_pos)
assert(hid == len(doubleSegments) + len(singleSegments))
singleSegments.append(seg)
## Find crossovers and 5prime/3prime ends
crossovers,prime5,prime3 = [[],[],[]]
for s in u.segments:
r = s.atoms.select_atoms("name C1'").resindices
tmp = np.where(np.diff(hmap[r]) != 0)[0]
crossovers.extend( r[tmp] )
prime5.append(r[0])
prime3.append(r[-1])
## Add connections
allSegments = doubleSegments+singleSegments
for r in crossovers:
seg1,seg2 = [allSegments[hmap[i]] for i in (r,r+1)]
nt1,nt2 = [hrank[i] for i in (r,r+1)]
f1,f2 = [fwd[i] for i in (r,r+1)]
if nt1 in (0,seg1.num_nt) or nt2 in (0,seg2.num_nt):
seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_="terminal_crossover")
else:
seg1.add_crossover(nt1,seg2,nt2,[f1,f2])
## Add 5prime/3prime ends
for r in prime5:
seg = allSegments[hmap[r]]
seg.add_5prime(hrank[r],fwd[r])
for r in prime3:
seg = allSegments[hmap[r]]
seg.add_3prime(hrank[r],fwd[r])
## Assign sequence
for hid in range(len(allSegments)):
resids = np.sort(np.where( (hmap==hid)*(fwd==1) )[0])
s = allSegments[hid]
s.sequence = [resname_to_key[u.residues[r].resname]
for r in resids]
## Build model
model = SegmentModel( allSegments,
max_basepairs_per_bead = 5,
max_nucleotides_per_bead = 5,
local_twist = False,
dimensions=(5000,5000,5000))
return model
if __name__ == "__main__":
# u = mda.Universe("test-ssdna.psf", "test-ssdna.pdb")
# u = mda.Universe("test-ssdna.pdb")
# model = SegmentModelFromPdb("test-ssdna.pdb")
# model = SegmentModelFromPdb("nanoengineer2pdb/nanoengineer2pdb.pdb")
model = SegmentModelFromPdb("test-twist.pdb")
# model = SegmentModelFromPdb("/data/server7/6hb.pdb")
model._clear_beads()
model._generate_atomic_model(scale=1.0)
model._write_atomic_ENM( 'test-from-pdb', lattice_type="honeycomb" )
model.atomic_simulate( outputPrefix = 'test-from-pdb' )
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment