Skip to content
Snippets Groups Projects
Commit 56790814 authored by cmaffeo2's avatar cmaffeo2
Browse files

Added script for building model from pdb

parent e138ec52
No related branches found
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
import pdb
import numpy as np
import os,sys
from glob import glob
import re
import MDAnalysis as mda
import MDAnalysis.analysis.align as align
from MDAnalysis.analysis.distances import distance_array
from segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
arbd="/home/cmaffeo2/development/cuda/arbd.dbg/src/arbd" # reduced the mem footprint cause vmd
namd="/home/cmaffeo2/development/namd-bin/NAMD_Git-2017-07-06_Linux-x86_64-multicore-CUDA/namd2"
def readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'):
seq = []
with open(sequenceFile) as ch:
for l in ch:
l = l.strip().replace(" ", "")
if l[0] in (";","#"): continue
seq.extend([c.upper() for c in l])
return seq
m13seq = readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat')
resnames = dict(A='ADE DA DA5 DA3', T='THY DT DT5 DT3',
C='CYT DC DC5 DC3', G='GUA DG DG5 DG3')
bp_bond_atoms = dict( A='N1', T='H3',
C='N3', G='H1' )
bases = resnames.keys()
resname_to_key = {n:k for k,v in resnames.items() for n in v.split()}
complement = dict(A='T', C='G', T='A', G='C')
refUnis = {b:mda.Universe("resources/generate-nts/1-w3dna/{}{}.pdb".format(b,complement[b]))
for b in bases}
stack_text = "not name O5' P O1P O2P OP1 OP2 O3' H*"
ref_name_ids_inv = dict()
for key,val in refUnis.items():
ref_res = val.residues[0]
names = ref_res.atoms.select_atoms( stack_text ).atoms.names
ref_name_ids_inv[key] = np.argsort(np.argsort(names))
## TODO: possibly improve ref_stack_position
# avg_stack_position = [ru.residues[0].atoms.select_atoms(stack_text).center_of_mass()
# for k,ru in refUnis.items()]
# avg_stack_position = np.mean(avg_stack_position,axis=0)
# print(avg_stack_position)
## Obtained by apply stack transform to base
ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))
ref_bp_position = np.array((0, -8.7434375, -0.326354))
def residues_within( cutoff, coords1, coords2, sel1, sel2, universe, selection_text ):
distance_matrix = distance_array( coords1, coords2 )
if sel1 == sel2:
## ignore diagonal
distance_matrix = distance_matrix + np.eye(len(distance_matrix))*10*cutoff
# arr_idxs = np.where((distance_matrix > 0) * (distance_matrix < cutoff))
arr_idxs = np.where((distance_matrix < cutoff))
arr_idx1,arr_idx2 = arr_idxs
atoms = universe.select_atoms(selection_text)
within = -np.ones( atoms.resindices[-1]+1, dtype=np.int )
if len(arr_idx1) == len(np.unique(arr_idx1)) and len(arr_idx2) == len(np.unique(arr_idx2)):
within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2]
else:
unique,ids,counts = np.unique(arr_idx1, return_index=True, return_counts=True)
res1 = sel1.resindices[unique[counts == 1]]
res2 = sel2.resindices[arr_idx2[ids[counts == 1]]]
within[res1] = res2
for i in np.where(counts > 1)[0]:
ids2 = arr_idx1 == unique[i]
ij = np.argmin(distance_matrix[arr_idx1[ids2],arr_idx2[ids2]])
res1 = sel1.resindices[arr_idx1[ids2][ij]]
res2 = sel2.resindices[arr_idx2[ids2][ij]]
within[res1] = res2
# within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2]
return within
def find_base_position_orientation( u, selection_text='all' ):
## Find orientation and center of each nucleotide
transforms,centers = [[],[]]
for r in u.select_atoms(selection_text).residues:
key = resname_to_key[r.resname]
ref_res = refUnis[key].residues[0]
ref,sel = [res.atoms.select_atoms( stack_text ) for res in (ref_res,r)]
assert(len(ref) == len(sel))
## Find order for sel
inv = ref_name_ids_inv[key]
# sel_order = inv[np.argsort(sel.atoms.names)]
sel_order = np.argsort(sel.atoms.names)[inv]
assert( (ref.atoms.names == sel.atoms.names[sel_order]).all() )
## Reorder coordinates in sel so atom names match
c = sel.atoms.center_of_mass()
R,rmsd = align.rotation_matrix(sel.positions[sel_order] - c,
ref.positions - ref.atoms.center_of_mass())
R = np.array(R) # convert from mda matrix to np array
transforms.append(R)
centers.append(c)
centers = np.array(centers, dtype=np.float32)
transforms = np.array(transforms)
return centers,transforms
def find_basepairs( u, centers, transforms, selection_text='all' ):
bonds = { b:"resname {} and name {}".format(resnames[b],bp_bond_atoms[b])
for b in bases }
sel1 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['A'],bonds['C']))
sel2 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['T'],bonds['G']))
possible_basepairs = residues_within( cutoff = 2.2,
coords1=sel1.positions,
coords2=sel2.positions,
sel1 = sel1, sel2 = sel2,
universe = u,
selection_text = selection_text )
## Filter by expected position
ids = possible_basepairs >= 0
possible_resI = np.where( ids )[0]
possible_resJ = possible_basepairs[ ids ].astype(int)
resI,resJ = [[],[]]
for i,j,R1,c1,c2 in zip(possible_resI,possible_resJ,
transforms[possible_resI],
centers[possible_resI],
centers[possible_resJ]):
c2_expected = c1 + ref_bp_position.dot(R1)
if np.linalg.norm(c2_expected-c2) < 1:
resI.append(i)
resJ.append(j)
resI = np.array(resI, dtype=np.int)
resJ = np.array(resJ, dtype=np.int)
## Add reciprocal basepairs
assert( (possible_basepairs[resJ] == -1).all() )
basepairs = -np.ones(possible_basepairs.shape, dtype=np.int)
basepairs[resI] = resJ
basepairs[resJ] = resI
return basepairs
def find_stacks( u, centers, transforms, selection_text='all' ):
## Find orientation and center of each nucleotide
expected_stack_positions = []
for R,c in zip(transforms,centers):
expected_stack_positions.append( c + ref_stack_position.dot(R) )
expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)
sel = u.select_atoms("({}) and name C1'".format( selection_text ))
stacks_above = residues_within( cutoff = 3.5,
coords1=centers,
coords2=expected_stack_positions,
sel1 = sel, sel2 = sel,
universe = u, selection_text = selection_text )
return stacks_above
def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
helixmap = -np.ones(basepairs.shape, dtype=np.int)
helixrank = -np.ones(basepairs.shape)
is_fwd = np.ones(basepairs.shape, dtype=np.int)
## Remove stacks with nts lacking a basepairs
nobp = np.where(basepairs < 0)[0]
stacks_above[nobp] = -1
stacks_with_nobp = np.in1d(stacks_above, nobp)
stacks_above[stacks_with_nobp] = -1
end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]
hid = 0
for end in end_ids:
if helixmap[end] >= 0:
continue
rank = 0
nt = basepairs[end]
bp = basepairs[nt]
assert(helixmap[nt] == -1)
assert(helixmap[bp] == -1)
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
rank +=1
while stacks_above[nt] >= 0:
nt = stacks_above[nt]
if basepairs[nt] < 0: break
bp = basepairs[nt]
assert(helixmap[nt] == -1)
assert(helixmap[bp] == -1)
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
rank +=1
hid += 1
return helixmap, helixrank, is_fwd
def get_crossovers(helixmap,helixrank,stacks_above):
...
def update_helixmap_with_ssdna(helixmap,helixrank,stacks_above):
...
ss_residues = hmap < 0
helixmap
def break_into_contiguous_groups(sequence):
from operator import itemgetter
from itertools import groupby
return [map(itemgetter(1),g)
for i,g in groupby(enumerate(sequence), lambda i,x:i-x)]
def SegmentModelFromPdb(*args,**kwargs):
u = mda.Universe(*args,**kwargs)
for a in u.select_atoms("resname DT* and name C7").atoms:
a.name = "C5M"
## Find basepairs and base stacks
centers,transforms = find_base_position_orientation(u)
bps = find_basepairs(u, centers, transforms)
stacks = find_stacks(u, centers, transforms)
## Build map from residue index to helix index
hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stacks)
double_stranded_helices = np.unique(hmap[hmap >= 0])
## Add ssDNA to hmap
hid = double_stranded_helices[-1]+1
ss_residues = hmap < 0
assert( np.all(bps[ss_residues] == -1) )
for s in u.segments:
r = s.atoms.select_atoms("name C1'").resindices
ss_residues = r[np.where(hmap[r] < 0)[0]]
if len(ss_residues) == 0: continue
resid_diff = np.diff(ss_residues)
tmp = np.where( resid_diff != 1 )[0]
first_res = ss_residues[0]
for i in tmp:
ids = np.arange(first_res, ss_residues[i]+1)
assert( np.all(hmap[ids] == -1) )
hmap[ids] = hid
hrank[ids] = ids-first_res
first_res = ss_residues[i+1]
hid += 1
ids = np.arange(first_res, ss_residues[-1]+1)
assert( np.all(hmap[ids] == -1) )
hmap[ids] = hid
hrank[ids] = ids-first_res
hid+=1
print(hmap)
single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)
## Create double-stranded segments
doubleSegments = []
for hid in double_stranded_helices:
maxrank = np.max( hrank[hmap==hid] )
start = np.where((hmap == hid) * (hrank == 0))[0]
end = np.where((hmap == hid) * (hrank == maxrank))[0]
start_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in start], axis=0 )
end_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in end], axis=0 )
seg = DoubleStrandedSegment(name=str(hid),
num_bp = np.sum(hmap==hid)//2,
start_position = start_pos,
end_position = end_pos)
assert(hid == len(doubleSegments))
doubleSegments.append(seg)
## Create single-stranded segments
singleSegments = []
for hid in single_stranded_helices:
maxrank = np.max( hrank[hmap==hid] )
start = np.where((hmap == hid) * (hrank == 0))[0]
end = np.where((hmap == hid) * (hrank == maxrank))[0]
start_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in start], axis=0 )
end_pos = np.mean( [u.atoms.residues[r].atoms.center_of_mass() for r in end], axis=0 )
seg = SingleStrandedSegment(name=str(hid),
num_nt = np.sum(hmap==hid),
start_position = start_pos,
end_position = end_pos)
assert(hid == len(doubleSegments) + len(singleSegments))
singleSegments.append(seg)
## Find crossovers and 5prime/3prime ends
crossovers,prime5,prime3 = [[],[],[]]
for s in u.segments:
r = s.atoms.select_atoms("name C1'").resindices
tmp = np.where(np.diff(hmap[r]) != 0)[0]
crossovers.extend( r[tmp] )
prime5.append(r[0])
prime3.append(r[-1])
## Add connections
allSegments = doubleSegments+singleSegments
for r in crossovers:
seg1,seg2 = [allSegments[hmap[i]] for i in (r,r+1)]
nt1,nt2 = [hrank[i] for i in (r,r+1)]
f1,f2 = [fwd[i] for i in (r,r+1)]
if nt1 in (0,seg1.num_nt) or nt2 in (0,seg2.num_nt):
seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_="terminal_crossover")
else:
seg1.add_crossover(nt1,seg2,nt2,[f1,f2])
## Add 5prime/3prime ends
for r in prime5:
seg = allSegments[hmap[r]]
seg.add_5prime(hrank[r],fwd[r])
for r in prime3:
seg = allSegments[hmap[r]]
seg.add_3prime(hrank[r],fwd[r])
## Assign sequence
for hid in range(len(allSegments)):
resids = np.sort(np.where( (hmap==hid)*(fwd==1) )[0])
s = allSegments[hid]
s.sequence = [resname_to_key[u.residues[r].resname]
for r in resids]
## Build model
model = SegmentModel( allSegments,
max_basepairs_per_bead = 5,
max_nucleotides_per_bead = 5,
local_twist = False,
dimensions=(5000,5000,5000))
return model
if __name__ == "__main__":
# u = mda.Universe("test-ssdna.psf", "test-ssdna.pdb")
# u = mda.Universe("test-ssdna.pdb")
# model = SegmentModelFromPdb("test-ssdna.pdb")
# model = SegmentModelFromPdb("nanoengineer2pdb/nanoengineer2pdb.pdb")
model = SegmentModelFromPdb("test-twist.pdb")
# model = SegmentModelFromPdb("/data/server7/6hb.pdb")
model._clear_beads()
model._generate_atomic_model(scale=1.0)
model._write_atomic_ENM( 'test-from-pdb', lattice_type="honeycomb" )
model.atomic_simulate( outputPrefix = 'test-from-pdb' )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment