Skip to content
Snippets Groups Projects

Pinyili2

Merged pinyili2 requested to merge pinyili2 into master
+ 225
0
from mrdna import logger, devlogger
from .segmentmodel_from_lists import model_from_basepair_stack_3prime
from ..arbdmodel.coords import rotationAboutAxis
import pickle
import pandas as pd
import sys
import mrdna.readers.libs as libs
import numpy as np
from scipy.spatial import distance_matrix
pd.options.mode.chained_assignment = None # default='warn'
_seq_to_int_dict = dict(A=0,T=1,C=2,G=3)
_seq_to_int_dict = {k:str(v) for k,v in _seq_to_int_dict.items()}
_yrot = rotationAboutAxis(axis=(0,1,0), angle=180).dot(rotationAboutAxis(axis=(0,0,1),angle=-40))
def mrdna_model_from_oxdna(coordinate_file, topology_file,virt2nuc=None,get_nt_prop=False, **model_parameters):
""" Construct an mrdna model from oxDNA coordinate and topology files """
top_data = np.loadtxt(topology_file, skiprows=1,
unpack=True,
dtype=np.dtype('i4,U1,i4,i4')
)
conf_data = np.loadtxt(coordinate_file, skiprows=3)
def _get_bp(sequence=None):
dists = distance_matrix(r,basepair_pos) + np.eye(len(r))*1000
dists = 0.5*(dists + dists.T)
bp = np.array([np.argmin(da) for da in dists])
for i,j in enumerate(bp):
if j == -1: continue
# devlogger.info(f'bp {i} {j} {dists[i,j]}')
if dists[i,j] > 2:
bp[i] = -1
elif bp[j] != i:
bpj = bp[j]
logger.warning( " ".join([str(_x) for _x in ["Bad pair", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) )
for i,j in enumerate(bp):
if j == -1: continue
if bp[j] != i:
bpj = bp[j]
logger.warning( " ".join([str(_x) for _x in ["Bad pair2", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) )
raise Exception
if sequence is not None:
seq = sequence
bp_seq = sequence[bp]
bp_seq[bp==-1] = 'X'
bad_bps = np.where( (bp >= 0) &
(((seq == 'C') & (bp_seq != 'G')) |
((seq == 'G') & (bp_seq != 'C')) |
((seq == 'T') & (bp_seq != 'A')) |
((seq == 'U') & (bp_seq != 'A')) |
((seq == 'A') & ((bp_seq != 'T') | (bp_seq != 'U')))
) )[0]
bp[bp[bad_bps]] = -1
bp[bad_bps] = -1
return bp
def _get_stack():
dists = distance_matrix( r + 3.5*normal_dir + 2.1*perp_dir -1*base_dir, r ) + np.eye(len(r))*1000
stack = np.array([np.argmin(da) for da in dists])
for i,j in enumerate(stack):
if dists[i,j] > 8:
stack[i] = -1
elif i < 10:
## development info
# devlogger.info([i,j,dists[i,j]])
# dr = r[j] - (r[i] - normal_dir[i]*3.4 + perp_dir[i]*1 + base_dir[i]*1)
dr = r[j] - r[i]
# devlogger.info([normal_dir[i].dot(dr), perp_dir[i].dot(dr), base_dir[i].dot(dr)])
return np.array(stack)
def _find_vh_vb_table(s,is_scaf):
L=[]
for i in list(s.keys()):
vh,zid=i
strand,indices=s[i]
if len(indices)==0:
continue
else:
if len(indices)==1:
zids=[str(zid)]
else:
zids=[str(zid)+"."+str(j) for j in range(len(indices))]
for index,z in zip(indices,zids):
L.append(pd.Series({"index":index,"vh":vh,"zid":z,"strand":strand,"is_scaf":bool(is_scaf)}))
return L
def get_virt2nuc(virt2nuc,top_data):
sys.modules["libs"]=libs
logger.info("""You are using a feature that depends on the tacoxdna library: A. Suma et al., 2019, "TacoxDNA: A user‐friendly web server for simulations of complex DNA structures, from single strands to origami", J. Comput. Chem. 40, 2586""")
virt_pickle=open(virt2nuc,"rb")
vh_vb,pattern=pickle.load(virt_pickle)
L1=_find_vh_vb_table(vh_vb._scaf,1)
L2=_find_vh_vb_table(vh_vb._stap,0)
nt_prop=pd.DataFrame(L1+L2,dtype=object)
nt_prop["index"]=nt_prop["index"].astype(int)
nt_prop.set_index("index",inplace=True)
nt_prop.sort_index(inplace=True)
nt_prop["threeprime"]=top_data[2]
nt_prop["seq"]=top_data[1]
nt_prop["stack"]=top_data[2]
vh_bool=1-(nt_prop["vh"]%2)*2
is_scaf_bool=nt_prop["is_scaf"]*2-1
nt_prop["fwd"]=np.array((is_scaf_bool.T*vh_bool+1)/2,dtype=bool)
bp_map=dict(zip(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]),nt_prop.index))
for i in nt_prop.index:
if nt_prop.loc[i]["threeprime"] in nt_prop.index:
if nt_prop.loc[nt_prop.loc[i]["threeprime"]]["vh"]!=nt_prop.loc[i]["vh"]:
nt_prop["stack"][i]=-1
bp=-np.ones(len(nt_prop.index),dtype=int)
counter=0
for i,j,k in zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]):
try:
bp[counter]=bp_map[(i,j,not(k))]
except:
pass
counter+=1
nt_prop["bp"]=bp
non_stack_ind,=np.where(nt_prop["stack"]==-1)
for i in non_stack_ind:
zid=int(nt_prop.loc[i]["zid"])+int(nt_prop.loc[i]["fwd"])*2-1
try:
nt_prop["stack"][i]=bp_map[(nt_prop.loc[i]["vh"],str(zid),nt_prop.loc[i]["fwd"])]
except:
continue
return nt_prop
try:
nt_prop=get_virt2nuc(virt2nuc,top_data)
r=conf_data[:,:3] * 8.518
base_dir = conf_data[:,3:6]
# basepair_pos = r + base_dir*6.0
basepair_pos = r + base_dir*10.0
normal_dir = -conf_data[:,6:9]
perp_dir = np.cross(base_dir, normal_dir)
orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)])
seq=nt_prop["seq"]
bp=nt_prop["bp"]
stack=nt_prop["stack"]
three_prime=nt_prop["threeprime"]
nt_prop["orientation"]=orientation
nt_prop["r"]=list(r)
except:
if virt2nuc is not None:
logger.info("warning: virt2nuc not read. guessing structure...")
## Reverse direction so indices run 5'-to-3'
top_data = [a[::-1] for a in top_data]
conf_data = conf_data[::-1,:]
r = conf_data[:,:3] * 8.518
base_dir = conf_data[:,3:6]
# basepair_pos = r + base_dir*6.0
basepair_pos = r + base_dir*10.0
normal_dir = -conf_data[:,6:9]
perp_dir = np.cross(base_dir, normal_dir)
orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)])
seq = top_data[1]
bp = _get_bp(seq)
stack = _get_stack()
three_prime = len(r) - top_data[2] -1
five_prime = len(r) - top_data[3] -1
three_prime[three_prime >= len(r)] = -1
five_prime[five_prime >= len(r)] = -1
nt_prop=pd.DataFrame({"r":list(r),"bp":list(bp),"stack":list(stack),"threeprime":list(three_prime), "seq":list(seq),"orientation":list(orientation)})
def _debug_write_bonds():
from ..arbdmodel import ParticleType, PointParticle, ArbdModel, Group
bond = tuple()
b_t = ParticleType('BASE')
p_t = ParticleType('PHOS')
parts = []
for i,(r0,r_bp,three_prime0,bp0,stack0,seq0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack, seq)):
p = PointParticle(p_t, name='PHOS', position = r0, resid=i)
b = PointParticle(b_t, name=seq0, position = 0.5*(r0+r_bp), resid=i)
parts.extend((p,b))
model = ArbdModel(parts)
model.writePdb('test.pdb')
for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)):
model.add_bond(parts[2*i],parts[2*i+1],bond)
j = three_prime0
if j >= 0:
model.add_bond(parts[2*i],parts[2*j],bond)
j = bp0
if j >= 0:
model.add_bond(parts[2*i+1],parts[2*j+1],bond)
model.writePsf('test.psf')
model.bonds = []
for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)):
j = stack0
if j >= 0:
model.add_bond(parts[2*i],parts[2*j],bond)
model.writePsf('test.stack.psf')
## _debug_write_bonds()
logger.info(f'mrdna_model_from_oxdna: num_bp, num_ss_nt, num_stacked: {np.sum(bp>=0)//2} {np.sum(bp<0)} {np.sum(stack >= 0)}')
model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
"""
model.DEBUG = True
model.generate_bead_model(1,1,False,True,one_bead_per_monomer=True)
for seg in model.segments:
for bead in seg.beads:
bead.position = bead.position + np.random.standard_normal(3)
simulate( model, output_name='test', directory='test4' )
"""
model._dataframe=nt_prop
return model
if __name__ == "__main__":
mrdna_model_from_oxdna("0-from-collab/nanopore.oxdna","0-from-collab/nanopore.top")
# mrdna_model_from_oxdna("2-oxdna.manual/output/from_mrdna-oxdna-min.last.conf","0-from-collab/nanopore.top")
Loading