Skip to content
Snippets Groups Projects
Commit 08f835d2 authored by pinyili2's avatar pinyili2
Browse files

add

parent 47aef9dc
No related branches found
No related tags found
1 merge request!1Pinyili2
File added
# -*- coding: utf-8 -*-
import numpy as np
import os,sys
from glob import glob
import re
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
from .segmentmodel_from_lists import model_from_basepair_stack_3prime
from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis
from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
from ..model.dna_sequence import m13 as m13seq
import json
import re
import pdb
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs
## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs
def get_lattice(part):
lattice_type = None
_gt = part.getGridType()
try:
lattice_type = _gt.name.lower()
except:
if _gt == 1:
lattice_type = 'square'
elif _gt == 2:
lattice_type = 'honeycomb'
else:
print(lattice_type)
return lattice_type
def read_json_file(filename):
import cadnano
from cadnano.document import Document
try:
with open(filename) as ch:
json_data = json.load(ch)
except:
with open(filename) as ch:
content = ""
for l in ch:
l = re.sub(r"'", r'"', l)
# https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
# l = re.sub(r"{\s*(\w)", r'{"\1', l)
# l = re.sub(r",\s*(\w)", r',"\1', l)
# l = re.sub(r"(\w):", r'\1":', l)
content += l+"\n"
json_data = json.loads(content)
try:
doc = Document()
cadnano.fileio.v3decode.decode(doc, json_data)
decoder = 3
except:
doc = Document()
cadnano.fileio.v2decode.decode(doc, json_data)
decoder = 2
parts = [p for p in doc.getParts()]
if len(parts) != 1:
raise Exception("Only documents containing a single cadnano part are implemented at this time.")
part = parts[0]
if decoder == 2:
""" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """
l = get_lattice(part)
if l == 'square':
for id_num in part.getIdNums():
if part.vh_properties.loc[id_num,'eulerZ'] == 0:
part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)
df=pd.DataFrame(json_data["vstrands"])
n_df=df.set_index("num")
return part
def get_helix_angle(part, helix_id, indices):
""" Get "start_orientation" for helix """
# import ipdb
# ipdb.set_trace()
""" FROM CADNANO2.5
+ angle is CCW
- angle is CW
Right handed DNA rotates clockwise from 5' to 3'
we use the convention the 5' end starts at 0 degrees
and it's pair is minor_groove_angle degrees away
direction, hence the minus signs. eulerZ
"""
hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,
['helical_pitch',
'bases_per_repeat',
'turns_per_repeat',
'eulerZ',
'minor_groove_angle']]
twist_per_base = tpr*360./bpr
# angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180
angle = eulerZ + twist_per_base*indices - 0.5*mgroove
return rotationAboutAxis(np.array((0,0,1)),angle)
def gen_id_series(strand,part):
df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object)
df["vh"]=strand._id_num
df["fwd"]=strand.isForward()
df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10
df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10
id_lo,id_hi=strand.idxs()
zids=[str(i) for i in range(id_lo,id_hi+1)]
insert_dict={}
insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])
z=np.arange(id_lo,id_hi+1)
zids=[str(i) for i in range(id_lo,id_hi+1)]
z=list(np.arange(id_lo,id_hi+1))
zids=[str(i) for i in range(id_lo,id_hi+1)]
for insert_base in insert_dict:
z_ind=zids.index(str(insert_base))
z_val=insert_dict[insert_base]
z_pos_ind=z.index(insert_base)
zids.pop(z_ind)
z.pop(z_pos_ind)
if z_val!=-1:
#l=[str(insert_base)+"."+str(i) for i in range(z_val+1)]
l=list(range(z_val+1))
l.reverse()
for k in l:
zids.insert(z_ind,str(insert_base)+"."+str(k))
z.insert(z_pos_ind,insert_base+k/(z_val+1))
df["zid"]=zids
df["z"]=np.array(z)*3.4
L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index]
if strand.isForward()==True:
df["stack_tuple"]=L[1:]+[-1]
if strand.connection3p() is None:
df["threeprime_tuple"]=L[1:]+[-1]
else:
df["threeprime_tuple"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]
else:
df["stack_tuple"]=[-1]+L[0:-1]
if strand.connection3p() is None:
df["threeprime_tuple"]=[-1]+L[0:-1]
else:
df["threeprime_tuple"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]
## cadnano 3.1 sequence assign is wrong if there is insertion or deletion.
df["r"]=[np.array([df["x"][i],df["y"][i],df["z"][i]],dtype=np.float32) for i in df.index]
return [pd.Series(df.loc[i]) for i in df.index]
def gen_prop_table(part):
strand_set=[]
for i in part.getidNums():
fwd,rev=part.getStrandSets(i)
[strand_set.append(i) for i in fwd.strands()]
[strand_set.append(i) for i in rev.strands()]
id_series=[]
for i in strand_set:
id_series=id_series+gen_id_series(i,part)
nt_prop=pd.DataFrame(id_series)
nt_prop.reset_index(inplace=True)
nt_prop["seq"]=-1
ind_tuple=list(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]))
stacks=[]
for i in list(nt_prop["stack_tuple"]):
if i ==-1:
stacks.append(i)
else:
stacks.append(ind_tuple.index(i))
nt_prop["stack"]=stacks
tprime=[]
for i in list(nt_prop["threeprime_tuple"]):
if i ==-1:
tprime.append(i)
else:
tprime.append(ind_tuple.index(i))
nt_prop["threeprime"]=tprime
vhzid=list(zip(nt_prop["vh"],nt_prop["zid"]))
nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]
nt_prop=nt_prop.fillna(-1)
counter=-1
bp=-np.ones(len(nt_prop.index),dtype=int)
bp_map=dict(zip(ind_tuple,nt_prop.index))
for i,j,k in ind_tuple:
counter+=1
try:
bp[counter]=bp_map[(i,j,not(k))]
except:
pass
nt_prop["bp"]=bp
return nt_prop
def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters):
part=read_json_file(json_file)
nt_prop=gen_prop_table(part)
if seq is None:
if nt_prop["seq"][0]==-1:
seq=None
else:
seq=nt_prop["seq"]
r=np.array(list(nt_prop['r']))
bp=np.array(list(nt_prop['bp']))
three_prime=np.array((list(nt_prop["threeprime"])))
stack=np.array((list(nt_prop["stack"])))
orientation=np.array(list(nt_prop["orientation"]))
model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
return model
......@@ -85,9 +85,12 @@ def gen_strand(strand,design,group_index=0):
strand_df["threeprime"]=list(strand_df.index[1:]+group_index)+[-1]
stack=np.where(strand_df["stacked"],strand_df["threeprime"],-1)
strand_df["stack"]=stack
if len(strand.dna_sequence)==len(strand_df.index):
strand_df["seq"]=list(strand.dna_sequence)
else:
try:
if len(strand.dna_sequence)==len(strand_df.index):
strand_df["seq"]=list(strand.dna_sequence)
else:
strand_df["seq"]=-1
except:
strand_df["seq"]=-1
strand_df["index"]=strand_df.index+group_index
#strand_df["seg_index"]=index
......@@ -128,10 +131,12 @@ def mrdna_model_from_scadnano(sc_file,seq=None,**model_parameters):
bp=np.array(list(nt_prop['bp']))
three_prime=np.array((list(nt_prop["threeprime"])))
stack=np.array((list(nt_prop["stack"])))
if nt_prop["seq"][0]==-1:
seq=None
else:
seq=nt_prop["seq"]
orientation=[rotationAboutAxis(np.array((0,0,1)),i) for i in list(nt_prop["orientation_angle"])]
model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
model._dataframe=nt_prop
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment