Skip to content
Snippets Groups Projects
Commit 219c598c authored by pinyili2's avatar pinyili2
Browse files

update

parent 534a875f
No related branches found
No related tags found
1 merge request!1Pinyili2
...@@ -13,34 +13,17 @@ from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSe ...@@ -13,34 +13,17 @@ from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSe
from ..model.dna_sequence import m13 as m13seq from ..model.dna_sequence import m13 as m13seq
import json import json
import re import re
import cadnano
from cadnano.document import Document
## Only testing on cadnano2.5
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined ## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls ## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs ## TODO: handle special motifs
## - doubly-nicked helices ## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix) ## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs ## - circular constructs
def get_lattice(part):
lattice_type = None
_gt = part.getGridType()
try:
lattice_type = _gt.name.lower()
except:
if _gt == 1:
lattice_type = 'square'
elif _gt == 2:
lattice_type = 'honeycomb'
else:
print("WARNING: unable to determine cadnano part lattice type")
return lattice_type
def read_json_file(filename): def read_json_file(filename):
try: try:
with open(filename) as ch: with open(filename) as ch:
json_data = json.load(ch) json_data = json.load(ch)
...@@ -55,7 +38,11 @@ def read_json_file(filename): ...@@ -55,7 +38,11 @@ def read_json_file(filename):
# l = re.sub(r"(\w):", r'\1":', l) # l = re.sub(r"(\w):", r'\1":', l)
content += l+"\n" content += l+"\n"
json_data = json.loads(content) json_data = json.loads(content)
return json_data
def cadnano_parts(json_data):
import cadnano
from cadnano.document import Document
try: try:
doc = Document() doc = Document()
cadnano.fileio.v3decode.decode(doc, json_data) cadnano.fileio.v3decode.decode(doc, json_data)
...@@ -77,9 +64,23 @@ def read_json_file(filename): ...@@ -77,9 +64,23 @@ def read_json_file(filename):
for id_num in part.getIdNums(): for id_num in part.getIdNums():
if part.vh_properties.loc[id_num,'eulerZ'] == 0: if part.vh_properties.loc[id_num,'eulerZ'] == 0:
part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5) part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)
df=pd.DataFrame(json_data["vstrands"]) df=pd.DataFrame(json_data["vstrands"])
n_df=df.set_index("num") n_df=df.set_index("num")
return part,n_df return part
def get_lattice(part):
lattice_type = None
_gt = part.getGridType()
try:
lattice_type = _gt.name.lower()
except:
if _gt == 1:
lattice_type = 'square'
elif _gt == 2:
lattice_type = 'honeycomb'
else:
print("WARNING: unable to determine cadnano part lattice type")
return lattice_type
def get_helix_angle(part, helix_id, indices): def get_helix_angle(part, helix_id, indices):
""" Get "start_orientation" for helix """ """ Get "start_orientation" for helix """
...@@ -106,81 +107,117 @@ def get_helix_angle(part, helix_id, indices): ...@@ -106,81 +107,117 @@ def get_helix_angle(part, helix_id, indices):
angle = eulerZ + twist_per_base*indices - 0.5*mgroove angle = eulerZ + twist_per_base*indices - 0.5*mgroove
return rotationAboutAxis(np.array((0,0,1)),angle) return rotationAboutAxis(np.array((0,0,1)),angle)
def nttype(scafs): def gen_id_series(strand,part):
def judge(i): df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object)
if i ==[-1,-1,-1,-1]: df["vh"]=strand._id_num
return 0 df["fwd"]=strand.isForward()
else: return 1 df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10
n=np.array([judge(i) for i in scafs]) df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10
return n id_lo,id_hi=strand.idxs()
zids=[str(i) for i in range(id_lo,id_hi+1)]
insert_dict={}
def gen_prop_table(json_file): insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])
part,vslist=read_json_file(json_file) z=np.arange(id_lo,id_hi+1)
props = part.getModelProperties().copy() zids=[str(i) for i in range(id_lo,id_hi+1)]
try: z=list(np.arange(id_lo,id_hi+1))
if props.get('point_type') == PointType.ARBITRARY: zids=[str(i) for i in range(id_lo,id_hi+1)]
# TODO add code to encode Parts with ARBITRARY point configurations for insert_base in insert_dict:
raise NotImplementedError("Not implemented") z_ind=zids.index(str(insert_base))
except: z_val=insert_dict[insert_base]
try: z_pos_ind=z.index(insert_base)
vh_props, origins = part.helixPropertiesAndOrigins() zids.pop(z_ind)
except: z.pop(z_pos_ind)
origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()} if z_val!=-1:
scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index] #l=[str(insert_base)+"."+str(i) for i in range(z_val+1)]
stap_id=[nttype(vslist['stap'][i]) for i in vslist.index] l=list(range(z_val+1))
cad_bps=part.getIndices(0) l.reverse()
vslist["scafnt"]=np.sum(np.array(scaf_id),axis=1) for k in l:
vslist["stapnt"]=np.sum(np.array(stap_id),axis=1) zids.insert(z_ind,str(insert_base)+"."+str(k))
totnt=np.sum(vslist["scafnt"])+np.sum(vslist["stapnt"]) z.insert(z_pos_ind,insert_base+k/(z_val+1))
is_scaf=np.zeros(totnt,dtype=bool) df["zid"]=zids
is_scaf[0:np.sum(vslist["scafnt"])]=1 df["z"]=np.array(z)*3.4
nt_prop=pd.DataFrame(index=range(totnt),columns=["vh","zid","is_scaf","r","bp","stack","threeprime","seq","orientation"])
nt_prop["is_scaf"]=is_scaf
tot_id=scaf_id+stap_id L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index]
vhi,zidi=np.where(np.array(scaf_id)==1) if strand.isForward()==True:
vhj,zidj=np.where(np.array(stap_id)==1) df["stack_tuple"]=L[1:]+[-1]
vhi=vslist.index[vhi] if strand.connection3p() is None:
vhj=vslist.index[vhj] df["threeprime_tuple"]=L[1:]+[-1]
nt_prop["vh"]=list(vhi)+list(vhj) else:
nt_prop["zid"]=list(zidi)+list(zidj) df["threeprime_tuple"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]
else:
df["stack_tuple"]=[-1]+L[0:-1]
if strand.connection3p() is None:
df["threeprime_tuple"]=[-1]+L[0:-1]
else:
df["threeprime_tuple"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]
## cadnano 3.1 sequence assign is wrong if there is insertion or deletion.
df["r"]=[np.array([df["x"][i],df["y"][i],df["z"][i]]) for i in df.index]
return [pd.Series(df.loc[i]) for i in df.index]
def gen_prop_table(part,seq_file=None,fill_seq="T"):
strand_set=[]
for i in part.getidNums():
fwd,rev=part.getStrandSets(i)
[strand_set.append(i) for i in fwd.strands()]
[strand_set.append(i) for i in rev.strands()]
id_series=[]
for i in strand_set:
id_series=id_series+gen_id_series(i,part)
nt_prop=pd.DataFrame(id_series)
nt_prop.reset_index(names=list(range(len(nt_prop.index))),inplace=True)
ind_tuple=[(nt_prop["vh"][i],nt_prop["zid"][i],nt_prop["fwd"][i]) for i in nt_prop.index]
stacks=[]
for i in list(nt_prop["stack_tuple"]):
if i ==-1:
stacks.append(i)
else:
stacks.append(ind_tuple.index(i))
nt_prop["stack"]=stacks
tprime=[]
for i in list(nt_prop["threeprime_tuple"]):
if i ==-1:
tprime.append(i)
else:
tprime.append(ind_tuple.index(i))
nt_prop["threeprime"]=tprime
vhzid=list(zip(nt_prop["vh"],nt_prop["zid"])) vhzid=list(zip(nt_prop["vh"],nt_prop["zid"]))
nt_prop["r"]=[part.getCoordinate(helix_id, indices) for helix_id, indices in zip(nt_prop["vh"],nt_prop["zid"])]
nt_prop["orientation"]=[get_helix_angle(part, helix_id, indices) for helix_id,indices in zip(nt_prop["vh"],nt_prop["zid"])] nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]
nt_prop=nt_prop.fillna(-1) nt_prop=nt_prop.fillna(-1)
for i in range(int(len(vhzid)/2)): for i in range(int(len(nt_prop.index)/2)):
try: try:
bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i])) bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))
nt_prop["bp"][bp1]=bp2 nt_prop["bp"][bp1]=bp2
nt_prop["bp"][bp2]=bp1 nt_prop["bp"][bp2]=bp1
except: except:
pass pass
tprime_list=-np.ones(len(nt_prop.index),dtype=int) if seq_file is not None:
index2=list(zip(vhzid,nt_prop["is_scaf"])) #under construction
for i in range(len(nt_prop.index)): seq=open(seq_file,"r")
((m,n),p)=list(zip(vhzid,nt_prop["is_scaf"]))[i] for seqline in seq:
if p==True: try:
k,l=(vslist["scaf"][m])[n][2:] int(seqline[0])
if k!=-1 and l!=-1: except:
n=index2.index(((k,l),True)) continue
tprime_list[i]=int(n)
else:
k,l=(vslist["stap"][m])[n][2:]
if k!=-1 and l!=-1:
n=index2.index(((k,l),False))
tprime_list[i]=int(n)
nt_prop["threeprime"]=tprime_list
(n,)=np.where(nt_prop["threeprime"]==-1)
stackid=nt_prop["bp"][[list(nt_prop["threeprime"]).index(i) for i in n]]
nt_prop["stack"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop["threeprime"][stackid.index[np.where(np.array(stackid)!=-1)]]
## Todo: sequence
return nt_prop return nt_prop
def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters):
nt_prop=gen_prop_table(json_file) json_data=read_json_file(json_file)
part,decoder=cadnano_parts(json_data)
if decoder==2:
nt_prop=gen_prop_table_cad2(json_data,part)
elif decoder==3:
nt_prop=gen_prop_table_cad3(json_data,part)
if seq is None: if seq is None:
if nt_prop["seq"][0]==-1: if nt_prop["seq"][0]==-1:
seq=None seq=None
......
# -*- coding: utf-8 -*-
import pdb
import numpy as np
import os,sys
from glob import glob
import re
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
from .segmentmodel_from_lists import model_from_basepair_stack_3prime
from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis
from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
from ..model.dna_sequence import m13 as m13seq
import json
import re
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs
## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs
def read_json_file(filename):
try:
with open(filename) as ch:
json_data = json.load(ch)
except:
with open(filename) as ch:
content = ""
for l in ch:
l = re.sub(r"'", r'"', l)
# https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
# l = re.sub(r"{\s*(\w)", r'{"\1', l)
# l = re.sub(r",\s*(\w)", r',"\1', l)
# l = re.sub(r"(\w):", r'\1":', l)
content += l+"\n"
json_data = json.loads(content)
return json_data
def get_lattice(part):
lattice_type = None
_gt = part.getGridType()
try:
lattice_type = _gt.name.lower()
except:
if _gt == 1:
lattice_type = 'square'
elif _gt == 2:
lattice_type = 'honeycomb'
else:
print("WARNING: unable to determine cadnano part lattice type")
return lattice_type
def cadnano_parts(json_data):
import cadnano
from cadnano.document import Document
try:
doc = Document()
cadnano.fileio.v3decode.decode(doc, json_data)
decoder = 3
except:
doc = Document()
cadnano.fileio.v2decode.decode(doc, json_data)
decoder = 2
parts = [p for p in doc.getParts()]
if len(parts) != 1:
raise Exception("Only documents containing a single cadnano part are implemented at this time.")
part = parts[0]
if decoder == 2:
""" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """
l = get_lattice(part)
if l == 'square':
for id_num in part.getIdNums():
if part.vh_properties.loc[id_num,'eulerZ'] == 0:
part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)
return part,decoder
def get_helix_angle(part, helix_id, indices):
""" Get "start_orientation" for helix """
# import ipdb
# ipdb.set_trace()
""" FROM CADNANO2.5
+ angle is CCW
- angle is CW
Right handed DNA rotates clockwise from 5' to 3'
we use the convention the 5' end starts at 0 degrees
and it's pair is minor_groove_angle degrees away
direction, hence the minus signs. eulerZ
"""
hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,
['helical_pitch',
'bases_per_repeat',
'turns_per_repeat',
'eulerZ',
'minor_groove_angle']]
twist_per_base = tpr*360./bpr
# angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180
angle = eulerZ + twist_per_base*indices - 0.5*mgroove
return rotationAboutAxis(np.array((0,0,1)),angle)
def nttype_v2(scafs):
def judge(i):
if i ==[-1,-1,-1,-1]:
return 0
else: return 1
n=np.array([judge(i) for i in scafs])
return n
def gen_prop_table_cad2(json_data,part):
df=pd.DataFrame(json_data["vstrands"])
vslist=df.set_index("num")
props = part.getModelProperties().copy()
try:
if props.get('point_type') == PointType.ARBITRARY:
# TODO add code to encode Parts with ARBITRARY point configurations
raise NotImplementedError("Not implemented")
except:
try:
vh_props, origins = part.helixPropertiesAndOrigins()
except:
origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}
scaf_id=[nttype_v2(vslist['scaf'][i]) for i in vslist.index]
stap_id=[nttype_v2(vslist['stap'][i]) for i in vslist.index]
cad_bps=part.getIndices(0)
vslist["scafnt"]=np.sum(np.array(scaf_id),axis=1)
vslist["stapnt"]=np.sum(np.array(stap_id),axis=1)
totnt=np.sum(vslist["scafnt"])+np.sum(vslist["stapnt"])
is_scaf=np.zeros(totnt,dtype=bool)
is_scaf[0:np.sum(vslist["scafnt"])]=1
nt_prop=pd.DataFrame(index=range(totnt),columns=["vh","zid","is_scaf","r","bp","stack","threeprime","seq","orientation"])
nt_prop["is_scaf"]=is_scaf
tot_id=scaf_id+stap_id
vhi,zidi=np.where(np.array(scaf_id)==1)
vhj,zidj=np.where(np.array(stap_id)==1)
vhi=vslist.index[vhi]
vhj=vslist.index[vhj]
nt_prop["vh"]=list(vhi)+list(vhj)
nt_prop["zid"]=list(zidi)+list(zidj)
vhzid=list(zip(nt_prop["vh"],nt_prop["zid"]))
nt_prop["r"]=[part.getCoordinate(helix_id, indices) for helix_id, indices in zip(nt_prop["vh"],nt_prop["zid"])]
nt_prop["orientation"]=[get_helix_angle(part, helix_id, indices) for helix_id,indices in zip(nt_prop["vh"],nt_prop["zid"])]
nt_prop=nt_prop.fillna(-1)
for i in range(int(len(vhzid)/2)):
try:
bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))
nt_prop["bp"][bp1]=bp2
nt_prop["bp"][bp2]=bp1
except:
pass
tprime_list=-np.ones(len(nt_prop.index),dtype=int)
index2=list(zip(vhzid,nt_prop["is_scaf"]))
for i in range(len(nt_prop.index)):
((m,n),p)=list(zip(vhzid,nt_prop["is_scaf"]))[i]
if p==True:
k,l=(vslist["scaf"][m])[n][2:]
if k!=-1 and l!=-1:
n=index2.index(((k,l),True))
tprime_list[i]=int(n)
else:
k,l=(vslist["stap"][m])[n][2:]
if k!=-1 and l!=-1:
n=index2.index(((k,l),False))
tprime_list[i]=int(n)
nt_prop["threeprime"]=tprime_list
#(n,)=np.where(nt_prop["threeprime"]==-1)
#stackid=nt_prop["bp"][[list(nt_prop["threeprime"]).index(i) for i in n]]
index3=dict(zip(nt_prop.index,zip(nt_prop["vh"],nt_prop["is_scaf"])))
index3[-1]=(-1,-1)
nt_prop["stack"]=np.where([index3[i]==index3[nt_prop["threeprime"][i]] for i in (nt_prop.index)],nt_prop["threeprime"],-1)
return nt_prop
def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters):
json_data=read_json_file(json_file)
part,decoder=cadnano_parts(json_data)
if decoder==2:
nt_prop=gen_prop_table_cad2(json_data,part)
elif decoder==3:
nt_prop=gen_prop_table_cad3(json_data,part)
if seq is None:
if nt_prop["seq"][0]==-1:
seq=None
else:
seq=nt_prop["seq"]
r=np.array(list(nt_prop['r']))
bp=np.array(list(nt_prop['bp']))
three_prime=np.array((list(nt_prop["threeprime"])))
stack=np.array((list(nt_prop["stack"])))
orientation=np.array(list(nt_prop["orientation"]))
model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation=None, **model_parameters )
return model
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment