Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mrdna
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tbgl
tools
mrdna
Commits
219c598c
Commit
219c598c
authored
7 months ago
by
pinyili2
Browse files
Options
Downloads
Patches
Plain Diff
update
parent
534a875f
No related branches found
No related tags found
1 merge request
!1
Pinyili2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
mrdna/readers/segmentmodel_from_cadnano.py
+121
-84
121 additions, 84 deletions
mrdna/readers/segmentmodel_from_cadnano.py
mrdna/readers/segmentmodel_from_cadnano_old.py
+207
-0
207 additions, 0 deletions
mrdna/readers/segmentmodel_from_cadnano_old.py
with
328 additions
and
84 deletions
mrdna/readers/segmentmodel_from_cadnano.py
+
121
−
84
View file @
219c598c
...
@@ -13,34 +13,17 @@ from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSe
...
@@ -13,34 +13,17 @@ from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSe
from
..model.dna_sequence
import
m13
as
m13seq
from
..model.dna_sequence
import
m13
as
m13seq
import
json
import
json
import
re
import
re
import
cadnano
from
cadnano.document
import
Document
## Only testing on cadnano2.5
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs
## TODO: handle special motifs
## - doubly-nicked helices
## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs
## - circular constructs
def
get_lattice
(
part
):
lattice_type
=
None
_gt
=
part
.
getGridType
()
try
:
lattice_type
=
_gt
.
name
.
lower
()
except
:
if
_gt
==
1
:
lattice_type
=
'
square
'
elif
_gt
==
2
:
lattice_type
=
'
honeycomb
'
else
:
print
(
"
WARNING: unable to determine cadnano part lattice type
"
)
return
lattice_type
def
read_json_file
(
filename
):
def
read_json_file
(
filename
):
try
:
try
:
with
open
(
filename
)
as
ch
:
with
open
(
filename
)
as
ch
:
json_data
=
json
.
load
(
ch
)
json_data
=
json
.
load
(
ch
)
...
@@ -55,7 +38,11 @@ def read_json_file(filename):
...
@@ -55,7 +38,11 @@ def read_json_file(filename):
# l = re.sub(r"(\w):", r'\1":', l)
# l = re.sub(r"(\w):", r'\1":', l)
content
+=
l
+
"
\n
"
content
+=
l
+
"
\n
"
json_data
=
json
.
loads
(
content
)
json_data
=
json
.
loads
(
content
)
return
json_data
def
cadnano_parts
(
json_data
):
import
cadnano
from
cadnano.document
import
Document
try
:
try
:
doc
=
Document
()
doc
=
Document
()
cadnano
.
fileio
.
v3decode
.
decode
(
doc
,
json_data
)
cadnano
.
fileio
.
v3decode
.
decode
(
doc
,
json_data
)
...
@@ -77,9 +64,23 @@ def read_json_file(filename):
...
@@ -77,9 +64,23 @@ def read_json_file(filename):
for
id_num
in
part
.
getIdNums
():
for
id_num
in
part
.
getIdNums
():
if
part
.
vh_properties
.
loc
[
id_num
,
'
eulerZ
'
]
==
0
:
if
part
.
vh_properties
.
loc
[
id_num
,
'
eulerZ
'
]
==
0
:
part
.
vh_properties
.
loc
[
id_num
,
'
eulerZ
'
]
=
360
*
(
6
/
10.5
)
part
.
vh_properties
.
loc
[
id_num
,
'
eulerZ
'
]
=
360
*
(
6
/
10.5
)
df
=
pd
.
DataFrame
(
json_data
[
"
vstrands
"
])
df
=
pd
.
DataFrame
(
json_data
[
"
vstrands
"
])
n_df
=
df
.
set_index
(
"
num
"
)
n_df
=
df
.
set_index
(
"
num
"
)
return
part
,
n_df
return
part
def
get_lattice
(
part
):
lattice_type
=
None
_gt
=
part
.
getGridType
()
try
:
lattice_type
=
_gt
.
name
.
lower
()
except
:
if
_gt
==
1
:
lattice_type
=
'
square
'
elif
_gt
==
2
:
lattice_type
=
'
honeycomb
'
else
:
print
(
"
WARNING: unable to determine cadnano part lattice type
"
)
return
lattice_type
def
get_helix_angle
(
part
,
helix_id
,
indices
):
def
get_helix_angle
(
part
,
helix_id
,
indices
):
"""
Get
"
start_orientation
"
for helix
"""
"""
Get
"
start_orientation
"
for helix
"""
...
@@ -106,81 +107,117 @@ def get_helix_angle(part, helix_id, indices):
...
@@ -106,81 +107,117 @@ def get_helix_angle(part, helix_id, indices):
angle
=
eulerZ
+
twist_per_base
*
indices
-
0.5
*
mgroove
angle
=
eulerZ
+
twist_per_base
*
indices
-
0.5
*
mgroove
return
rotationAboutAxis
(
np
.
array
((
0
,
0
,
1
)),
angle
)
return
rotationAboutAxis
(
np
.
array
((
0
,
0
,
1
)),
angle
)
def
nttype
(
scafs
):
def
gen_id_series
(
strand
,
part
):
def
judge
(
i
):
df
=
pd
.
DataFrame
(
columns
=
[
"
vh
"
,
"
zid
"
,
"
fwd
"
,
"
stack_tuple
"
,
"
threeprime_tuple
"
,
"
x
"
,
"
y
"
,
"
z
"
],
index
=
range
(
strand
.
totalLength
()),
dtype
=
object
)
if
i
==
[
-
1
,
-
1
,
-
1
,
-
1
]:
df
[
"
vh
"
]
=
strand
.
_id_num
return
0
df
[
"
fwd
"
]
=
strand
.
isForward
()
else
:
return
1
df
[
"
x
"
]
=
part
.
getVirtualHelixOrigin
(
strand
.
_id_num
)[
0
]
*
10
n
=
np
.
array
([
judge
(
i
)
for
i
in
scafs
])
df
[
"
y
"
]
=
part
.
getVirtualHelixOrigin
(
strand
.
_id_num
)[
1
]
*
10
return
n
id_lo
,
id_hi
=
strand
.
idxs
()
zids
=
[
str
(
i
)
for
i
in
range
(
id_lo
,
id_hi
+
1
)]
insert_dict
=
{}
def
gen_prop_table
(
json_file
):
insert_dict
=
dict
([(
j
.
idx
(),
j
.
length
())
for
j
in
strand
.
insertionsOnStrand
()])
part
,
vslist
=
read_json_file
(
json_file
)
z
=
np
.
arange
(
id_lo
,
id_hi
+
1
)
props
=
part
.
getModelProperties
().
copy
()
zids
=
[
str
(
i
)
for
i
in
range
(
id_lo
,
id_hi
+
1
)]
try
:
z
=
list
(
np
.
arange
(
id_lo
,
id_hi
+
1
))
if
props
.
get
(
'
point_type
'
)
==
PointType
.
ARBITRARY
:
zids
=
[
str
(
i
)
for
i
in
range
(
id_lo
,
id_hi
+
1
)]
# TODO add code to encode Parts with ARBITRARY point configurations
for
insert_base
in
insert_dict
:
raise
NotImplementedError
(
"
Not implemented
"
)
z_ind
=
zids
.
index
(
str
(
insert_base
))
except
:
z_val
=
insert_dict
[
insert_base
]
try
:
z_pos_ind
=
z
.
index
(
insert_base
)
vh_props
,
origins
=
part
.
helixPropertiesAndOrigins
()
zids
.
pop
(
z_ind
)
except
:
z
.
pop
(
z_pos_ind
)
origins
=
{
hid
:
part
.
getVirtualHelixOrigin
(
hid
)[:
2
]
for
hid
in
part
.
getidNums
()}
if
z_val
!=-
1
:
scaf_id
=
[
nttype
(
vslist
[
'
scaf
'
][
i
])
for
i
in
vslist
.
index
]
#l=[str(insert_base)+"."+str(i) for i in range(z_val+1)]
stap_id
=
[
nttype
(
vslist
[
'
stap
'
][
i
])
for
i
in
vslist
.
index
]
l
=
list
(
range
(
z_val
+
1
))
cad_bps
=
part
.
getIndices
(
0
)
l
.
reverse
()
vslist
[
"
scafnt
"
]
=
np
.
sum
(
np
.
array
(
scaf_id
),
axis
=
1
)
for
k
in
l
:
vslist
[
"
stapnt
"
]
=
np
.
sum
(
np
.
array
(
stap_id
),
axis
=
1
)
zids
.
insert
(
z_ind
,
str
(
insert_base
)
+
"
.
"
+
str
(
k
))
totnt
=
np
.
sum
(
vslist
[
"
scafnt
"
])
+
np
.
sum
(
vslist
[
"
stapnt
"
])
z
.
insert
(
z_pos_ind
,
insert_base
+
k
/
(
z_val
+
1
))
is_scaf
=
np
.
zeros
(
totnt
,
dtype
=
bool
)
df
[
"
zid
"
]
=
zids
is_scaf
[
0
:
np
.
sum
(
vslist
[
"
scafnt
"
])]
=
1
df
[
"
z
"
]
=
np
.
array
(
z
)
*
3.4
nt_prop
=
pd
.
DataFrame
(
index
=
range
(
totnt
),
columns
=
[
"
vh
"
,
"
zid
"
,
"
is_scaf
"
,
"
r
"
,
"
bp
"
,
"
stack
"
,
"
threeprime
"
,
"
seq
"
,
"
orientation
"
])
nt_prop
[
"
is_scaf
"
]
=
is_scaf
tot_id
=
scaf_id
+
stap_id
L
=
[(
df
[
"
vh
"
][
i
],
df
[
"
zid
"
][
i
],
df
[
"
fwd
"
][
i
])
for
i
in
df
.
index
]
vhi
,
zidi
=
np
.
where
(
np
.
array
(
scaf_id
)
==
1
)
if
strand
.
isForward
()
==
True
:
vhj
,
zidj
=
np
.
where
(
np
.
array
(
stap_id
)
==
1
)
df
[
"
stack_tuple
"
]
=
L
[
1
:]
+
[
-
1
]
vhi
=
vslist
.
index
[
vhi
]
if
strand
.
connection3p
()
is
None
:
vhj
=
vslist
.
index
[
vhj
]
df
[
"
threeprime_tuple
"
]
=
L
[
1
:]
+
[
-
1
]
nt_prop
[
"
vh
"
]
=
list
(
vhi
)
+
list
(
vhj
)
else
:
nt_prop
[
"
zid
"
]
=
list
(
zidi
)
+
list
(
zidj
)
df
[
"
threeprime_tuple
"
]
=
L
[
1
:]
+
[(
strand
.
connection3p
().
idNum
(),
str
(
strand
.
connection3p
().
idx5Prime
()),
strand
.
connection3p
().
isForward
())]
else
:
df
[
"
stack_tuple
"
]
=
[
-
1
]
+
L
[
0
:
-
1
]
if
strand
.
connection3p
()
is
None
:
df
[
"
threeprime_tuple
"
]
=
[
-
1
]
+
L
[
0
:
-
1
]
else
:
df
[
"
threeprime_tuple
"
]
=
[(
strand
.
connection3p
().
idNum
(),
str
(
strand
.
connection3p
().
idx5Prime
()),
strand
.
connection3p
().
isForward
())]
+
L
[
0
:
-
1
]
## cadnano 3.1 sequence assign is wrong if there is insertion or deletion.
df
[
"
r
"
]
=
[
np
.
array
([
df
[
"
x
"
][
i
],
df
[
"
y
"
][
i
],
df
[
"
z
"
][
i
]])
for
i
in
df
.
index
]
return
[
pd
.
Series
(
df
.
loc
[
i
])
for
i
in
df
.
index
]
def
gen_prop_table
(
part
,
seq_file
=
None
,
fill_seq
=
"
T
"
):
strand_set
=
[]
for
i
in
part
.
getidNums
():
fwd
,
rev
=
part
.
getStrandSets
(
i
)
[
strand_set
.
append
(
i
)
for
i
in
fwd
.
strands
()]
[
strand_set
.
append
(
i
)
for
i
in
rev
.
strands
()]
id_series
=
[]
for
i
in
strand_set
:
id_series
=
id_series
+
gen_id_series
(
i
,
part
)
nt_prop
=
pd
.
DataFrame
(
id_series
)
nt_prop
.
reset_index
(
names
=
list
(
range
(
len
(
nt_prop
.
index
))),
inplace
=
True
)
ind_tuple
=
[(
nt_prop
[
"
vh
"
][
i
],
nt_prop
[
"
zid
"
][
i
],
nt_prop
[
"
fwd
"
][
i
])
for
i
in
nt_prop
.
index
]
stacks
=
[]
for
i
in
list
(
nt_prop
[
"
stack_tuple
"
]):
if
i
==-
1
:
stacks
.
append
(
i
)
else
:
stacks
.
append
(
ind_tuple
.
index
(
i
))
nt_prop
[
"
stack
"
]
=
stacks
tprime
=
[]
for
i
in
list
(
nt_prop
[
"
threeprime_tuple
"
]):
if
i
==-
1
:
tprime
.
append
(
i
)
else
:
tprime
.
append
(
ind_tuple
.
index
(
i
))
nt_prop
[
"
threeprime
"
]
=
tprime
vhzid
=
list
(
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
]))
vhzid
=
list
(
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
]))
nt_prop
[
"
r
"
]
=
[
part
.
getCoordinate
(
helix_id
,
indices
)
for
helix_id
,
indices
in
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
])]
nt_prop
[
"
orientation
"
]
=
[
get_helix_angle
(
part
,
helix_id
,
indices
)
for
helix_id
,
indices
in
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
])
]
nt_prop
[
"
orientation
"
]
=
[
get_helix_angle
(
part
,
helix_id
,
int
(
float
(
indices
)
))
for
helix_id
,
indices
in
vhzid
]
nt_prop
=
nt_prop
.
fillna
(
-
1
)
nt_prop
=
nt_prop
.
fillna
(
-
1
)
for
i
in
range
(
int
(
len
(
vhzid
)
/
2
)):
for
i
in
range
(
int
(
len
(
nt_prop
.
index
)
/
2
)):
try
:
try
:
bp1
,
bp2
=
(
i
,
1
+
i
+
vhzid
[
i
+
1
:].
index
(
vhzid
[
i
]))
bp1
,
bp2
=
(
i
,
1
+
i
+
vhzid
[
i
+
1
:].
index
(
vhzid
[
i
]))
nt_prop
[
"
bp
"
][
bp1
]
=
bp2
nt_prop
[
"
bp
"
][
bp1
]
=
bp2
nt_prop
[
"
bp
"
][
bp2
]
=
bp1
nt_prop
[
"
bp
"
][
bp2
]
=
bp1
except
:
except
:
pass
pass
tprime_list
=-
np
.
ones
(
len
(
nt_prop
.
index
),
dtype
=
int
)
if
seq_file
is
not
None
:
index2
=
list
(
zip
(
vhzid
,
nt_prop
[
"
is_scaf
"
]))
#under construction
for
i
in
range
(
len
(
nt_prop
.
index
)):
seq
=
open
(
seq_file
,
"
r
"
)
((
m
,
n
),
p
)
=
list
(
zip
(
vhzid
,
nt_prop
[
"
is_scaf
"
]))[
i
]
for
seqline
in
seq
:
if
p
==
True
:
try
:
k
,
l
=
(
vslist
[
"
scaf
"
][
m
])[
n
][
2
:]
int
(
seqline
[
0
])
if
k
!=-
1
and
l
!=-
1
:
except
:
n
=
index2
.
index
(((
k
,
l
),
True
))
continue
tprime_list
[
i
]
=
int
(
n
)
else
:
k
,
l
=
(
vslist
[
"
stap
"
][
m
])[
n
][
2
:]
if
k
!=-
1
and
l
!=-
1
:
n
=
index2
.
index
(((
k
,
l
),
False
))
tprime_list
[
i
]
=
int
(
n
)
nt_prop
[
"
threeprime
"
]
=
tprime_list
(
n
,)
=
np
.
where
(
nt_prop
[
"
threeprime
"
]
==-
1
)
stackid
=
nt_prop
[
"
bp
"
][[
list
(
nt_prop
[
"
threeprime
"
]).
index
(
i
)
for
i
in
n
]]
nt_prop
[
"
stack
"
][
stackid
.
index
[
np
.
where
(
np
.
array
(
stackid
)
!=-
1
)]]
=
nt_prop
[
"
threeprime
"
][
stackid
.
index
[
np
.
where
(
np
.
array
(
stackid
)
!=-
1
)]]
## Todo: sequence
return
nt_prop
return
nt_prop
def
mrdna_model_from_cadnano
(
json_file
,
seq
=
None
,
**
model_parameters
):
def
mrdna_model_from_cadnano
(
json_file
,
seq
=
None
,
**
model_parameters
):
nt_prop
=
gen_prop_table
(
json_file
)
json_data
=
read_json_file
(
json_file
)
part
,
decoder
=
cadnano_parts
(
json_data
)
if
decoder
==
2
:
nt_prop
=
gen_prop_table_cad2
(
json_data
,
part
)
elif
decoder
==
3
:
nt_prop
=
gen_prop_table_cad3
(
json_data
,
part
)
if
seq
is
None
:
if
seq
is
None
:
if
nt_prop
[
"
seq
"
][
0
]
==-
1
:
if
nt_prop
[
"
seq
"
][
0
]
==-
1
:
seq
=
None
seq
=
None
...
...
This diff is collapsed.
Click to expand it.
mrdna/readers/segmentmodel_from_cadnano_old.py
0 → 100644
+
207
−
0
View file @
219c598c
# -*- coding: utf-8 -*-
import
pdb
import
numpy
as
np
import
os
,
sys
from
glob
import
glob
import
re
import
pandas
as
pd
pd
.
options
.
mode
.
chained_assignment
=
None
# default='warn'
from
.segmentmodel_from_lists
import
model_from_basepair_stack_3prime
from
..arbdmodel.coords
import
readArbdCoords
,
readAvgArbdCoords
,
rotationAboutAxis
from
..segmentmodel
import
SegmentModel
,
SingleStrandedSegment
,
DoubleStrandedSegment
from
..model.dna_sequence
import
m13
as
m13seq
import
json
import
re
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs
## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs
def
read_json_file
(
filename
):
try
:
with
open
(
filename
)
as
ch
:
json_data
=
json
.
load
(
ch
)
except
:
with
open
(
filename
)
as
ch
:
content
=
""
for
l
in
ch
:
l
=
re
.
sub
(
r
"'"
,
r
'"'
,
l
)
# https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
# l = re.sub(r"{\s*(\w)", r'{"\1', l)
# l = re.sub(r",\s*(\w)", r',"\1', l)
# l = re.sub(r"(\w):", r'\1":', l)
content
+=
l
+
"
\n
"
json_data
=
json
.
loads
(
content
)
return
json_data
def
get_lattice
(
part
):
lattice_type
=
None
_gt
=
part
.
getGridType
()
try
:
lattice_type
=
_gt
.
name
.
lower
()
except
:
if
_gt
==
1
:
lattice_type
=
'
square
'
elif
_gt
==
2
:
lattice_type
=
'
honeycomb
'
else
:
print
(
"
WARNING: unable to determine cadnano part lattice type
"
)
return
lattice_type
def
cadnano_parts
(
json_data
):
import
cadnano
from
cadnano.document
import
Document
try
:
doc
=
Document
()
cadnano
.
fileio
.
v3decode
.
decode
(
doc
,
json_data
)
decoder
=
3
except
:
doc
=
Document
()
cadnano
.
fileio
.
v2decode
.
decode
(
doc
,
json_data
)
decoder
=
2
parts
=
[
p
for
p
in
doc
.
getParts
()]
if
len
(
parts
)
!=
1
:
raise
Exception
(
"
Only documents containing a single cadnano part are implemented at this time.
"
)
part
=
parts
[
0
]
if
decoder
==
2
:
"""
It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here
"""
l
=
get_lattice
(
part
)
if
l
==
'
square
'
:
for
id_num
in
part
.
getIdNums
():
if
part
.
vh_properties
.
loc
[
id_num
,
'
eulerZ
'
]
==
0
:
part
.
vh_properties
.
loc
[
id_num
,
'
eulerZ
'
]
=
360
*
(
6
/
10.5
)
return
part
,
decoder
def
get_helix_angle
(
part
,
helix_id
,
indices
):
"""
Get
"
start_orientation
"
for helix
"""
# import ipdb
# ipdb.set_trace()
"""
FROM CADNANO2.5
+ angle is CCW
- angle is CW
Right handed DNA rotates clockwise from 5
'
to 3
'
we use the convention the 5
'
end starts at 0 degrees
and it
'
s pair is minor_groove_angle degrees away
direction, hence the minus signs. eulerZ
"""
hp
,
bpr
,
tpr
,
eulerZ
,
mgroove
=
part
.
vh_properties
.
loc
[
helix_id
,
[
'
helical_pitch
'
,
'
bases_per_repeat
'
,
'
turns_per_repeat
'
,
'
eulerZ
'
,
'
minor_groove_angle
'
]]
twist_per_base
=
tpr
*
360.
/
bpr
# angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180
angle
=
eulerZ
+
twist_per_base
*
indices
-
0.5
*
mgroove
return
rotationAboutAxis
(
np
.
array
((
0
,
0
,
1
)),
angle
)
def
nttype_v2
(
scafs
):
def
judge
(
i
):
if
i
==
[
-
1
,
-
1
,
-
1
,
-
1
]:
return
0
else
:
return
1
n
=
np
.
array
([
judge
(
i
)
for
i
in
scafs
])
return
n
def
gen_prop_table_cad2
(
json_data
,
part
):
df
=
pd
.
DataFrame
(
json_data
[
"
vstrands
"
])
vslist
=
df
.
set_index
(
"
num
"
)
props
=
part
.
getModelProperties
().
copy
()
try
:
if
props
.
get
(
'
point_type
'
)
==
PointType
.
ARBITRARY
:
# TODO add code to encode Parts with ARBITRARY point configurations
raise
NotImplementedError
(
"
Not implemented
"
)
except
:
try
:
vh_props
,
origins
=
part
.
helixPropertiesAndOrigins
()
except
:
origins
=
{
hid
:
part
.
getVirtualHelixOrigin
(
hid
)[:
2
]
for
hid
in
part
.
getidNums
()}
scaf_id
=
[
nttype_v2
(
vslist
[
'
scaf
'
][
i
])
for
i
in
vslist
.
index
]
stap_id
=
[
nttype_v2
(
vslist
[
'
stap
'
][
i
])
for
i
in
vslist
.
index
]
cad_bps
=
part
.
getIndices
(
0
)
vslist
[
"
scafnt
"
]
=
np
.
sum
(
np
.
array
(
scaf_id
),
axis
=
1
)
vslist
[
"
stapnt
"
]
=
np
.
sum
(
np
.
array
(
stap_id
),
axis
=
1
)
totnt
=
np
.
sum
(
vslist
[
"
scafnt
"
])
+
np
.
sum
(
vslist
[
"
stapnt
"
])
is_scaf
=
np
.
zeros
(
totnt
,
dtype
=
bool
)
is_scaf
[
0
:
np
.
sum
(
vslist
[
"
scafnt
"
])]
=
1
nt_prop
=
pd
.
DataFrame
(
index
=
range
(
totnt
),
columns
=
[
"
vh
"
,
"
zid
"
,
"
is_scaf
"
,
"
r
"
,
"
bp
"
,
"
stack
"
,
"
threeprime
"
,
"
seq
"
,
"
orientation
"
])
nt_prop
[
"
is_scaf
"
]
=
is_scaf
tot_id
=
scaf_id
+
stap_id
vhi
,
zidi
=
np
.
where
(
np
.
array
(
scaf_id
)
==
1
)
vhj
,
zidj
=
np
.
where
(
np
.
array
(
stap_id
)
==
1
)
vhi
=
vslist
.
index
[
vhi
]
vhj
=
vslist
.
index
[
vhj
]
nt_prop
[
"
vh
"
]
=
list
(
vhi
)
+
list
(
vhj
)
nt_prop
[
"
zid
"
]
=
list
(
zidi
)
+
list
(
zidj
)
vhzid
=
list
(
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
]))
nt_prop
[
"
r
"
]
=
[
part
.
getCoordinate
(
helix_id
,
indices
)
for
helix_id
,
indices
in
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
])]
nt_prop
[
"
orientation
"
]
=
[
get_helix_angle
(
part
,
helix_id
,
indices
)
for
helix_id
,
indices
in
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
zid
"
])]
nt_prop
=
nt_prop
.
fillna
(
-
1
)
for
i
in
range
(
int
(
len
(
vhzid
)
/
2
)):
try
:
bp1
,
bp2
=
(
i
,
1
+
i
+
vhzid
[
i
+
1
:].
index
(
vhzid
[
i
]))
nt_prop
[
"
bp
"
][
bp1
]
=
bp2
nt_prop
[
"
bp
"
][
bp2
]
=
bp1
except
:
pass
tprime_list
=-
np
.
ones
(
len
(
nt_prop
.
index
),
dtype
=
int
)
index2
=
list
(
zip
(
vhzid
,
nt_prop
[
"
is_scaf
"
]))
for
i
in
range
(
len
(
nt_prop
.
index
)):
((
m
,
n
),
p
)
=
list
(
zip
(
vhzid
,
nt_prop
[
"
is_scaf
"
]))[
i
]
if
p
==
True
:
k
,
l
=
(
vslist
[
"
scaf
"
][
m
])[
n
][
2
:]
if
k
!=-
1
and
l
!=-
1
:
n
=
index2
.
index
(((
k
,
l
),
True
))
tprime_list
[
i
]
=
int
(
n
)
else
:
k
,
l
=
(
vslist
[
"
stap
"
][
m
])[
n
][
2
:]
if
k
!=-
1
and
l
!=-
1
:
n
=
index2
.
index
(((
k
,
l
),
False
))
tprime_list
[
i
]
=
int
(
n
)
nt_prop
[
"
threeprime
"
]
=
tprime_list
#(n,)=np.where(nt_prop["threeprime"]==-1)
#stackid=nt_prop["bp"][[list(nt_prop["threeprime"]).index(i) for i in n]]
index3
=
dict
(
zip
(
nt_prop
.
index
,
zip
(
nt_prop
[
"
vh
"
],
nt_prop
[
"
is_scaf
"
])))
index3
[
-
1
]
=
(
-
1
,
-
1
)
nt_prop
[
"
stack
"
]
=
np
.
where
([
index3
[
i
]
==
index3
[
nt_prop
[
"
threeprime
"
][
i
]]
for
i
in
(
nt_prop
.
index
)],
nt_prop
[
"
threeprime
"
],
-
1
)
return
nt_prop
def
mrdna_model_from_cadnano
(
json_file
,
seq
=
None
,
**
model_parameters
):
json_data
=
read_json_file
(
json_file
)
part
,
decoder
=
cadnano_parts
(
json_data
)
if
decoder
==
2
:
nt_prop
=
gen_prop_table_cad2
(
json_data
,
part
)
elif
decoder
==
3
:
nt_prop
=
gen_prop_table_cad3
(
json_data
,
part
)
if
seq
is
None
:
if
nt_prop
[
"
seq
"
][
0
]
==-
1
:
seq
=
None
else
:
seq
=
nt_prop
[
"
seq
"
]
r
=
np
.
array
(
list
(
nt_prop
[
'
r
'
]))
bp
=
np
.
array
(
list
(
nt_prop
[
'
bp
'
]))
three_prime
=
np
.
array
((
list
(
nt_prop
[
"
threeprime
"
])))
stack
=
np
.
array
((
list
(
nt_prop
[
"
stack
"
])))
orientation
=
np
.
array
(
list
(
nt_prop
[
"
orientation
"
]))
model
=
model_from_basepair_stack_3prime
(
r
,
bp
,
stack
,
three_prime
,
seq
,
orientation
=
None
,
**
model_parameters
)
return
model
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment