Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tbgl
tools
mrdna
Commits
2acb5e5d
Commit
2acb5e5d
authored
Sep 04, 2018
by
cmaffeo2
Browse files
Refactored more general segment_from_list module from segment_from_pdb.py
parent
2191911b
Changes
3
Hide whitespace changes
Inline
Side-by-side
dnarbd/readers/segmentmodel_from_lists.py
0 → 100644
View file @
2acb5e5d
# -*- coding: utf-8 -*-
import
pdb
import
numpy
as
np
import
os
,
sys
from
..segmentmodel
import
SegmentModel
,
SingleStrandedSegment
,
DoubleStrandedSegment
from
..
import
get_resource_path
def
_three_prime_list_to_five_prime
(
three_prime
):
ids
=
np
.
arange
(
len
(
three_prime
))
five_prime
=
-
np
.
ones
(
three_prime
.
shape
)
has_three_prime
=
np
.
where
(
three_prime
>=
0
)[
0
]
five_prime
[
three_prime
[
has_three_prime
]]
=
has_three_prime
return
five_prime
def
_three_prime_list_to_strands
(
three_prime
):
five_prime
=
_three_prime_list_to_five_prime
(
three_prime
)
five_prime_ends
=
np
.
where
(
five_prime
<
0
)[
0
]
strands
=
[]
for
nt_idx
in
five_prime_ends
:
strand
=
[
nt_idx
]
while
three_prime
[
nt_idx
]
>=
0
:
nt_idx
=
three_prime
[
nt_idx
]
strand
.
append
(
nt_idx
)
strands
.
append
(
np
.
array
(
strand
,
dtype
=
np
.
int
)
)
return
strands
def
basepairs_and_stacks_to_helixmap
(
basepairs
,
stacks_above
):
helixmap
=
-
np
.
ones
(
basepairs
.
shape
,
dtype
=
np
.
int
)
helixrank
=
-
np
.
ones
(
basepairs
.
shape
)
is_fwd
=
np
.
ones
(
basepairs
.
shape
,
dtype
=
np
.
int
)
## Remove stacks with nts lacking a basepairs
nobp
=
np
.
where
(
basepairs
<
0
)[
0
]
stacks_above
[
nobp
]
=
-
1
stacks_with_nobp
=
np
.
in1d
(
stacks_above
,
nobp
)
stacks_above
[
stacks_with_nobp
]
=
-
1
end_ids
=
np
.
where
(
(
stacks_above
<
0
)
*
(
basepairs
>=
0
)
)[
0
]
hid
=
0
for
end
in
end_ids
:
if
helixmap
[
end
]
>=
0
:
continue
rank
=
0
nt
=
basepairs
[
end
]
bp
=
basepairs
[
nt
]
assert
(
helixmap
[
nt
]
==
-
1
)
assert
(
helixmap
[
bp
]
==
-
1
)
helixmap
[
nt
]
=
helixmap
[
bp
]
=
hid
helixrank
[
nt
]
=
helixrank
[
bp
]
=
rank
is_fwd
[
bp
]
=
0
rank
+=
1
while
stacks_above
[
nt
]
>=
0
:
nt
=
stacks_above
[
nt
]
if
basepairs
[
nt
]
<
0
:
break
bp
=
basepairs
[
nt
]
assert
(
helixmap
[
nt
]
==
-
1
)
assert
(
helixmap
[
bp
]
==
-
1
)
helixmap
[
nt
]
=
helixmap
[
bp
]
=
hid
helixrank
[
nt
]
=
helixrank
[
bp
]
=
rank
is_fwd
[
bp
]
=
0
rank
+=
1
hid
+=
1
return
helixmap
,
helixrank
,
is_fwd
def
SegmentModelFromPdb
(
*
args
,
**
kwargs
):
u
=
mda
.
Universe
(
*
args
,
**
kwargs
)
for
a
in
u
.
select_atoms
(
"resname DT* and name C7"
).
atoms
:
a
.
name
=
"C5M"
## Find basepairs and base stacks
centers
,
transforms
=
find_base_position_orientation
(
u
)
bps
=
find_basepairs
(
u
,
centers
,
transforms
)
stacks
=
find_stacks
(
u
,
centers
,
transforms
)
## Build map from residue index to helix index
def
model_from_basepair_stack_3prime
(
coordinates
,
basepair
,
stack
,
three_prime
,
sequence
=
None
):
"""
Creates a SegmentModel object from lists of each nucleotide's
basepair, its stack (on 3' side) and its 3'-connected nucleotide
The first argument should be an N-by-3 numpy array containing the
coordinates of each nucleotide, where N is the number of
nucleotides. The following three arguments should be integer lists
where the i-th element corresponds to the i-th nucleotide; the
list element should the integer index of the corresponding
basepaired / stacked / phosphodiester-bonded nucleotide. If there
is no such nucleotide, the value should be -1.
Args:
basepair: List of each nucleotide's basepair's index
stack: List containing index of the nucleotide stacked on the 3' of each nucleotide
three_prime: List of each nucleotide's the 3' end of each nucleotide
Returns:
SegmentModel
"""
""" Validate Input """
try
:
inputs
=
(
basepair
,
stack
,
three_prime
)
basepair
,
stack
,
three_prime
=
[
np
.
array
(
a
,
dtype
=
np
.
int
)
for
a
in
inputs
]
except
:
raise
TypeError
(
"One or more of the input lists could not be converted into a numpy array"
)
if
np
.
any
(
[
len
(
a
.
shape
)
>
1
for
a
in
inputs
]
):
raise
ValueError
(
"One or more the input lists has the wrong dimensionality"
)
inputs
=
(
basepair
,
stack
,
three_prime
)
if
not
np
.
all
(
np
.
diff
([
len
(
a
)
for
a
in
inputs
])
==
0
):
raise
ValueError
(
"Inputs are not the same length"
)
num_nt
=
len
(
basepair
)
if
sequence
is
not
None
and
len
(
sequence
)
!=
num_nt
:
raise
ValueError
(
"The 'sequence' parameter is the wrong length {} != {}"
.
format
(
len
(
sequence
),
num_nt
))
bps
=
basepair
# alias
""" Build map of dsDNA helices and strands """
hmap
,
hrank
,
fwd
=
basepairs_and_stacks_to_helixmap
(
bps
,
stack
)
double_stranded_helices
=
np
.
unique
(
hmap
[
hmap
>=
0
])
strands
=
_three_prime_list_to_strands
(
three_prime
)
""" Add ssDNA to hmap """
hid
=
double_stranded_helices
[
-
1
]
+
1
ss_residues
=
hmap
<
0
assert
(
np
.
all
(
bps
[
ss_residues
]
==
-
1
)
)
for
s
in
strands
:
ss_residues
=
s
[
np
.
where
(
hmap
[
s
]
<
0
)[
0
]]
if
len
(
ss_residues
)
==
0
:
continue
resid_diff
=
np
.
diff
(
ss_residues
)
tmp
=
np
.
where
(
resid_diff
!=
1
)[
0
]
first_res
=
ss_residues
[
0
]
for
i
in
tmp
:
ids
=
np
.
arange
(
first_res
,
ss_residues
[
i
]
+
1
)
assert
(
np
.
all
(
hmap
[
ids
]
==
-
1
)
)
hmap
[
ids
]
=
hid
hrank
[
ids
]
=
ids
-
first_res
first_res
=
ss_residues
[
i
+
1
]
hid
+=
1
ids
=
np
.
arange
(
first_res
,
ss_residues
[
-
1
]
+
1
)
assert
(
np
.
all
(
hmap
[
ids
]
==
-
1
)
)
hmap
[
ids
]
=
hid
hrank
[
ids
]
=
ids
-
first_res
hid
+=
1
single_stranded_helices
=
np
.
arange
(
double_stranded_helices
[
-
1
]
+
1
,
hid
)
## Create double-stranded segments
doubleSegments
=
[]
for
hid
in
double_stranded_helices
:
maxrank
=
np
.
max
(
hrank
[
hmap
==
hid
]
)
start
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
0
))[
0
]
end
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
maxrank
))[
0
]
start_pos
=
np
.
mean
(
[
coordinates
[
r
,:]
for
r
in
start
],
axis
=
0
)
end_pos
=
np
.
mean
(
[
coordinates
[
r
,:]
for
r
in
end
],
axis
=
0
)
seg
=
DoubleStrandedSegment
(
name
=
str
(
hid
),
num_bp
=
np
.
sum
(
hmap
==
hid
)
//
2
,
start_position
=
start_pos
,
end_position
=
end_pos
)
assert
(
hid
==
len
(
doubleSegments
))
doubleSegments
.
append
(
seg
)
## Create single-stranded segments
singleSegments
=
[]
for
hid
in
single_stranded_helices
:
maxrank
=
np
.
max
(
hrank
[
hmap
==
hid
]
)
start
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
0
))[
0
]
end
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
maxrank
))[
0
]
start_pos
=
np
.
mean
(
[
coordinates
[
r
,:]
for
r
in
start
],
axis
=
0
)
end_pos
=
np
.
mean
(
[
coordinates
[
r
,:]
for
r
in
end
],
axis
=
0
)
seg
=
SingleStrandedSegment
(
name
=
str
(
hid
),
num_nt
=
np
.
sum
(
hmap
==
hid
),
start_position
=
start_pos
,
end_position
=
end_pos
)
assert
(
hid
==
len
(
doubleSegments
)
+
len
(
singleSegments
))
singleSegments
.
append
(
seg
)
## Find crossovers and 5prime/3prime ends
crossovers
,
prime5
,
prime3
=
[[],[],[]]
for
s
in
strands
:
tmp
=
np
.
where
(
np
.
diff
(
hmap
[
s
])
!=
0
)[
0
]
crossovers
.
extend
(
s
[
tmp
]
)
prime5
.
append
(
s
[
0
])
prime3
.
append
(
s
[
-
1
])
## Add connections
allSegments
=
doubleSegments
+
singleSegments
for
r
in
crossovers
:
seg1
,
seg2
=
[
allSegments
[
hmap
[
i
]]
for
i
in
(
r
,
r
+
1
)]
nt1
,
nt2
=
[
hrank
[
i
]
for
i
in
(
r
,
r
+
1
)]
f1
,
f2
=
[
fwd
[
i
]
for
i
in
(
r
,
r
+
1
)]
if
nt1
in
(
0
,
seg1
.
num_nt
)
or
nt2
in
(
0
,
seg2
.
num_nt
):
seg1
.
add_crossover
(
nt1
,
seg2
,
nt2
,[
f1
,
f2
],
type_
=
"terminal_crossover"
)
else
:
seg1
.
add_crossover
(
nt1
,
seg2
,
nt2
,[
f1
,
f2
])
## Add 5prime/3prime ends
for
r
in
prime5
:
seg
=
allSegments
[
hmap
[
r
]]
seg
.
add_5prime
(
hrank
[
r
],
fwd
[
r
])
for
r
in
prime3
:
seg
=
allSegments
[
hmap
[
r
]]
seg
.
add_3prime
(
hrank
[
r
],
fwd
[
r
])
## Assign sequence
if
sequence
is
not
None
:
for
hid
in
range
(
len
(
allSegments
)):
resids
=
np
.
sort
(
np
.
where
(
(
hmap
==
hid
)
*
(
fwd
==
1
)
)[
0
])
s
=
allSegments
[
hid
]
s
.
sequence
=
[
sequence
[
r
]
for
r
in
resids
]
## Build model
model
=
SegmentModel
(
allSegments
,
max_basepairs_per_bead
=
5
,
max_nucleotides_per_bead
=
5
,
local_twist
=
False
,
dimensions
=
(
5000
,
5000
,
5000
))
if
sequence
is
None
:
model
.
randomize_unset_sequence
()
return
model
dnarbd/readers/segmentmodel_from_pdb.py
View file @
2acb5e5d
...
...
@@ -12,6 +12,7 @@ from MDAnalysis.analysis.distances import distance_array
from
..segmentmodel
import
SegmentModel
,
SingleStrandedSegment
,
DoubleStrandedSegment
from
..
import
get_resource_path
from
.segmentmodel_from_lists
import
model_from_basepair_stack_3prime
resnames
=
dict
(
A
=
'ADE DA DA5 DA3'
,
T
=
'THY DT DT5 DT3'
,
C
=
'CYT DC DC5 DC3'
,
G
=
'GUA DG DG5 DG3'
)
...
...
@@ -227,113 +228,21 @@ def SegmentModelFromPdb(*args,**kwargs):
bps
=
find_basepairs
(
u
,
centers
,
transforms
)
stacks
=
find_stacks
(
u
,
centers
,
transforms
)
## Build map from residue index to helix index
hmap
,
hrank
,
fwd
=
basepairs_and_stacks_to_helixmap
(
bps
,
stacks
)
double_stranded_helices
=
np
.
unique
(
hmap
[
hmap
>=
0
])
## Add ssDNA to hmap
hid
=
double_stranded_helices
[
-
1
]
+
1
ss_residues
=
hmap
<
0
assert
(
np
.
all
(
bps
[
ss_residues
]
==
-
1
)
)
## Find three-prime ends
three_prime
=
-
np
.
ones
(
bps
.
shape
,
dtype
=
np
.
int
)
for
s
in
u
.
segments
:
r
=
s
.
atoms
.
select_atoms
(
"name C1'"
).
resindices
ss_residues
=
r
[
np
.
where
(
hmap
[
r
]
<
0
)[
0
]]
if
len
(
ss_residues
)
==
0
:
continue
resid_diff
=
np
.
diff
(
ss_residues
)
tmp
=
np
.
where
(
resid_diff
!=
1
)[
0
]
first_res
=
ss_residues
[
0
]
for
i
in
tmp
:
ids
=
np
.
arange
(
first_res
,
ss_residues
[
i
]
+
1
)
assert
(
np
.
all
(
hmap
[
ids
]
==
-
1
)
)
hmap
[
ids
]
=
hid
hrank
[
ids
]
=
ids
-
first_res
first_res
=
ss_residues
[
i
+
1
]
hid
+=
1
ids
=
np
.
arange
(
first_res
,
ss_residues
[
-
1
]
+
1
)
assert
(
np
.
all
(
hmap
[
ids
]
==
-
1
)
)
hmap
[
ids
]
=
hid
hrank
[
ids
]
=
ids
-
first_res
hid
+=
1
print
(
hmap
)
single_stranded_helices
=
np
.
arange
(
double_stranded_helices
[
-
1
]
+
1
,
hid
)
## Create double-stranded segments
doubleSegments
=
[]
for
hid
in
double_stranded_helices
:
maxrank
=
np
.
max
(
hrank
[
hmap
==
hid
]
)
start
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
0
))[
0
]
end
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
maxrank
))[
0
]
start_pos
=
np
.
mean
(
[
u
.
atoms
.
residues
[
r
].
atoms
.
center_of_mass
()
for
r
in
start
],
axis
=
0
)
end_pos
=
np
.
mean
(
[
u
.
atoms
.
residues
[
r
].
atoms
.
center_of_mass
()
for
r
in
end
],
axis
=
0
)
seg
=
DoubleStrandedSegment
(
name
=
str
(
hid
),
num_bp
=
np
.
sum
(
hmap
==
hid
)
//
2
,
start_position
=
start_pos
,
end_position
=
end_pos
)
assert
(
hid
==
len
(
doubleSegments
))
doubleSegments
.
append
(
seg
)
## Create single-stranded segments
singleSegments
=
[]
for
hid
in
single_stranded_helices
:
maxrank
=
np
.
max
(
hrank
[
hmap
==
hid
]
)
start
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
0
))[
0
]
end
=
np
.
where
((
hmap
==
hid
)
*
(
hrank
==
maxrank
))[
0
]
start_pos
=
np
.
mean
(
[
u
.
atoms
.
residues
[
r
].
atoms
.
center_of_mass
()
for
r
in
start
],
axis
=
0
)
end_pos
=
np
.
mean
(
[
u
.
atoms
.
residues
[
r
].
atoms
.
center_of_mass
()
for
r
in
end
],
axis
=
0
)
seg
=
SingleStrandedSegment
(
name
=
str
(
hid
),
num_nt
=
np
.
sum
(
hmap
==
hid
),
start_position
=
start_pos
,
end_position
=
end_pos
)
assert
(
hid
==
len
(
doubleSegments
)
+
len
(
singleSegments
))
singleSegments
.
append
(
seg
)
## Find crossovers and 5prime/3prime ends
crossovers
,
prime5
,
prime3
=
[[],[],[]]
for
s
in
u
.
segments
:
r
=
s
.
atoms
.
select_atoms
(
"name C1'"
).
resindices
tmp
=
np
.
where
(
np
.
diff
(
hmap
[
r
])
!=
0
)[
0
]
crossovers
.
extend
(
r
[
tmp
]
)
prime5
.
append
(
r
[
0
])
prime3
.
append
(
r
[
-
1
])
## Add connections
allSegments
=
doubleSegments
+
singleSegments
for
r
in
crossovers
:
seg1
,
seg2
=
[
allSegments
[
hmap
[
i
]]
for
i
in
(
r
,
r
+
1
)]
nt1
,
nt2
=
[
hrank
[
i
]
for
i
in
(
r
,
r
+
1
)]
f1
,
f2
=
[
fwd
[
i
]
for
i
in
(
r
,
r
+
1
)]
if
nt1
in
(
0
,
seg1
.
num_nt
)
or
nt2
in
(
0
,
seg2
.
num_nt
):
seg1
.
add_crossover
(
nt1
,
seg2
,
nt2
,[
f1
,
f2
],
type_
=
"terminal_crossover"
)
else
:
seg1
.
add_crossover
(
nt1
,
seg2
,
nt2
,[
f1
,
f2
])
## Add 5prime/3prime ends
for
r
in
prime5
:
seg
=
allSegments
[
hmap
[
r
]]
seg
.
add_5prime
(
hrank
[
r
],
fwd
[
r
])
for
r
in
prime3
:
seg
=
allSegments
[
hmap
[
r
]]
seg
.
add_3prime
(
hrank
[
r
],
fwd
[
r
])
## Assign sequence
for
hid
in
range
(
len
(
allSegments
)):
resids
=
np
.
sort
(
np
.
where
(
(
hmap
==
hid
)
*
(
fwd
==
1
)
)[
0
])
s
=
allSegments
[
hid
]
s
.
sequence
=
[
resname_to_key
[
u
.
residues
[
r
].
resname
]
for
r
in
resids
]
## Build model
model
=
SegmentModel
(
allSegments
,
max_basepairs_per_bead
=
5
,
max_nucleotides_per_bead
=
5
,
local_twist
=
False
,
dimensions
=
(
5000
,
5000
,
5000
))
return
model
three_prime
[
r
[:
-
1
]]
=
r
[
1
:]
## Find sequence
seq
=
[
resname_to_key
[
rn
]
for
rn
in
u
.
select_atoms
(
"name C1'"
).
resnames
]
return
model_from_basepair_stack_3prime
(
centers
,
bps
,
stacks
,
three_prime
,
seq
)
if
__name__
==
"__main__"
:
# u = mda.Universe("test-ssdna.psf", "test-ssdna.pdb")
...
...
dnarbd/segmentmodel.py
View file @
2acb5e5d
...
...
@@ -25,7 +25,7 @@ TODO:
+ shrink dsDNA backbone
+ make orientation continuous
+ sequence
-
handle circular dna
+
handle circular dna
+ ensure crossover bead potentials aren't applied twice
+ remove performance bottlenecks
- test for large systems
...
...
@@ -2590,3 +2590,25 @@ proc calcforces {} {
score
.
append
(
sum2
/
count
)
print
(
angles
[
np
.
argmin
(
score
)])
print
(
score
)
def
vmd_cylinder_tcl
(
self
,
file_handle
):
raise
NotImplementedError
def
draw_cylinder
(
segment
):
r0
=
segment
.
start_position
r1
=
segment
.
end_position
file_handle
.
write
(
""
)
file_handle
.
write
(
""
)
## material
for
s
in
self
.
segments
:
if
isinstance
(
s
,
DoubleStrandedSegment
):
...
radius
=
5
elif
isinstance
(
s
,
SingleStrandedSegment
):
...
else
:
raise
(
Exception
)
file_handle
.
write
(
""
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment