Commit 6f9552db authored by cmaffeo2's avatar cmaffeo2
Browse files

Multiple improvements to validation model_from_basepair_stack_3prime

1) Passthrough of mrdna.segmentmodel arguments
2) Improved input validation
3) Stacks are removed unless stack[bp[stack]] == stack
4) Cleaner code for assigning ends of terminally-connected "segments"
parent afa963d2
......@@ -9,14 +9,12 @@ from ..coords import quaternion_from_matrix
from .. import get_resource_path
def _three_prime_list_to_five_prime(three_prime):
ids = np.arange(len(three_prime))
five_prime = -np.ones(three_prime.shape)
five_prime = -np.ones(three_prime.shape, dtype=np.int)
has_three_prime = np.where(three_prime >= 0)[0]
five_prime[three_prime[has_three_prime]] = has_three_prime
return five_prime
def _three_prime_list_to_strands(three_prime):
five_prime = _three_prime_list_to_five_prime(three_prime)
def _primes_list_to_strands(three_prime, five_prime):
five_prime_ends = np.where(five_prime < 0)[0]
strands = []
......@@ -110,11 +108,11 @@ def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
return helixmap, helixrank, is_fwd
def set_splines(seg, coordinates, hid, hmap, hrank, fwd, orientation=None):
def set_splines(seg, coordinate, hid, hmap, hrank, fwd, orientation=None):
maxrank = np.max( hrank[hmap==hid] )
if maxrank == 0:
ids = np.where((hmap == hid))[0]
pos = np.mean( [coordinates[r,:] for r in ids ], axis=0 )
pos = np.mean( [coordinate[r,:] for r in ids ], axis=0 )
coords = [pos,pos]
contours = [0,1]
if orientation is not None:
......@@ -128,7 +126,7 @@ def set_splines(seg, coordinates, hid, hmap, hrank, fwd, orientation=None):
coords,contours,quats = [[],[],[]]
for rank in range(int(maxrank)+1):
ids = np.where((hmap == hid) * (hrank == rank))[0]
coords.append(np.mean( [coordinates[r,:] for r in ids ], axis=0 ))
coords.append(np.mean( [coordinate[r,:] for r in ids ], axis=0 ))
contours.append( float(rank+0.5)/(maxrank+1) )
last_q = None
if orientation is not None:
......@@ -150,14 +148,19 @@ def set_splines(seg, coordinates, hid, hmap, hrank, fwd, orientation=None):
seg.end_position = coords[-1,:]
def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
sequence=None, orientation=None):
def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
sequence=None, orientation=None,
max_basepairs_per_bead = 5,
max_nucleotides_per_bead = 5,
local_twist = False,
dimensions=(5000,5000,5000),
**model_parameters):
"""
Creates a SegmentModel object from lists of each nucleotide's
basepair, its stack (on 3' side) and its 3'-connected nucleotide
The first argument should be an N-by-3 numpy array containing the
coordinates of each nucleotide, where N is the number of
coordinate of each nucleotide, where N is the number of
nucleotides. The following three arguments should be integer lists
where the i-th element corresponds to the i-th nucleotide; the
list element should the integer index of the corresponding
......@@ -174,17 +177,19 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
"""
""" Validate Input """
inputs = (basepair,three_prime)
try:
inputs = (basepair,stack,three_prime)
basepair,stack,three_prime = [np.array(a,dtype=np.int)
for a in inputs]
basepair,three_prime = [np.array(a,dtype=np.int) for a in inputs]
except:
raise TypeError("One or more of the input lists could not be converted into a numpy array")
if np.any( [len(a.shape) > 1 for a in inputs] ):
raise ValueError("One or more the input lists has the wrong dimensionality")
raise ValueError("One or more of the input lists has the wrong dimensionality")
if len(coordinate.shape) != 2:
raise ValueError("Coordinate array has the wrong dimensionality")
inputs = (basepair,stack,three_prime)
inputs = (coordinate,basepair,three_prime)
if not np.all(np.diff([len(a) for a in inputs]) == 0):
raise ValueError("Inputs are not the same length")
......@@ -192,12 +197,38 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
if sequence is not None and len(sequence) != num_nt:
raise ValueError("The 'sequence' parameter is the wrong length {} != {}".format(len(sequence),num_nt))
if orientation is not None:
if len(orientation.shape) != 3:
raise ValueError("The 'orientation' array has the wrong dimensionality (should be Nx3x3)")
if orientation.shape != (num_nt,3,3):
raise ValueError("The 'orientation' array is not properly formatted")
try:
stack = np.array(stack,dtype=np.int)
except:
raise TypeError("The 'stack' array could not be converted into a numpy integer array")
if len(stack.shape) != 1:
raise ValueError("The 'stack' array has the wrong dimensionality")
if len(stack) != num_nt:
raise ValueError("The length of the 'stack' array does not match other inputs")
bps = basepair # alias
""" Fix stacks: require that the stack of a bp of a base's stack is its bp """
_has_bp = (bps >= 0)
_has_stack = (stack >= 0)
_stack_has_basepair = (bps[stack] >= 0) * _has_stack
stack = np.where( (stack[bps[stack]] == bps) * _has_bp * _has_stack * _has_bp,
stack, -np.ones(len(stack),dtype=np.int) )
five_prime = _three_prime_list_to_five_prime(three_prime)
""" Build map of dsDNA helices and strands """
hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stack)
double_stranded_helices = np.unique(hmap[hmap >= 0])
strands = _three_prime_list_to_strands(three_prime)
strands = _primes_list_to_strands(three_prime, five_prime)
""" Add ssDNA to hmap """
hid = double_stranded_helices[-1]+1
......@@ -231,7 +262,7 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
for hid in double_stranded_helices:
seg = DoubleStrandedSegment(name=str(hid),
num_bp = np.sum(hmap==hid)//2)
set_splines(seg, coordinates, hid, hmap, hrank, fwd, orientation)
set_splines(seg, coordinate, hid, hmap, hrank, fwd, orientation)
assert(hid == len(doubleSegments))
doubleSegments.append(seg)
......@@ -241,7 +272,7 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
for hid in single_stranded_helices:
seg = SingleStrandedSegment(name=str(hid),
num_nt = np.sum(hmap==hid))
set_splines(seg, coordinates, hid, hmap, hrank, fwd, orientation)
set_splines(seg, coordinate, hid, hmap, hrank, fwd, orientation)
assert(hid == len(doubleSegments) + len(singleSegments))
singleSegments.append(seg)
......@@ -262,18 +293,14 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
nt1,nt2 = [hrank[i] for i in (r1,r2)]
f1,f2 = [fwd[i] for i in (r1,r2)]
if nt1 in (0,seg1.num_nt-1) or nt2 in (0,seg2.num_nt-1):
if nt1 in (0,seg1.num_nt-1) and nt2 in (0,seg2.num_nt-1):
if nt1 == 0:
end1 = seg1.start5 if f1 else seg1.start3
else:
end1 = seg1.end3 if f1 else seg1.end5
if nt2 == 0:
end2 = seg2.start5 if f2 else seg2.start3
else:
end2 = seg2.end3 if f2 else seg2.end5
## Handle connections at the ends
is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))
is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))
if is_terminal1 or is_terminal2:
if is_terminal1 and is_terminal2:
end1 = seg1.end3 if f1 else seg1.start3
end2 = seg2.start5 if f2 else seg2.end5
seg1._connect_ends( end1, end2, type_='intrahelical')
else:
seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_="terminal_crossover")
else:
......@@ -296,10 +323,11 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
## Build model
model = SegmentModel( allSegments,
max_basepairs_per_bead = 5,
max_nucleotides_per_bead = 5,
local_twist = False,
dimensions=(5000,5000,5000))
max_basepairs_per_bead = max_basepairs_per_bead,
max_nucleotides_per_bead = max_nucleotides_per_bead,
local_twist = local_twist,
dimensions = dimensions,
**model_parameters )
if sequence is None:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment