Skip to content
Snippets Groups Projects
Commit abc6765e authored by cmaffeo2's avatar cmaffeo2
Browse files

model_from_basepair_stack_3prime detects circular DNA strands; needs testing

parent 6f9552db
No related branches found
No related tags found
No related merge requests found
......@@ -17,14 +17,35 @@ def _three_prime_list_to_five_prime(three_prime):
def _primes_list_to_strands(three_prime, five_prime):
five_prime_ends = np.where(five_prime < 0)[0]
strands = []
strand_is_circular = []
for nt_idx in five_prime_ends:
idx_to_strand = -np.ones(three_prime.shape, dtype=np.int)
def build_strand(nt_idx, conditional):
strand = [nt_idx]
while three_prime[nt_idx] >= 0:
idx_to_strand[nt_idx] = len(strands)
while conditional(nt_idx):
nt_idx = three_prime[nt_idx]
strand.append(nt_idx)
idx_to_strand[nt_idx] = len(strands)
strands.append( np.array(strand, dtype=np.int) )
return strands
for nt_idx in five_prime_ends:
build_strand(nt_idx,
lambda nt: three_prime[nt] >= 0)
strand_is_circular.append(False)
while True:
print("WARNING: working on circular strand {}".format(len(strands)))
ids = np.where(idx_to_strand < 0)[0]
if len(ids) == 0: break
build_strand(ids[0],
lambda nt: three_prime[nt] >= 0 and \
idx_to_strand[three_prime[nt]] < 0)
strand_is_circular.append(True)
return strands, strand_is_circular
def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
......@@ -228,16 +249,18 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
""" Build map of dsDNA helices and strands """
hmap,hrank,fwd = basepairs_and_stacks_to_helixmap(bps,stack)
double_stranded_helices = np.unique(hmap[hmap >= 0])
strands = _primes_list_to_strands(three_prime, five_prime)
strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)
""" Add ssDNA to hmap """
hid = double_stranded_helices[-1]+1
ss_residues = hmap < 0
assert( np.all(bps[ss_residues] == -1) )
for s in strands:
for s,c in zip(strands, strand_is_circular):
ss_residues = s[np.where(hmap[s] < 0)[0]]
if len(ss_residues) == 0: continue
if c:
print("WARNING: Circular ssDNA strands may not be modeled correctly")
resid_diff = np.diff(ss_residues)
tmp = np.where( resid_diff != 1 )[0]
......@@ -279,12 +302,16 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
## Find crossovers and 5prime/3prime ends
crossovers,prime5,prime3 = [[],[],[]]
for s in strands:
for s,c in zip(strands,strand_is_circular):
tmp = np.where(np.diff(hmap[s]) != 0)[0]
for i in tmp:
crossovers.append( (s[i],s[i+1]) )
prime5.append(s[0])
prime3.append(s[-1])
if c:
if hmap[s[-1]] != hmap[s[0]]:
crossovers.append( (s[-1],s[0]) )
else:
prime5.append(s[0])
prime3.append(s[-1])
## Add connections
allSegments = doubleSegments+singleSegments
......@@ -297,6 +324,15 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))
is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))
if is_terminal1 or is_terminal2:
""" Re-evaluate condition for being terminal so that both
strands must be connected at end to have an 'intrahelical'
connection """
if is_terminal1 and (bps[r1] >= 0):
is_terminal1 = (three_prime[r1] == five_prime[bps[r1]])
if is_terminal2 and (bps[r2] >= 0):
is_terminal2 = (five_prime[r2] == three_prime[bps[r2]])
""" Place connection """
if is_terminal1 and is_terminal2:
end1 = seg1.end3 if f1 else seg1.start3
end2 = seg2.start5 if f2 else seg2.end5
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment