Skip to content
Snippets Groups Projects
Commit 351b9b7f authored by pinyili2's avatar pinyili2
Browse files

a

parent 5b203296
No related branches found
No related tags found
1 merge request!1Pinyili2
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
%% Cell type:code id:ef4ed889-492f-4ba3-8c1d-a0fc607b1f57 tags:
``` python
import pandas as pd
df=pd.read_json("test.json")
df
```
%% Output
name vstrands
0 test.json {'row': 12, 'col': 16, 'num': 0, 'scaf': [[-1,...
1 test.json {'row': 12, 'col': 15, 'num': 1, 'scaf': [[-1,...
2 test.json {'row': 13, 'col': 15, 'num': 2, 'scaf': [[-1,...
3 test.json {'row': 13, 'col': 16, 'num': 3, 'scaf': [[-1,...
4 test.json {'row': 13, 'col': 17, 'num': 4, 'scaf': [[-1,...
5 test.json {'row': 12, 'col': 17, 'num': 5, 'scaf': [[5, ...
%% Cell type:code id:051eb3b8-2bf1-4482-9815-19937f66e09d tags:
``` python
class vstrands (object):
def __init__(self):
self.vhelices = []
def add_vhelix(self, toadd):
self.vhelices.append(toadd)
def bbox(self):
rows = []
cols = []
lens = []
for h in self.vhelices:
rows.append(h.row)
cols.append(h.col)
lens.append(len(h.stap))
dr = DIST_SQUARE * (max(rows) - min(rows) + 2)
dc = DIST_SQUARE * (max(cols) - min(cols) + 2)
dl = 0.34 * (max(lens) + 2)
return 2 * max([dr, dc, dl]) * BOX_FACTOR
def __str__(self):
a = '{\n"vstrands":[\n'
if len(self.vhelices) > 0:
for h in self.vhelices:
a = a + str(h) + ','
a = a[0:len(a) - 1]
a = a + '}\n'
return a
class vhelix (object):
def __init__(self):
self.stapLoop = []
self.scafLoop = []
self.skip = []
self.loop = []
self.stap_colors = []
self.row = 0
self.col = 0
self.num = 0
self.stap = []
self.scaf = []
self.cad_index = -1
self.skiploop_bases = 0
def get_length(self):
return max (len(self.scaf), len(self.stap))
len = property (get_length)
def add_square(self, toadd, which):
if which == 'stap':
self.stap.append(toadd)
elif which == 'scaf':
self.scaf.append (toadd)
else:
base.Logger.log("Cannot add square that is not scaf or stap. Dying now", base.Logger.CRITICAL)
sys.exit(1)
def __str__(self):
a = '{\n'
a = a + '"stapLoop":['
if len(self.stapLoop) > 0:
for i in self.stapLoop:
a = a + str(i) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + '],\n'
a = a + '"skip":['
if len(self.skip) > 0:
for e in self.skip:
a = a + str(e) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + '],\n'
a = a + '"loop":['
if len(self.loop) > 0:
for e in self.loop:
a = a + str(e) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + '],\n'
a = a + '"stap_colors":['
if len (self.stap_colors) > 0:
for e in self.stap_colors:
a = a + str(e) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + '],\n'
a = a + '"row":' + str(self.row) + ',\n'
a = a + '"col":' + str(self.col) + ',\n'
a = a + '"num":' + str(self.num) + ',\n'
a = a + '"scafLoop":['
if len(self.scafLoop) > 0:
for i in self.scafLoop:
a = a + str(i) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + '],\n'
a = a + '"stap":['
if len(self.stap) > 0:
for i in self.stap:
a = a + str(i) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + '],\n'
a = a + '"scaf":['
if len(self.scaf) > 0:
for i in self.scaf:
a = a + str(i) + ','
a = a[0:len(a) - 1] # remove last comma
a = a + ']\n}'
return a
```
%% Cell type:code id:6e88e214-c870-47d1-9a47-504caa411530 tags:
``` python
L=[]
for i in df["vstrands"]:
L.append(i)
cadsys = vstrands()
vh = vhelix()
for s in L:
vh.stap = [ i for i in s["scaf"]]
vh.scaf = [i for i in s["stap"]]
vh.skiploop_bases = len(s["skip"]) + sum(s["loop"]) - sum(s["skip"])
cadsys.add_vhelix(vh)
```
%% Cell type:code id:326f70c5-6a80-4802-847a-ae0de82f0eda tags:
``` python
s0=cadsys.vhelices[0]
s0.__str__()
```
%% Output
'{\n"stapLoop":[],\n"skip":[],\n"loop":[],\n"stap_colors":[],\n"row":0,\n"col":0,\n"num":0,\n"scafLoop":[],\n"stap":[[5, 1, -1, -1],[5, 2, 5, 0],[5, 3, 5, 1],[-1, -1, 5, 2],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[5, 10, 4, 9],[5, 11, 5, 9],[5, 12, 5, 10],[5, 13, 5, 11],[5, 14, 5, 12],[5, 15, 5, 13],[5, 16, 5, 14],[5, 17, 5, 15],[5, 18, 5, 16],[5, 19, 5, 17],[5, 20, 5, 18],[5, 21, 5, 19],[5, 22, 5, 20],[-1, -1, 5, 21],[5, 24, -1, -1],[5, 25, 5, 23],[5, 26, 5, 24],[5, 27, 5, 25],[5, 28, 5, 26],[5, 29, 5, 27],[5, 30, 5, 28],[5, 31, 5, 29],[5, 32, 5, 30],[5, 33, 5, 31],[5, 34, 5, 32],[5, 35, 5, 33],[5, 36, 5, 34],[5, 37, 5, 35],[5, 38, 5, 36],[5, 39, 5, 37],[4, 39, 5, 38],[-1, -1, -1, -1],[-1, -1, -1, -1]],\n"scaf":[[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, 5, 10],[5, 9, 5, 11],[5, 10, 5, 12],[5, 11, 5, 13],[5, 12, 5, 14],[5, 13, 5, 15],[5, 14, 5, 16],[5, 15, 5, 17],[5, 16, 5, 18],[5, 17, 5, 19],[5, 18, 5, 20],[5, 19, 5, 21],[5, 20, 5, 22],[5, 21, 5, 23],[5, 22, 5, 24],[5, 23, 5, 25],[5, 24, 5, 26],[5, 25, 5, 27],[5, 26, 0, 27],[0, 28, 5, 29],[5, 28, 5, 30],[5, 29, 5, 31],[5, 30, 5, 32],[5, 31, 5, 33],[5, 32, 5, 34],[5, 33, 5, 35],[5, 34, 5, 36],[5, 35, 5, 37],[5, 36, 5, 38],[5, 37, 5, 39],[5, 38, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1]]\n}'
%% Cell type:code id:7b552911-d7de-4079-ab4d-e859f2281f36 tags:
``` python
vh.stap
```
%% Output
[[5, 1, -1, -1],
[5, 2, 5, 0],
[5, 3, 5, 1],
[-1, -1, 5, 2],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[5, 10, 4, 9],
[5, 11, 5, 9],
[5, 12, 5, 10],
[5, 13, 5, 11],
[5, 14, 5, 12],
[5, 15, 5, 13],
[5, 16, 5, 14],
[5, 17, 5, 15],
[5, 18, 5, 16],
[5, 19, 5, 17],
[5, 20, 5, 18],
[5, 21, 5, 19],
[5, 22, 5, 20],
[-1, -1, 5, 21],
[5, 24, -1, -1],
[5, 25, 5, 23],
[5, 26, 5, 24],
[5, 27, 5, 25],
[5, 28, 5, 26],
[5, 29, 5, 27],
[5, 30, 5, 28],
[5, 31, 5, 29],
[5, 32, 5, 30],
[5, 33, 5, 31],
[5, 34, 5, 32],
[5, 35, 5, 33],
[5, 36, 5, 34],
[5, 37, 5, 35],
[5, 38, 5, 36],
[5, 39, 5, 37],
[4, 39, 5, 38],
[-1, -1, -1, -1],
[-1, -1, -1, -1]]
%% Cell type:code id:94113dfa tags:
``` python
from mrdna.readers.cadnano_segments import *
from cadnano.document import Document
import cadnano
```
%% Output
_
_____ ___ _| |___ ___
| | _| . | | .'|
|_|_|_|_| |___|_|_|__,| v1.0a.dev74
it/its
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 from mrdna.readers.cadnano_segments import *
2 from cadnano.document import Document
3 import cadnano
ModuleNotFoundError: No module named 'mrdna'
%% Cell type:code id:ec7f1e22-6a95-41f9-8c23-7603cc165837 tags:
``` python
```
%% Cell type:code id:40d7cdcf tags:
``` python
json_data=read_json_file("test.json")
part=decode_cadnano_part(json_data)
model=cadnano_part(part)
```
%% Output
Found cadnano version 2 file
%% Cell type:code id:4b3748a8 tags:
``` python
doc=Document()
cadnano.fileio.v2decode.decode(doc, json_data)
parts = [p for p in doc.getParts()]
part=parts[0]
```
%% Output
Found cadnano version 2 file
%% Cell type:code id:aa8a8cd9 tags:
``` python
part.__dict__.keys()
oligos = part.oligos()
for oligo in oligos:
print("{0}\t{1}\t\'{2}".format(oligo,
oligo.length(),
oligo.sequence()))
vhs = list(part.getIdNums()) # convert set to list
for vh_id in vhs: # display first 3 vhs
fwd_ss, rev_ss = part.getStrandSets(vh_id)
print('VH{0}'.format(vh_id))
print('\t', fwd_ss, '\t', [s.idxs() for s in fwd_ss.strands()], '\n\t\t\t\t',
[s.getColor() for s in fwd_ss.strands()])
print('\t', rev_ss, '\t', [s.idxs() for s in rev_ss.strands()], '\n\t\t\t\t',
[s.getColor() for s in rev_ss.strands()])
```
%% Output
Oligo_(0.1[38])_1328 23 'None
Oligo_(2.1[34])_9584 35 'None
Oligo_(1.1[36])_7488 188 'None
Oligo_(4.1[39])_4384 33 'None
Oligo_(5.0[9])_0240 23 'None
Oligo_(1.0[3])_8256 37 'None
Oligo_(3.0[0])_3296 33 'None
Oligo_(0.1[23])_9088 21 'None
VH0
<fwd_StrandSet(0)> [(5, 36)]
['#0066cc']
<rev_StrandSet(0)> [(2, 20), (21, 23), (24, 27), (28, 38)]
['#cc0000', '#b8056c', '#f74308', '#1700de']
VH1
<fwd_StrandSet(1)> [(3, 20), (21, 38)]
['#cc0000', '#b8056c']
<rev_StrandSet(1)> [(5, 18), (19, 36)]
['#0066cc', '#0066cc']
VH2
<fwd_StrandSet(2)> [(2, 18), (19, 32)]
['#0066cc', '#0066cc']
<rev_StrandSet(2)> [(0, 34)]
['#888888']
VH3
<fwd_StrandSet(3)> [(0, 20), (21, 34)]
['#cc0000', '#888888']
<rev_StrandSet(3)> [(2, 15), (16, 32)]
['#0066cc', '#0066cc']
VH4
<fwd_StrandSet(4)> [(9, 15), (16, 39)]
['#0066cc', '#0066cc']
<rev_StrandSet(4)> [(9, 20), (21, 39)]
['#cc0000', '#888888']
VH5
<fwd_StrandSet(5)> [(9, 27), (28, 39)]
['#f74308', '#1700de']
<rev_StrandSet(5)> [(9, 39)]
['#0066cc']
%% Cell type:code id:e0cab15d tags:
``` python
strands5 = [o.strand5p() for o in part.oligos()]
strands3 = [o.strand3p() for o in part.oligos()]
```
%% Cell type:code id:9811bf9a tags:
``` python
L=[o for o in part.oligos()]
dir(L[2])
```
%% Output
['__class__',
'__delattr__',
'__dict__',
'__dir__',
'__doc__',
'__eq__',
'__format__',
'__ge__',
'__getattribute__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__slots__',
'__str__',
'__subclasshook__',
'__weakref__',
'_decrementLength',
'_incrementLength',
'_is_circular',
'_parent',
'_part',
'_props',
'_setColor',
'_setLength',
'_setLoop',
'_setProperty',
'_signals',
'_strand5p',
'_strandMergeUpdate',
'_strandSplitUpdate',
'addToPart',
'applyAbstractSequences',
'applyColor',
'applySequence',
'applySequenceCMD',
'clearAbstractSequences',
'connect',
'deleteLater',
'destroy',
'disconnect',
'displayAbstractSequences',
'dump',
'editable_properties',
'getAbsolutePositionAtLength',
'getColor',
'getModelProperties',
'getName',
'getNumberOfBasesToEachXover',
'getOutlineProperties',
'getProperty',
'getStrandLengths',
'isCircular',
'length',
'locString',
'oligoPropertyChangedSignal',
'oligoRemovedSignal',
'oligoSelectedChangedSignal',
'oligoSequenceAddedSignal',
'oligoSequenceClearedSignal',
'parent',
'part',
'refreshLength',
'remove',
'removeFromPart',
'sequence',
'sequenceExport',
'setParent',
'setPart',
'setProperty',
'setStrand5p',
'shallowCopy',
'shouldHighlight',
'signals',
'splitAtAbsoluteLengths',
'strand3p',
'strand5p',
'undoStack']
%% Cell type:code id:86403b4b tags:
``` python
part.insertions()
```
%% Output
defaultdict(dict, {0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}})
%% Cell type:code id:5c7cea80 tags:
``` python
part.getModelProperties()
```
%% Output
{'name': 'NaPart1',
'color': '#0066cc',
'is_visible': True,
'active_phos': None,
'crossover_span_angle': 45,
'max_vhelix_length': 42,
'neighbor_active_angle': '',
'grid_type': <GridEnum.HONEYCOMB: 2>,
'virtual_helix_order': [0, 1, 2, 3, 4, 5],
'is_lattice': True,
<GridEnum.HONEYCOMB: 2>: <GridEnum.NONE: 0>}
%% Cell type:code id:136dcf08 tags:
``` python
import pdb
import numpy as np
import os,sys
import scipy
from mrdna import logger, devlogger
from mrdna.segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
from mrdna.arbdmodel.coords import quaternion_from_matrix, rotationAboutAxis, quaternion_slerp
from mrdna import get_resource_path
ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))
def _three_prime_list_to_five_prime(three_prime):
five_prime = -np.ones(three_prime.shape, dtype=int)
has_three_prime = np.where(three_prime >= 0)[0]
five_prime[three_prime[has_three_prime]] = has_three_prime
return five_prime
def _primes_list_to_strands(three_prime, five_prime):
five_prime_ends = np.where(five_prime < 0)[0]
strands = []
strand_is_circular = []
idx_to_strand = -np.ones(three_prime.shape, dtype=int)
def build_strand(nt_idx, conditional):
strand = [nt_idx]
idx_to_strand[nt_idx] = len(strands)
while conditional(nt_idx):
nt_idx = three_prime[nt_idx]
strand.append(nt_idx)
idx_to_strand[nt_idx] = len(strands)
strands.append( np.array(strand, dtype=int) )
for nt_idx in five_prime_ends:
build_strand(nt_idx,
lambda nt: three_prime[nt] >= 0)
strand_is_circular.append(False)
while True:
## print("WARNING: working on circular strand {}".format(len(strands)))
ids = np.where(idx_to_strand < 0)[0]
if len(ids) == 0: break
build_strand(ids[0],
lambda nt: three_prime[nt] >= 0 and \
idx_to_strand[three_prime[nt]] < 0)
strand_is_circular.append(True)
return strands, strand_is_circular
def find_stacks(centers, transforms):
## Find orientation and center of each nucleotide
expected_stack_positions = []
for R,c in zip(transforms,centers):
expected_stack_positions.append( c + ref_stack_position.dot(R) )
expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)
dists = scipy.spatial.distance_matrix(expected_stack_positions, centers)
dists = dists + 5*np.eye(len(dists))
idx1, idx2 = np.where(dists < 3.5)
## Convert distances to stacks
stacks_above = -np.ones(len(centers), dtype=int)
_z = np.array((0,0,1))
for i in np.unique(idx1):
js = idx2[ idx1 == i ]
with np.errstate(divide='ignore',invalid='ignore'):
angles = [np.arccos( transforms[j].T.dot( transforms[i].dot(_z) ).dot( _z ) ) for j in js]
angles = np.array( angles )
tmp = np.argmin(dists[i][js] + 1.0*angles)
j = js[tmp]
stacks_above[i] = j
return stacks_above
def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
helixmap = -np.ones(basepairs.shape, dtype=int)
helixrank = -np.ones(basepairs.shape)
is_fwd = np.ones(basepairs.shape, dtype=int)
## Remove stacks with nts lacking a basepairs
nobp = np.where(basepairs < 0)[0]
stacks_above[nobp] = -1
stacks_with_nobp = np.in1d(stacks_above, nobp)
stacks_above[stacks_with_nobp] = -1
end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]
hid = 0
for end in end_ids:
if helixmap[end] >= 0:
continue
rank = 0
nt = basepairs[end]
bp = basepairs[nt]
assert( bp == end )
if helixmap[nt] >= 0 or helixmap[bp] >= 0:
logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')
continue
# assert(helixmap[nt] == -1)
# assert(helixmap[bp] == -1)
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
rank +=1
_tmp = [(nt,bp)]
while stacks_above[nt] >= 0:
nt = stacks_above[nt]
if basepairs[nt] < 0: break
bp = basepairs[nt]
if helixmap[nt] >= 0 or helixmap[bp] >= 0:
logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')
break
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
_tmp.append((nt,bp))
rank +=1
hid += 1
## Create "helix" for each circular segment
intrahelical = []
processed = set()
unclaimed_bases = np.where( (basepairs >= 0)*(helixmap == -1) )[0]
for nt0 in unclaimed_bases:
if nt0 in processed: continue
nt = nt0
all_nts = [nt]
rank = 0
nt = nt0
bp = basepairs[nt]
if helixmap[nt] >= 0 or helixmap[bp] >= 0:
logger.warning(f'Ill-formed cylic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')
continue
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
rank +=1
processed.add(nt)
processed.add(bp)
counter = 0
while stacks_above[nt] >= 0:
lastnt = nt
nt = stacks_above[nt]
bp = basepairs[nt]
if nt == nt0 or nt == basepairs[nt0]:
intrahelical.append((lastnt,nt0))
break
assert( bp >= 0 )
if helixmap[nt] >= 0 or helixmap[bp] >= 0:
logger.warning(f'Ill-formed cyclic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')
break
helixmap[nt] = helixmap[bp] = hid
helixrank[nt] = helixrank[bp] = rank
is_fwd[bp] = 0
processed.add(nt)
processed.add(bp)
rank +=1
hid += 1
return helixmap, helixrank, is_fwd, intrahelical
def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=None):
maxrank = np.max( hrank[hmap==hid] )
if maxrank == 0:
ids = np.where((hmap == hid))[0]
pos = np.mean( [coordinate[r,:] for r in ids ], axis=0 )
coords = [pos,pos]
contours = [0,1]
if orientation is not None:
ids = np.where((hmap == hid) * fwd)[0]
assert( len(ids) == 1 )
q = quaternion_from_matrix( orientation[ids[0]] )
quats = [q, q]
coords[-1] = pos + orientation[ids[0]].dot(np.array((0,0,1)))
else:
coords,contours,quats = [[],[],[]]
last_q = None
for rank in range(int(maxrank)+1):
ids = np.where((hmap == hid) * (hrank == rank))[0]
coords.append(np.mean( [coordinate[r,:] for r in ids ], axis=0 ))
contours.append( float(rank+0.5)/(maxrank+1) )
if orientation is not None:
ids = np.where((hmap == hid) * (hrank == rank) * fwd)[0]
assert(len(ids) == 1)
q = quaternion_from_matrix( orientation[ids[0]] )
if last_q is not None and last_q.dot(q) < 0:
q = -q
## Average quaterion with reverse direction
bp = basepair[ids[0]]
if bp >= 0:
bp_o = orientation[bp].dot(rotationAboutAxis(np.array((1,0,0)),180))
q2 = quaternion_from_matrix( bp_o )
if q.dot(q2) < 0:
q2 = -q2
## probably good enough, but slerp is better: q = (q + q2)*0.5
q = quaternion_slerp(q,q2,0.5)
quats.append(q)
last_q = q
coords = np.array(coords)
seg.set_splines(contours,coords)
if orientation is not None:
quats = np.array(quats)
seg.set_orientation_splines(contours,quats)
seg.start_position = coords[0,:]
seg.end_position = coords[-1,:]
def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
sequence=None, orientation=None,
max_basepairs_per_bead = 5,
max_nucleotides_per_bead = 5,
local_twist = False,
dimensions=(5000,5000,5000),
**model_parameters):
"""
Creates a SegmentModel object from lists of each nucleotide's
basepair, its stack (on 3' side) and its 3'-connected nucleotide
The first argument should be an N-by-3 numpy array containing the
coordinate of each nucleotide, where N is the number of
nucleotides. The following three arguments should be integer lists
where the i-th element corresponds to the i-th nucleotide; the
list element should the integer index of the corresponding
basepaired / stacked / phosphodiester-bonded nucleotide. If there
is no such nucleotide, the value should be -1.
Args:
basepair: List of each nucleotide's basepair's index
stack: List containing index of the nucleotide stacked on the 3' of each nucleotide
three_prime: List of each nucleotide's the 3' end of each nucleotide
Returns:
SegmentModel
"""
""" Validate Input """
inputs = (basepair,three_prime)
try:
basepair,three_prime = [np.array(a,dtype=int) for a in inputs]
except:
raise TypeError("One or more of the input lists could not be converted into a numpy array")
inputs = (basepair,three_prime)
coordinate = np.array(coordinate)
if np.any( [len(a.shape) > 1 for a in inputs] ):
raise ValueError("One or more of the input lists has the wrong dimensionality")
if len(coordinate.shape) != 2:
raise ValueError("Coordinate array has the wrong dimensionality")
inputs = (coordinate,basepair,three_prime)
if not np.all(np.diff([len(a) for a in inputs]) == 0):
raise ValueError("Inputs are not the same length")
num_nt = len(basepair)
if sequence is not None and len(sequence) != num_nt:
raise ValueError("The 'sequence' parameter is the wrong length {} != {}".format(len(sequence),num_nt))
if orientation is not None:
orientation = np.array(orientation)
if len(orientation.shape) != 3:
raise ValueError("The 'orientation' array has the wrong dimensionality (should be Nx3x3)")
if orientation.shape != (num_nt,3,3):
raise ValueError("The 'orientation' array is not properly formatted")
if stack is None:
if orientation is not None:
stack = find_stacks(coordinate, orientation)
else:
## Guess stacking based on 3' connectivity
stack = np.array(three_prime,dtype=int) # Assume nts on 3' ends are stacked
_stack_below = _three_prime_list_to_five_prime(stack)
_has_bp = (basepair >= 0)
_nostack = np.where( (stack == -1)*_has_bp )[0]
_has_stack_below = _stack_below[basepair[_nostack]] >= 0
_nostack2 = _nostack[_has_stack_below]
stack[_nostack2] = basepair[_stack_below[basepair[_nostack2]]]
else:
try:
stack = np.array(stack,dtype=int)
except:
raise TypeError("The 'stack' array could not be converted into a numpy integer array")
if len(stack.shape) != 1:
raise ValueError("The 'stack' array has the wrong dimensionality")
if len(stack) != num_nt:
raise ValueError("The length of the 'stack' array does not match other inputs")
bps = basepair # alias
""" Fix stacks: require that the stack of a bp of a base's stack is its bp """
_has_bp = (bps >= 0)
_has_stack = (stack >= 0)
_stack_has_basepair = (bps[stack] >= 0) * _has_stack
stack = np.where( (stack[bps[stack]] == bps) * _has_bp * _has_stack * _has_bp,
stack, -np.ones(len(stack),dtype=int) )
five_prime = _three_prime_list_to_five_prime(three_prime)
""" Build map of dsDNA helices and strands """
hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack)
double_stranded_helices = np.unique(hmap[hmap >= 0])
strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)
""" Add ssDNA to hmap """
if len(double_stranded_helices) > 0:
hid = double_stranded_helices[-1]+1
else:
hid = 0
ss_residues = hmap < 0
#
if np.any(bps[ss_residues] != -1):
logger.warning(f'{np.sum(bps[ss_residues] != -1)} ssDNA nucleotides appear to have basepairs... ignoring')
for s,c in zip(strands, strand_is_circular):
strand_segment_ends = [i for i in np.where( np.diff(hmap[s]) != 0 )[0]] + [len(s)-1]
seg_start = 0
for i in strand_segment_ends:
if hmap[s[i]] < 0:
## Found single-stranded segment
ids = s[seg_start:i+1]
assert( np.all(hmap[ids] == -1) )
hmap[ids] = hid
hrank[ids] = np.arange(i+1-seg_start)
hid+=1
seg_start = i+1
if len(double_stranded_helices) > 0:
single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)
else:
single_stranded_helices = np.arange(hid)
## Create double-stranded segments
doubleSegments = []
for hid in double_stranded_helices:
seg = DoubleStrandedSegment(name=str(hid),
num_bp = np.sum(hmap==hid)//2)
set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)
assert(hid == len(doubleSegments))
doubleSegments.append(seg)
## Create single-stranded segments
singleSegments = []
for hid in single_stranded_helices:
seg = SingleStrandedSegment(name=str(hid),
num_nt = np.sum(hmap==hid))
set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)
assert(hid == len(doubleSegments) + len(singleSegments))
singleSegments.append(seg)
## Find crossovers and 5prime/3prime ends
crossovers,prime5,prime3 = [[],[],[]]
for s,c in zip(strands,strand_is_circular):
tmp = np.where(np.diff(hmap[s]) != 0)[0]
for i in tmp:
crossovers.append( (s[i],s[i+1]) )
if c:
if hmap[s[-1]] != hmap[s[0]]:
crossovers.append( (s[-1],s[0]) )
else:
prime5.append(s[0])
prime3.append(s[-1])
## Add connections
allSegments = doubleSegments+singleSegments
for r1,r2 in crossovers:
seg1,seg2 = [allSegments[hmap[i]] for i in (r1,r2)]
nt1,nt2 = [hrank[i] for i in (r1,r2)]
f1,f2 = [fwd[i] for i in (r1,r2)]
## Handle connections at the ends
is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))
is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))
print(seg1,seg2, r1, r2, is_terminal1, is_terminal2)
if is_terminal1 or is_terminal2:
""" Ensure that we don't have three-way dsDNA junctions """
if is_terminal1 and (bps[r1] >= 0) and (five_prime[bps[r1]] >= 0) and (three_prime[r1] >= 0):
if (bps[five_prime[bps[r1]]] >= 0) and (bps[three_prime[r1]] >= 0):
# is_terminal1 = (three_prime[r1] == bps[five_prime[bps[r1]]])
is_terminal1 = hmap[five_prime[bps[r1]]] == hmap[three_prime[r1]]
if is_terminal2 and (bps[r2] >= 0) and (three_prime[bps[r2]] >= 0) and (five_prime[r2] >= 0):
if (bps[three_prime[bps[r2]]] >= 0) and (bps[five_prime[r2]] >= 0):
# is_terminal2 = (five_prime[r2] == bps[three_prime[bps[r2]]])
is_terminal2 = hmap[three_prime[bps[r2]]] == hmap[five_prime[r2]]
""" Place connection """
if is_terminal1 and is_terminal2:
end1 = seg1.end3 if f1 else seg1.start3
end2 = seg2.start5 if f2 else seg2.end5
seg1._connect_ends( end1, end2, type_='intrahelical')
else:
seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_="terminal_crossover")
else:
seg1.add_crossover(nt1,seg2,nt2,[f1,f2])
## Add 5prime/3prime ends
for r in prime5:
seg = allSegments[hmap[r]]
seg.add_5prime(hrank[r],fwd[r])
for r in prime3:
seg = allSegments[hmap[r]]
seg.add_3prime(hrank[r],fwd[r])
## Add intrahelical connections to circular helical sections
for nt0,nt1 in intrahelical:
seg = allSegments[hmap[nt0]]
assert( seg is allSegments[hmap[nt1]] )
if three_prime[nt0] >= 0:
if hmap[nt0] == hmap[three_prime[nt0]]:
seg.connect_end3(seg.start5)
bp0,bp1 = [bps[nt] for nt in (nt0,nt1)]
if three_prime[bp1] >= 0:
if hmap[bp1] == hmap[three_prime[bp1]]:
seg.connect_start3(seg.end5)
## Assign sequence
if sequence is not None:
for hid in range(len(allSegments)):
resids = np.where( (hmap==hid)*(fwd==1) )[0]
s = allSegments[hid]
s.sequence = [sequence[r] for r in sorted(resids,key=lambda x: hrank[x])]
## Build model
model = SegmentModel( allSegments,
max_basepairs_per_bead = max_basepairs_per_bead,
max_nucleotides_per_bead = max_nucleotides_per_bead,
local_twist = local_twist,
dimensions = dimensions,
**model_parameters )
model._reader_list_coordinates = coordinate
model._reader_list_basepair = basepair
model._reader_list_stack = stack
model._reader_list_three_prime = three_prime
model._reader_list_five_prime = five_prime
model._reader_list_sequence = sequence
model._reader_list_orientation = orientation
model._reader_list_hmap = hmap
model._reader_list_fwd = fwd
model._reader_list_hrank = hrank
if sequence is None:
for s in model.segments:
s.randomize_unset_sequence()
return model
```
%% Output
_
_____ ___ _| |___ ___
| | _| . | | .'|
|_|_|_|_| |___|_|_|__,| v1.0a.dev74
it/its
%% Cell type:code id:88f7d20e tags:
``` python
coordinate = [(0,0,3.4*i) for i in range(7)]
three_prime = [ 1, 2, 4,-1, 6, 3, 0]
basepair = [-1,-1, 3, 2, 5, 4,-1]
stack = [-1,-1, -1,-1,-1, -1,-1]
for i in [3,5]:
coordinate[i] = (1,0,3.4*i)
model = model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,
max_basepairs_per_bead=1,
max_nucleotides_per_bead=1,
local_twist=False)
model.writePsf("list.psf")
model.writePdb("list.pdb")
```
%% Output
<DoubleStrandedSegment'> 1[1]> <DoubleStrandedSegment'> 0[1]> 5 3 True True
<SingleStrandedSegment'> 2[2]> <DoubleStrandedSegment'> 0[1]> 1 2 True True
<DoubleStrandedSegment'> 0[1]> <DoubleStrandedSegment'> 1[1]> 2 4 True True
<DoubleStrandedSegment'> 1[1]> <SingleStrandedSegment'> 3[1]> 4 6 True True
<SingleStrandedSegment'> 3[1]> <SingleStrandedSegment'> 2[2]> 6 0 True True
%% Cell type:code id:69d29f1e tags:
``` python
print([i.children for i in model.children])
print([i.connections for i in model.children])
```
%% Output
[[<SegmentParticle DNA on <DoubleStrandedSegment'> 0[1]>[1.00]>], [<SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[0.00]>, <SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.00]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.50]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 3[1]>[0.50]>]]
[[<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>], [<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>, <Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>], [<Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>], [<Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>]]
%% Cell type:code id:e103e8b4 tags:
``` python
s=model.children[0]
```
%% Cell type:code id:a30b582d tags:
``` python
s.segname
```
%% Output
'0'
%% Cell type:code id:dcd8bf8a tags:
``` python
coordinate
```
%% Output
[(0, 0, 0.0),
(0, 0, 3.4),
(0, 0, 6.8),
(1, 0, 10.2),
(0, 0, 13.6),
(1, 0, 17.0),
(0, 0, 20.4)]
%% Cell type:code id:bf20c9ef tags:
``` python
```
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment