diff --git a/mrdna/readers/test/segmentmodel_from_oxdna_pinyi.py b/mrdna/readers/test/segmentmodel_from_oxdna_pinyi.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5e49b2f09802a4c75f31ef54041b577e798b49
--- /dev/null
+++ b/mrdna/readers/test/segmentmodel_from_oxdna_pinyi.py
@@ -0,0 +1,205 @@
+from mrdna import logger, devlogger
+from .segmentmodel_from_lists import model_from_basepair_stack_3prime
+from ..arbdmodel.coords import rotationAboutAxis
+import pandas as pd
+
+from oxlibs import *
+import numpy as np
+from scipy.spatial import distance_matrix
+pd.options.mode.chained_assignment = None # default='warn'
+
+_seq_to_int_dict = dict(A=0,T=1,C=2,G=3)
+_seq_to_int_dict = {k:str(v) for k,v in _seq_to_int_dict.items()}
+
+_yrot = rotationAboutAxis(axis=(0,1,0), angle=180).dot(rotationAboutAxis(axis=(0,0,1),angle=-40))
+
+def mrdna_model_from_oxdna(coordinate_file, topology_file,virt2nuc=None,get_nt_prop=False, **model_parameters):
+ """ Construct an mrdna model from oxDNA coordinate and topology files """
+ top_data = np.loadtxt(topology_file, skiprows=1,
+ unpack=True,
+ dtype=np.dtype('i4,U1,i4,i4')
+ )
+ conf_data = np.loadtxt(coordinate_file, skiprows=3)
+ def _get_bp(sequence=None):
+ dists = distance_matrix(r,basepair_pos) + np.eye(len(r))*1000
+ dists = 0.5*(dists + dists.T)
+
+ bp = np.array([np.argmin(da) for da in dists])
+
+ for i,j in enumerate(bp):
+ if j == -1: continue
+ # devlogger.info(f'bp {i} {j} {dists[i,j]}')
+ if dists[i,j] > 2:
+ bp[i] = -1
+ elif bp[j] != i:
+ bpj = bp[j]
+ logger.warning( " ".join([str(_x) for _x in ["Bad pair", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) )
+
+ for i,j in enumerate(bp):
+ if j == -1: continue
+ if bp[j] != i:
+ bpj = bp[j]
+ logger.warning( " ".join([str(_x) for _x in ["Bad pair2", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) )
+ raise Exception
+
+ if sequence is not None:
+ seq = sequence
+ bp_seq = sequence[bp]
+ bp_seq[bp==-1] = 'X'
+ bad_bps = np.where( (bp >= 0) &
+ (((seq == 'C') & (bp_seq != 'G')) |
+ ((seq == 'G') & (bp_seq != 'C')) |
+ ((seq == 'T') & (bp_seq != 'A')) |
+ ((seq == 'U') & (bp_seq != 'A')) |
+ ((seq == 'A') & ((bp_seq != 'T') | (bp_seq != 'U')))
+ ) )[0]
+ bp[bp[bad_bps]] = -1
+ bp[bad_bps] = -1
+
+ return bp
+
+ def _get_stack():
+ dists = distance_matrix( r + 3.5*normal_dir + 2.1*perp_dir -1*base_dir, r ) + np.eye(len(r))*1000
+ stack = np.array([np.argmin(da) for da in dists])
+ for i,j in enumerate(stack):
+ if dists[i,j] > 8:
+ stack[i] = -1
+ elif i < 10:
+ ## development info
+ # devlogger.info([i,j,dists[i,j]])
+ # dr = r[j] - (r[i] - normal_dir[i]*3.4 + perp_dir[i]*1 + base_dir[i]*1)
+ dr = r[j] - r[i]
+ # devlogger.info([normal_dir[i].dot(dr), perp_dir[i].dot(dr), base_dir[i].dot(dr)])
+ return np.array(stack)
+
+ def _find_vh_vb_table(s,is_scaf):
+ L=[]
+ for i in list(s.keys()):
+ vh,zid=i
+ strand,indices=s[i]
+ if len(indices)==0:
+ continue
+ else:
+ if len(indices)==1:
+ zids=[str(zid)]
+ else:
+ zids=[str(zid)+"."+str(j) for j in range(len(indices))]
+ for index,z in zip(indices,zids):
+ L.append(pd.Series({"index":index,"vh":vh,"zid":z,"strand":strand,"is_scaf":bool(is_scaf)}))
+ return L
+ def get_virt2nuc(virt2nuc,top_data):
+ vh_vb,pattern=pd.read_pickle(virt2nuc)
+ L1=_find_vh_vb_table(vh_vb._scaf,1)
+ L2=_find_vh_vb_table(vh_vb._stap,0)
+ nt_prop=pd.DataFrame(L1+L2)
+ nt_prop.set_index("index",inplace=True)
+ nt_prop.sort_index(inplace=True)
+ nt_prop["threeprime"]=top_data[2]
+ nt_prop["seq"]=top_data[1]
+ nt_prop["stack"]=top_data[2]
+ for i in nt_prop.index:
+ if nt_prop.loc[i]["threeprime"] in nt_prop.index:
+ if nt_prop.loc[nt_prop.loc[i]["threeprime"]]["vh"]!=nt_prop.loc[i]["vh"]:
+ nt_prop["stack"][i]=-1
+ bp_map=dict(zip(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["is_scaf"]),nt_prop.index))
+ bp=-np.ones(len(nt_prop.index),dtype=int)
+ counter=0
+ for i,j,k in zip(nt_prop["vh"],nt_prop["zid"],nt_prop["is_scaf"]):
+ try:
+ bp[counter]=bp_map[(i,j,not(k))]
+ except:
+ pass
+ counter+=1
+ nt_prop["bp"]=bp
+ return nt_prop
+ try:
+ nt_prop=get_virt2nuc(virt2nuc,top_data)
+ r=conf_data[:,:3] * 8.518
+ base_dir = conf_data[:,3:6]
+ # basepair_pos = r + base_dir*6.0
+ basepair_pos = r + base_dir*10.0
+ normal_dir = -conf_data[:,6:9]
+ perp_dir = np.cross(base_dir, normal_dir)
+ orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)])
+ seq=nt_prop["seq"]
+ bp=nt_prop["bp"]
+ stack=nt_prop["stack"]
+ three_prime=nt_prop["threeprime"]
+ nt_prop["r"]=r
+ nt_prop["orientation"]=orientation
+
+ except:
+ ## Reverse direction so indices run 5'-to-3'
+ top_data = [a[::-1] for a in top_data]
+ conf_data = conf_data[::-1,:]
+
+ r = conf_data[:,:3] * 8.518
+ base_dir = conf_data[:,3:6]
+ # basepair_pos = r + base_dir*6.0
+ basepair_pos = r + base_dir*10.0
+ normal_dir = -conf_data[:,6:9]
+ perp_dir = np.cross(base_dir, normal_dir)
+ orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)])
+ seq = top_data[1]
+ bp = _get_bp(seq)
+ stack = _get_stack()
+
+ three_prime = len(r) - top_data[2] -1
+ five_prime = len(r) - top_data[3] -1
+ three_prime[three_prime >= len(r)] = -1
+ five_prime[five_prime >= len(r)] = -1
+ nt_prop=pd.DataFrame({"r":r,"bp":bp,"stack":stack,"threeprime":three_prime, "seq":seq,"orientation":orientation})
+
+ def _debug_write_bonds():
+ from ..arbdmodel import ParticleType, PointParticle, ArbdModel, Group
+ bond = tuple()
+ b_t = ParticleType('BASE')
+ p_t = ParticleType('PHOS')
+
+ parts = []
+ for i,(r0,r_bp,three_prime0,bp0,stack0,seq0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack, seq)):
+ p = PointParticle(p_t, name='PHOS', position = r0, resid=i)
+ b = PointParticle(b_t, name=seq0, position = 0.5*(r0+r_bp), resid=i)
+ parts.extend((p,b))
+
+ model = ArbdModel(parts)
+ model.writePdb('test.pdb')
+
+ for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)):
+ model.add_bond(parts[2*i],parts[2*i+1],bond)
+ j = three_prime0
+ if j >= 0:
+ model.add_bond(parts[2*i],parts[2*j],bond)
+ j = bp0
+ if j >= 0:
+ model.add_bond(parts[2*i+1],parts[2*j+1],bond)
+ model.writePsf('test.psf')
+
+ model.bonds = []
+ for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)):
+ j = stack0
+ if j >= 0:
+ model.add_bond(parts[2*i],parts[2*j],bond)
+ model.writePsf('test.stack.psf')
+ ## _debug_write_bonds()
+
+ logger.info(f'mrdna_model_from_oxdna: num_bp, num_ss_nt, num_stacked: {np.sum(bp>=0)//2} {np.sum(bp<0)} {np.sum(stack >= 0)}')
+
+
+ model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
+
+ """
+ model.DEBUG = True
+ model.generate_bead_model(1,1,False,True,one_bead_per_monomer=True)
+ for seg in model.segments:
+ for bead in seg.beads:
+ bead.position = bead.position + np.random.standard_normal(3)
+
+ simulate( model, output_name='test', directory='test4' )
+ """
+ model._dataframe=nt_prop
+ return model
+
+if __name__ == "__main__":
+ mrdna_model_from_oxdna("0-from-collab/nanopore.oxdna","0-from-collab/nanopore.top")
+ # mrdna_model_from_oxdna("2-oxdna.manual/output/from_mrdna-oxdna-min.last.conf","0-from-collab/nanopore.top")
diff --git a/mrdna/readers/test/test.ipynb b/mrdna/readers/test/test.ipynb
index 270b98c02dd96d0b2ed26defc0c0193c92b9de32..a3bd160b751ec4f98fab99afeb23a4076371dfd4 100644
--- a/mrdna/readers/test/test.ipynb
+++ b/mrdna/readers/test/test.ipynb
@@ -3,129 +3,55 @@
{
"cell_type": "code",
"execution_count": 1,
- "id": "1955acef",
+ "id": "03eb8540",
"metadata": {
"scrolled": true
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pip3 install termcolor\n"
+ ]
+ }
+ ],
"source": [
"import pandas as pd\n",
"import pickle\n",
"import numpy as np\n",
- "\n",
- "df=pd.read_json(\"test.json\")\n",
- "d=list(df[\"vstrands\"])"
+ "import json\n",
+ "import re\n",
+ "import cadnano\n",
+ "from cadnano.document import Document\n"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "5ebd0c89-5dd5-41a3-85e1-10b53cb34113",
+ "execution_count": 2,
+ "id": "cb40f6b8",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>c</th>\n",
- " <th>d</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>1</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>2</td>\n",
- " <td>3</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>3</td>\n",
- " <td>4</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>4</td>\n",
- " <td>5</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>5</td>\n",
- " <td>6</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>6</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " c d\n",
- "0 1 2\n",
- "1 2 3\n",
- "2 3 4\n",
- "3 4 5\n",
- "4 5 6\n",
- "5 6 1"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "c=[1,2,3,4,5,6]\n",
- "d=[2,3,4,5,6,1]\n",
- "pd.DataFrame({\"c\":c,\"d\":d})"
+ "from cadnano.views.pathview import pathstyles"
]
},
{
"cell_type": "code",
- "execution_count": 2,
- "id": "1a6b8cb2",
+ "execution_count": 1,
+ "id": "ea7e8da0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "pip3 install termcolor\n"
- ]
- },
- {
- "ename": "ModuleNotFoundError",
- "evalue": "No module named 'mrdna'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marbdmodel\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcoords\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m readArbdCoords, readAvgArbdCoords, rotationAboutAxis\n",
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'"
+ " _\n",
+ " _____ ___ _| |___ ___\n",
+ "| | _| . | | .'|\n",
+ "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n",
+ "it/its\n",
+ "\n"
]
}
],
@@ -137,19 +63,204 @@
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "c7d2f43e",
+ "execution_count": 15,
+ "id": "3a134bd9-0d8c-40b2-bf71-7b2310e09802",
"metadata": {},
"outputs": [],
"source": [
- "df=pd.DataFrame(data=d)\n",
- "df=df.set_index(\"num\")"
+ "def get_lattice(part):\n",
+ " lattice_type = None\n",
+ " _gt = part.getGridType()\n",
+ " try:\n",
+ " lattice_type = _gt.name.lower()\n",
+ " except:\n",
+ " if _gt == 1:\n",
+ " lattice_type = 'square'\n",
+ " elif _gt == 2:\n",
+ " lattice_type = 'honeycomb'\n",
+ " else:\n",
+ " print(lattice_type)\n",
+ " return lattice_type\n",
+ "\n",
+ "\n",
+ "def read_json_file(filename):\n",
+ " import cadnano\n",
+ " from cadnano.document import Document\n",
+ "\n",
+ " try:\n",
+ " with open(filename) as ch:\n",
+ " json_data = json.load(ch)\n",
+ " except:\n",
+ " with open(filename) as ch:\n",
+ " content = \"\"\n",
+ " for l in ch:\n",
+ " l = re.sub(r\"'\", r'\"', l)\n",
+ " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n",
+ " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n",
+ " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n",
+ " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n",
+ " content += l+\"\\n\"\n",
+ " json_data = json.loads(content)\n",
+ "\n",
+ " try:\n",
+ " doc = Document()\n",
+ " cadnano.fileio.v3decode.decode(doc, json_data)\n",
+ " decoder = 3\n",
+ " except:\n",
+ " doc = Document()\n",
+ " cadnano.fileio.v2decode.decode(doc, json_data)\n",
+ " decoder = 2\n",
+ "\n",
+ " parts = [p for p in doc.getParts()]\n",
+ " if len(parts) != 1:\n",
+ " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n",
+ " part = parts[0]\n",
+ "\n",
+ " if decoder == 2:\n",
+ " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n",
+ " l = get_lattice(part)\n",
+ " if l == 'square':\n",
+ " for id_num in part.getIdNums():\n",
+ " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n",
+ " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n",
+ " df=pd.DataFrame(json_data[\"vstrands\"])\n",
+ " n_df=df.set_index(\"num\")\n",
+ " return part\n",
+ "\n",
+ "def get_helix_angle(part, helix_id, indices):\n",
+ " \"\"\" Get \"start_orientation\" for helix \"\"\"\n",
+ " # import ipdb\n",
+ " # ipdb.set_trace()\n",
+ "\n",
+ " \"\"\" FROM CADNANO2.5\n",
+ " + angle is CCW\n",
+ " - angle is CW\n",
+ " Right handed DNA rotates clockwise from 5' to 3'\n",
+ " we use the convention the 5' end starts at 0 degrees\n",
+ " and it's pair is minor_groove_angle degrees away\n",
+ " direction, hence the minus signs. eulerZ\n",
+ " \"\"\"\n",
+ "\n",
+ " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n",
+ " ['helical_pitch',\n",
+ " 'bases_per_repeat',\n",
+ " 'turns_per_repeat',\n",
+ " 'eulerZ',\n",
+ " 'minor_groove_angle']]\n",
+ " twist_per_base = tpr*360./bpr\n",
+ " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n",
+ " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n",
+ " return angle\n",
+ "\n",
+ "def gen_id_series(strand,part):\n",
+ " df=pd.DataFrame(columns=[\"vh\",\"zid\",\"fwd\",\"stack_tuple\",\"threeprime_tuple\",\"x\",\"y\",\"z\"],index=range(strand.totalLength()),dtype=object)\n",
+ " df[\"vh\"]=strand._id_num\n",
+ " df[\"fwd\"]=strand.isForward()\n",
+ " df[\"x\"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10\n",
+ " df[\"y\"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10\n",
+ " id_lo,id_hi=strand.idxs()\n",
+ " zids=[str(i) for i in range(id_lo,id_hi+1)]\n",
+ " insert_dict={}\n",
+ " insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])\n",
+ " z=np.arange(id_lo,id_hi+1)\n",
+ " zids=[str(i) for i in range(id_lo,id_hi+1)]\n",
+ " z=list(np.arange(id_lo,id_hi+1))\n",
+ " zids=[str(i) for i in range(id_lo,id_hi+1)]\n",
+ " for insert_base in insert_dict:\n",
+ " z_ind=zids.index(str(insert_base))\n",
+ " z_val=insert_dict[insert_base]\n",
+ " z_pos_ind=z.index(insert_base)\n",
+ " zids.pop(z_ind)\n",
+ " z.pop(z_pos_ind)\n",
+ " if z_val!=-1:\n",
+ " #l=[str(insert_base)+\".\"+str(i) for i in range(z_val+1)]\n",
+ " l=list(range(z_val+1))\n",
+ " l.reverse()\n",
+ " for k in l: \n",
+ " zids.insert(z_ind,str(insert_base)+\".\"+str(k))\n",
+ " z.insert(z_pos_ind,insert_base+k/(z_val+1))\n",
+ " df[\"zid\"]=zids\n",
+ " df[\"z\"]=np.array(z)*3.4\n",
+ " \n",
+ " \n",
+ " L=[(df[\"vh\"][i],df[\"zid\"][i],df[\"fwd\"][i]) for i in df.index]\n",
+ " if strand.isForward()==True:\n",
+ " df[\"stack_tuple\"]=L[1:]+[-1]\n",
+ " if strand.connection3p() is None:\n",
+ " df[\"threeprime_tuple\"]=L[1:]+[-1]\n",
+ " else:\n",
+ " df[\"threeprime_tuple\"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]\n",
+ " \n",
+ " \n",
+ " else:\n",
+ " df[\"stack_tuple\"]=[-1]+L[0:-1]\n",
+ " if strand.connection3p() is None:\n",
+ " df[\"threeprime_tuple\"]=[-1]+L[0:-1]\n",
+ " else:\n",
+ " df[\"threeprime_tuple\"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]\n",
+ " ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. \n",
+ " df[\"r\"]=[np.array([df[\"x\"][i],df[\"y\"][i],df[\"z\"][i]],dtype=np.float32) for i in df.index]\n",
+ " \n",
+ " return [pd.Series(df.loc[i]) for i in df.index]\n",
+ "\n"
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "62f9b7f3",
+ "execution_count": 18,
+ "id": "04c497ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def gen_prop_table(part):\n",
+ " strand_set=[]\n",
+ " for i in part.getidNums():\n",
+ " fwd,rev=part.getStrandSets(i)\n",
+ " [strand_set.append(i) for i in fwd.strands()]\n",
+ " [strand_set.append(i) for i in rev.strands()]\n",
+ " id_series=[]\n",
+ " for i in strand_set:\n",
+ " id_series=id_series+gen_id_series(i,part)\n",
+ " \n",
+ " nt_prop=pd.DataFrame(id_series)\n",
+ " nt_prop.reset_index(inplace=True)\n",
+ " nt_prop[\"seq\"]=-1\n",
+ " ind_tuple=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"fwd\"]))\n",
+ " stacks=[]\n",
+ " for i in list(nt_prop[\"stack_tuple\"]):\n",
+ " if i ==-1:\n",
+ " stacks.append(i)\n",
+ " else:\n",
+ " stacks.append(ind_tuple.index(i))\n",
+ " nt_prop[\"stack\"]=stacks\n",
+ " tprime=[]\n",
+ " for i in list(nt_prop[\"threeprime_tuple\"]):\n",
+ " if i ==-1:\n",
+ " tprime.append(i)\n",
+ " else:\n",
+ " tprime.append(ind_tuple.index(i))\n",
+ " nt_prop[\"threeprime\"]=tprime\n",
+ " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n",
+ " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]\n",
+ " nt_prop=nt_prop.fillna(-1)\n",
+ " counter=-1\n",
+ " bp=-np.ones(len(nt_prop.index),dtype=int)\n",
+ " bp_map=dict(zip(ind_tuple,nt_prop.index))\n",
+ " for i,j,k in ind_tuple:\n",
+ " counter+=1\n",
+ " try:\n",
+ " bp[counter]=bp_map[(i,j,not(k))]\n",
+ " except:\n",
+ " pass\n",
+ " nt_prop[\"bp\"]=bp\n",
+ "\n",
+ " return nt_prop"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "11b0f5de-80f6-4845-b97f-a9d31e7be90a",
"metadata": {},
"outputs": [
{
@@ -173,1125 +284,949 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
- " <th>row</th>\n",
- " <th>col</th>\n",
- " <th>scaf</th>\n",
- " <th>stap</th>\n",
- " <th>loop</th>\n",
- " <th>skip</th>\n",
- " <th>scafLoop</th>\n",
- " <th>stapLoop</th>\n",
- " <th>stap_colors</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>num</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
+ " <th>index</th>\n",
+ " <th>vh</th>\n",
+ " <th>zid</th>\n",
+ " <th>fwd</th>\n",
+ " <th>stack_tuple</th>\n",
+ " <th>threeprime_tuple</th>\n",
+ " <th>x</th>\n",
+ " <th>y</th>\n",
+ " <th>z</th>\n",
+ " <th>r</th>\n",
+ " <th>seq</th>\n",
+ " <th>stack</th>\n",
+ " <th>threeprime</th>\n",
+ " <th>orientation</th>\n",
+ " <th>bp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>12</td>\n",
- " <td>16</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[]</td>\n",
- " <td>[]</td>\n",
- " <td>[[23, 13369809], [38, 12060012]]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>12</td>\n",
- " <td>15</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[]</td>\n",
- " <td>[]</td>\n",
- " <td>[[3, 1501302]]</td>\n",
+ " <td>0</td>\n",
+ " <td>0</td>\n",
+ " <td>5</td>\n",
+ " <td>True</td>\n",
+ " <td>(0, 6, True)</td>\n",
+ " <td>(0, 6, True)</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>22.50</td>\n",
+ " <td>17.0</td>\n",
+ " <td>[0.0, 22.5, 17.0]</td>\n",
+ " <td>-1</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>81.428571</td>\n",
+ " <td>38</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>6</td>\n",
+ " <td>True</td>\n",
+ " <td>(0, 7, True)</td>\n",
+ " <td>(0, 7, True)</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>22.50</td>\n",
+ " <td>20.4</td>\n",
+ " <td>[0.0, 22.5, 20.4]</td>\n",
+ " <td>-1</td>\n",
+ " <td>2</td>\n",
+ " <td>2</td>\n",
+ " <td>115.714286</td>\n",
+ " <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>13</td>\n",
- " <td>15</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n",
- " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[]</td>\n",
- " <td>[]</td>\n",
- " <td>[[34, 8947848]]</td>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
+ " <td>7</td>\n",
+ " <td>True</td>\n",
+ " <td>(0, 8, True)</td>\n",
+ " <td>(0, 8, True)</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>22.50</td>\n",
+ " <td>23.8</td>\n",
+ " <td>[0.0, 22.5, 23.8]</td>\n",
+ " <td>-1</td>\n",
+ " <td>3</td>\n",
+ " <td>3</td>\n",
+ " <td>150.000000</td>\n",
+ " <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
- " <td>13</td>\n",
- " <td>16</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n",
- " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[]</td>\n",
- " <td>[]</td>\n",
- " <td>[[0, 13369344]]</td>\n",
+ " <td>3</td>\n",
+ " <td>0</td>\n",
+ " <td>8</td>\n",
+ " <td>True</td>\n",
+ " <td>(0, 9, True)</td>\n",
+ " <td>(0, 9, True)</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>22.50</td>\n",
+ " <td>27.2</td>\n",
+ " <td>[0.0, 22.5, 27.2]</td>\n",
+ " <td>-1</td>\n",
+ " <td>4</td>\n",
+ " <td>4</td>\n",
+ " <td>184.285714</td>\n",
+ " <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
- " <td>13</td>\n",
- " <td>17</td>\n",
- " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[]</td>\n",
- " <td>[]</td>\n",
- " <td>[[39, 8947848]]</td>\n",
+ " <td>4</td>\n",
+ " <td>0</td>\n",
+ " <td>9</td>\n",
+ " <td>True</td>\n",
+ " <td>(0, 10, True)</td>\n",
+ " <td>(0, 10, True)</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>22.50</td>\n",
+ " <td>30.6</td>\n",
+ " <td>[0.0, 22.5, 30.6]</td>\n",
+ " <td>-1</td>\n",
+ " <td>5</td>\n",
+ " <td>5</td>\n",
+ " <td>218.571429</td>\n",
+ " <td>42</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>5</th>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>410</th>\n",
" <td>12</td>\n",
- " <td>17</td>\n",
- " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n",
- " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
- " <td>[]</td>\n",
- " <td>[]</td>\n",
- " <td>[[9, 0]]</td>\n",
+ " <td>5</td>\n",
+ " <td>35</td>\n",
+ " <td>False</td>\n",
+ " <td>(5, 34, False)</td>\n",
+ " <td>(5, 34, False)</td>\n",
+ " <td>19.485574</td>\n",
+ " <td>11.25</td>\n",
+ " <td>119.0</td>\n",
+ " <td>[19.485573, 11.25, 119.0]</td>\n",
+ " <td>-1</td>\n",
+ " <td>409</td>\n",
+ " <td>409</td>\n",
+ " <td>1110.000000</td>\n",
+ " <td>375</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>411</th>\n",
+ " <td>13</td>\n",
+ " <td>5</td>\n",
+ " <td>36</td>\n",
+ " <td>False</td>\n",
+ " <td>(5, 35, False)</td>\n",
+ " <td>(5, 35, False)</td>\n",
+ " <td>19.485574</td>\n",
+ " <td>11.25</td>\n",
+ " <td>122.4</td>\n",
+ " <td>[19.485573, 11.25, 122.4]</td>\n",
+ " <td>-1</td>\n",
+ " <td>410</td>\n",
+ " <td>410</td>\n",
+ " <td>1144.285714</td>\n",
+ " <td>376</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>412</th>\n",
+ " <td>14</td>\n",
+ " <td>5</td>\n",
+ " <td>37</td>\n",
+ " <td>False</td>\n",
+ " <td>(5, 36, False)</td>\n",
+ " <td>(5, 36, False)</td>\n",
+ " <td>19.485574</td>\n",
+ " <td>11.25</td>\n",
+ " <td>125.8</td>\n",
+ " <td>[19.485573, 11.25, 125.8]</td>\n",
+ " <td>-1</td>\n",
+ " <td>411</td>\n",
+ " <td>411</td>\n",
+ " <td>1178.571429</td>\n",
+ " <td>377</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>413</th>\n",
+ " <td>15</td>\n",
+ " <td>5</td>\n",
+ " <td>38</td>\n",
+ " <td>False</td>\n",
+ " <td>(5, 37, False)</td>\n",
+ " <td>(5, 37, False)</td>\n",
+ " <td>19.485574</td>\n",
+ " <td>11.25</td>\n",
+ " <td>129.2</td>\n",
+ " <td>[19.485573, 11.25, 129.2]</td>\n",
+ " <td>-1</td>\n",
+ " <td>412</td>\n",
+ " <td>412</td>\n",
+ " <td>1212.857143</td>\n",
+ " <td>378</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>414</th>\n",
+ " <td>16</td>\n",
+ " <td>5</td>\n",
+ " <td>39</td>\n",
+ " <td>False</td>\n",
+ " <td>(5, 38, False)</td>\n",
+ " <td>(5, 38, False)</td>\n",
+ " <td>19.485574</td>\n",
+ " <td>11.25</td>\n",
+ " <td>132.6</td>\n",
+ " <td>[19.485573, 11.25, 132.6]</td>\n",
+ " <td>-1</td>\n",
+ " <td>413</td>\n",
+ " <td>413</td>\n",
+ " <td>1247.142857</td>\n",
+ " <td>379</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
+ "<p>415 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
- " row col scaf \\\n",
- "num \n",
- "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
- "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
- "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n",
- "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n",
- "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n",
- "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n",
- "\n",
- " stap \\\n",
- "num \n",
- "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n",
- "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
- "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n",
- "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n",
- "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
- "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
+ " index vh zid fwd stack_tuple threeprime_tuple x y \\\n",
+ "0 0 0 5 True (0, 6, True) (0, 6, True) 0.000000 22.50 \n",
+ "1 1 0 6 True (0, 7, True) (0, 7, True) 0.000000 22.50 \n",
+ "2 2 0 7 True (0, 8, True) (0, 8, True) 0.000000 22.50 \n",
+ "3 3 0 8 True (0, 9, True) (0, 9, True) 0.000000 22.50 \n",
+ "4 4 0 9 True (0, 10, True) (0, 10, True) 0.000000 22.50 \n",
+ ".. ... .. .. ... ... ... ... ... \n",
+ "410 12 5 35 False (5, 34, False) (5, 34, False) 19.485574 11.25 \n",
+ "411 13 5 36 False (5, 35, False) (5, 35, False) 19.485574 11.25 \n",
+ "412 14 5 37 False (5, 36, False) (5, 36, False) 19.485574 11.25 \n",
+ "413 15 5 38 False (5, 37, False) (5, 37, False) 19.485574 11.25 \n",
+ "414 16 5 39 False (5, 38, False) (5, 38, False) 19.485574 11.25 \n",
"\n",
- " loop \\\n",
- "num \n",
- "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
- "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
- "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
- "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
- "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
- "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ " z r seq stack threeprime orientation \\\n",
+ "0 17.0 [0.0, 22.5, 17.0] -1 1 1 81.428571 \n",
+ "1 20.4 [0.0, 22.5, 20.4] -1 2 2 115.714286 \n",
+ "2 23.8 [0.0, 22.5, 23.8] -1 3 3 150.000000 \n",
+ "3 27.2 [0.0, 22.5, 27.2] -1 4 4 184.285714 \n",
+ "4 30.6 [0.0, 22.5, 30.6] -1 5 5 218.571429 \n",
+ ".. ... ... ... ... ... ... \n",
+ "410 119.0 [19.485573, 11.25, 119.0] -1 409 409 1110.000000 \n",
+ "411 122.4 [19.485573, 11.25, 122.4] -1 410 410 1144.285714 \n",
+ "412 125.8 [19.485573, 11.25, 125.8] -1 411 411 1178.571429 \n",
+ "413 129.2 [19.485573, 11.25, 129.2] -1 412 412 1212.857143 \n",
+ "414 132.6 [19.485573, 11.25, 132.6] -1 413 413 1247.142857 \n",
"\n",
- " skip scafLoop stapLoop \\\n",
- "num \n",
- "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
- "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
- "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
- "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
- "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
- "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ " bp \n",
+ "0 38 \n",
+ "1 39 \n",
+ "2 40 \n",
+ "3 41 \n",
+ "4 42 \n",
+ ".. ... \n",
+ "410 375 \n",
+ "411 376 \n",
+ "412 377 \n",
+ "413 378 \n",
+ "414 379 \n",
"\n",
- " stap_colors \n",
- "num \n",
- "0 [[23, 13369809], [38, 12060012]] \n",
- "1 [[3, 1501302]] \n",
- "2 [[34, 8947848]] \n",
- "3 [[0, 13369344]] \n",
- "4 [[39, 8947848]] \n",
- "5 [[9, 0]] "
+ "[415 rows x 15 columns]"
]
},
- "execution_count": 5,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df"
+ "gen_prop_table(p)"
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "b317d21a",
+ "execution_count": null,
+ "id": "f2116b88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import mrdna\n",
+ "from mrdna.readers import read_list"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "4c954133",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Found cadnano version 2 file\n"
- ]
+ "data": {
+ "text/plain": [
+ "(415,)"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "doc = Document()\n",
- "def read_json_file(filename):\n",
- " import json\n",
- " import re\n",
- "\n",
- " try:\n",
- " with open(filename) as ch:\n",
- " data = json.load(ch)\n",
- " except:\n",
- " with open(filename) as ch:\n",
- " content = \"\"\n",
- " for l in ch:\n",
- " l = re.sub(r\"'\", r'\"', l)\n",
- " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n",
- " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n",
- " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n",
- " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n",
- " content += l+\"\\n\"\n",
- " data = json.loads(content)\n",
- " return data\n",
- "f=read_json_file(\"test.json\")\n",
- "cadnano.fileio.v2decode.decode(doc, f)\n",
- "\n"
+ "np.array(list(nt_prop['bp'])).shape"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "id": "dc7eb261",
+ "execution_count": 560,
+ "id": "5ee54071",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(array([ 12, 13, 14, ..., 13920, 13921, 13922]),)"
+ ]
+ },
+ "execution_count": 560,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "def get_lattice(part):\n",
- " lattice_type = None\n",
- " _gt = part.getGridType()\n",
- " try:\n",
- " lattice_type = _gt.name.lower()\n",
- " except:\n",
- " if _gt == 1:\n",
- " lattice_type = 'square'\n",
- " elif _gt == 2:\n",
- " lattice_type = 'honeycomb'\n",
- " else:\n",
- " print(\"WARNING: unable to determine cadnano part lattice type\")\n",
- " return lattice_type\n"
+ "np.where(np.array(nt_prop[\"bp\"])!=-1)"
]
},
{
"cell_type": "code",
- "execution_count": 35,
- "id": "1bf753c6",
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_json_file(filename):\n",
- " import json\n",
- " import re\n",
- " import cadnano\n",
- " from cadnano.document import Document\n",
- "\n",
- " try:\n",
- " with open(filename) as ch:\n",
- " json_data = json.load(ch)\n",
- " except:\n",
- " with open(filename) as ch:\n",
- " content = \"\"\n",
- " for l in ch:\n",
- " l = re.sub(r\"'\", r'\"', l)\n",
- " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n",
- " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n",
- " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n",
- " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n",
- " content += l+\"\\n\"\n",
- " json_data = json.loads(content)\n",
- "\n",
- " try:\n",
- " doc = Document()\n",
- " cadnano.fileio.v3decode.decode(doc, json_data)\n",
- " decoder = 3\n",
- " except:\n",
- " doc = Document()\n",
- " cadnano.fileio.v2decode.decode(doc, json_data)\n",
- " decoder = 2\n",
- "\n",
- " parts = [p for p in doc.getParts()]\n",
- " if len(parts) != 1:\n",
- " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n",
- " part = parts[0]\n",
- "\n",
- " if decoder == 2:\n",
- " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n",
- " l = get_lattice(part)\n",
- " if l == 'square':\n",
- " for id_num in part.getIdNums():\n",
- " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n",
- " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n",
- " df=pd.DataFrame(json_data[\"vstrands\"])\n",
- " n_df=df.set_index(\"num\")\n",
- " else:\n",
- " raise(\"Not yet implemented\")\n",
- " \n",
- " return part,df,decoder\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 72,
- "id": "17ac5a29-3c76-4b8b-9a98-6343bc91e9e8",
+ "execution_count": 21,
+ "id": "e1588c54",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1.json test.json.oxdna test2.ipynb\n",
- "Na_liu.json test.json.top test3.ipynb\n",
- "rest_scaf_col.json test.sc test_cad2.5.json\n",
- "test.ipynb test.seq.json test_insert.json\n",
- "test.json test.virt2nuc test_insert_2.5.json\n"
- ]
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>vh</th>\n",
+ " <th>zid</th>\n",
+ " <th>is_scaf</th>\n",
+ " <th>r</th>\n",
+ " <th>bp</th>\n",
+ " <th>stack</th>\n",
+ " <th>threeprime</th>\n",
+ " <th>seq</th>\n",
+ " <th>orientation</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>0</td>\n",
+ " <td>5</td>\n",
+ " <td>True</td>\n",
+ " <td>[0.0, 2.25, 1.7000000000000002]</td>\n",
+ " <td>213</td>\n",
+ " <td>-1</td>\n",
+ " <td>1</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[0.14904226617617466, -0.9888308262251284, 0....</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>0</td>\n",
+ " <td>6</td>\n",
+ " <td>True</td>\n",
+ " <td>[0.0, 2.25, 2.04]</td>\n",
+ " <td>214</td>\n",
+ " <td>-1</td>\n",
+ " <td>2</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.4338837391175583, -0.900968867902419, 0.0...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>0</td>\n",
+ " <td>7</td>\n",
+ " <td>True</td>\n",
+ " <td>[0.0, 2.25, 2.3800000000000003]</td>\n",
+ " <td>215</td>\n",
+ " <td>-1</td>\n",
+ " <td>3</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.8660254037844388, -0.49999999999999994, 0...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>0</td>\n",
+ " <td>8</td>\n",
+ " <td>True</td>\n",
+ " <td>[0.0, 2.25, 2.72]</td>\n",
+ " <td>216</td>\n",
+ " <td>-1</td>\n",
+ " <td>4</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.9972037971811805, 0.07473009358642399, 0....</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>0</td>\n",
+ " <td>9</td>\n",
+ " <td>True</td>\n",
+ " <td>[0.0, 2.25, 3.06]</td>\n",
+ " <td>217</td>\n",
+ " <td>-1</td>\n",
+ " <td>5</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.7818314824680299, 0.6234898018587334, 0.0...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>410</th>\n",
+ " <td>5</td>\n",
+ " <td>35</td>\n",
+ " <td>False</td>\n",
+ " <td>[1.948557375, 1.125, 11.9]</td>\n",
+ " <td>205</td>\n",
+ " <td>-1</td>\n",
+ " <td>411</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[0.8660254037844375, -0.5000000000000019, 0.0...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>411</th>\n",
+ " <td>5</td>\n",
+ " <td>36</td>\n",
+ " <td>False</td>\n",
+ " <td>[1.948557375, 1.125, 12.24]</td>\n",
+ " <td>206</td>\n",
+ " <td>-1</td>\n",
+ " <td>412</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[0.4338837391175605, -0.900968867902418, 0.0]...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>412</th>\n",
+ " <td>5</td>\n",
+ " <td>37</td>\n",
+ " <td>False</td>\n",
+ " <td>[1.948557375, 1.125, 12.58]</td>\n",
+ " <td>-1</td>\n",
+ " <td>-1</td>\n",
+ " <td>413</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.14904226617617078, -0.9888308262251292, 0...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>413</th>\n",
+ " <td>5</td>\n",
+ " <td>38</td>\n",
+ " <td>False</td>\n",
+ " <td>[1.948557375, 1.125, 12.920000000000002]</td>\n",
+ " <td>-1</td>\n",
+ " <td>-1</td>\n",
+ " <td>414</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.6801727377709186, -0.7330518718298275, 0....</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>414</th>\n",
+ " <td>5</td>\n",
+ " <td>39</td>\n",
+ " <td>False</td>\n",
+ " <td>[1.948557375, 1.125, 13.260000000000002]</td>\n",
+ " <td>-1</td>\n",
+ " <td>-1</td>\n",
+ " <td>-1</td>\n",
+ " <td>-1</td>\n",
+ " <td>[[-0.9749279121818233, -0.222520933956317, 0.0...</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>415 rows × 9 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " vh zid is_scaf r bp stack \\\n",
+ "0 0 5 True [0.0, 2.25, 1.7000000000000002] 213 -1 \n",
+ "1 0 6 True [0.0, 2.25, 2.04] 214 -1 \n",
+ "2 0 7 True [0.0, 2.25, 2.3800000000000003] 215 -1 \n",
+ "3 0 8 True [0.0, 2.25, 2.72] 216 -1 \n",
+ "4 0 9 True [0.0, 2.25, 3.06] 217 -1 \n",
+ ".. .. ... ... ... ... ... \n",
+ "410 5 35 False [1.948557375, 1.125, 11.9] 205 -1 \n",
+ "411 5 36 False [1.948557375, 1.125, 12.24] 206 -1 \n",
+ "412 5 37 False [1.948557375, 1.125, 12.58] -1 -1 \n",
+ "413 5 38 False [1.948557375, 1.125, 12.920000000000002] -1 -1 \n",
+ "414 5 39 False [1.948557375, 1.125, 13.260000000000002] -1 -1 \n",
+ "\n",
+ " threeprime seq orientation \n",
+ "0 1 -1 [[0.14904226617617466, -0.9888308262251284, 0.... \n",
+ "1 2 -1 [[-0.4338837391175583, -0.900968867902419, 0.0... \n",
+ "2 3 -1 [[-0.8660254037844388, -0.49999999999999994, 0... \n",
+ "3 4 -1 [[-0.9972037971811805, 0.07473009358642399, 0.... \n",
+ "4 5 -1 [[-0.7818314824680299, 0.6234898018587334, 0.0... \n",
+ ".. ... ... ... \n",
+ "410 411 -1 [[0.8660254037844375, -0.5000000000000019, 0.0... \n",
+ "411 412 -1 [[0.4338837391175605, -0.900968867902418, 0.0]... \n",
+ "412 413 -1 [[-0.14904226617617078, -0.9888308262251292, 0... \n",
+ "413 414 -1 [[-0.6801727377709186, -0.7330518718298275, 0.... \n",
+ "414 -1 -1 [[-0.9749279121818233, -0.222520933956317, 0.0... \n",
+ "\n",
+ "[415 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "!ls"
+ "nt_prop"
]
},
{
"cell_type": "code",
- "execution_count": 73,
- "id": "1c3104ac-787f-4d32-9917-1c7111427925",
+ "execution_count": 468,
+ "id": "156dcda2",
"metadata": {},
"outputs": [],
"source": [
- "with open(\"test_insert_2.5.json\") as ch:\n",
- " json_data = json.load(ch)"
+ "scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n",
+ "stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n"
]
},
{
"cell_type": "code",
- "execution_count": 77,
- "id": "9b534b7c",
+ "execution_count": 500,
+ "id": "6413d856",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'name': 'NaPart1',\n",
- " 'color': '#0066cc',\n",
- " 'is_visible': True,\n",
- " 'active_phos': None,\n",
- " 'crossover_span_angle': 45,\n",
- " 'max_vhelix_length': 42,\n",
- " 'neighbor_active_angle': '',\n",
- " 'grid_type': 2,\n",
- " 'virtual_helix_order': [0, 1, 2, 3, 4, 5, 6],\n",
- " 'is_lattice': True,\n",
- " '2': 0,\n",
- " 'virtual_helices': {'name': ['vh0', 'vh1', 'vh2', 'vh3', 'vh4', 'vh5', 'vh6'],\n",
- " 'is_visible': [True, True, True, True, True, True, True],\n",
- " 'color': ['#0066cc',\n",
- " '#0066cc',\n",
- " '#0066cc',\n",
- " '#0066cc',\n",
- " '#0066cc',\n",
- " '#0066cc',\n",
- " '#0066cc'],\n",
- " 'eulerZ': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n",
- " 'neighbor_active_angle': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n",
- " 'neighbors': ['[1, 5]',\n",
- " '[0, 2]',\n",
- " '[1, 3]',\n",
- " '[2, 4]',\n",
- " '[3, 5]',\n",
- " '[0, 4]',\n",
- " '[]'],\n",
- " 'bases_per_repeat': [21, 21, 21, 21, 21, 21, 21],\n",
- " 'turns_per_repeat': [2, 2, 2, 2, 2, 2, 2],\n",
- " 'repeat_hint': [2, 2, 2, 2, 2, 2, 2],\n",
- " 'helical_pitch': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n",
- " 'minor_groove_angle': [180.0, 180.0, 180.0, 180.0, 180.0, 180.0, 180.0],\n",
- " 'length': [42, 42, 42, 42, 42, 42, 42],\n",
- " 'z': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]},\n",
- " 'origins': [[7.7942295, 9.0, 0.0],\n",
- " [5.845672125, 7.875, 0.0],\n",
- " [5.845672125, 5.625, 0.0],\n",
- " [7.7942295, 4.5, 0.0],\n",
- " [9.742786875, 5.625, 0.0],\n",
- " [9.742786875, 7.875, 0.0],\n",
- " [-13.639901625, -1.125, 0.0]],\n",
- " 'directions': [[0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0]],\n",
- " 'vh_list': [[0, 42], [1, 42], [2, 42], [3, 42], [4, 42], [5, 42], [6, 42]],\n",
- " 'strands': {'indices': [[[[5, 36], [39, 41]],\n",
- " [[2, 20], [21, 23], [24, 27], [28, 38]]],\n",
- " [[[3, 20], [21, 38]], [[5, 18], [19, 36], [39, 41]]],\n",
- " [[[2, 18], [19, 32], [39, 41]], [[0, 34]]],\n",
- " [[[0, 20], [21, 34]], [[2, 15], [16, 32], [37, 41]]],\n",
- " [[[0, 3], [9, 15], [16, 39]], [[9, 20], [21, 39]]],\n",
- " [[[9, 27], [28, 39]], [[0, 3], [9, 22], [23, 39]]],\n",
- " [[[1, 31]], [[1, 31]]]],\n",
- " 'properties': [[['#0066cc', '#0066cc'],\n",
- " ['#16e876', '#cc01d1', '#000000', '#b8056c']],\n",
- " [['#16e876', '#cc01d1'], ['#0066cc', '#0066cc', '#0066cc']],\n",
- " [['#0066cc', '#0066cc', '#0066cc'], ['#888888']],\n",
- " [['#cc0000', '#888888'], ['#0066cc', '#0066cc', '#0066cc']],\n",
- " [['#0066cc', '#0066cc', '#0066cc'], ['#cc0000', '#888888']],\n",
- " [['#000000', '#b8056c'], ['#0066cc', '#0066cc', '#0066cc']],\n",
- " [['#0066cc'], ['#0066cc']]]},\n",
- " 'insertions': [[0, 13, 10],\n",
- " [0, 29, 11],\n",
- " [1, 10, 1],\n",
- " [2, 11, -1],\n",
- " [3, 28, -1]],\n",
- " 'xovers': [[0, True, 36, 1, False, 36],\n",
- " [0, False, 21, 1, True, 21],\n",
- " [0, False, 28, 5, True, 28],\n",
- " [1, True, 20, 0, False, 20],\n",
- " [1, False, 5, 0, True, 5],\n",
- " [1, False, 19, 2, True, 19],\n",
- " [2, True, 18, 1, False, 18],\n",
- " [2, True, 32, 3, False, 32],\n",
- " [3, True, 20, 4, False, 20],\n",
- " [3, False, 2, 2, True, 2],\n",
- " [3, False, 16, 4, True, 16],\n",
- " [4, True, 15, 3, False, 15],\n",
- " [4, True, 39, 5, False, 39],\n",
- " [4, False, 21, 3, True, 21],\n",
- " [5, True, 27, 0, False, 27],\n",
- " [5, False, 9, 4, True, 9]],\n",
- " 'oligos': [{'id_num': 6,\n",
- " 'idx5p': 31,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo2304',\n",
- " 'color': '#0066cc',\n",
- " 'length': 31,\n",
- " 'is_visible': True},\n",
- " {'id_num': 0,\n",
- " 'idx5p': 23,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo8960',\n",
- " 'color': '#cc01d1',\n",
- " 'length': 21,\n",
- " 'is_visible': True},\n",
- " {'id_num': 5,\n",
- " 'idx5p': 3,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo9296',\n",
- " 'color': '#0066cc',\n",
- " 'length': 4,\n",
- " 'is_visible': True},\n",
- " {'id_num': 5,\n",
- " 'idx5p': 9,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo1120',\n",
- " 'color': '#000000',\n",
- " 'length': 23,\n",
- " 'is_visible': True},\n",
- " {'id_num': 6,\n",
- " 'idx5p': 1,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo7728',\n",
- " 'color': '#0066cc',\n",
- " 'length': 31,\n",
- " 'is_visible': True},\n",
- " {'id_num': 3,\n",
- " 'idx5p': 41,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo4848',\n",
- " 'color': '#0066cc',\n",
- " 'length': 5,\n",
- " 'is_visible': True},\n",
- " {'id_num': 2,\n",
- " 'idx5p': 39,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo3552',\n",
- " 'color': '#0066cc',\n",
- " 'length': 3,\n",
- " 'is_visible': True},\n",
- " {'id_num': 3,\n",
- " 'idx5p': 0,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo7008',\n",
- " 'color': '#cc0000',\n",
- " 'length': 33,\n",
- " 'is_visible': True},\n",
- " {'id_num': 4,\n",
- " 'idx5p': 0,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo5696',\n",
- " 'color': '#0066cc',\n",
- " 'length': 4,\n",
- " 'is_visible': True},\n",
- " {'id_num': 4,\n",
- " 'idx5p': 39,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo7856',\n",
- " 'color': '#888888',\n",
- " 'length': 32,\n",
- " 'is_visible': True},\n",
- " {'id_num': 1,\n",
- " 'idx5p': 41,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo6016',\n",
- " 'color': '#0066cc',\n",
- " 'length': 3,\n",
- " 'is_visible': True},\n",
- " {'id_num': 0,\n",
- " 'idx5p': 38,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo4560',\n",
- " 'color': '#b8056c',\n",
- " 'length': 34,\n",
- " 'is_visible': True},\n",
- " {'id_num': 0,\n",
- " 'idx5p': 39,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo5504',\n",
- " 'color': '#0066cc',\n",
- " 'length': 3,\n",
- " 'is_visible': True},\n",
- " {'id_num': 5,\n",
- " 'idx5p': 22,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo3488',\n",
- " 'color': '#0066cc',\n",
- " 'length': 208,\n",
- " 'is_visible': True},\n",
- " {'id_num': 1,\n",
- " 'idx5p': 3,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo1648',\n",
- " 'color': '#16e876',\n",
- " 'length': 48,\n",
- " 'is_visible': True},\n",
- " {'id_num': 2,\n",
- " 'idx5p': 34,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo1536',\n",
- " 'color': '#888888',\n",
- " 'length': 34,\n",
- " 'is_visible': True}],\n",
- " 'instance_properties': [{'slice:position': [0.0, 0.0],\n",
- " 'grid:position': [0.0, 0.0],\n",
- " 'path:position': [0.0, 0.0]}],\n",
- " 'uuid': '217c2ce287e943ca8d0e8cde1ffa3291'}"
+ "1"
]
},
- "execution_count": 77,
+ "execution_count": 500,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "s=json_data[\"parts\"][0]\n"
+ "nttype(vslist[\"scaf\"][30])[146]"
]
},
{
"cell_type": "code",
- "execution_count": 80,
- "id": "be6cf774-3ef7-464c-8f34-9877e73a16d0",
+ "execution_count": 498,
+ "id": "fe8797db",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[[[5, 36], [39, 41]], [[2, 20], [21, 23], [24, 27], [28, 38]]]"
+ "0"
]
},
- "execution_count": 80,
+ "execution_count": 498,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "s[\"strands\"][\"indices\"][0]"
+ "vhi,zidi=np.where(np.array(scaf_id)==1)\n",
+ "scaf_id[30][146]"
]
},
{
"cell_type": "code",
- "execution_count": 71,
- "id": "6ec811a1-297b-4782-827e-d7c8eebb5daa",
+ "execution_count": 480,
+ "id": "9f1b975f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "<fwd_StrandSet(2)>.<Strand(39, 41)>"
+ "True"
]
},
- "execution_count": 71,
+ "execution_count": 480,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "s=[i for i in part.oligos()]\n",
- "l=s[0]\n",
- "l.strand3p()"
+ "scaf_id[30][146]==np.array(scaf_id)[0][9]"
]
},
{
"cell_type": "code",
- "execution_count": 68,
- "id": "188d4ed0-1664-44f1-8318-f11f4fd0e0bd",
+ "execution_count": 549,
+ "id": "a16f3fd0",
"metadata": {},
"outputs": [
{
- "ename": "AttributeError",
- "evalue": "'NucleicAcidPart' object has no attribute 'strand3p'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[68], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpart\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrand3p\u001b[49m\n",
- "\u001b[0;31mAttributeError\u001b[0m: 'NucleicAcidPart' object has no attribute 'strand3p'"
- ]
+ "data": {
+ "text/plain": [
+ "Int64Index([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+ " ...\n",
+ " 39, 39, 39, 39, 39, 39, 39, 39, 39, 39],\n",
+ " dtype='int64', name='num', length=7560)"
+ ]
+ },
+ "execution_count": 549,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "part.strand3p"
+ "def nttype(scafs):\n",
+ " def judge(i):\n",
+ " if i ==[-1,-1,-1,-1]:\n",
+ " return 0\n",
+ " else: return 1\n",
+ " n=np.array([judge(i) for i in scafs])\n",
+ " return n\n",
+ "d={}\n",
+ "vslist.index[vhi]"
]
},
{
"cell_type": "code",
- "execution_count": 53,
- "id": "6e2bd586-1f68-44d1-982d-93837c64e616",
+ "execution_count": 544,
+ "id": "b9f25d41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "([(5, 36), (39, 41)], ['#0066cc', '#0066cc'])"
+ "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n",
+ " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n",
+ " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n",
+ " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)"
]
},
- "execution_count": 53,
+ "execution_count": 544,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "fwd,rev=part.getStrandSets(0)\n",
- "x=[]\n",
- "fwd.dump(x)"
+ "np.where(vslist.index[vhi]!=vhi)"
]
},
{
"cell_type": "code",
- "execution_count": 27,
- "id": "cbb83c93",
+ "execution_count": 550,
+ "id": "976095ce",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "vh 29\n",
+ "zid 83\n",
+ "is_scaf True\n",
+ "r [-17.537016375, 28.125, 28.220000000000002]\n",
+ "bp -1\n",
+ "stack -1\n",
+ "threeprime -1\n",
+ "seq -1\n",
+ "orientation [[-0.5633200580636211, 0.8262387743159955, 0.0...\n",
+ "Name: 7394, dtype: object"
+ ]
+ },
+ "execution_count": 550,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "n=dict(json_data)"
+ "nt_prop.loc[7394]"
]
},
{
"cell_type": "code",
- "execution_count": 33,
- "id": "7700a744-858d-4f83-b3be-fd7abb67ae76",
+ "execution_count": 548,
+ "id": "ac8f5067",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'name': 'NaPart1',\n",
- " 'color': '#0066cc',\n",
- " 'is_visible': True,\n",
- " 'active_phos': None,\n",
- " 'crossover_span_angle': 45,\n",
- " 'max_vhelix_length': 42,\n",
- " 'neighbor_active_angle': '',\n",
- " 'grid_type': 2,\n",
- " 'virtual_helix_order': [0, 1, 2, 3, 4, 5],\n",
- " 'is_lattice': True,\n",
- " 'virtual_helices': {'name': ['vh0', 'vh1', 'vh2', 'vh3', 'vh4', 'vh5'],\n",
- " 'is_visible': [True, True, True, True, True, True],\n",
- " 'color': ['#0066cc', '#0066cc', '#0066cc', '#0066cc', '#0066cc', '#0066cc'],\n",
- " 'eulerZ': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n",
- " 'neighbor_active_angle': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n",
- " 'neighbors': ['[1, 5]', '[0, 2]', '[1, 3]', '[2, 4]', '[3, 5]', '[0, 4]'],\n",
- " 'bases_per_repeat': [21, 21, 21, 21, 21, 21],\n",
- " 'turns_per_repeat': [2, 2, 2, 2, 2, 2],\n",
- " 'repeat_hint': [2, 2, 2, 2, 2, 2],\n",
- " 'helical_pitch': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n",
- " 'minor_groove_angle': [180.0, 180.0, 180.0, 180.0, 180.0, 180.0],\n",
- " 'length': [42, 42, 42, 42, 42, 42],\n",
- " 'z': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]},\n",
- " 'origins': [[-1.948557375, 1.125, 0.0],\n",
- " [0.0, 2.25, 0.0],\n",
- " [1.948557375, 1.125, 0.0],\n",
- " [1.948557375, -1.125, 0.0],\n",
- " [0.0, -2.25, 0.0],\n",
- " [-1.948557375, -1.125, 0.0]],\n",
- " 'directions': [[0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0],\n",
- " [0.0, 0.0, 1.0]],\n",
- " 'vh_list': [[0, 42], [1, 42], [2, 42], [3, 42], [4, 42], [5, 42]],\n",
- " 'strands': {'indices': [[[[0, 2], [3, 20], [21, 35]], []],\n",
- " [[], [[0, 20], [21, 41]]],\n",
- " [[[0, 23], [24, 41]], []],\n",
- " [[], [[0, 9], [10, 23], [24, 30], [31, 41]]],\n",
- " [[[0, 9], [10, 30], [31, 41]], []],\n",
- " [[], [[0, 2], [3, 35], [36, 41]]]],\n",
- " 'properties': [[['#0066cc', '#f7931e', '#f7931e'], []],\n",
- " [[], ['#f7931e', '#f7931e']],\n",
- " [['#f7931e', '#f7931e'], []],\n",
- " [[], ['#0066cc', '#f7931e', '#f7931e', '#0066cc']],\n",
- " [['#0066cc', '#f7931e', '#0066cc'], []],\n",
- " [[], ['#333333', '#f7931e', '#0066cc']]]},\n",
- " 'insertions': [],\n",
- " 'xovers': [[0, True, 20, 1, False, 20],\n",
- " [0, True, 35, 5, False, 35],\n",
- " [1, False, 0, 2, True, 0],\n",
- " [1, False, 21, 0, True, 21],\n",
- " [2, True, 23, 3, False, 23],\n",
- " [2, True, 41, 1, False, 41],\n",
- " [3, False, 10, 4, True, 10],\n",
- " [3, False, 24, 2, True, 24],\n",
- " [3, False, 31, 4, True, 31],\n",
- " [4, True, 9, 3, False, 9],\n",
- " [4, True, 30, 3, False, 30],\n",
- " [5, False, 3, 0, True, 3]],\n",
- " 'oligos': [{'id_num': 4,\n",
- " 'idx5p': 0,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo3632',\n",
- " 'color': '#0066cc',\n",
- " 'length': 20,\n",
- " 'is_visible': True},\n",
- " {'id_num': 3,\n",
- " 'idx5p': 30,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': True,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo8000',\n",
- " 'color': '#f7931e',\n",
- " 'length': 192,\n",
- " 'is_visible': True},\n",
- " {'id_num': 3,\n",
- " 'idx5p': 41,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo0160',\n",
- " 'color': '#0066cc',\n",
- " 'length': 22,\n",
- " 'is_visible': True},\n",
- " {'id_num': 5,\n",
- " 'idx5p': 41,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo7152',\n",
- " 'color': '#0066cc',\n",
- " 'length': 6,\n",
- " 'is_visible': True},\n",
- " {'id_num': 0,\n",
- " 'idx5p': 0,\n",
- " 'is_5p_fwd': True,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo8896',\n",
- " 'color': '#0066cc',\n",
- " 'length': 3,\n",
- " 'is_visible': True},\n",
- " {'id_num': 5,\n",
- " 'idx5p': 2,\n",
- " 'is_5p_fwd': False,\n",
- " 'is_circular': False,\n",
- " 'sequence': None,\n",
- " 'name': 'oligo1168',\n",
- " 'color': '#333333',\n",
- " 'length': 3,\n",
- " 'is_visible': True}],\n",
- " 'instance_properties': [{'slice:position': [0.0, 0.0],\n",
- " 'grid:position': [0.0, 0.0],\n",
- " 'path:position': [-12.994158258098764, -4.331386086032921]}],\n",
- " 'uuid': '7029b213616f4ab5a1adcbf0d3a59edb'}"
+ "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
+ " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 29, 31, 32, 33,\n",
+ " 34, 35, 36, 37, 38, 39, 41, 40, 42, 44, 46, 48, 50],\n",
+ " dtype='int64', name='num')"
]
},
- "execution_count": 33,
+ "execution_count": 548,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "s=n[\"parts\"][0]\n",
- "s"
+ "vslist.index"
]
},
{
"cell_type": "code",
- "execution_count": 199,
- "id": "3bb28a94",
+ "execution_count": 527,
+ "id": "1006fc48",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([0. , 2.25, 3.4 ])"
+ "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n",
+ " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n",
+ " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n",
+ " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)"
]
},
- "execution_count": 199,
+ "execution_count": 527,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "p.getCoordinate(0,10)"
+ "n=list(nt_prop[\"zid\"])\n",
+ "np.where(np.array(list(nt_prop[\"vh\"]))==29)"
]
},
{
"cell_type": "code",
- "execution_count": 434,
- "id": "be894ade",
+ "execution_count": 503,
+ "id": "09c7e7d4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "((30, 146), True) is not in list",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-503-1b9956d4cdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list"
+ ]
+ }
+ ],
"source": [
- "def mrdna_model_from_cadnano(json_file,**model_parameters):\n",
- " part,vslist=read_json_file(json_file)\n",
- " props = part.getModelProperties().copy()\n",
- " try:\n",
- " if props.get('point_type') == PointType.ARBITRARY:\n",
- " # TODO add code to encode Parts with ARBITRARY point configurations\n",
- " raise NotImplementedError(\"Not implemented\")\n",
- " except:\n",
- " try:\n",
- " vh_props, origins = part.helixPropertiesAndOrigins()\n",
- " except:\n",
- " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n",
- " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n",
- " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n",
- " cad_bps=part.getIndices(0)\n",
- " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n",
- " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n",
- " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n",
- " is_scaf=np.zeros(totnt,dtype=bool)\n",
- " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n",
- " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n",
- " nt_prop[\"is_scaf\"]=is_scaf\n",
- " tot_id=scaf_id+stap_id\n",
- " vhi,zidi=np.where(np.array(scaf_id)==1)\n",
- " vhj,zidj=np.where(np.array(stap_id)==1)\n",
- " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n",
- " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n",
- " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n",
- " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n",
- " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n",
- " nt_prop=nt_prop.fillna(-1)\n",
- " for i in range(int(len(vhzid)/2)):\n",
- " try:\n",
- " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n",
- " nt_prop[\"bp\"][bp1]=bp2\n",
- " nt_prop[\"bp\"][bp2]=bp1\n",
- " except:\n",
- " pass\n",
- " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n",
- " for i in range(len(nt_prop.index)):\n",
- " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n",
- " if p==True:\n",
- " k,l=(vslist[\"scaf\"][m])[n][2:]\n",
- " if k!=-1 and l!=-1:\n",
- " n=index2.index(((k,l),True))\n",
- " tprime_list[i]=int(n)\n",
- "\n",
- " else:\n",
- " k,l=(vslist[\"stap\"][m])[n][2:]\n",
- " if k!=-1 and l!=-1:\n",
- " n=index2.index(((k,l),False))\n",
- " tprime_list[i]=int(n)\n",
- " nt_prop[\"threeprime\"]=tprime_list\n",
- " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n",
- " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n",
- " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n",
- "\n",
- "\n",
- " return nt_prop\n"
+ "vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n",
+ "index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n",
+ "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n",
+ " \n",
+ "print(index2.index(((30,146),(True))))"
]
},
{
"cell_type": "code",
- "execution_count": 440,
- "id": "9a290811",
+ "execution_count": 537,
+ "id": "5f0c5266",
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n",
- " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n",
- " 27, 28, 29, 30, 31, 66, 33, 34, -1, 0, 35, 36, 37,\n",
- " 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 87, 49, 50,\n",
- " 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,\n",
- " 64, 65, -1, 67, 68, 71, 72, 73, 74, 75, 76, 77, 78,\n",
- " 79, 80, 81, 82, 83, 84, 85, 86, 48, 88, 89, 90, 91,\n",
- " 92, 93, 94, 95, 96, 97, 98, 99, 100, 134, 102, 103, -1,\n",
- " 70, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,\n",
- " 116, 151, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,\n",
- " 129, 130, 131, 132, 133, -1, 135, 136, 137, 138, 141, 142, 143,\n",
- " -1, 145, 146, 147, 148, 149, 150, 117, 152, 153, 154, 155, 156,\n",
- " 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,\n",
- " 170, 171, 172, 173, 174, 209, -1, 175, 176, 177, 144, 179, 180,\n",
- " 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, -1, 193,\n",
- " 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,\n",
- " 207, 208, -1, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,\n",
- " 220, 221, 222, 223, 224, 225, 226, 227, 265, 229, 230, -1, 232,\n",
- " 233, 234, 403, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245,\n",
- " 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,\n",
- " 261, 262, 263, 264, 228, 266, 267, 268, 269, 270, 271, 272, 273,\n",
- " 274, 275, 276, 277, 278, 279, 280, 281, 282, -1, -1, 283, 284,\n",
- " 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297,\n",
- " 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310,\n",
- " 311, 312, 313, 314, 315, 316, 319, 320, 321, 322, 323, 324, 325,\n",
- " 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338,\n",
- " 364, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351,\n",
- " 352, -1, -1, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,\n",
- " 363, 339, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375,\n",
- " 376, 377, 378, 379, 380, 381, 382, 385, 386, 387, 388, 389, 390,\n",
- " 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 235,\n",
- " 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, -1])"
- ]
- },
- "execution_count": 440,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "11 135 30 146 3010\n"
+ ]
+ },
+ {
+ "ename": "ValueError",
+ "evalue": "((30, 146), True) is not in list",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-537-f07d5cbf0867>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list"
+ ]
}
],
"source": [
- "np.array(nt_prop[\"threeprime\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 302,
- "id": "551cc70f",
- "metadata": {},
- "outputs": [],
- "source": [
- "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n",
+ " \n",
"for i in range(len(nt_prop.index)):\n",
" ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n",
" if p==True:\n",
" k,l=(vslist[\"scaf\"][m])[n][2:]\n",
+ " if k==30 and l==146:\n",
+ " print(m,n,k,l,i)\n",
" if k!=-1 and l!=-1:\n",
" n=index2.index(((k,l),True))\n",
" tprime_list[i]=int(n)\n",
- " \n",
+ "\n",
" else:\n",
" k,l=(vslist[\"stap\"][m])[n][2:]\n",
" if k!=-1 and l!=-1:\n",
" n=index2.index(((k,l),False))\n",
" tprime_list[i]=int(n)\n",
- "nt_prop[\"threeprime\"]=tprime_list"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 368,
- "id": "1d40286a",
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_helix_angle(part, helix_id, indices):\n",
- " \"\"\" Get \"start_orientation\" for helix \"\"\"\n",
- " # import ipdb\n",
- " # ipdb.set_trace()\n",
- "\n",
- " \"\"\" FROM CADNANO2.5\n",
- " + angle is CCW\n",
- " - angle is CW\n",
- " Right handed DNA rotates clockwise from 5' to 3'\n",
- " we use the convention the 5' end starts at 0 degrees\n",
- " and it's pair is minor_groove_angle degrees away\n",
- " direction, hence the minus signs. eulerZ\n",
- " \"\"\"\n",
- "\n",
- " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n",
- " ['helical_pitch',\n",
- " 'bases_per_repeat',\n",
- " 'turns_per_repeat',\n",
- " 'eulerZ',\n",
- " 'minor_groove_angle']]\n",
- " twist_per_base = tpr*360./bpr\n",
- " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n",
- " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n",
- " return rotationAboutAxis(np.array((0,0,1)),angle)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 429,
- "id": "f45dd87c",
- "metadata": {},
- "outputs": [],
- "source": [
- "\n",
+ "nt_prop[\"threeprime\"]=tprime_list\n",
"(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n",
- "\n",
"stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n",
- "\n",
- "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n"
+ "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n",
+ " ## Todo: sequence "
]
},
{
"cell_type": "code",
- "execution_count": 430,
- "id": "ef29b662",
+ "execution_count": 491,
+ "id": "fec987da",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "33 -1\n",
- "68 -1\n",
- "102 -1\n",
- "136 -1\n",
- "142 -1\n",
- "176 -1\n",
- "194 399\n",
- "211 -1\n",
- "233 20\n",
- "281 -1\n",
- "284 -1\n",
- "351 -1\n",
- "354 145\n",
- "413 -1\n",
- "Name: bp, dtype: int64"
+ "[11, 135, 30, 147]"
]
},
- "execution_count": 430,
+ "execution_count": 491,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "stackid"
+ "list(vslist.loc[30][\"scaf\"])[146]"
]
},
{
"cell_type": "code",
- "execution_count": 431,
- "id": "6678f56c",
+ "execution_count": 493,
+ "id": "f332ad87",
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "232"
- ]
- },
- "execution_count": 431,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "ValueError",
+ "evalue": "(30, 146) is not in list",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-493-d1dd239124c3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvhzid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m: (30, 146) is not in list"
+ ]
}
],
"source": [
- "nt_prop[\"stack\"][233]"
+ "vhzid.index((30,146))"
]
},
{
"cell_type": "code",
- "execution_count": 433,
- "id": "cb9dbf13",
+ "execution_count": 4,
+ "id": "dd3cd839",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "353"
- ]
- },
- "execution_count": 433,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "nt_prop[\"stack\"][354]"
+ "df=pd.DataFrame(data=d)\n",
+ "df=df.set_index(\"num\")"
]
},
{
"cell_type": "code",
- "execution_count": 167,
- "id": "165d3cc6",
+ "execution_count": null,
+ "id": "41b3d9af",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "9ec18edc",
"metadata": {},
"outputs": [],
"source": [
- "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n",
- "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n",
- "nts=scaf_id+stap_id"
+ "def get_lattice(part):\n",
+ " lattice_type = None\n",
+ " _gt = part.getGridType()\n",
+ " try:\n",
+ " lattice_type = _gt.name.lower()\n",
+ " except:\n",
+ " if _gt == 1:\n",
+ " lattice_type = 'square'\n",
+ " elif _gt == 2:\n",
+ " lattice_type = 'honeycomb'\n",
+ " else:\n",
+ " print(\"WARNING: unable to determine cadnano part lattice type\")\n",
+ " return lattice_type\n"
]
},
{
"cell_type": "code",
- "execution_count": 360,
- "id": "b2856178",
+ "execution_count": 13,
+ "id": "2fa31a78",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Found cadnano version 2 file\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "NucleicAcidPart_-1_2800"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n"
+ "p=read_json_file(\"test/test.json\")\n",
+ "p"
]
},
{
"cell_type": "code",
- "execution_count": 190,
- "id": "07918f5c",
+ "execution_count": 441,
+ "id": "64eb309f",
"metadata": {},
"outputs": [
{
@@ -1462,157 +1397,324 @@
"5 [[9, 0]] "
]
},
- "execution_count": 190,
+ "execution_count": 441,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "vslist"
+ "f"
]
},
{
"cell_type": "code",
- "execution_count": 200,
- "id": "86293e8a",
+ "execution_count": 199,
+ "id": "bda3cddd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0. , 2.25, 3.4 ])"
+ ]
+ },
+ "execution_count": 199,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p.getCoordinate(0,10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 434,
+ "id": "a86cfa84",
"metadata": {},
"outputs": [],
"source": [
- "def mrdna_model_from_cadnano_v2(json_data,**model_parameters):\n",
- " part,vslist=decode_cadnano_part(json_data)\n",
+ "def mrdna_model_from_cadnano(json_file,**model_parameters):\n",
+ " part,vslist=read_json_file(json_file)\n",
" props = part.getModelProperties().copy()\n",
- "\n",
- " if props.get('point_type') == PointType.ARBITRARY:\n",
+ " try:\n",
+ " if props.get('point_type') == PointType.ARBITRARY:\n",
" # TODO add code to encode Parts with ARBITRARY point configurations\n",
- " raise NotImplementedError(\"Not implemented\")\n",
- " else:\n",
+ " raise NotImplementedError(\"Not implemented\")\n",
+ " except:\n",
" try:\n",
" vh_props, origins = part.helixPropertiesAndOrigins()\n",
" except:\n",
" origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n",
- " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n",
- " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n",
+ " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n",
+ " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n",
" cad_bps=part.getIndices(0)\n",
- " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n",
- " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n",
+ " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n",
+ " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n",
" totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n",
- " is_scaf=np.zeros(totnt)\n",
+ " is_scaf=np.zeros(totnt,dtype=bool)\n",
" is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n",
" nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n",
" nt_prop[\"is_scaf\"]=is_scaf\n",
- " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n",
- " nt_prop[\"vh\"]=vhi\n",
- " nt_prop[\"zid\"]=zids\n",
- " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n",
- " return nt_prop\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 201,
- "id": "b398277c",
- "metadata": {},
- "outputs": [
+ " tot_id=scaf_id+stap_id\n",
+ " vhi,zidi=np.where(np.array(scaf_id)==1)\n",
+ " vhj,zidj=np.where(np.array(stap_id)==1)\n",
+ " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n",
+ " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n",
+ " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n",
+ " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n",
+ " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n",
+ " nt_prop=nt_prop.fillna(-1)\n",
+ " for i in range(int(len(vhzid)/2)):\n",
+ " try:\n",
+ " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n",
+ " nt_prop[\"bp\"][bp1]=bp2\n",
+ " nt_prop[\"bp\"][bp2]=bp1\n",
+ " except:\n",
+ " pass\n",
+ " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n",
+ " for i in range(len(nt_prop.index)):\n",
+ " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n",
+ " if p==True:\n",
+ " k,l=(vslist[\"scaf\"][m])[n][2:]\n",
+ " if k!=-1 and l!=-1:\n",
+ " n=index2.index(((k,l),True))\n",
+ " tprime_list[i]=int(n)\n",
+ "\n",
+ " else:\n",
+ " k,l=(vslist[\"stap\"][m])[n][2:]\n",
+ " if k!=-1 and l!=-1:\n",
+ " n=index2.index(((k,l),False))\n",
+ " tprime_list[i]=int(n)\n",
+ " nt_prop[\"threeprime\"]=tprime_list\n",
+ " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n",
+ " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n",
+ " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n",
+ "\n",
+ "\n",
+ " return nt_prop\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 442,
+ "id": "be5de5ba",
+ "metadata": {},
+ "outputs": [
{
- "ename": "NameError",
- "evalue": "name 'decode_cadnano_part' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined"
- ]
+ "data": {
+ "text/plain": [
+ "array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n",
+ " -1, -1, -1, -1, -1, -1, -1])"
+ ]
+ },
+ "execution_count": 442,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "mrdna_model_from_cadnano(\"test.json\")"
+ "np.array(nt_prop[\"seq\"])"
]
},
{
"cell_type": "code",
- "execution_count": 146,
- "id": "c0d9eb64",
+ "execution_count": 302,
+ "id": "0ce6701d",
"metadata": {},
"outputs": [],
"source": [
- "a,b=np.where(np.array(nts)==1)"
+ "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n",
+ "for i in range(len(nt_prop.index)):\n",
+ " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n",
+ " if p==True:\n",
+ " k,l=(vslist[\"scaf\"][m])[n][2:]\n",
+ " if k!=-1 and l!=-1:\n",
+ " n=index2.index(((k,l),True))\n",
+ " tprime_list[i]=int(n)\n",
+ " \n",
+ " else:\n",
+ " k,l=(vslist[\"stap\"][m])[n][2:]\n",
+ " if k!=-1 and l!=-1:\n",
+ " n=index2.index(((k,l),False))\n",
+ " tprime_list[i]=int(n)\n",
+ "nt_prop[\"threeprime\"]=tprime_list"
]
},
{
"cell_type": "code",
- "execution_count": 148,
- "id": "ab563ec9",
+ "execution_count": 368,
+ "id": "9d0e49cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_helix_angle(part, helix_id, indices):\n",
+ " \"\"\" Get \"start_orientation\" for helix \"\"\"\n",
+ " # import ipdb\n",
+ " # ipdb.set_trace()\n",
+ "\n",
+ " \"\"\" FROM CADNANO2.5\n",
+ " + angle is CCW\n",
+ " - angle is CW\n",
+ " Right handed DNA rotates clockwise from 5' to 3'\n",
+ " we use the convention the 5' end starts at 0 degrees\n",
+ " and it's pair is minor_groove_angle degrees away\n",
+ " direction, hence the minus signs. eulerZ\n",
+ " \"\"\"\n",
+ "\n",
+ " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n",
+ " ['helical_pitch',\n",
+ " 'bases_per_repeat',\n",
+ " 'turns_per_repeat',\n",
+ " 'eulerZ',\n",
+ " 'minor_groove_angle']]\n",
+ " twist_per_base = tpr*360./bpr\n",
+ " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n",
+ " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n",
+ " return rotationAboutAxis(np.array((0,0,1)),angle)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 429,
+ "id": "c1d77642",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n",
+ "\n",
+ "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n",
+ "\n",
+ "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 430,
+ "id": "2c2d1227",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n",
- " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n",
- " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n",
- " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n",
- " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
- " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n",
- " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
- " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n",
- " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
- " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n",
- " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n",
- " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
- " 34, 35, 36, 37, 38, 39])"
+ "33 -1\n",
+ "68 -1\n",
+ "102 -1\n",
+ "136 -1\n",
+ "142 -1\n",
+ "176 -1\n",
+ "194 399\n",
+ "211 -1\n",
+ "233 20\n",
+ "281 -1\n",
+ "284 -1\n",
+ "351 -1\n",
+ "354 145\n",
+ "413 -1\n",
+ "Name: bp, dtype: int64"
]
},
- "execution_count": 148,
+ "execution_count": 430,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "nt_prop=pd.DataFrame(index)"
+ "stackid"
]
},
{
"cell_type": "code",
- "execution_count": 137,
- "id": "3c45aeac",
+ "execution_count": 431,
+ "id": "e701d029",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "232"
+ ]
+ },
+ "execution_count": 431,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "def nttype(scafs):\n",
- " def judge(i):\n",
- " if i ==[-1,-1,-1,-1]:\n",
- " return 0\n",
- " else: return 1\n",
- " n=np.array([judge(i) for i in scafs])\n",
- " return n\n",
- "\n"
+ "nt_prop[\"stack\"][233]"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "803b3c67",
+ "execution_count": 433,
+ "id": "4a62f5d9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "353"
+ ]
+ },
+ "execution_count": 433,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nt_prop[\"stack\"][354]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 167,
+ "id": "d80ab792",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n",
+ "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n",
+ "nts=scaf_id+stap_id"
+ ]
},
{
"cell_type": "code",
- "execution_count": 84,
- "id": "adb6b347",
+ "execution_count": 360,
+ "id": "8e009bc9",
"metadata": {},
"outputs": [],
"source": [
- "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n",
- "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]"
+ "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n"
]
},
{
"cell_type": "code",
- "execution_count": 156,
- "id": "307e53ad",
+ "execution_count": 190,
+ "id": "3dc97f0d",
"metadata": {},
"outputs": [
{
@@ -1636,17 +1738,21 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
- " <th></th>\n",
- " <th>r</th>\n",
- " <th>bp</th>\n",
- " <th>stack</th>\n",
- " <th>threeprime</th>\n",
- " <th>seq</th>\n",
- " <th>orientation</th>\n",
+ " <th>row</th>\n",
+ " <th>col</th>\n",
+ " <th>scaf</th>\n",
+ " <th>stap</th>\n",
+ " <th>loop</th>\n",
+ " <th>skip</th>\n",
+ " <th>scafLoop</th>\n",
+ " <th>stapLoop</th>\n",
+ " <th>stap_colors</th>\n",
" </tr>\n",
" <tr>\n",
- " <th>vh</th>\n",
- " <th>zid</th>\n",
+ " <th>num</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
@@ -1658,2299 +1764,404 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <th>0</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
+ " <td>12</td>\n",
+ " <td>16</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[]</td>\n",
+ " <td>[]</td>\n",
+ " <td>[[23, 13369809], [38, 12060012]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <th>3</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <th>1</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <th>2</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <th>8</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " r bp stack threeprime seq orientation\n",
- "vh zid \n",
- "0 0 NaN NaN NaN NaN NaN NaN\n",
- "1 3 NaN NaN NaN NaN NaN NaN\n",
- "2 1 NaN NaN NaN NaN NaN NaN\n",
- "3 2 NaN NaN NaN NaN NaN NaN\n",
- "1 8 NaN NaN NaN NaN NaN NaN"
- ]
- },
- "execution_count": 156,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "i=range(5)\n",
- "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n",
- "d=pd.DataFrame(index=i,columns=col)\n",
- "d['vh']=[0,1,2,3,1]\n",
- "d['zid']=[0,3,1,2,8]\n",
- "d.set_index([\"vh\",\"zid\"],inplace=True)\n",
- "d"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 157,
- "id": "d030974e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>vh</th>\n",
- " <th>zid</th>\n",
- " <th>r</th>\n",
- " <th>bp</th>\n",
- " <th>stack</th>\n",
- " <th>threeprime</th>\n",
- " <th>seq</th>\n",
- " <th>orientation</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1</td>\n",
- " <td>3</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
+ " <td>12</td>\n",
+ " <td>15</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[]</td>\n",
+ " <td>[]</td>\n",
+ " <td>[[3, 1501302]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>3</td>\n",
- " <td>2</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " vh zid r bp stack threeprime seq orientation\n",
- "0 0 0 NaN NaN NaN NaN NaN NaN\n",
- "1 1 3 NaN NaN NaN NaN NaN NaN\n",
- "2 2 1 NaN NaN NaN NaN NaN NaN\n",
- "3 3 2 NaN NaN NaN NaN NaN NaN\n",
- "4 1 8 NaN NaN NaN NaN NaN NaN"
- ]
- },
- "execution_count": 157,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "\n",
- "d=d.reset_index()\n",
- "d"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 128,
- "id": "6ddb4784",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(array([0, 2]),)"
- ]
- },
- "execution_count": 128,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "s=[True,False,True,False,False]\n",
- "np.where(np.array(s)==True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 127,
- "id": "28e3acea",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th>r</th>\n",
- " <th>bp</th>\n",
- " <th>stack</th>\n",
- " <th>threeprime</th>\n",
- " <th>seq</th>\n",
- " <th>orientation</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>vh</th>\n",
- " <th>zid</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <th>0</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <th>3</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <th>1</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <th>2</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <th>8</th>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " r bp stack threeprime seq orientation\n",
- "vh zid \n",
- "0 0 NaN NaN NaN NaN NaN NaN\n",
- "1 3 NaN NaN NaN NaN NaN NaN\n",
- "2 1 NaN NaN NaN NaN NaN NaN\n",
- "3 2 NaN NaN NaN NaN NaN NaN\n",
- "1 8 NaN NaN NaN NaN NaN NaN"
- ]
- },
- "execution_count": 127,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 100,
- "id": "545acf6d",
- "metadata": {},
- "outputs": [
- {
- "ename": "KeyError",
- "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\""
- ]
- }
- ],
- "source": [
- "d[[0,0]]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "f6748d9c",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[[23, 13369809], [38, 12060012]]"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[\"vstrands\"][0][\"stap_colors\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "5005611f",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "210"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n",
- "len(vh_vb._scaf)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "b6643a5e-d63e-452d-99d0-0f0ad460bdf3",
- "metadata": {},
- "outputs": [],
- "source": [
- "with open(\"test.virt2nuc\",\"rb\") as f:\n",
- " df=pickle.load(f)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "83fc4ec6",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(<libs.cadnano_utils.vhelix_vbase_to_nucleotide at 0x7f9b1b65c820>,\n",
- " {0: (12, 16),\n",
- " 1: (12, 15),\n",
- " 2: (13, 15),\n",
- " 3: (13, 16),\n",
- " 4: (13, 17),\n",
- " 5: (12, 17)})"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "1cd359b5",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pattern"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 173,
- "id": "078656d6",
- "metadata": {},
- "outputs": [],
- "source": [
- "class strands():\n",
- " def __init__(self):\n",
- " self.row=0 \n",
- " self.col=0\n",
- " self.num=0\n",
- " self.scaf=[]\n",
- " self.stap=[]\n",
- " self.loop=[]\n",
- " self.skip=[]\n",
- " self.scafLoop=[]\n",
- " self.stapLoop=[]\n",
- " self.stap_colors=[]\n",
- " self.scaf_contact={}\n",
- " self.stap_connect={}\n",
- " def to_dict(self):\n",
- " d={}\n",
- " d['row']=self.row\n",
- " d['col']=self.col\n",
- " d['num']=self.num\n",
- " d['scaf']=self.scaf\n",
- " d['stap']=self.stap\n",
- " d['loop']=self.loop\n",
- " d['skip']=self.skip\n",
- " d['scafLoop']=self.scafLoop\n",
- " d['stapLoop']=self.stapLoop\n",
- " d['stap_colors']=self.stap_colors\n",
- " return d\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 177,
- "id": "914acd5d",
- "metadata": {},
- "outputs": [],
- "source": [
- "def find_segs(vir2nuc_scaf):\n",
- " oligos={}\n",
- " for i in range(len(vir2nuc_scaf)):\n",
- " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n",
- " if oligo not in oligos.keys():\n",
- " oligos[oligo]=[]\n",
- " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n",
- " return oligos\n",
- "\n",
- "#class\n",
- "def decode_vh_vb(virt2nuc):\n",
- " vh_list={}\n",
- " vh_vb,pattern=pd.read_pickle(virt2nuc)\n",
- " for i in pattern.keys():\n",
- " s=strands()\n",
- " s.row,s.col=pattern[i]\n",
- " s.num=i\n",
- " vh_list[s.num]=s\n",
- " scafs=vh_vb._scaf\n",
- " staps=vh_vb._stap\n",
- " scaf_strands=find_segs(scafs)\n",
- " scaf_oligos=list(scaf_strands.keys())\n",
- " for i in scaf_oligos:\n",
- " pass\n",
- " \n",
- " \n",
- " return vh_list"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 198,
- "id": "18132c9b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[[(2, 34),\n",
- " (2, 33),\n",
- " (2, 32),\n",
- " (2, 31),\n",
- " (2, 30),\n",
- " (2, 29),\n",
- " (2, 28),\n",
- " (2, 27),\n",
- " (2, 26),\n",
- " (2, 25),\n",
- " (2, 24),\n",
- " (2, 23),\n",
- " (2, 22),\n",
- " (2, 21),\n",
- " (2, 20),\n",
- " (2, 19),\n",
- " (2, 18),\n",
- " (2, 17),\n",
- " (2, 16),\n",
- " (2, 15),\n",
- " (2, 14),\n",
- " (2, 13),\n",
- " (2, 12),\n",
- " (2, 11),\n",
- " (2, 10),\n",
- " (2, 9),\n",
- " (2, 8),\n",
- " (2, 7),\n",
- " (2, 6),\n",
- " (2, 5),\n",
- " (2, 4),\n",
- " (2, 3),\n",
- " (2, 2),\n",
- " (2, 1),\n",
- " (2, 0)],\n",
- " [(1, 3),\n",
- " (1, 4),\n",
- " (1, 5),\n",
- " (1, 6),\n",
- " (1, 7),\n",
- " (1, 8),\n",
- " (1, 9),\n",
- " (1, 10),\n",
- " (1, 11),\n",
- " (1, 12),\n",
- " (1, 13),\n",
- " (1, 14),\n",
- " (1, 15),\n",
- " (1, 16),\n",
- " (1, 17),\n",
- " (1, 18),\n",
- " (1, 19),\n",
- " (1, 20),\n",
- " (0, 20),\n",
- " (0, 19),\n",
- " (0, 18),\n",
- " (0, 17),\n",
- " (0, 16),\n",
- " (0, 15),\n",
- " (0, 14),\n",
- " (0, 13),\n",
- " (0, 12),\n",
- " (0, 11),\n",
- " (0, 10),\n",
- " (0, 9),\n",
- " (0, 8),\n",
- " (0, 7),\n",
- " (0, 6),\n",
- " (0, 5),\n",
- " (0, 4),\n",
- " (0, 3),\n",
- " (0, 2)],\n",
- " [(0, 23),\n",
- " (0, 22),\n",
- " (0, 21),\n",
- " (1, 21),\n",
- " (1, 22),\n",
- " (1, 23),\n",
- " (1, 24),\n",
- " (1, 25),\n",
- " (1, 26),\n",
- " (1, 27),\n",
- " (1, 28),\n",
- " (1, 29),\n",
- " (1, 30),\n",
- " (1, 31),\n",
- " (1, 32),\n",
- " (1, 33),\n",
- " (1, 34),\n",
- " (1, 35),\n",
- " (1, 36),\n",
- " (1, 37),\n",
- " (1, 38)],\n",
- " [(5, 9),\n",
- " (5, 10),\n",
- " (5, 11),\n",
- " (5, 12),\n",
- " (5, 13),\n",
- " (5, 14),\n",
- " (5, 15),\n",
- " (5, 16),\n",
- " (5, 17),\n",
- " (5, 18),\n",
- " (5, 19),\n",
- " (5, 20),\n",
- " (5, 21),\n",
- " (5, 22),\n",
- " (5, 23),\n",
- " (5, 24),\n",
- " (5, 25),\n",
- " (5, 26),\n",
- " (5, 27),\n",
- " (0, 27),\n",
- " (0, 26),\n",
- " (0, 25),\n",
- " (0, 24)],\n",
- " [(0, 38),\n",
- " (0, 37),\n",
- " (0, 36),\n",
- " (0, 35),\n",
- " (0, 34),\n",
- " (0, 33),\n",
- " (0, 32),\n",
- " (0, 31),\n",
- " (0, 30),\n",
- " (0, 29),\n",
- " (0, 28),\n",
- " (5, 28),\n",
- " (5, 29),\n",
- " (5, 30),\n",
- " (5, 31),\n",
- " (5, 32),\n",
- " (5, 33),\n",
- " (5, 34),\n",
- " (5, 35),\n",
- " (5, 36),\n",
- " (5, 37),\n",
- " (5, 38),\n",
- " (5, 39)],\n",
- " [(3, 0),\n",
- " (3, 1),\n",
- " (3, 2),\n",
- " (3, 3),\n",
- " (3, 4),\n",
- " (3, 5),\n",
- " (3, 6),\n",
- " (3, 7),\n",
- " (3, 8),\n",
- " (3, 9),\n",
- " (3, 10),\n",
- " (3, 11),\n",
- " (3, 12),\n",
- " (3, 13),\n",
- " (3, 14),\n",
- " (3, 15),\n",
- " (3, 16),\n",
- " (3, 17),\n",
- " (3, 18),\n",
- " (3, 19),\n",
- " (3, 20),\n",
- " (4, 20),\n",
- " (4, 19),\n",
- " (4, 18),\n",
- " (4, 17),\n",
- " (4, 16),\n",
- " (4, 15),\n",
- " (4, 14),\n",
- " (4, 13),\n",
- " (4, 12),\n",
- " (4, 11),\n",
- " (4, 10),\n",
- " (4, 9)],\n",
- " [(4, 39),\n",
- " (4, 38),\n",
- " (4, 37),\n",
- " (4, 36),\n",
- " (4, 35),\n",
- " (4, 34),\n",
- " (4, 33),\n",
- " (4, 32),\n",
- " (4, 31),\n",
- " (4, 30),\n",
- " (4, 29),\n",
- " (4, 28),\n",
- " (4, 27),\n",
- " (4, 26),\n",
- " (4, 25),\n",
- " (4, 24),\n",
- " (4, 23),\n",
- " (4, 22),\n",
- " (4, 21),\n",
- " (3, 21),\n",
- " (3, 22),\n",
- " (3, 23),\n",
- " (3, 24),\n",
- " (3, 25),\n",
- " (3, 26),\n",
- " (3, 27),\n",
- " (3, 28),\n",
- " (3, 29),\n",
- " (3, 30),\n",
- " (3, 31),\n",
- " (3, 32),\n",
- " (3, 33),\n",
- " (3, 34)]]"
- ]
- },
- "execution_count": 198,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "s1=decode_vh_vb(\"virt2nuc\")\n",
- "vh_vb,pattern=pd.read_pickle(\"virt2nuc\")\n",
- "list(find_segs(vh_vb._stap).values())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "id": "dafaa4cf-47d9-4da3-8b63-1129c074c5ef",
- "metadata": {},
- "outputs": [
- {
- "data": {
+ " <td>13</td>\n",
+ " <td>15</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n",
+ " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[]</td>\n",
+ " <td>[]</td>\n",
+ " <td>[[34, 8947848]]</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>13</td>\n",
+ " <td>16</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n",
+ " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[]</td>\n",
+ " <td>[]</td>\n",
+ " <td>[[0, 13369344]]</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>13</td>\n",
+ " <td>17</td>\n",
+ " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[]</td>\n",
+ " <td>[]</td>\n",
+ " <td>[[39, 8947848]]</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5</th>\n",
+ " <td>12</td>\n",
+ " <td>17</td>\n",
+ " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n",
+ " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
+ " <td>[]</td>\n",
+ " <td>[]</td>\n",
+ " <td>[[9, 0]]</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
"text/plain": [
- "{0: (12, 16),\n",
- " 1: (12, 15),\n",
- " 2: (13, 15),\n",
- " 3: (13, 16),\n",
- " 4: (13, 17),\n",
- " 5: (12, 17),\n",
- " 6: (15, 5)}"
+ " row col scaf \\\n",
+ "num \n",
+ "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
+ "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
+ "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n",
+ "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n",
+ "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n",
+ "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n",
+ "\n",
+ " stap \\\n",
+ "num \n",
+ "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n",
+ "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
+ "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n",
+ "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n",
+ "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
+ "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n",
+ "\n",
+ " loop \\\n",
+ "num \n",
+ "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
+ "\n",
+ " skip scafLoop stapLoop \\\n",
+ "num \n",
+ "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n",
+ "\n",
+ " stap_colors \n",
+ "num \n",
+ "0 [[23, 13369809], [38, 12060012]] \n",
+ "1 [[3, 1501302]] \n",
+ "2 [[34, 8947848]] \n",
+ "3 [[0, 13369344]] \n",
+ "4 [[39, 8947848]] \n",
+ "5 [[9, 0]] "
]
},
- "execution_count": 28,
+ "execution_count": 190,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "pattern"
+ "vslist"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "29e7336d-5ff3-4be1-bd47-585236ee7bb2",
+ "execution_count": 200,
+ "id": "3d019b73",
"metadata": {},
"outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "id": "75e15cf7-b1a3-4133-8bc4-3eaab5922b96",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'_scaf': {(0, 39): (0, [2]),\n",
- " (0, 40): (0, [1]),\n",
- " (0, 41): (0, [0]),\n",
- " (1, 41): (1, [5]),\n",
- " (1, 40): (1, [4]),\n",
- " (1, 39): (1, [3]),\n",
- " (2, 39): (2, [8]),\n",
- " (2, 40): (2, [7]),\n",
- " (2, 41): (2, [6]),\n",
- " (3, 41): (4, [48]),\n",
- " (3, 40): (4, [47]),\n",
- " (3, 39): (4, [46]),\n",
- " (3, 38): (4, [45]),\n",
- " (3, 37): (4, [44]),\n",
- " (4, 0): (5, [52]),\n",
- " (4, 1): (5, [51]),\n",
- " (4, 2): (5, [50]),\n",
- " (4, 3): (5, [49]),\n",
- " (5, 3): (6, [56]),\n",
- " (5, 2): (6, [55]),\n",
- " (5, 1): (6, [54]),\n",
- " (5, 0): (6, [53]),\n",
- " (5, 22): (7, [244]),\n",
- " (5, 21): (7, [243]),\n",
- " (5, 20): (7, [242]),\n",
- " (5, 19): (7, [241]),\n",
- " (5, 18): (7, [240]),\n",
- " (5, 17): (7, [239]),\n",
- " (5, 16): (7, [238]),\n",
- " (5, 15): (7, [237]),\n",
- " (5, 14): (7, [236]),\n",
- " (5, 13): (7, [235]),\n",
- " (5, 12): (7, [234]),\n",
- " (5, 11): (7, [233]),\n",
- " (5, 10): (7, [232]),\n",
- " (5, 9): (7, [231]),\n",
- " (4, 9): (7, [230]),\n",
- " (4, 10): (7, [229]),\n",
- " (4, 11): (7, [228]),\n",
- " (4, 12): (7, [227]),\n",
- " (4, 13): (7, [226]),\n",
- " (4, 14): (7, [225]),\n",
- " (4, 15): (7, [224]),\n",
- " (3, 15): (7, [223]),\n",
- " (3, 14): (7, [222]),\n",
- " (3, 13): (7, [221]),\n",
- " (3, 12): (7, [220]),\n",
- " (3, 11): (7, [219]),\n",
- " (3, 10): (7, [218]),\n",
- " (3, 9): (7, [217]),\n",
- " (3, 8): (7, [216]),\n",
- " (3, 7): (7, [215]),\n",
- " (3, 6): (7, [214]),\n",
- " (3, 5): (7, [213]),\n",
- " (3, 4): (7, [212]),\n",
- " (3, 3): (7, [211]),\n",
- " (3, 2): (7, [210]),\n",
- " (2, 2): (7, [209]),\n",
- " (2, 3): (7, [208]),\n",
- " (2, 4): (7, [207]),\n",
- " (2, 5): (7, [206]),\n",
- " (2, 6): (7, [205]),\n",
- " (2, 7): (7, [204]),\n",
- " (2, 8): (7, [203]),\n",
- " (2, 9): (7, [202]),\n",
- " (2, 10): (7, [201]),\n",
- " (2, 11): (7, [200]),\n",
- " (2, 12): (7, [199]),\n",
- " (2, 13): (7, [198]),\n",
- " (2, 14): (7, [197]),\n",
- " (2, 15): (7, [196]),\n",
- " (2, 16): (7, [195]),\n",
- " (2, 17): (7, [194]),\n",
- " (2, 18): (7, [193]),\n",
- " (1, 18): (7, [192]),\n",
- " (1, 17): (7, [191]),\n",
- " (1, 16): (7, [190]),\n",
- " (1, 15): (7, [189]),\n",
- " (1, 14): (7, [188]),\n",
- " (1, 13): (7, [187]),\n",
- " (1, 12): (7, [186]),\n",
- " (1, 11): (7, [185]),\n",
- " (1, 10): (7, [184]),\n",
- " (1, 9): (7, [183]),\n",
- " (1, 8): (7, [182]),\n",
- " (1, 7): (7, [181]),\n",
- " (1, 6): (7, [180]),\n",
- " (1, 5): (7, [179]),\n",
- " (0, 5): (7, [178]),\n",
- " (0, 6): (7, [177]),\n",
- " (0, 7): (7, [176]),\n",
- " (0, 8): (7, [175]),\n",
- " (0, 9): (7, [174]),\n",
- " (0, 10): (7, [173]),\n",
- " (0, 11): (7, [172]),\n",
- " (0, 12): (7, [171]),\n",
- " (0, 13): (7, [170]),\n",
- " (0, 14): (7, [169]),\n",
- " (0, 15): (7, [168]),\n",
- " (0, 16): (7, [167]),\n",
- " (0, 17): (7, [166]),\n",
- " (0, 18): (7, [165]),\n",
- " (0, 19): (7, [164]),\n",
- " (0, 20): (7, [163]),\n",
- " (0, 21): (7, [162]),\n",
- " (0, 22): (7, [161]),\n",
- " (0, 23): (7, [160]),\n",
- " (0, 24): (7, [159]),\n",
- " (0, 25): (7, [158]),\n",
- " (0, 26): (7, [157]),\n",
- " (0, 27): (7, [156]),\n",
- " (0, 28): (7, [155]),\n",
- " (0, 29): (7, [154]),\n",
- " (0, 30): (7, [153]),\n",
- " (0, 31): (7, [152]),\n",
- " (0, 32): (7, [151]),\n",
- " (0, 33): (7, [150]),\n",
- " (0, 34): (7, [149]),\n",
- " (0, 35): (7, [148]),\n",
- " (0, 36): (7, [147]),\n",
- " (1, 36): (7, [146]),\n",
- " (1, 35): (7, [145]),\n",
- " (1, 34): (7, [144]),\n",
- " (1, 33): (7, [143]),\n",
- " (1, 32): (7, [142]),\n",
- " (1, 31): (7, [141]),\n",
- " (1, 30): (7, [140]),\n",
- " (1, 29): (7, [139]),\n",
- " (1, 28): (7, [138]),\n",
- " (1, 27): (7, [137]),\n",
- " (1, 26): (7, [136]),\n",
- " (1, 25): (7, [135]),\n",
- " (1, 24): (7, [134]),\n",
- " (1, 23): (7, [133]),\n",
- " (1, 22): (7, [132]),\n",
- " (1, 21): (7, [131]),\n",
- " (1, 20): (7, [130]),\n",
- " (1, 19): (7, [129]),\n",
- " (2, 19): (7, [128]),\n",
- " (2, 20): (7, [127]),\n",
- " (2, 21): (7, [126]),\n",
- " (2, 22): (7, [125]),\n",
- " (2, 23): (7, [124]),\n",
- " (2, 24): (7, [123]),\n",
- " (2, 25): (7, [122]),\n",
- " (2, 26): (7, [121]),\n",
- " (2, 27): (7, [120]),\n",
- " (2, 28): (7, [119]),\n",
- " (2, 29): (7, [118]),\n",
- " (2, 30): (7, [117]),\n",
- " (2, 31): (7, [116]),\n",
- " (2, 32): (7, [115]),\n",
- " (3, 32): (7, [114]),\n",
- " (3, 31): (7, [113]),\n",
- " (3, 30): (7, [112]),\n",
- " (3, 29): (7, [111]),\n",
- " (3, 28): (7, [110]),\n",
- " (3, 27): (7, [109]),\n",
- " (3, 26): (7, [108]),\n",
- " (3, 25): (7, [107]),\n",
- " (3, 24): (7, [106]),\n",
- " (3, 23): (7, [105]),\n",
- " (3, 22): (7, [104]),\n",
- " (3, 21): (7, [103]),\n",
- " (3, 20): (7, [102]),\n",
- " (3, 19): (7, [101]),\n",
- " (3, 18): (7, [100]),\n",
- " (3, 17): (7, [99]),\n",
- " (3, 16): (7, [98]),\n",
- " (4, 16): (7, [97]),\n",
- " (4, 17): (7, [96]),\n",
- " (4, 18): (7, [95]),\n",
- " (4, 19): (7, [94]),\n",
- " (4, 20): (7, [93]),\n",
- " (4, 21): (7, [92]),\n",
- " (4, 22): (7, [91]),\n",
- " (4, 23): (7, [90]),\n",
- " (4, 24): (7, [89]),\n",
- " (4, 25): (7, [88]),\n",
- " (4, 26): (7, [87]),\n",
- " (4, 27): (7, [86]),\n",
- " (4, 28): (7, [85]),\n",
- " (4, 29): (7, [84]),\n",
- " (4, 30): (7, [83]),\n",
- " (4, 31): (7, [82]),\n",
- " (4, 32): (7, [81]),\n",
- " (4, 33): (7, [80]),\n",
- " (4, 34): (7, [79]),\n",
- " (4, 35): (7, [78]),\n",
- " (4, 36): (7, [77]),\n",
- " (4, 37): (7, [76]),\n",
- " (4, 38): (7, [75]),\n",
- " (4, 39): (7, [74]),\n",
- " (5, 39): (7, [73]),\n",
- " (5, 38): (7, [72]),\n",
- " (5, 37): (7, [71]),\n",
- " (5, 36): (7, [70]),\n",
- " (5, 35): (7, [69]),\n",
- " (5, 34): (7, [68]),\n",
- " (5, 33): (7, [67]),\n",
- " (5, 32): (7, [66]),\n",
- " (5, 31): (7, [65]),\n",
- " (5, 30): (7, [64]),\n",
- " (5, 29): (7, [63]),\n",
- " (5, 28): (7, [62]),\n",
- " (5, 27): (7, [61]),\n",
- " (5, 26): (7, [60]),\n",
- " (5, 25): (7, [59]),\n",
- " (5, 24): (7, [58]),\n",
- " (5, 23): (7, [57])},\n",
- " '_stap': {(2, 34): (3, [43]),\n",
- " (2, 33): (3, [42]),\n",
- " (2, 32): (3, [41]),\n",
- " (2, 31): (3, [40]),\n",
- " (2, 30): (3, [39]),\n",
- " (2, 29): (3, [38]),\n",
- " (2, 28): (3, [37]),\n",
- " (2, 27): (3, [36]),\n",
- " (2, 26): (3, [35]),\n",
- " (2, 25): (3, [34]),\n",
- " (2, 24): (3, [33]),\n",
- " (2, 23): (3, [32]),\n",
- " (2, 22): (3, [31]),\n",
- " (2, 21): (3, [30]),\n",
- " (2, 20): (3, [29]),\n",
- " (2, 19): (3, [28]),\n",
- " (2, 18): (3, [27]),\n",
- " (2, 17): (3, [26]),\n",
- " (2, 16): (3, [25]),\n",
- " (2, 15): (3, [24]),\n",
- " (2, 14): (3, [23]),\n",
- " (2, 13): (3, [22]),\n",
- " (2, 12): (3, [21]),\n",
- " (2, 11): (3, [20]),\n",
- " (2, 10): (3, [19]),\n",
- " (2, 9): (3, [18]),\n",
- " (2, 8): (3, [17]),\n",
- " (2, 7): (3, [16]),\n",
- " (2, 6): (3, [15]),\n",
- " (2, 5): (3, [14]),\n",
- " (2, 4): (3, [13]),\n",
- " (2, 3): (3, [12]),\n",
- " (2, 2): (3, [11]),\n",
- " (2, 1): (3, [10]),\n",
- " (2, 0): (3, [9]),\n",
- " (1, 3): (8, [281]),\n",
- " (1, 4): (8, [280]),\n",
- " (1, 5): (8, [279]),\n",
- " (1, 6): (8, [278]),\n",
- " (1, 7): (8, [277]),\n",
- " (1, 8): (8, [276]),\n",
- " (1, 9): (8, [275]),\n",
- " (1, 10): (8, [274]),\n",
- " (1, 11): (8, [273]),\n",
- " (1, 12): (8, [272]),\n",
- " (1, 13): (8, [271]),\n",
- " (1, 14): (8, [270]),\n",
- " (1, 15): (8, [269]),\n",
- " (1, 16): (8, [268]),\n",
- " (1, 17): (8, [267]),\n",
- " (1, 18): (8, [266]),\n",
- " (1, 19): (8, [265]),\n",
- " (1, 20): (8, [264]),\n",
- " (0, 20): (8, [263]),\n",
- " (0, 19): (8, [262]),\n",
- " (0, 18): (8, [261]),\n",
- " (0, 17): (8, [260]),\n",
- " (0, 16): (8, [259]),\n",
- " (0, 15): (8, [258]),\n",
- " (0, 14): (8, [257]),\n",
- " (0, 13): (8, [256]),\n",
- " (0, 12): (8, [255]),\n",
- " (0, 11): (8, [254]),\n",
- " (0, 10): (8, [253]),\n",
- " (0, 9): (8, [252]),\n",
- " (0, 8): (8, [251]),\n",
- " (0, 7): (8, [250]),\n",
- " (0, 6): (8, [249]),\n",
- " (0, 5): (8, [248]),\n",
- " (0, 4): (8, [247]),\n",
- " (0, 3): (8, [246]),\n",
- " (0, 2): (8, [245]),\n",
- " (0, 23): (9, [302]),\n",
- " (0, 22): (9, [301]),\n",
- " (0, 21): (9, [300]),\n",
- " (1, 21): (9, [299]),\n",
- " (1, 22): (9, [298]),\n",
- " (1, 23): (9, [297]),\n",
- " (1, 24): (9, [296]),\n",
- " (1, 25): (9, [295]),\n",
- " (1, 26): (9, [294]),\n",
- " (1, 27): (9, [293]),\n",
- " (1, 28): (9, [292]),\n",
- " (1, 29): (9, [291]),\n",
- " (1, 30): (9, [290]),\n",
- " (1, 31): (9, [289]),\n",
- " (1, 32): (9, [288]),\n",
- " (1, 33): (9, [287]),\n",
- " (1, 34): (9, [286]),\n",
- " (1, 35): (9, [285]),\n",
- " (1, 36): (9, [284]),\n",
- " (1, 37): (9, [283]),\n",
- " (1, 38): (9, [282]),\n",
- " (5, 9): (10, [325]),\n",
- " (5, 10): (10, [324]),\n",
- " (5, 11): (10, [323]),\n",
- " (5, 12): (10, [322]),\n",
- " (5, 13): (10, [321]),\n",
- " (5, 14): (10, [320]),\n",
- " (5, 15): (10, [319]),\n",
- " (5, 16): (10, [318]),\n",
- " (5, 17): (10, [317]),\n",
- " (5, 18): (10, [316]),\n",
- " (5, 19): (10, [315]),\n",
- " (5, 20): (10, [314]),\n",
- " (5, 21): (10, [313]),\n",
- " (5, 22): (10, [312]),\n",
- " (5, 23): (10, [311]),\n",
- " (5, 24): (10, [310]),\n",
- " (5, 25): (10, [309]),\n",
- " (5, 26): (10, [308]),\n",
- " (5, 27): (10, [307]),\n",
- " (0, 27): (10, [306]),\n",
- " (0, 26): (10, [305]),\n",
- " (0, 25): (10, [304]),\n",
- " (0, 24): (10, [303]),\n",
- " (0, 38): (11, [348]),\n",
- " (0, 37): (11, [347]),\n",
- " (0, 36): (11, [346]),\n",
- " (0, 35): (11, [345]),\n",
- " (0, 34): (11, [344]),\n",
- " (0, 33): (11, [343]),\n",
- " (0, 32): (11, [342]),\n",
- " (0, 31): (11, [341]),\n",
- " (0, 30): (11, [340]),\n",
- " (0, 29): (11, [339]),\n",
- " (0, 28): (11, [338]),\n",
- " (5, 28): (11, [337]),\n",
- " (5, 29): (11, [336]),\n",
- " (5, 30): (11, [335]),\n",
- " (5, 31): (11, [334]),\n",
- " (5, 32): (11, [333]),\n",
- " (5, 33): (11, [332]),\n",
- " (5, 34): (11, [331]),\n",
- " (5, 35): (11, [330]),\n",
- " (5, 36): (11, [329]),\n",
- " (5, 37): (11, [328]),\n",
- " (5, 38): (11, [327]),\n",
- " (5, 39): (11, [326]),\n",
- " (3, 0): (12, [381]),\n",
- " (3, 1): (12, [380]),\n",
- " (3, 2): (12, [379]),\n",
- " (3, 3): (12, [378]),\n",
- " (3, 4): (12, [377]),\n",
- " (3, 5): (12, [376]),\n",
- " (3, 6): (12, [375]),\n",
- " (3, 7): (12, [374]),\n",
- " (3, 8): (12, [373]),\n",
- " (3, 9): (12, [372]),\n",
- " (3, 10): (12, [371]),\n",
- " (3, 11): (12, [370]),\n",
- " (3, 12): (12, [369]),\n",
- " (3, 13): (12, [368]),\n",
- " (3, 14): (12, [367]),\n",
- " (3, 15): (12, [366]),\n",
- " (3, 16): (12, [365]),\n",
- " (3, 17): (12, [364]),\n",
- " (3, 18): (12, [363]),\n",
- " (3, 19): (12, [362]),\n",
- " (3, 20): (12, [361]),\n",
- " (4, 20): (12, [360]),\n",
- " (4, 19): (12, [359]),\n",
- " (4, 18): (12, [358]),\n",
- " (4, 17): (12, [357]),\n",
- " (4, 16): (12, [356]),\n",
- " (4, 15): (12, [355]),\n",
- " (4, 14): (12, [354]),\n",
- " (4, 13): (12, [353]),\n",
- " (4, 12): (12, [352]),\n",
- " (4, 11): (12, [351]),\n",
- " (4, 10): (12, [350]),\n",
- " (4, 9): (12, [349]),\n",
- " (4, 39): (13, [414]),\n",
- " (4, 38): (13, [413]),\n",
- " (4, 37): (13, [412]),\n",
- " (4, 36): (13, [411]),\n",
- " (4, 35): (13, [410]),\n",
- " (4, 34): (13, [409]),\n",
- " (4, 33): (13, [408]),\n",
- " (4, 32): (13, [407]),\n",
- " (4, 31): (13, [406]),\n",
- " (4, 30): (13, [405]),\n",
- " (4, 29): (13, [404]),\n",
- " (4, 28): (13, [403]),\n",
- " (4, 27): (13, [402]),\n",
- " (4, 26): (13, [401]),\n",
- " (4, 25): (13, [400]),\n",
- " (4, 24): (13, [399]),\n",
- " (4, 23): (13, [398]),\n",
- " (4, 22): (13, [397]),\n",
- " (4, 21): (13, [396]),\n",
- " (3, 21): (13, [395]),\n",
- " (3, 22): (13, [394]),\n",
- " (3, 23): (13, [393]),\n",
- " (3, 24): (13, [392]),\n",
- " (3, 25): (13, [391]),\n",
- " (3, 26): (13, [390]),\n",
- " (3, 27): (13, [389]),\n",
- " (3, 28): (13, [388]),\n",
- " (3, 29): (13, [387]),\n",
- " (3, 30): (13, [386]),\n",
- " (3, 31): (13, [385]),\n",
- " (3, 32): (13, [384]),\n",
- " (3, 33): (13, [383]),\n",
- " (3, 34): (13, [382])},\n",
- " 'nuc_count': 0,\n",
- " 'strand_count': 0}"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "vh_vb.__dict__"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "id": "cef5068b-4858-44e2-9837-e95f1718e111",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[(0, [2]),\n",
- " (0, [1]),\n",
- " (0, [0]),\n",
- " (1, [5]),\n",
- " (1, [4]),\n",
- " (1, [3]),\n",
- " (2, [8]),\n",
- " (2, [7]),\n",
- " (2, [6]),\n",
- " (4, [47]),\n",
- " (4, [46]),\n",
- " (4, [45]),\n",
- " (4, [44]),\n",
- " (4, [43]),\n",
- " (5, [51]),\n",
- " (5, [50]),\n",
- " (5, [49]),\n",
- " (5, [48]),\n",
- " (6, [55]),\n",
- " (6, [54]),\n",
- " (6, [53]),\n",
- " (6, [52]),\n",
- " (7, [263]),\n",
- " (7, [262]),\n",
- " (7, [261]),\n",
- " (7, [260]),\n",
- " (7, [259]),\n",
- " (7, [258]),\n",
- " (7, [257]),\n",
- " (7, [256]),\n",
- " (7, [255]),\n",
- " (7, [254]),\n",
- " (7, [253]),\n",
- " (7, [252]),\n",
- " (7, [251]),\n",
- " (7, [250]),\n",
- " (7, [249]),\n",
- " (7, [248]),\n",
- " (7, [247]),\n",
- " (7, [246]),\n",
- " (7, [245]),\n",
- " (7, [244]),\n",
- " (7, [243]),\n",
- " (7, [242]),\n",
- " (7, [241]),\n",
- " (7, [240]),\n",
- " (7, [239]),\n",
- " (7, [238]),\n",
- " (7, [237]),\n",
- " (7, [236]),\n",
- " (7, [235]),\n",
- " (7, [234]),\n",
- " (7, [233]),\n",
- " (7, [232]),\n",
- " (7, [231]),\n",
- " (7, [230]),\n",
- " (7, [229]),\n",
- " (7, [228]),\n",
- " (7, [227]),\n",
- " (7, [226]),\n",
- " (7, [225]),\n",
- " (7, [224]),\n",
- " (7, [223]),\n",
- " (7, [222]),\n",
- " (7, [221]),\n",
- " (7, [220]),\n",
- " (7, []),\n",
- " (7, [219]),\n",
- " (7, [218]),\n",
- " (7, [217]),\n",
- " (7, [216]),\n",
- " (7, [215]),\n",
- " (7, [214]),\n",
- " (7, [213]),\n",
- " (7, [212]),\n",
- " (7, [211]),\n",
- " (7, [210]),\n",
- " (7, [209]),\n",
- " (7, [208]),\n",
- " (7, [207]),\n",
- " (7, [206]),\n",
- " (7, [205]),\n",
- " (7, [203, 204]),\n",
- " (7, [202]),\n",
- " (7, [201]),\n",
- " (7, [200]),\n",
- " (7, [199]),\n",
- " (7, [198]),\n",
- " (7, [197]),\n",
- " (7, [196]),\n",
- " (7, [195]),\n",
- " (7, [194]),\n",
- " (7, [193]),\n",
- " (7, [192]),\n",
- " (7, [191]),\n",
- " (7, [190]),\n",
- " (7, [189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179]),\n",
- " (7, [178]),\n",
- " (7, [177]),\n",
- " (7, [176]),\n",
- " (7, [175]),\n",
- " (7, [174]),\n",
- " (7, [173]),\n",
- " (7, [172]),\n",
- " (7, [171]),\n",
- " (7, [170]),\n",
- " (7, [169]),\n",
- " (7, [168]),\n",
- " (7, [167]),\n",
- " (7, [166]),\n",
- " (7, [165]),\n",
- " (7, [164]),\n",
- " (7, [163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152]),\n",
- " (7, [151]),\n",
- " (7, [150]),\n",
- " (7, [149]),\n",
- " (7, [148]),\n",
- " (7, [147]),\n",
- " (7, [146]),\n",
- " (7, [145]),\n",
- " (7, [144]),\n",
- " (7, [143]),\n",
- " (7, [142]),\n",
- " (7, [141]),\n",
- " (7, [140]),\n",
- " (7, [139]),\n",
- " (7, [138]),\n",
- " (7, [137]),\n",
- " (7, [136]),\n",
- " (7, [135]),\n",
- " (7, [134]),\n",
- " (7, [133]),\n",
- " (7, [132]),\n",
- " (7, [131]),\n",
- " (7, [130]),\n",
- " (7, [129]),\n",
- " (7, [128]),\n",
- " (7, [127]),\n",
- " (7, [126]),\n",
- " (7, [125]),\n",
- " (7, [124]),\n",
- " (7, [123]),\n",
- " (7, [122]),\n",
- " (7, [121]),\n",
- " (7, [120]),\n",
- " (7, [119]),\n",
- " (7, [118]),\n",
- " (7, [117]),\n",
- " (7, [116]),\n",
- " (7, [115]),\n",
- " (7, [114]),\n",
- " (7, [113]),\n",
- " (7, [112]),\n",
- " (7, [111]),\n",
- " (7, [110]),\n",
- " (7, [109]),\n",
- " (7, []),\n",
- " (7, [108]),\n",
- " (7, [107]),\n",
- " (7, [106]),\n",
- " (7, [105]),\n",
- " (7, [104]),\n",
- " (7, [103]),\n",
- " (7, [102]),\n",
- " (7, [101]),\n",
- " (7, [100]),\n",
- " (7, [99]),\n",
- " (7, [98]),\n",
- " (7, [97]),\n",
- " (7, [96]),\n",
- " (7, [95]),\n",
- " (7, [94]),\n",
- " (7, [93]),\n",
- " (7, [92]),\n",
- " (7, [91]),\n",
- " (7, [90]),\n",
- " (7, [89]),\n",
- " (7, [88]),\n",
- " (7, [87]),\n",
- " (7, [86]),\n",
- " (7, [85]),\n",
- " (7, [84]),\n",
- " (7, [83]),\n",
- " (7, [82]),\n",
- " (7, [81]),\n",
- " (7, [80]),\n",
- " (7, [79]),\n",
- " (7, [78]),\n",
- " (7, [77]),\n",
- " (7, [76]),\n",
- " (7, [75]),\n",
- " (7, [74]),\n",
- " (7, [73]),\n",
- " (7, [72]),\n",
- " (7, [71]),\n",
- " (7, [70]),\n",
- " (7, [69]),\n",
- " (7, [68]),\n",
- " (7, [67]),\n",
- " (7, [66]),\n",
- " (7, [65]),\n",
- " (7, [64]),\n",
- " (7, [63]),\n",
- " (7, [62]),\n",
- " (7, [61]),\n",
- " (7, [60]),\n",
- " (7, [59]),\n",
- " (7, [58]),\n",
- " (7, [57]),\n",
- " (7, [56])]"
- ]
- },
- "execution_count": 31,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
"source": [
- "list(vh_vb._scaf.values())"
+ "def mrdna_model_from_cadnano(json_data,**model_parameters):\n",
+ " part,vslist=decode_cadnano_part(json_data)\n",
+ " props = part.getModelProperties().copy()\n",
+ "\n",
+ " if props.get('point_type') == PointType.ARBITRARY:\n",
+ " # TODO add code to encode Parts with ARBITRARY point configurations\n",
+ " raise NotImplementedError(\"Not implemented\")\n",
+ " else:\n",
+ " try:\n",
+ " vh_props, origins = part.helixPropertiesAndOrigins()\n",
+ " except:\n",
+ " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n",
+ " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n",
+ " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n",
+ " cad_bps=part.getIndices(0)\n",
+ " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n",
+ " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n",
+ " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n",
+ " is_scaf=np.zeros(totnt)\n",
+ " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n",
+ " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n",
+ " nt_prop[\"is_scaf\"]=is_scaf\n",
+ " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n",
+ " nt_prop[\"vh\"]=vhi\n",
+ " nt_prop[\"zid\"]=zids\n",
+ " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n",
+ " return nt_prop\n"
]
},
{
"cell_type": "code",
- "execution_count": 32,
- "id": "704b72d9-4745-4818-83b5-934ca486e1bd",
+ "execution_count": 201,
+ "id": "fb789ffb",
"metadata": {},
"outputs": [
{
- "ename": "KeyError",
- "evalue": "0",
+ "ename": "NameError",
+ "evalue": "name 'decode_cadnano_part' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mvh_vb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_scaf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n",
- "\u001b[0;31mKeyError\u001b[0m: 0"
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined"
]
}
],
"source": [
- "vh_vb._scaf[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 199,
- "id": "5a920c32-cb2a-4a75-ac5a-d15fcff28aab",
- "metadata": {},
- "outputs": [],
- "source": [
- "\n",
- "def find_vh_vb_table(s,is_scaf):\n",
- " L=[]\n",
- " for i in list(s.keys()):\n",
- " vh,zid=i\n",
- " strand,indices=s[i]\n",
- " if len(indices)==0:\n",
- " continue\n",
- " else:\n",
- " if len(indices)==1:\n",
- " zids=[str(zid)]\n",
- " else:\n",
- " zids=[str(zid)+\".\"+str(j) for j in range(len(indices))]\n",
- " for index,z in zip(indices,zids):\n",
- " L.append(pd.Series({\"index\":index,\"vh\":vh,\"zid\":z,\"strand\":strand,\"is_scaf\":bool(is_scaf)}))\n",
- " return L\n",
- "L1=find_vh_vb_table(vh_vb._scaf,1)\n",
- "L2=find_vh_vb_table(vh_vb._stap,0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 219,
- "id": "8a6971b1-be0a-4546-90a7-918fa482f873",
- "metadata": {},
- "outputs": [],
- "source": [
- "df=pd.DataFrame(L1+L2)\n",
- "pd.options.mode.chained_assignment = None # default='warn'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1e8e7753-bb38-43a9-a33e-7882db3ace50",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 255,
- "id": "74ba42d3-aa69-4dd6-9c26-0023095b2923",
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_virt2nuc(virt2nuc,top_data):\n",
- " vh_vb,pattern=pd.read_pickle(virt2nuc)\n",
- " L1=find_vh_vb_table(vh_vb._scaf,1)\n",
- " L2=find_vh_vb_table(vh_vb._stap,0)\n",
- " nt_prop=pd.DataFrame(L1+L2)\n",
- " nt_prop.set_index(\"index\",inplace=True)\n",
- " nt_prop.sort_index(inplace=True)\n",
- " nt_prop[\"threeprime\"]=top_data[2]\n",
- " nt_prop[\"seq\"]=top_data[1]\n",
- " nt_prop[\"stack\"]=top_data[2]\n",
- " for i in nt_prop.index:\n",
- " if nt_prop.loc[i][\"threeprime\"] in nt_prop.index:\n",
- " if nt_prop.loc[nt_prop.loc[i][\"threeprime\"]][\"vh\"]!=nt_prop.loc[i][\"vh\"]:\n",
- " nt_prop[\"stack\"][i]=-1\n",
- " bp_map=dict(zip(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"is_scaf\"]),nt_prop.index))\n",
- " bp=-np.ones(len(nt_prop.index),dtype=int)\n",
- " counter=0\n",
- " for i,j,k in zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"is_scaf\"]):\n",
- " try:\n",
- " bp[counter]=bp_map[(i,j,not(k))]\n",
- " except:\n",
- " pass\n",
- " counter+=1\n",
- " nt_prop[\"bp\"]=bp\n",
- " return nt_prop"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 257,
- "id": "74afbfe6-5fd3-4f41-8a41-c0aa56ddd1be",
- "metadata": {},
- "outputs": [],
- "source": [
- "vh_vb,pattern=pd.read_pickle(\"virt2nuc\")\n",
- "s=get_virt2nuc(\"virt2nuc\",top_data)\n"
+ "mrdna_model_from_cadnano(\"test.json\")"
]
},
{
"cell_type": "code",
- "execution_count": 261,
- "id": "9a115dac-e6cc-408c-9bc2-f33cf437f8c2",
+ "execution_count": 146,
+ "id": "98703867",
"metadata": {},
"outputs": [],
"source": [
- "s.to_csv(\"s.csv\")"
+ "a,b=np.where(np.array(nts)==1)"
]
},
{
"cell_type": "code",
- "execution_count": 263,
- "id": "170a2918-f113-475c-b0a3-edbf33153e33",
+ "execution_count": 148,
+ "id": "c316fead",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "0 -1\n",
- "1 -1\n",
- "2 -1\n",
- "3 -1\n",
- "4 -1\n",
- " ..\n",
- "450 77\n",
- "451 76\n",
- "452 75\n",
- "453 74\n",
- "454 73\n",
- "Name: bp, Length: 455, dtype: int64"
+ "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n",
+ " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n",
+ " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n",
+ " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n",
+ " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
+ " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n",
+ " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
+ " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n",
+ " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
+ " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n",
+ " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n",
+ " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
+ " 34, 35, 36, 37, 38, 39])"
]
},
- "execution_count": 263,
+ "execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "coordinate_col=\"bp\"\n",
- "dg[coordinate_col]"
+ "nt_prop=pd.DataFrame(index)"
]
},
{
"cell_type": "code",
- "execution_count": 176,
- "id": "675f9e19-e4d8-4dfa-bf89-56f628ba2a75",
+ "execution_count": 137,
+ "id": "0718e41e",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[-1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 228,\n",
- " 227,\n",
- " 226,\n",
- " 225,\n",
- " 224,\n",
- " 223,\n",
- " 222,\n",
- " 221,\n",
- " 220,\n",
- " 219,\n",
- " 218,\n",
- " 217,\n",
- " 216,\n",
- " 215,\n",
- " 214,\n",
- " 213,\n",
- " 126,\n",
- " 125,\n",
- " 124,\n",
- " 123,\n",
- " 122,\n",
- " 121,\n",
- " 120,\n",
- " 119,\n",
- " 118,\n",
- " 117,\n",
- " 116,\n",
- " 115,\n",
- " 114,\n",
- " 113,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 341,\n",
- " 340,\n",
- " 339,\n",
- " 338,\n",
- " 337,\n",
- " 367,\n",
- " 366,\n",
- " 365,\n",
- " 364,\n",
- " 363,\n",
- " 362,\n",
- " 361,\n",
- " 360,\n",
- " 359,\n",
- " 358,\n",
- " 357,\n",
- " 356,\n",
- " 454,\n",
- " 453,\n",
- " 452,\n",
- " 451,\n",
- " 450,\n",
- " 449,\n",
- " 448,\n",
- " 447,\n",
- " 446,\n",
- " 445,\n",
- " 444,\n",
- " 443,\n",
- " 442,\n",
- " 441,\n",
- " 440,\n",
- " 439,\n",
- " 438,\n",
- " 437,\n",
- " 436,\n",
- " 401,\n",
- " 400,\n",
- " 399,\n",
- " 398,\n",
- " 397,\n",
- " 406,\n",
- " 405,\n",
- " 404,\n",
- " 403,\n",
- " 402,\n",
- " 435,\n",
- " 434,\n",
- " 433,\n",
- " 432,\n",
- " 431,\n",
- " 430,\n",
- " 429,\n",
- " 428,\n",
- " 427,\n",
- " 426,\n",
- " 425,\n",
- " 40,\n",
- " 39,\n",
- " 38,\n",
- " 37,\n",
- " 36,\n",
- " 35,\n",
- " 34,\n",
- " 33,\n",
- " 32,\n",
- " 31,\n",
- " 30,\n",
- " 29,\n",
- " 28,\n",
- " 27,\n",
- " 294,\n",
- " 293,\n",
- " 329,\n",
- " 328,\n",
- " 327,\n",
- " 326,\n",
- " 325,\n",
- " 324,\n",
- " 323,\n",
- " 322,\n",
- " 321,\n",
- " 320,\n",
- " 319,\n",
- " 318,\n",
- " 317,\n",
- " 316,\n",
- " 315,\n",
- " 314,\n",
- " 387,\n",
- " 386,\n",
- " 385,\n",
- " 384,\n",
- " 383,\n",
- " 382,\n",
- " 381,\n",
- " 380,\n",
- " 379,\n",
- " 378,\n",
- " 377,\n",
- " 376,\n",
- " 375,\n",
- " 374,\n",
- " 373,\n",
- " 372,\n",
- " 371,\n",
- " 370,\n",
- " 369,\n",
- " 368,\n",
- " 336,\n",
- " 335,\n",
- " 334,\n",
- " 333,\n",
- " 332,\n",
- " 331,\n",
- " 330,\n",
- " 292,\n",
- " 291,\n",
- " 290,\n",
- " 289,\n",
- " 288,\n",
- " 287,\n",
- " 286,\n",
- " 285,\n",
- " 284,\n",
- " 283,\n",
- " 282,\n",
- " 281,\n",
- " 280,\n",
- " 279,\n",
- " 278,\n",
- " 277,\n",
- " 276,\n",
- " 275,\n",
- " 274,\n",
- " 273,\n",
- " 272,\n",
- " 271,\n",
- " 270,\n",
- " 269,\n",
- " 268,\n",
- " 267,\n",
- " 309,\n",
- " 308,\n",
- " 307,\n",
- " 306,\n",
- " 305,\n",
- " 304,\n",
- " 303,\n",
- " 302,\n",
- " 301,\n",
- " 300,\n",
- " 299,\n",
- " 298,\n",
- " 297,\n",
- " 296,\n",
- " 295,\n",
- " 26,\n",
- " 25,\n",
- " 24,\n",
- " 23,\n",
- " 22,\n",
- " 21,\n",
- " 20,\n",
- " 19,\n",
- " 18,\n",
- " 17,\n",
- " 16,\n",
- " 15,\n",
- " 14,\n",
- " 13,\n",
- " 12,\n",
- " 11,\n",
- " 420,\n",
- " 419,\n",
- " 418,\n",
- " 417,\n",
- " 416,\n",
- " 415,\n",
- " 414,\n",
- " 413,\n",
- " 412,\n",
- " 411,\n",
- " 410,\n",
- " 409,\n",
- " 408,\n",
- " 407,\n",
- " 396,\n",
- " 395,\n",
- " 394,\n",
- " 393,\n",
- " 392,\n",
- " 391,\n",
- " 390,\n",
- " 355,\n",
- " 354,\n",
- " 353,\n",
- " 352,\n",
- " 351,\n",
- " 350,\n",
- " 349,\n",
- " 348,\n",
- " 347,\n",
- " 346,\n",
- " 345,\n",
- " 344,\n",
- " 343,\n",
- " 342,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 197,\n",
- " 196,\n",
- " 195,\n",
- " 194,\n",
- " 193,\n",
- " 192,\n",
- " 191,\n",
- " 190,\n",
- " 189,\n",
- " 188,\n",
- " 187,\n",
- " 186,\n",
- " 185,\n",
- " 184,\n",
- " 183,\n",
- " 182,\n",
- " 181,\n",
- " 180,\n",
- " 179,\n",
- " 178,\n",
- " 177,\n",
- " 176,\n",
- " 175,\n",
- " 174,\n",
- " 173,\n",
- " 172,\n",
- " 128,\n",
- " 127,\n",
- " 212,\n",
- " 211,\n",
- " 210,\n",
- " 209,\n",
- " 208,\n",
- " 207,\n",
- " 206,\n",
- " 205,\n",
- " 204,\n",
- " 203,\n",
- " 202,\n",
- " 201,\n",
- " 200,\n",
- " 199,\n",
- " 198,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 144,\n",
- " 143,\n",
- " 142,\n",
- " 141,\n",
- " 140,\n",
- " 139,\n",
- " 138,\n",
- " 137,\n",
- " 136,\n",
- " 135,\n",
- " 134,\n",
- " 133,\n",
- " 132,\n",
- " 131,\n",
- " 130,\n",
- " 129,\n",
- " 171,\n",
- " 170,\n",
- " 169,\n",
- " 168,\n",
- " 167,\n",
- " 166,\n",
- " 165,\n",
- " 60,\n",
- " 59,\n",
- " 58,\n",
- " 57,\n",
- " 56,\n",
- " 263,\n",
- " 262,\n",
- " 261,\n",
- " 260,\n",
- " 259,\n",
- " 258,\n",
- " 257,\n",
- " 256,\n",
- " 255,\n",
- " 254,\n",
- " 253,\n",
- " 252,\n",
- " 251,\n",
- " 250,\n",
- " 72,\n",
- " 71,\n",
- " 70,\n",
- " 69,\n",
- " 68,\n",
- " 67,\n",
- " 66,\n",
- " 65,\n",
- " 64,\n",
- " 63,\n",
- " 62,\n",
- " 61,\n",
- " 164,\n",
- " 163,\n",
- " 162,\n",
- " 161,\n",
- " 160,\n",
- " 159,\n",
- " 158,\n",
- " 157,\n",
- " 156,\n",
- " 155,\n",
- " 154,\n",
- " 153,\n",
- " 152,\n",
- " 151,\n",
- " 150,\n",
- " 149,\n",
- " 148,\n",
- " 147,\n",
- " 146,\n",
- " 145,\n",
- " -1,\n",
- " -1,\n",
- " 249,\n",
- " 248,\n",
- " 247,\n",
- " 246,\n",
- " 245,\n",
- " 244,\n",
- " 243,\n",
- " 96,\n",
- " 95,\n",
- " 94,\n",
- " 93,\n",
- " 92,\n",
- " 101,\n",
- " 100,\n",
- " 99,\n",
- " 98,\n",
- " 97,\n",
- " 242,\n",
- " 241,\n",
- " 240,\n",
- " 239,\n",
- " 238,\n",
- " 237,\n",
- " 236,\n",
- " 235,\n",
- " 234,\n",
- " 233,\n",
- " 232,\n",
- " 231,\n",
- " 230,\n",
- " 229,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 112,\n",
- " 111,\n",
- " 110,\n",
- " 109,\n",
- " 108,\n",
- " 107,\n",
- " 106,\n",
- " 105,\n",
- " 104,\n",
- " 103,\n",
- " 102,\n",
- " 91,\n",
- " 90,\n",
- " 89,\n",
- " 88,\n",
- " 87,\n",
- " 86,\n",
- " 85,\n",
- " 84,\n",
- " 83,\n",
- " 82,\n",
- " 81,\n",
- " 80,\n",
- " 79,\n",
- " 78,\n",
- " 77,\n",
- " 76,\n",
- " 75,\n",
- " 74,\n",
- " 73]"
- ]
- },
- "execution_count": 176,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "vhzid=list(zip(dg[\"vh\"],dg[\"zid\"],dg[\"is_scaf\"]))\n",
- "bp_list=dict(zip(vhzid,dg[\"index\"]))\n",
- "bps =[]\n",
- "for vh,zid,scaf in bp_list.keys():\n",
- " try:\n",
- " bps.append(bp_list[(vh,zid,not(scaf))])\n",
- " except:\n",
- " bps.append(-1)\n",
- "bps "
+ "def nttype(scafs):\n",
+ " def judge(i):\n",
+ " if i ==[-1,-1,-1,-1]:\n",
+ " return 0\n",
+ " else: return 1\n",
+ " n=np.array([judge(i) for i in scafs])\n",
+ " return n\n",
+ "\n"
]
},
{
"cell_type": "code",
- "execution_count": 126,
- "id": "9f960b0e-61d2-448e-bde9-a6595d733a11",
+ "execution_count": null,
+ "id": "1a61115e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "id": "12198835",
"metadata": {},
"outputs": [],
"source": [
- "top_data = np.loadtxt(\"test_insert.json.top\", skiprows=1,\n",
- " unpack=True,\n",
- " dtype=np.dtype('i4,U1,i4,i4')\n",
- " )\n",
- "dg=df.sort_index()"
+ "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n",
+ "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]"
]
},
{
"cell_type": "code",
- "execution_count": 128,
- "id": "6df902fa-6979-403f-bef3-0afe2c4dac5f",
+ "execution_count": 156,
+ "id": "1e5c9807",
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th>r</th>\n",
+ " <th>bp</th>\n",
+ " <th>stack</th>\n",
+ " <th>threeprime</th>\n",
+ " <th>seq</th>\n",
+ " <th>orientation</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>vh</th>\n",
+ " <th>zid</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <th>0</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <th>3</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <th>1</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <th>2</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <th>8</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
"text/plain": [
- "[array([ 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,\n",
- " 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
- " 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6,\n",
- " 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,\n",
- " 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,\n",
- " 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,\n",
- " 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,\n",
- " 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,\n",
- " 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,\n",
- " 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,\n",
- " 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,\n",
- " 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,\n",
- " 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,\n",
- " 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,\n",
- " 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14], dtype=int32),\n",
- " array(['C', 'C', 'G', 'G', 'T', 'C', 'G', 'G', 'G', 'C', 'A', 'A', 'A',\n",
- " 'G', 'G', 'G', 'A', 'C', 'T', 'T', 'T', 'T', 'G', 'A', 'C', 'T',\n",
- " 'G', 'T', 'G', 'C', 'C', 'C', 'C', 'C', 'A', 'A', 'A', 'A', 'G',\n",
- " 'G', 'G', 'A', 'C', 'T', 'T', 'C', 'C', 'T', 'G', 'C', 'T', 'T',\n",
- " 'G', 'A', 'C', 'A', 'T', 'A', 'G', 'T', 'G', 'C', 'G', 'G', 'C',\n",
- " 'A', 'G', 'G', 'C', 'G', 'C', 'T', 'T', 'G', 'G', 'C', 'G', 'T',\n",
- " 'G', 'A', 'A', 'C', 'C', 'T', 'C', 'G', 'C', 'A', 'T', 'G', 'G',\n",
- " 'A', 'G', 'T', 'C', 'T', 'C', 'G', 'G', 'C', 'A', 'G', 'C', 'G',\n",
- " 'T', 'G', 'T', 'C', 'C', 'G', 'C', 'G', 'T', 'C', 'C', 'C', 'T',\n",
- " 'T', 'T', 'T', 'G', 'G', 'G', 'G', 'G', 'C', 'A', 'C', 'C', 'C',\n",
- " 'C', 'G', 'C', 'C', 'C', 'A', 'C', 'G', 'C', 'T', 'G', 'A', 'G',\n",
- " 'G', 'C', 'G', 'A', 'A', 'C', 'C', 'A', 'A', 'A', 'G', 'T', 'A',\n",
- " 'T', 'G', 'T', 'G', 'A', 'G', 'C', 'G', 'T', 'T', 'T', 'A', 'C',\n",
- " 'A', 'C', 'T', 'T', 'A', 'T', 'C', 'A', 'C', 'T', 'C', 'T', 'A',\n",
- " 'A', 'T', 'G', 'T', 'G', 'A', 'T', 'A', 'T', 'A', 'T', 'G', 'C',\n",
- " 'C', 'C', 'G', 'T', 'C', 'C', 'A', 'G', 'A', 'C', 'G', 'A', 'T',\n",
- " 'C', 'G', 'T', 'G', 'C', 'C', 'A', 'G', 'T', 'C', 'A', 'A', 'A',\n",
- " 'A', 'G', 'T', 'C', 'C', 'C', 'T', 'T', 'T', 'T', 'A', 'T', 'C',\n",
- " 'C', 'A', 'C', 'C', 'A', 'T', 'C', 'C', 'G', 'C', 'C', 'A', 'T',\n",
- " 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'A', 'C', 'T', 'A', 'G', 'G',\n",
- " 'T', 'A', 'T', 'T', 'G', 'G', 'A', 'C', 'G', 'G', 'G', 'C', 'A',\n",
- " 'T', 'A', 'T', 'A', 'T', 'C', 'A', 'C', 'A', 'T', 'T', 'A', 'G',\n",
- " 'A', 'G', 'T', 'G', 'A', 'T', 'A', 'G', 'G', 'G', 'C', 'A', 'C',\n",
- " 'G', 'A', 'T', 'C', 'G', 'T', 'C', 'T', 'G', 'G', 'A', 'T', 'T',\n",
- " 'T', 'G', 'G', 'C', 'C', 'T', 'C', 'A', 'G', 'C', 'G', 'T', 'G',\n",
- " 'G', 'G', 'C', 'G', 'G', 'A', 'G', 'T', 'G', 'T', 'A', 'A', 'C',\n",
- " 'A', 'C', 'T', 'A', 'A', 'A', 'T', 'A', 'C', 'C', 'T', 'A', 'G',\n",
- " 'T', 'T', 'T', 'G', 'G', 'A', 'A', 'G', 'C', 'G', 'C', 'C', 'T',\n",
- " 'G', 'C', 'C', 'G', 'A', 'C', 'G', 'C', 'T', 'C', 'A', 'C', 'A',\n",
- " 'T', 'A', 'C', 'T', 'T', 'T', 'G', 'G', 'T', 'T', 'C', 'A', 'G',\n",
- " 'G', 'G', 'T', 'A', 'T', 'G', 'G', 'G', 'A', 'G', 'A', 'C', 'C',\n",
- " 'T', 'G', 'C', 'C', 'C', 'G', 'G', 'A', 'T', 'G', 'G', 'T', 'G',\n",
- " 'G', 'A', 'T', 'A', 'A', 'A', 'T', 'T', 'A', 'A', 'C', 'G', 'C',\n",
- " 'G', 'G', 'A', 'C', 'A', 'C', 'G', 'T', 'C', 'C', 'A', 'T', 'G',\n",
- " 'C', 'G', 'A', 'G', 'G', 'T', 'T', 'C', 'A', 'C', 'G', 'C', 'C'],\n",
- " dtype='<U1'),\n",
- " array([ -1, 0, 1, -1, 3, 4, -1, 6, 7, -1, 9, 10, 11,\n",
- " 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,\n",
- " 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,\n",
- " 38, 39, 40, 41, -1, 43, 44, 45, 46, -1, 48, 49, 50,\n",
- " -1, 52, 53, 54, -1, 56, 57, 58, 59, 60, 61, 62, 63,\n",
- " 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,\n",
- " 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,\n",
- " 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,\n",
- " 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,\n",
- " 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,\n",
- " 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141,\n",
- " 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,\n",
- " 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,\n",
- " 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,\n",
- " 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193,\n",
- " 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,\n",
- " 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,\n",
- " 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232,\n",
- " 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245,\n",
- " 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258,\n",
- " 259, 260, 261, 262, -1, 264, 265, 266, 267, 268, 269, 270, 271,\n",
- " 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,\n",
- " 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297,\n",
- " 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310,\n",
- " -1, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323,\n",
- " 324, 325, 326, 327, 328, 329, 330, 331, -1, 333, 334, 335, 336,\n",
- " 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,\n",
- " 350, 351, 352, 353, 354, -1, 356, 357, 358, 359, 360, 361, 362,\n",
- " 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375,\n",
- " 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,\n",
- " -1, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401,\n",
- " 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414,\n",
- " 415, 416, 417, 418, 419, 420, 421, -1, 423, 424, 425, 426, 427,\n",
- " 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440,\n",
- " 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453],\n",
- " dtype=int32),\n",
- " array([ 1, 2, -1, 4, 5, -1, 7, 8, -1, 10, 11, 12, 13,\n",
- " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n",
- " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,\n",
- " 40, 41, 42, -1, 44, 45, 46, 47, -1, 49, 50, 51, -1,\n",
- " 53, 54, 55, -1, 57, 58, 59, 60, 61, 62, 63, 64, 65,\n",
- " 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,\n",
- " 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,\n",
- " 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,\n",
- " 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,\n",
- " 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,\n",
- " 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,\n",
- " 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,\n",
- " 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,\n",
- " 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,\n",
- " 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,\n",
- " 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,\n",
- " 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,\n",
- " 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,\n",
- " 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247,\n",
- " 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,\n",
- " 261, 262, 263, -1, 265, 266, 267, 268, 269, 270, 271, 272, 273,\n",
- " 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286,\n",
- " 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,\n",
- " 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1,\n",
- " 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325,\n",
- " 326, 327, 328, 329, 330, 331, 332, -1, 334, 335, 336, 337, 338,\n",
- " 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351,\n",
- " 352, 353, 354, 355, -1, 357, 358, 359, 360, 361, 362, 363, 364,\n",
- " 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377,\n",
- " 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, -1,\n",
- " 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403,\n",
- " 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416,\n",
- " 417, 418, 419, 420, 421, 422, -1, 424, 425, 426, 427, 428, 429,\n",
- " 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442,\n",
- " 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, -1],\n",
- " dtype=int32)]"
+ " r bp stack threeprime seq orientation\n",
+ "vh zid \n",
+ "0 0 NaN NaN NaN NaN NaN NaN\n",
+ "1 3 NaN NaN NaN NaN NaN NaN\n",
+ "2 1 NaN NaN NaN NaN NaN NaN\n",
+ "3 2 NaN NaN NaN NaN NaN NaN\n",
+ "1 8 NaN NaN NaN NaN NaN NaN"
]
},
- "execution_count": 128,
+ "execution_count": 156,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "top_data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 222,
- "id": "9dbdf874-ed47-45bf-a70d-8c4928a5761b",
- "metadata": {},
- "outputs": [],
- "source": [
- "dg[\"threeprime\"]=top_data[-1]\n",
- "stacks=[]\n",
- "dg[\"seq\"]=top_data[1]\n",
- "dg[\"stack\"]=dg[\"threeprime\"]\n",
- "for i in dg.index:\n",
- " if dg.loc[i][\"threeprime\"] in dg.index:\n",
- " if dg.loc[dg.loc[i][\"threeprime\"]][\"vh\"]!=dg.loc[i][\"vh\"]:\n",
- " dg[\"stack\"][i]=-1\n"
+ "i=range(5)\n",
+ "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n",
+ "d=pd.DataFrame(index=i,columns=col)\n",
+ "d['vh']=[0,1,2,3,1]\n",
+ "d['zid']=[0,3,1,2,8]\n",
+ "d.set_index([\"vh\",\"zid\"],inplace=True)\n",
+ "d"
]
},
{
"cell_type": "code",
- "execution_count": 179,
- "id": "6e9e8a3f-6884-40ab-9746-9b338a650858",
+ "execution_count": 157,
+ "id": "31c50f63",
"metadata": {},
"outputs": [
{
@@ -3974,15 +2185,14 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
- " <th>level_0</th>\n",
- " <th>index</th>\n",
" <th>vh</th>\n",
" <th>zid</th>\n",
- " <th>strand</th>\n",
- " <th>is_scaf</th>\n",
+ " <th>r</th>\n",
+ " <th>bp</th>\n",
" <th>stack</th>\n",
" <th>threeprime</th>\n",
" <th>seq</th>\n",
+ " <th>orientation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
@@ -3990,172 +2200,108 @@
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
- " <td>0</td>\n",
- " <td>41</td>\n",
- " <td>0</td>\n",
- " <td>True</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>C</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>40</td>\n",
- " <td>0</td>\n",
- " <td>True</td>\n",
- " <td>2</td>\n",
- " <td>2</td>\n",
- " <td>C</td>\n",
+ " <td>3</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
- " <td>2</td>\n",
- " <td>0</td>\n",
- " <td>39</td>\n",
- " <td>0</td>\n",
- " <td>True</td>\n",
- " <td>-1</td>\n",
- " <td>-1</td>\n",
- " <td>G</td>\n",
+ " <td>1</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
- " <td>39</td>\n",
- " <td>1</td>\n",
- " <td>True</td>\n",
- " <td>4</td>\n",
- " <td>4</td>\n",
- " <td>G</td>\n",
+ " <td>2</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
- " <td>4</td>\n",
- " <td>4</td>\n",
" <td>1</td>\n",
- " <td>40</td>\n",
- " <td>1</td>\n",
- " <td>True</td>\n",
- " <td>5</td>\n",
- " <td>5</td>\n",
- " <td>T</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>450</th>\n",
- " <td>450</td>\n",
- " <td>450</td>\n",
- " <td>4</td>\n",
- " <td>35</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>451</td>\n",
- " <td>451</td>\n",
- " <td>A</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>451</th>\n",
- " <td>451</td>\n",
- " <td>451</td>\n",
- " <td>4</td>\n",
- " <td>36</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>452</td>\n",
- " <td>452</td>\n",
- " <td>C</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>452</th>\n",
- " <td>452</td>\n",
- " <td>452</td>\n",
- " <td>4</td>\n",
- " <td>37</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>453</td>\n",
- " <td>453</td>\n",
- " <td>G</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>453</th>\n",
- " <td>453</td>\n",
- " <td>453</td>\n",
- " <td>4</td>\n",
- " <td>38</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>454</td>\n",
- " <td>454</td>\n",
- " <td>C</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>454</th>\n",
- " <td>454</td>\n",
- " <td>454</td>\n",
- " <td>4</td>\n",
- " <td>39</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>-1</td>\n",
- " <td>-1</td>\n",
- " <td>C</td>\n",
+ " <td>8</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
- "<p>455 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
- " level_0 index vh zid strand is_scaf stack threeprime seq\n",
- "0 0 0 0 41 0 True 1 1 C\n",
- "1 1 1 0 40 0 True 2 2 C\n",
- "2 2 2 0 39 0 True -1 -1 G\n",
- "3 3 3 1 39 1 True 4 4 G\n",
- "4 4 4 1 40 1 True 5 5 T\n",
- ".. ... ... .. .. ... ... ... ... ..\n",
- "450 450 450 4 35 13 False 451 451 A\n",
- "451 451 451 4 36 13 False 452 452 C\n",
- "452 452 452 4 37 13 False 453 453 G\n",
- "453 453 453 4 38 13 False 454 454 C\n",
- "454 454 454 4 39 13 False -1 -1 C\n",
- "\n",
- "[455 rows x 9 columns]"
+ " vh zid r bp stack threeprime seq orientation\n",
+ "0 0 0 NaN NaN NaN NaN NaN NaN\n",
+ "1 1 3 NaN NaN NaN NaN NaN NaN\n",
+ "2 2 1 NaN NaN NaN NaN NaN NaN\n",
+ "3 3 2 NaN NaN NaN NaN NaN NaN\n",
+ "4 1 8 NaN NaN NaN NaN NaN NaN"
+ ]
+ },
+ "execution_count": 157,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "d=d.reset_index()\n",
+ "d"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "id": "67546136",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(array([0, 2]),)"
]
},
- "execution_count": 179,
+ "execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "dg"
+ "s=[True,False,True,False,False]\n",
+ "np.where(np.array(s)==True)"
]
},
{
"cell_type": "code",
- "execution_count": 196,
- "id": "9492a36d-ca4c-4859-833b-1f7831360c4e",
- "metadata": {
- "scrolled": true
- },
+ "execution_count": 127,
+ "id": "bad20d6a",
+ "metadata": {},
"outputs": [
{
"data": {
@@ -4178,3620 +2324,513 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
- " <th>level_0</th>\n",
- " <th>index</th>\n",
- " <th>vh</th>\n",
- " <th>zid</th>\n",
- " <th>strand</th>\n",
- " <th>is_scaf</th>\n",
+ " <th></th>\n",
+ " <th>r</th>\n",
+ " <th>bp</th>\n",
" <th>stack</th>\n",
" <th>threeprime</th>\n",
" <th>seq</th>\n",
- " <th>bp</th>\n",
+ " <th>orientation</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>vh</th>\n",
+ " <th>zid</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " <td>0</td>\n",
- " <td>41</td>\n",
- " <td>0</td>\n",
- " <td>True</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>C</td>\n",
- " <td>-1</td>\n",
+ " <th>0</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>40</td>\n",
- " <td>0</td>\n",
- " <td>True</td>\n",
- " <td>2</td>\n",
- " <td>2</td>\n",
- " <td>C</td>\n",
- " <td>-1</td>\n",
+ " <th>3</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>2</td>\n",
- " <td>2</td>\n",
- " <td>0</td>\n",
- " <td>39</td>\n",
- " <td>0</td>\n",
- " <td>True</td>\n",
- " <td>-1</td>\n",
- " <td>-1</td>\n",
- " <td>G</td>\n",
- " <td>-1</td>\n",
+ " <th>1</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
- " <td>3</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
- " <td>39</td>\n",
- " <td>1</td>\n",
- " <td>True</td>\n",
- " <td>4</td>\n",
- " <td>4</td>\n",
- " <td>G</td>\n",
- " <td>-1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>4</td>\n",
- " <td>4</td>\n",
- " <td>1</td>\n",
- " <td>40</td>\n",
- " <td>1</td>\n",
- " <td>True</td>\n",
- " <td>5</td>\n",
- " <td>5</td>\n",
- " <td>T</td>\n",
- " <td>-1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>450</th>\n",
- " <td>450</td>\n",
- " <td>450</td>\n",
- " <td>4</td>\n",
- " <td>35</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>451</td>\n",
- " <td>451</td>\n",
- " <td>A</td>\n",
- " <td>77</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>451</th>\n",
- " <td>451</td>\n",
- " <td>451</td>\n",
- " <td>4</td>\n",
- " <td>36</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>452</td>\n",
- " <td>452</td>\n",
- " <td>C</td>\n",
- " <td>76</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>452</th>\n",
- " <td>452</td>\n",
- " <td>452</td>\n",
- " <td>4</td>\n",
- " <td>37</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>453</td>\n",
- " <td>453</td>\n",
- " <td>G</td>\n",
- " <td>75</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>453</th>\n",
- " <td>453</td>\n",
- " <td>453</td>\n",
- " <td>4</td>\n",
- " <td>38</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>454</td>\n",
- " <td>454</td>\n",
- " <td>C</td>\n",
- " <td>74</td>\n",
+ " <th>2</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>454</th>\n",
- " <td>454</td>\n",
- " <td>454</td>\n",
- " <td>4</td>\n",
- " <td>39</td>\n",
- " <td>13</td>\n",
- " <td>False</td>\n",
- " <td>-1</td>\n",
- " <td>-1</td>\n",
- " <td>C</td>\n",
- " <td>73</td>\n",
+ " <th>1</th>\n",
+ " <th>8</th>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
- "<p>455 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
- " level_0 index vh zid strand is_scaf stack threeprime seq bp\n",
- "0 0 0 0 41 0 True 1 1 C -1\n",
- "1 1 1 0 40 0 True 2 2 C -1\n",
- "2 2 2 0 39 0 True -1 -1 G -1\n",
- "3 3 3 1 39 1 True 4 4 G -1\n",
- "4 4 4 1 40 1 True 5 5 T -1\n",
- ".. ... ... .. .. ... ... ... ... .. ..\n",
- "450 450 450 4 35 13 False 451 451 A 77\n",
- "451 451 451 4 36 13 False 452 452 C 76\n",
- "452 452 452 4 37 13 False 453 453 G 75\n",
- "453 453 453 4 38 13 False 454 454 C 74\n",
- "454 454 454 4 39 13 False -1 -1 C 73\n",
- "\n",
- "[455 rows x 10 columns]"
+ " r bp stack threeprime seq orientation\n",
+ "vh zid \n",
+ "0 0 NaN NaN NaN NaN NaN NaN\n",
+ "1 3 NaN NaN NaN NaN NaN NaN\n",
+ "2 1 NaN NaN NaN NaN NaN NaN\n",
+ "3 2 NaN NaN NaN NaN NaN NaN\n",
+ "1 8 NaN NaN NaN NaN NaN NaN"
]
},
- "execution_count": 196,
+ "execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "bp_map=dict(zip(zip(dg[\"vh\"],dg[\"zid\"],dg[\"is_scaf\"]),dg[\"index\"]))\n",
- "bp=-np.ones(len(dg.index),dtype=int)\n",
- "counter=0\n",
- "for i,j,k in zip(dg[\"vh\"],dg[\"zid\"],dg[\"is_scaf\"]):\n",
- " try:\n",
- " bp[counter]=bp_map[(i,j,not(k))]\n",
- " except:\n",
- " pass\n",
- " counter+=1\n",
- "dg[\"bp\"]=bp\n",
- "dg"
+ "d"
]
},
{
"cell_type": "code",
- "execution_count": 197,
- "id": "319e2a8b-a24b-431e-80ec-6a21a8505ead",
+ "execution_count": 100,
+ "id": "48225afa",
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "(level_0 450\n",
- " index 450\n",
- " vh 4\n",
- " zid 35\n",
- " strand 13\n",
- " is_scaf False\n",
- " stack 451\n",
- " threeprime 451\n",
- " seq A\n",
- " bp 77\n",
- " Name: 450, dtype: object,\n",
- " level_0 77\n",
- " index 77\n",
- " vh 4\n",
- " zid 35\n",
- " strand 7\n",
- " is_scaf True\n",
- " stack 78\n",
- " threeprime 78\n",
- " seq T\n",
- " bp 450\n",
- " Name: 77, dtype: object)"
- ]
- },
- "execution_count": 197,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "KeyError",
+ "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\""
+ ]
}
],
"source": [
- "dg.loc[450],dg.loc[77]"
+ "d[[0,0]]"
]
},
{
"cell_type": "code",
- "execution_count": 107,
- "id": "d57bc82b-c233-4f0d-862e-47134101f43c",
+ "execution_count": 13,
+ "id": "c75bd92f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{2: (0, 0),\n",
- " 1: (0, 0),\n",
- " 0: (0, 0),\n",
- " 5: (1, 1),\n",
- " 4: (1, 1),\n",
- " 3: (1, 1),\n",
- " 8: (2, 2),\n",
- " 7: (2, 2),\n",
- " 6: (2, 2),\n",
- " 42: (3, 2),\n",
- " 41: (3, 2),\n",
- " 40: (3, 2),\n",
- " 39: (3, 2),\n",
- " 38: (3, 2),\n",
- " 37: (3, 2),\n",
- " 36: (3, 2),\n",
- " 35: (3, 2),\n",
- " 34: (3, 2),\n",
- " 33: (3, 2),\n",
- " 32: (3, 2),\n",
- " 31: (3, 2),\n",
- " 30: (3, 2),\n",
- " 29: (3, 2),\n",
- " 28: (3, 2),\n",
- " 27: (3, 2),\n",
- " 26: (3, 2),\n",
- " 25: (3, 2),\n",
- " 24: (3, 2),\n",
- " 23: (3, 2),\n",
- " 22: (3, 2),\n",
- " 21: (3, 2),\n",
- " 20: (3, 2),\n",
- " 19: (3, 2),\n",
- " 18: (3, 2),\n",
- " 17: (3, 2),\n",
- " 16: (3, 2),\n",
- " 15: (3, 2),\n",
- " 14: (3, 2),\n",
- " 13: (3, 2),\n",
- " 12: (3, 2),\n",
- " 11: (3, 2),\n",
- " 10: (3, 2),\n",
- " 9: (3, 2),\n",
- " 47: (4, 3),\n",
- " 46: (4, 3),\n",
- " 45: (4, 3),\n",
- " 44: (4, 3),\n",
- " 43: (4, 3),\n",
- " 51: (5, 4),\n",
- " 50: (5, 4),\n",
- " 49: (5, 4),\n",
- " 48: (5, 4),\n",
- " 55: (6, 5),\n",
- " 54: (6, 5),\n",
- " 53: (6, 5),\n",
- " 52: (6, 5),\n",
- " 197: (7, 0),\n",
- " 196: (7, 0),\n",
- " 195: (7, 0),\n",
- " 194: (7, 0),\n",
- " 193: (7, 0),\n",
- " 192: (7, 0),\n",
- " 191: (7, 0),\n",
- " 190: (7, 0),\n",
- " 189: (7, 0),\n",
- " 188: (7, 0),\n",
- " 187: (7, 0),\n",
- " 186: (7, 0),\n",
- " 185: (7, 0),\n",
- " 184: (7, 0),\n",
- " 183: (7, 0),\n",
- " 182: (7, 0),\n",
- " 181: (7, 0),\n",
- " 180: (7, 0),\n",
- " 179: (7, 0),\n",
- " 178: (7, 0),\n",
- " 177: (7, 0),\n",
- " 176: (7, 0),\n",
- " 175: (7, 0),\n",
- " 174: (7, 0),\n",
- " 173: (7, 0),\n",
- " 172: (7, 0),\n",
- " 171: (7, 0),\n",
- " 170: (7, 0),\n",
- " 169: (7, 0),\n",
- " 168: (7, 0),\n",
- " 167: (7, 0),\n",
- " 166: (7, 0),\n",
- " 165: (7, 0),\n",
- " 164: (7, 0),\n",
- " 163: (7, 0),\n",
- " 162: (7, 0),\n",
- " 161: (7, 0),\n",
- " 160: (7, 0),\n",
- " 159: (7, 0),\n",
- " 158: (7, 0),\n",
- " 157: (7, 0),\n",
- " 156: (7, 0),\n",
- " 155: (7, 0),\n",
- " 154: (7, 0),\n",
- " 153: (7, 0),\n",
- " 152: (7, 0),\n",
- " 151: (7, 0),\n",
- " 150: (7, 0),\n",
- " 149: (7, 0),\n",
- " 148: (7, 0),\n",
- " 147: (7, 0),\n",
- " 146: (7, 0),\n",
- " 145: (7, 0),\n",
- " 212: (7, 1),\n",
- " 211: (7, 1),\n",
- " 210: (7, 1),\n",
- " 209: (7, 1),\n",
- " 208: (7, 1),\n",
- " 207: (7, 1),\n",
- " 206: (7, 1),\n",
- " 205: (7, 1),\n",
- " 203: (7, 1),\n",
- " 204: (7, 1),\n",
- " 202: (7, 1),\n",
- " 201: (7, 1),\n",
- " 200: (7, 1),\n",
- " 199: (7, 1),\n",
- " 198: (7, 1),\n",
- " 144: (7, 1),\n",
- " 143: (7, 1),\n",
- " 142: (7, 1),\n",
- " 141: (7, 1),\n",
- " 140: (7, 1),\n",
- " 139: (7, 1),\n",
- " 138: (7, 1),\n",
- " 137: (7, 1),\n",
- " 136: (7, 1),\n",
- " 135: (7, 1),\n",
- " 134: (7, 1),\n",
- " 133: (7, 1),\n",
- " 132: (7, 1),\n",
- " 131: (7, 1),\n",
- " 130: (7, 1),\n",
- " 129: (7, 1),\n",
- " 128: (7, 1),\n",
- " 127: (7, 1),\n",
- " 228: (7, 2),\n",
- " 227: (7, 2),\n",
- " 226: (7, 2),\n",
- " 225: (7, 2),\n",
- " 224: (7, 2),\n",
- " 223: (7, 2),\n",
- " 222: (7, 2),\n",
- " 221: (7, 2),\n",
- " 220: (7, 2),\n",
- " 219: (7, 2),\n",
- " 218: (7, 2),\n",
- " 217: (7, 2),\n",
- " 216: (7, 2),\n",
- " 215: (7, 2),\n",
- " 214: (7, 2),\n",
- " 213: (7, 2),\n",
- " 126: (7, 2),\n",
- " 125: (7, 2),\n",
- " 124: (7, 2),\n",
- " 123: (7, 2),\n",
- " 122: (7, 2),\n",
- " 121: (7, 2),\n",
- " 120: (7, 2),\n",
- " 119: (7, 2),\n",
- " 118: (7, 2),\n",
- " 117: (7, 2),\n",
- " 116: (7, 2),\n",
- " 115: (7, 2),\n",
- " 114: (7, 2),\n",
- " 113: (7, 2),\n",
- " 242: (7, 3),\n",
- " 241: (7, 3),\n",
- " 240: (7, 3),\n",
- " 239: (7, 3),\n",
- " 238: (7, 3),\n",
- " 237: (7, 3),\n",
- " 236: (7, 3),\n",
- " 235: (7, 3),\n",
- " 234: (7, 3),\n",
- " 233: (7, 3),\n",
- " 232: (7, 3),\n",
- " 231: (7, 3),\n",
- " 230: (7, 3),\n",
- " 229: (7, 3),\n",
- " 112: (7, 3),\n",
- " 111: (7, 3),\n",
- " 110: (7, 3),\n",
- " 109: (7, 3),\n",
- " 108: (7, 3),\n",
- " 107: (7, 3),\n",
- " 106: (7, 3),\n",
- " 105: (7, 3),\n",
- " 104: (7, 3),\n",
- " 103: (7, 3),\n",
- " 102: (7, 3),\n",
- " 101: (7, 3),\n",
- " 100: (7, 3),\n",
- " 99: (7, 3),\n",
- " 98: (7, 3),\n",
- " 97: (7, 3),\n",
- " 249: (7, 4),\n",
- " 248: (7, 4),\n",
- " 247: (7, 4),\n",
- " 246: (7, 4),\n",
- " 245: (7, 4),\n",
- " 244: (7, 4),\n",
- " 243: (7, 4),\n",
- " 96: (7, 4),\n",
- " 95: (7, 4),\n",
- " 94: (7, 4),\n",
- " 93: (7, 4),\n",
- " 92: (7, 4),\n",
- " 91: (7, 4),\n",
- " 90: (7, 4),\n",
- " 89: (7, 4),\n",
- " 88: (7, 4),\n",
- " 87: (7, 4),\n",
- " 86: (7, 4),\n",
- " 85: (7, 4),\n",
- " 84: (7, 4),\n",
- " 83: (7, 4),\n",
- " 82: (7, 4),\n",
- " 81: (7, 4),\n",
- " 80: (7, 4),\n",
- " 79: (7, 4),\n",
- " 78: (7, 4),\n",
- " 77: (7, 4),\n",
- " 76: (7, 4),\n",
- " 75: (7, 4),\n",
- " 74: (7, 4),\n",
- " 73: (7, 4),\n",
- " 263: (7, 5),\n",
- " 262: (7, 5),\n",
- " 261: (7, 5),\n",
- " 260: (7, 5),\n",
- " 259: (7, 5),\n",
- " 258: (7, 5),\n",
- " 257: (7, 5),\n",
- " 256: (7, 5),\n",
- " 255: (7, 5),\n",
- " 254: (7, 5),\n",
- " 253: (7, 5),\n",
- " 252: (7, 5),\n",
- " 251: (7, 5),\n",
- " 250: (7, 5),\n",
- " 72: (7, 5),\n",
- " 71: (7, 5),\n",
- " 70: (7, 5),\n",
- " 69: (7, 5),\n",
- " 68: (7, 5),\n",
- " 67: (7, 5),\n",
- " 66: (7, 5),\n",
- " 65: (7, 5),\n",
- " 64: (7, 5),\n",
- " 63: (7, 5),\n",
- " 62: (7, 5),\n",
- " 61: (7, 5),\n",
- " 60: (7, 5),\n",
- " 59: (7, 5),\n",
- " 58: (7, 5),\n",
- " 57: (7, 5),\n",
- " 56: (7, 5),\n",
- " 292: (8, 0),\n",
- " 291: (8, 0),\n",
- " 290: (8, 0),\n",
- " 289: (8, 0),\n",
- " 288: (8, 0),\n",
- " 287: (8, 0),\n",
- " 286: (8, 0),\n",
- " 275: (8, 0),\n",
- " 276: (8, 0),\n",
- " 277: (8, 0),\n",
- " 278: (8, 0),\n",
- " 279: (8, 0),\n",
- " 280: (8, 0),\n",
- " 281: (8, 0),\n",
- " 282: (8, 0),\n",
- " 283: (8, 0),\n",
- " 284: (8, 0),\n",
- " 285: (8, 0),\n",
- " 274: (8, 0),\n",
- " 273: (8, 0),\n",
- " 272: (8, 0),\n",
- " 271: (8, 0),\n",
- " 270: (8, 0),\n",
- " 269: (8, 0),\n",
- " 268: (8, 0),\n",
- " 267: (8, 0),\n",
- " 266: (8, 0),\n",
- " 265: (8, 0),\n",
- " 264: (8, 0),\n",
- " 311: (8, 1),\n",
- " 310: (8, 1),\n",
- " 309: (8, 1),\n",
- " 308: (8, 1),\n",
- " 307: (8, 1),\n",
- " 306: (8, 1),\n",
- " 305: (8, 1),\n",
- " 304: (8, 1),\n",
- " 303: (8, 1),\n",
- " 302: (8, 1),\n",
- " 301: (8, 1),\n",
- " 300: (8, 1),\n",
- " 299: (8, 1),\n",
- " 298: (8, 1),\n",
- " 297: (8, 1),\n",
- " 296: (8, 1),\n",
- " 295: (8, 1),\n",
- " 294: (8, 1),\n",
- " 293: (8, 1),\n",
- " 332: (9, 0),\n",
- " 331: (9, 0),\n",
- " 330: (9, 0),\n",
- " 329: (9, 1),\n",
- " 328: (9, 1),\n",
- " 327: (9, 1),\n",
- " 326: (9, 1),\n",
- " 325: (9, 1),\n",
- " 324: (9, 1),\n",
- " 323: (9, 1),\n",
- " 322: (9, 1),\n",
- " 321: (9, 1),\n",
- " 320: (9, 1),\n",
- " 319: (9, 1),\n",
- " 318: (9, 1),\n",
- " 317: (9, 1),\n",
- " 316: (9, 1),\n",
- " 315: (9, 1),\n",
- " 314: (9, 1),\n",
- " 313: (9, 1),\n",
- " 312: (9, 1),\n",
- " 336: (10, 0),\n",
- " 335: (10, 0),\n",
- " 334: (10, 0),\n",
- " 333: (10, 0),\n",
- " 355: (10, 5),\n",
- " 354: (10, 5),\n",
- " 353: (10, 5),\n",
- " 352: (10, 5),\n",
- " 351: (10, 5),\n",
- " 350: (10, 5),\n",
- " 349: (10, 5),\n",
- " 348: (10, 5),\n",
- " 347: (10, 5),\n",
- " 346: (10, 5),\n",
- " 345: (10, 5),\n",
- " 344: (10, 5),\n",
- " 343: (10, 5),\n",
- " 342: (10, 5),\n",
- " 341: (10, 5),\n",
- " 340: (10, 5),\n",
- " 339: (10, 5),\n",
- " 338: (10, 5),\n",
- " 337: (10, 5),\n",
- " 389: (11, 0),\n",
- " 388: (11, 0),\n",
- " 387: (11, 0),\n",
- " 386: (11, 0),\n",
- " 385: (11, 0),\n",
- " 384: (11, 0),\n",
- " 383: (11, 0),\n",
- " 382: (11, 0),\n",
- " 381: (11, 0),\n",
- " 369: (11, 0),\n",
- " 370: (11, 0),\n",
- " 371: (11, 0),\n",
- " 372: (11, 0),\n",
- " 373: (11, 0),\n",
- " 374: (11, 0),\n",
- " 375: (11, 0),\n",
- " 376: (11, 0),\n",
- " 377: (11, 0),\n",
- " 378: (11, 0),\n",
- " 379: (11, 0),\n",
- " 380: (11, 0),\n",
- " 368: (11, 0),\n",
- " 367: (11, 5),\n",
- " 366: (11, 5),\n",
- " 365: (11, 5),\n",
- " 364: (11, 5),\n",
- " 363: (11, 5),\n",
- " 362: (11, 5),\n",
- " 361: (11, 5),\n",
- " 360: (11, 5),\n",
- " 359: (11, 5),\n",
- " 358: (11, 5),\n",
- " 357: (11, 5),\n",
- " 356: (11, 5),\n",
- " 422: (12, 3),\n",
- " 421: (12, 3),\n",
- " 420: (12, 3),\n",
- " 419: (12, 3),\n",
- " 418: (12, 3),\n",
- " 417: (12, 3),\n",
- " 416: (12, 3),\n",
- " 415: (12, 3),\n",
- " 414: (12, 3),\n",
- " 413: (12, 3),\n",
- " 412: (12, 3),\n",
- " 411: (12, 3),\n",
- " 410: (12, 3),\n",
- " 409: (12, 3),\n",
- " 408: (12, 3),\n",
- " 407: (12, 3),\n",
- " 406: (12, 3),\n",
- " 405: (12, 3),\n",
- " 404: (12, 3),\n",
- " 403: (12, 3),\n",
- " 402: (12, 3),\n",
- " 401: (12, 4),\n",
- " 400: (12, 4),\n",
- " 399: (12, 4),\n",
- " 398: (12, 4),\n",
- " 397: (12, 4),\n",
- " 396: (12, 4),\n",
- " 395: (12, 4),\n",
- " 394: (12, 4),\n",
- " 393: (12, 4),\n",
- " 392: (12, 4),\n",
- " 391: (12, 4),\n",
- " 390: (12, 4),\n",
- " 435: (13, 3),\n",
- " 434: (13, 3),\n",
- " 433: (13, 3),\n",
- " 432: (13, 3),\n",
- " 431: (13, 3),\n",
- " 430: (13, 3),\n",
- " 429: (13, 3),\n",
- " 428: (13, 3),\n",
- " 427: (13, 3),\n",
- " 426: (13, 3),\n",
- " 425: (13, 3),\n",
- " 424: (13, 3),\n",
- " 423: (13, 3),\n",
- " 454: (13, 4),\n",
- " 453: (13, 4),\n",
- " 452: (13, 4),\n",
- " 451: (13, 4),\n",
- " 450: (13, 4),\n",
- " 449: (13, 4),\n",
- " 448: (13, 4),\n",
- " 447: (13, 4),\n",
- " 446: (13, 4),\n",
- " 445: (13, 4),\n",
- " 444: (13, 4),\n",
- " 443: (13, 4),\n",
- " 442: (13, 4),\n",
- " 441: (13, 4),\n",
- " 440: (13, 4),\n",
- " 439: (13, 4),\n",
- " 438: (13, 4),\n",
- " 437: (13, 4),\n",
- " 436: (13, 4)}"
+ "[[23, 13369809], [38, 12060012]]"
]
},
- "execution_count": 107,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df[\"stack\"]=df.index[1:]+[-1]\n",
- "df[\"threeprime\"]=df.index[1:]\n",
- "\n",
- "strands_map=df.groupby(['strand',\"vh\"]).groups\n",
- "ind_map={}\n",
- "for j in list(strands_map.keys()):\n",
- " for l in strands_map[j]:\n",
- " ind_map[l]=j\n",
- "for i in list(df.index):\n",
- " if df.loc[i]\n"
+ "df[\"vstrands\"][0][\"stap_colors\"]"
]
},
{
"cell_type": "code",
- "execution_count": 99,
- "id": "c88b4d35-0f58-47da-8687-e6a8433d363f",
+ "execution_count": 14,
+ "id": "00f1513e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " True,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " False,\n",
- " True]"
+ "210"
]
},
- "execution_count": 99,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "list(df[\"stack\"]==df[\"threeprime\"])"
+ "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n",
+ "len(vh_vb._scaf)"
]
},
{
"cell_type": "code",
- "execution_count": 84,
- "id": "eeea91ae-b631-454f-b66f-af580c7ece5c",
+ "execution_count": 198,
+ "id": "aaa65658",
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "{0: [2, 1, 0], 1: [5, 4, 3], 2: [8, 7, 6], 3: [42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9], 4: [47, 46, 45, 44, 43], 5: [51, 50, 49, 48], 6: [55, 54, 53, 52], 7: [263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213, 212, 211, 210, 209, 208, 207, 206, 205, 203, 204, 202, 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165, 164, ...], 8: [311, 310, 309, 308, 307, 306, 305, 304, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294, 293, 292, 291, 290, 289, 288, 287, 286, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 274, 273, 272, 271, 270, 269, 268, 267, 266, 265, 264], 9: [332, 331, 330, 329, 328, 327, 326, 325, 324, 323, 322, 321, 320, 319, 318, 317, 316, 315, 314, 313, 312], 10: [355, 354, 353, 352, 351, 350, 349, 348, 347, 346, 345, 344, 343, 342, 341, 340, 339, 338, 337, 336, 335, 334, 333], 11: [389, 388, 387, 386, 385, 384, 383, 382, 381, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 368, 367, 366, 365, 364, 363, 362, 361, 360, 359, 358, 357, 356], 12: [422, 421, 420, 419, 418, 417, 416, 415, 414, 413, 412, 411, 410, 409, 408, 407, 406, 405, 404, 403, 402, 401, 400, 399, 398, 397, 396, 395, 394, 393, 392, 391, 390], 13: [454, 453, 452, 451, 450, 449, 448, 447, 446, 445, 444, 443, 442, 441, 440, 439, 438, 437, 436, 435, 434, 433, 432, 431, 430, 429, 428, 427, 426, 425, 424, 423]}"
- ]
- },
- "execution_count": 84,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "TypeError",
+ "evalue": "file must have 'read' and 'readline' attributes",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[198], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.virt2nuc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "\u001b[0;31mTypeError\u001b[0m: file must have 'read' and 'readline' attributes"
+ ]
}
],
"source": [
- "strands=df.groupby(['strand']).groups\n",
- "strands"
+ "df = pickle.load(\"test.virt2nuc\")"
]
},
{
"cell_type": "code",
- "execution_count": 88,
- "id": "be4a60fe-444d-4772-80e8-b95591a079b3",
+ "execution_count": 15,
+ "id": "cbddf07f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[1,\n",
- " 0,\n",
- " -1,\n",
- " 4,\n",
- " 3,\n",
- " -1,\n",
- " 7,\n",
- " 6,\n",
- " -1,\n",
- " 41,\n",
- " 40,\n",
- " 39,\n",
- " 38,\n",
- " 37,\n",
- " 36,\n",
- " 35,\n",
- " 34,\n",
- " 33,\n",
- " 32,\n",
- " 31,\n",
- " 30,\n",
- " 29,\n",
- " 28,\n",
- " 27,\n",
- " 26,\n",
- " 25,\n",
- " 24,\n",
- " 23,\n",
- " 22,\n",
- " 21,\n",
- " 20,\n",
- " 19,\n",
- " 18,\n",
- " 17,\n",
- " 16,\n",
- " 15,\n",
- " 14,\n",
- " 13,\n",
- " 12,\n",
- " 11,\n",
- " 10,\n",
- " 9,\n",
- " -1,\n",
- " 46,\n",
- " 45,\n",
- " 44,\n",
- " 43,\n",
- " -1,\n",
- " 50,\n",
- " 49,\n",
- " 48,\n",
- " -1,\n",
- " 54,\n",
- " 53,\n",
- " 52,\n",
- " -1,\n",
- " 262,\n",
- " 261,\n",
- " 260,\n",
- " 259,\n",
- " 258,\n",
- " 257,\n",
- " 256,\n",
- " 255,\n",
- " 254,\n",
- " 253,\n",
- " 252,\n",
- " 251,\n",
- " 250,\n",
- " 249,\n",
- " 248,\n",
- " 247,\n",
- " 246,\n",
- " 245,\n",
- " 244,\n",
- " 243,\n",
- " 242,\n",
- " 241,\n",
- " 240,\n",
- " 239,\n",
- " 238,\n",
- " 237,\n",
- " 236,\n",
- " 235,\n",
- " 234,\n",
- " 233,\n",
- " 232,\n",
- " 231,\n",
- " 230,\n",
- " 229,\n",
- " 228,\n",
- " 227,\n",
- " 226,\n",
- " 225,\n",
- " 224,\n",
- " 223,\n",
- " 222,\n",
- " 221,\n",
- " 220,\n",
- " 219,\n",
- " 218,\n",
- " 217,\n",
- " 216,\n",
- " 215,\n",
- " 214,\n",
- " 213,\n",
- " 212,\n",
- " 211,\n",
- " 210,\n",
- " 209,\n",
- " 208,\n",
- " 207,\n",
- " 206,\n",
- " 205,\n",
- " 203,\n",
- " 204,\n",
- " 202,\n",
- " 201,\n",
- " 200,\n",
- " 199,\n",
- " 198,\n",
- " 197,\n",
- " 196,\n",
- " 195,\n",
- " 194,\n",
- " 193,\n",
- " 192,\n",
- " 191,\n",
- " 190,\n",
- " 189,\n",
- " 188,\n",
- " 187,\n",
- " 186,\n",
- " 185,\n",
- " 184,\n",
- " 183,\n",
- " 182,\n",
- " 181,\n",
- " 180,\n",
- " 179,\n",
- " 178,\n",
- " 177,\n",
- " 176,\n",
- " 175,\n",
- " 174,\n",
- " 173,\n",
- " 172,\n",
- " 171,\n",
- " 170,\n",
- " 169,\n",
- " 168,\n",
- " 167,\n",
- " 166,\n",
- " 165,\n",
- " 164,\n",
- " 163,\n",
- " 162,\n",
- " 161,\n",
- " 160,\n",
- " 159,\n",
- " 158,\n",
- " 157,\n",
- " 156,\n",
- " 155,\n",
- " 154,\n",
- " 153,\n",
- " 152,\n",
- " 151,\n",
- " 150,\n",
- " 149,\n",
- " 148,\n",
- " 147,\n",
- " 146,\n",
- " 145,\n",
- " 144,\n",
- " 143,\n",
- " 142,\n",
- " 141,\n",
- " 140,\n",
- " 139,\n",
- " 138,\n",
- " 137,\n",
- " 136,\n",
- " 135,\n",
- " 134,\n",
- " 133,\n",
- " 132,\n",
- " 131,\n",
- " 130,\n",
- " 129,\n",
- " 128,\n",
- " 127,\n",
- " 126,\n",
- " 125,\n",
- " 124,\n",
- " 123,\n",
- " 122,\n",
- " 121,\n",
- " 120,\n",
- " 119,\n",
- " 118,\n",
- " 117,\n",
- " 116,\n",
- " 115,\n",
- " 114,\n",
- " 113,\n",
- " 112,\n",
- " 111,\n",
- " 110,\n",
- " 109,\n",
- " 108,\n",
- " 107,\n",
- " 106,\n",
- " 105,\n",
- " 104,\n",
- " 103,\n",
- " 102,\n",
- " 101,\n",
- " 100,\n",
- " 99,\n",
- " 98,\n",
- " 97,\n",
- " 96,\n",
- " 95,\n",
- " 94,\n",
- " 93,\n",
- " 92,\n",
- " 91,\n",
- " 90,\n",
- " 89,\n",
- " 88,\n",
- " 87,\n",
- " 86,\n",
- " 85,\n",
- " 84,\n",
- " 83,\n",
- " 82,\n",
- " 81,\n",
- " 80,\n",
- " 79,\n",
- " 78,\n",
- " 77,\n",
- " 76,\n",
- " 75,\n",
- " 74,\n",
- " 73,\n",
- " 72,\n",
- " 71,\n",
- " 70,\n",
- " 69,\n",
- " 68,\n",
- " 67,\n",
- " 66,\n",
- " 65,\n",
- " 64,\n",
- " 63,\n",
- " 62,\n",
- " 61,\n",
- " 60,\n",
- " 59,\n",
- " 58,\n",
- " 57,\n",
- " 56,\n",
- " -1,\n",
- " 310,\n",
- " 309,\n",
- " 308,\n",
- " 307,\n",
- " 306,\n",
- " 305,\n",
- " 304,\n",
- " 303,\n",
- " 302,\n",
- " 301,\n",
- " 300,\n",
- " 299,\n",
- " 298,\n",
- " 297,\n",
- " 296,\n",
- " 295,\n",
- " 294,\n",
- " 293,\n",
- " 292,\n",
- " 291,\n",
- " 290,\n",
- " 289,\n",
- " 288,\n",
- " 287,\n",
- " 286,\n",
- " 275,\n",
- " 276,\n",
- " 277,\n",
- " 278,\n",
- " 279,\n",
- " 280,\n",
- " 281,\n",
- " 282,\n",
- " 283,\n",
- " 284,\n",
- " 285,\n",
- " 274,\n",
- " 273,\n",
- " 272,\n",
- " 271,\n",
- " 270,\n",
- " 269,\n",
- " 268,\n",
- " 267,\n",
- " 266,\n",
- " 265,\n",
- " 264,\n",
- " -1,\n",
- " 331,\n",
- " 330,\n",
- " 329,\n",
- " 328,\n",
- " 327,\n",
- " 326,\n",
- " 325,\n",
- " 324,\n",
- " 323,\n",
- " 322,\n",
- " 321,\n",
- " 320,\n",
- " 319,\n",
- " 318,\n",
- " 317,\n",
- " 316,\n",
- " 315,\n",
- " 314,\n",
- " 313,\n",
- " 312,\n",
- " -1,\n",
- " 354,\n",
- " 353,\n",
- " 352,\n",
- " 351,\n",
- " 350,\n",
- " 349,\n",
- " 348,\n",
- " 347,\n",
- " 346,\n",
- " 345,\n",
- " 344,\n",
- " 343,\n",
- " 342,\n",
- " 341,\n",
- " 340,\n",
- " 339,\n",
- " 338,\n",
- " 337,\n",
- " 336,\n",
- " 335,\n",
- " 334,\n",
- " 333,\n",
- " -1,\n",
- " 388,\n",
- " 387,\n",
- " 386,\n",
- " 385,\n",
- " 384,\n",
- " 383,\n",
- " 382,\n",
- " 381,\n",
- " 369,\n",
- " 370,\n",
- " 371,\n",
- " 372,\n",
- " 373,\n",
- " 374,\n",
- " 375,\n",
- " 376,\n",
- " 377,\n",
- " 378,\n",
- " 379,\n",
- " 380,\n",
- " 368,\n",
- " 367,\n",
- " 366,\n",
- " 365,\n",
- " 364,\n",
- " 363,\n",
- " 362,\n",
- " 361,\n",
- " 360,\n",
- " 359,\n",
- " 358,\n",
- " 357,\n",
- " 356,\n",
- " -1,\n",
- " 421,\n",
- " 420,\n",
- " 419,\n",
- " 418,\n",
- " 417,\n",
- " 416,\n",
- " 415,\n",
- " 414,\n",
- " 413,\n",
- " 412,\n",
- " 411,\n",
- " 410,\n",
- " 409,\n",
- " 408,\n",
- " 407,\n",
- " 406,\n",
- " 405,\n",
- " 404,\n",
- " 403,\n",
- " 402,\n",
- " 401,\n",
- " 400,\n",
- " 399,\n",
- " 398,\n",
- " 397,\n",
- " 396,\n",
- " 395,\n",
- " 394,\n",
- " 393,\n",
- " 392,\n",
- " 391,\n",
- " 390,\n",
- " -1,\n",
- " 453,\n",
- " 452,\n",
- " 451,\n",
- " 450,\n",
- " 449,\n",
- " 448,\n",
- " 447,\n",
- " 446,\n",
- " 445,\n",
- " 444,\n",
- " 443,\n",
- " 442,\n",
- " 441,\n",
- " 440,\n",
- " 439,\n",
- " 438,\n",
- " 437,\n",
- " 436,\n",
- " 435,\n",
- " 434,\n",
- " 433,\n",
- " 432,\n",
- " 431,\n",
- " 430,\n",
- " 429,\n",
- " 428,\n",
- " 427,\n",
- " 426,\n",
- " 425,\n",
- " 424,\n",
- " 423,\n",
- " -1]"
+ "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}"
]
},
- "execution_count": 88,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "threeprime=[]\n",
- "for i in list(strands.values()):\n",
- " m=list(i)\n",
- " m.append(-1)\n",
- " for j in range(1,len(m)):\n",
- " threeprime.append(m[j])\n",
- "threeprime"
+ "pattern"
]
},
{
"cell_type": "code",
- "execution_count": 106,
- "id": "dc9618ff-8016-480c-b704-31b2dc5f58a1",
+ "execution_count": 173,
+ "id": "fac8699e",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2 1\n",
- "1 0\n",
- "0 -1\n",
- "5 4\n",
- "4 3\n",
- "3 -1\n",
- "8 7\n",
- "7 6\n",
- "6 -1\n",
- "47 41\n",
- "46 40\n",
- "45 39\n",
- "44 38\n",
- "43 37\n",
- "51 36\n",
- "50 35\n",
- "49 34\n",
- "48 33\n",
- "55 32\n",
- "54 31\n",
- "53 30\n",
- "52 29\n",
- "263 28\n",
- "262 27\n",
- "261 26\n",
- "260 25\n",
- "259 24\n",
- "258 23\n",
- "257 22\n",
- "256 21\n",
- "255 20\n",
- "254 19\n",
- "253 18\n",
- "252 17\n",
- "251 16\n",
- "250 15\n",
- "249 14\n",
- "248 13\n",
- "247 12\n",
- "246 11\n",
- "245 10\n",
- "244 9\n",
- "243 -1\n",
- "242 46\n",
- "241 45\n",
- "240 44\n",
- "239 43\n",
- "238 -1\n",
- "237 50\n",
- "236 49\n",
- "235 48\n",
- "234 -1\n",
- "233 54\n",
- "232 53\n",
- "231 52\n",
- "230 -1\n",
- "229 262\n",
- "228 261\n",
- "227 260\n",
- "226 259\n",
- "225 258\n",
- "224 257\n",
- "223 256\n",
- "222 255\n",
- "221 254\n",
- "220 253\n",
- "219 252\n",
- "218 251\n",
- "217 250\n",
- "216 249\n",
- "215 248\n",
- "214 247\n",
- "213 246\n",
- "212 245\n",
- "211 244\n",
- "210 243\n",
- "209 242\n",
- "208 241\n",
- "207 240\n",
- "206 239\n",
- "205 238\n",
- "203 237\n",
- "204 236\n",
- "202 235\n",
- "201 234\n",
- "200 233\n",
- "199 232\n",
- "198 231\n",
- "197 230\n",
- "196 229\n",
- "195 228\n",
- "194 227\n",
- "193 226\n",
- "192 225\n",
- "191 224\n",
- "190 223\n",
- "189 222\n",
- "188 221\n",
- "187 220\n",
- "186 219\n",
- "185 218\n",
- "184 217\n",
- "183 216\n",
- "182 215\n",
- "181 214\n",
- "180 213\n",
- "179 212\n",
- "178 211\n",
- "177 210\n",
- "176 209\n",
- "175 208\n",
- "174 207\n",
- "173 206\n",
- "172 205\n",
- "171 203\n",
- "170 204\n",
- "169 202\n",
- "168 201\n",
- "167 200\n",
- "166 199\n",
- "165 198\n",
- "164 197\n",
- "163 196\n",
- "162 195\n",
- "161 194\n",
- "160 193\n",
- "159 192\n",
- "158 191\n",
- "157 190\n",
- "156 189\n",
- "155 188\n",
- "154 187\n",
- "153 186\n",
- "152 185\n",
- "151 184\n",
- "150 183\n",
- "149 182\n",
- "148 181\n",
- "147 180\n",
- "146 179\n",
- "145 178\n",
- "144 177\n",
- "143 176\n",
- "142 175\n",
- "141 174\n",
- "140 173\n",
- "139 172\n",
- "138 171\n",
- "137 170\n",
- "136 169\n",
- "135 168\n",
- "134 167\n",
- "133 166\n",
- "132 165\n",
- "131 164\n",
- "130 163\n",
- "129 162\n",
- "128 161\n",
- "127 160\n",
- "126 159\n",
- "125 158\n",
- "124 157\n",
- "123 156\n",
- "122 155\n",
- "121 154\n",
- "120 153\n",
- "119 152\n",
- "118 151\n",
- "117 150\n",
- "116 149\n",
- "115 148\n",
- "114 147\n",
- "113 146\n",
- "112 145\n",
- "111 144\n",
- "110 143\n",
- "109 142\n",
- "108 141\n",
- "107 140\n",
- "106 139\n",
- "105 138\n",
- "104 137\n",
- "103 136\n",
- "102 135\n",
- "101 134\n",
- "100 133\n",
- "99 132\n",
- "98 131\n",
- "97 130\n",
- "96 129\n",
- "95 128\n",
- "94 127\n",
- "93 126\n",
- "92 125\n",
- "91 124\n",
- "90 123\n",
- "89 122\n",
- "88 121\n",
- "87 120\n",
- "86 119\n",
- "85 118\n",
- "84 117\n",
- "83 116\n",
- "82 115\n",
- "81 114\n",
- "80 113\n",
- "79 112\n",
- "78 111\n",
- "77 110\n",
- "76 109\n",
- "75 108\n",
- "74 107\n",
- "73 106\n",
- "72 105\n",
- "71 104\n",
- "70 103\n",
- "69 102\n",
- "68 101\n",
- "67 100\n",
- "66 99\n",
- "65 98\n",
- "64 97\n",
- "63 96\n",
- "62 95\n",
- "61 94\n",
- "60 93\n",
- "59 92\n",
- "58 91\n",
- "57 90\n",
- "56 89\n",
- "42 88\n",
- "41 87\n",
- "40 86\n",
- "39 85\n",
- "38 84\n",
- "37 83\n",
- "36 82\n",
- "35 81\n",
- "34 80\n",
- "33 79\n",
- "32 78\n",
- "31 77\n",
- "30 76\n",
- "29 75\n",
- "28 74\n",
- "27 73\n",
- "26 72\n",
- "25 71\n",
- "24 70\n",
- "23 69\n",
- "22 68\n",
- "21 67\n",
- "20 66\n",
- "19 65\n",
- "18 64\n",
- "17 63\n",
- "16 62\n",
- "15 61\n",
- "14 60\n",
- "13 59\n",
- "12 58\n",
- "11 57\n",
- "10 56\n",
- "9 -1\n",
- "311 310\n",
- "310 309\n",
- "309 308\n",
- "308 307\n",
- "307 306\n",
- "306 305\n",
- "305 304\n",
- "304 303\n",
- "303 302\n",
- "302 301\n",
- "301 300\n",
- "300 299\n",
- "299 298\n",
- "298 297\n",
- "297 296\n",
- "296 295\n",
- "295 294\n",
- "294 293\n",
- "293 292\n",
- "292 291\n",
- "291 290\n",
- "290 289\n",
- "289 288\n",
- "288 287\n",
- "287 286\n",
- "286 275\n",
- "275 276\n",
- "276 277\n",
- "277 278\n",
- "278 279\n",
- "279 280\n",
- "280 281\n",
- "281 282\n",
- "282 283\n",
- "283 284\n",
- "284 285\n",
- "285 274\n",
- "274 273\n",
- "273 272\n",
- "272 271\n",
- "271 270\n",
- "270 269\n",
- "269 268\n",
- "268 267\n",
- "267 266\n",
- "266 265\n",
- "265 264\n",
- "264 -1\n",
- "332 331\n",
- "331 330\n",
- "330 329\n",
- "329 328\n",
- "328 327\n",
- "327 326\n",
- "326 325\n",
- "325 324\n",
- "324 323\n",
- "323 322\n",
- "322 321\n",
- "321 320\n",
- "320 319\n",
- "319 318\n",
- "318 317\n",
- "317 316\n",
- "316 315\n",
- "315 314\n",
- "314 313\n",
- "313 312\n",
- "312 -1\n",
- "355 354\n",
- "354 353\n",
- "353 352\n",
- "352 351\n",
- "351 350\n",
- "350 349\n",
- "349 348\n",
- "348 347\n",
- "347 346\n",
- "346 345\n",
- "345 344\n",
- "344 343\n",
- "343 342\n",
- "342 341\n",
- "341 340\n",
- "340 339\n",
- "339 338\n",
- "338 337\n",
- "337 336\n",
- "336 335\n",
- "335 334\n",
- "334 333\n",
- "333 -1\n",
- "389 388\n",
- "388 387\n",
- "387 386\n",
- "386 385\n",
- "385 384\n",
- "384 383\n",
- "383 382\n",
- "382 381\n",
- "381 369\n",
- "369 370\n",
- "370 371\n",
- "371 372\n",
- "372 373\n",
- "373 374\n",
- "374 375\n",
- "375 376\n",
- "376 377\n",
- "377 378\n",
- "378 379\n",
- "379 380\n",
- "380 368\n",
- "368 367\n",
- "367 366\n",
- "366 365\n",
- "365 364\n",
- "364 363\n",
- "363 362\n",
- "362 361\n",
- "361 360\n",
- "360 359\n",
- "359 358\n",
- "358 357\n",
- "357 356\n",
- "356 -1\n",
- "422 421\n",
- "421 420\n",
- "420 419\n",
- "419 418\n",
- "418 417\n",
- "417 416\n",
- "416 415\n",
- "415 414\n",
- "414 413\n",
- "413 412\n",
- "412 411\n",
- "411 410\n",
- "410 409\n",
- "409 408\n",
- "408 407\n",
- "407 406\n",
- "406 405\n",
- "405 404\n",
- "404 403\n",
- "403 402\n",
- "402 401\n",
- "401 400\n",
- "400 399\n",
- "399 398\n",
- "398 397\n",
- "397 396\n",
- "396 395\n",
- "395 394\n",
- "394 393\n",
- "393 392\n",
- "392 391\n",
- "391 390\n",
- "390 -1\n",
- "454 453\n",
- "453 452\n",
- "452 451\n",
- "451 450\n",
- "450 449\n",
- "449 448\n",
- "448 447\n",
- "447 446\n",
- "446 445\n",
- "445 444\n",
- "444 443\n",
- "443 442\n",
- "442 441\n",
- "441 440\n",
- "440 439\n",
- "439 438\n",
- "438 437\n",
- "437 436\n",
- "436 435\n",
- "435 434\n",
- "434 433\n",
- "433 432\n",
- "432 431\n",
- "431 430\n",
- "430 429\n",
- "429 428\n",
- "428 427\n",
- "427 426\n",
- "426 425\n",
- "425 424\n",
- "424 423\n",
- "423 -1\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[1,\n",
- " 0,\n",
- " -1,\n",
- " 4,\n",
- " 3,\n",
- " -1,\n",
- " 7,\n",
- " 6,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 197,\n",
- " 196,\n",
- " 195,\n",
- " 194,\n",
- " 193,\n",
- " 192,\n",
- " 191,\n",
- " 190,\n",
- " 189,\n",
- " 188,\n",
- " 187,\n",
- " 186,\n",
- " 185,\n",
- " 184,\n",
- " 183,\n",
- " 182,\n",
- " 181,\n",
- " 180,\n",
- " 179,\n",
- " 178,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " -1,\n",
- " 310,\n",
- " 309,\n",
- " 308,\n",
- " 307,\n",
- " 306,\n",
- " 305,\n",
- " 304,\n",
- " 303,\n",
- " 302,\n",
- " 301,\n",
- " 300,\n",
- " 299,\n",
- " 298,\n",
- " 297,\n",
- " 296,\n",
- " 295,\n",
- " 294,\n",
- " 293,\n",
- " -1,\n",
- " 291,\n",
- " 290,\n",
- " 289,\n",
- " 288,\n",
- " 287,\n",
- " 286,\n",
- " 275,\n",
- " 276,\n",
- " 277,\n",
- " 278,\n",
- " 279,\n",
- " 280,\n",
- " 281,\n",
- " 282,\n",
- " 283,\n",
- " 284,\n",
- " 285,\n",
- " 274,\n",
- " 273,\n",
- " 272,\n",
- " 271,\n",
- " 270,\n",
- " 269,\n",
- " 268,\n",
- " 267,\n",
- " 266,\n",
- " 265,\n",
- " 264,\n",
- " -1,\n",
- " 331,\n",
- " 330,\n",
- " -1,\n",
- " 328,\n",
- " 327,\n",
- " 326,\n",
- " 325,\n",
- " 324,\n",
- " 323,\n",
- " 322,\n",
- " 321,\n",
- " 320,\n",
- " 319,\n",
- " 318,\n",
- " 317,\n",
- " 316,\n",
- " 315,\n",
- " 314,\n",
- " 313,\n",
- " 312,\n",
- " -1,\n",
- " 354,\n",
- " 353,\n",
- " 352,\n",
- " 351,\n",
- " 350,\n",
- " 349,\n",
- " 348,\n",
- " 347,\n",
- " 346,\n",
- " 345,\n",
- " 344,\n",
- " 343,\n",
- " 342,\n",
- " 341,\n",
- " 340,\n",
- " 339,\n",
- " 338,\n",
- " 337,\n",
- " -1,\n",
- " 335,\n",
- " 334,\n",
- " 333,\n",
- " -1,\n",
- " 388,\n",
- " 387,\n",
- " 386,\n",
- " 385,\n",
- " 384,\n",
- " 383,\n",
- " 382,\n",
- " 381,\n",
- " 369,\n",
- " 370,\n",
- " 371,\n",
- " 372,\n",
- " 373,\n",
- " 374,\n",
- " 375,\n",
- " 376,\n",
- " 377,\n",
- " 378,\n",
- " 379,\n",
- " 380,\n",
- " 368,\n",
- " -1,\n",
- " 366,\n",
- " 365,\n",
- " 364,\n",
- " 363,\n",
- " 362,\n",
- " 361,\n",
- " 360,\n",
- " 359,\n",
- " 358,\n",
- " 357,\n",
- " 356,\n",
- " -1,\n",
- " 421,\n",
- " 420,\n",
- " 419,\n",
- " 418,\n",
- " 417,\n",
- " 416,\n",
- " 415,\n",
- " 414,\n",
- " 413,\n",
- " 412,\n",
- " 411,\n",
- " 410,\n",
- " 409,\n",
- " 408,\n",
- " 407,\n",
- " 406,\n",
- " 405,\n",
- " 404,\n",
- " 403,\n",
- " 402,\n",
- " -1,\n",
- " 400,\n",
- " 399,\n",
- " 398,\n",
- " 397,\n",
- " 396,\n",
- " 395,\n",
- " 394,\n",
- " 393,\n",
- " 392,\n",
- " 391,\n",
- " 390,\n",
- " -1,\n",
- " 453,\n",
- " 452,\n",
- " 451,\n",
- " 450,\n",
- " 449,\n",
- " 448,\n",
- " 447,\n",
- " 446,\n",
- " 445,\n",
- " 444,\n",
- " 443,\n",
- " 442,\n",
- " 441,\n",
- " 440,\n",
- " 439,\n",
- " 438,\n",
- " 437,\n",
- " 436,\n",
- " -1,\n",
- " 434,\n",
- " 433,\n",
- " 432,\n",
- " 431,\n",
- " 430,\n",
- " 429,\n",
- " 428,\n",
- " 427,\n",
- " 426,\n",
- " 425,\n",
- " 424,\n",
- " 423,\n",
- " -1]"
- ]
- },
- "execution_count": 106,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "stacks=[]\n",
- "for i,j in zip(df.index,df[\"threeprime\"]):\n",
- " try:\n",
- " print(i,j)\n",
- " if ind_map[i]==ind_map[j]:\n",
- " stacks.append(j)\n",
- " else:\n",
- " stacks.append(-1)\n",
- " except:\n",
- " stacks.append(-1)\n",
- "stacks"
+ "class strands():\n",
+ " def __init__(self):\n",
+ " self.row=0 \n",
+ " self.col=0\n",
+ " self.num=0\n",
+ " self.scaf=[]\n",
+ " self.stap=[]\n",
+ " self.loop=[]\n",
+ " self.skip=[]\n",
+ " self.scafLoop=[]\n",
+ " self.stapLoop=[]\n",
+ " self.stap_colors=[]\n",
+ " self.scaf_contact={}\n",
+ " self.stap_connect={}\n",
+ " def to_dict(self):\n",
+ " d={}\n",
+ " d['row']=self.row\n",
+ " d['col']=self.col\n",
+ " d['num']=self.num\n",
+ " d['scaf']=self.scaf\n",
+ " d['stap']=self.stap\n",
+ " d['loop']=self.loop\n",
+ " d['skip']=self.skip\n",
+ " d['scafLoop']=self.scafLoop\n",
+ " d['stapLoop']=self.stapLoop\n",
+ " d['stap_colors']=self.stap_colors\n",
+ " return d\n"
]
},
{
"cell_type": "code",
- "execution_count": 101,
- "id": "64cb336b-9399-49ed-8a82-173a2b967a75",
+ "execution_count": 177,
+ "id": "308cd6c1",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{2: (0, 0),\n",
- " 1: (0, 0),\n",
- " 0: (0, 0),\n",
- " 5: (1, 1),\n",
- " 4: (1, 1),\n",
- " 3: (1, 1),\n",
- " 8: (2, 2),\n",
- " 7: (2, 2),\n",
- " 6: (2, 2),\n",
- " 42: (3, 2),\n",
- " 41: (3, 2),\n",
- " 40: (3, 2),\n",
- " 39: (3, 2),\n",
- " 38: (3, 2),\n",
- " 37: (3, 2),\n",
- " 36: (3, 2),\n",
- " 35: (3, 2),\n",
- " 34: (3, 2),\n",
- " 33: (3, 2),\n",
- " 32: (3, 2),\n",
- " 31: (3, 2),\n",
- " 30: (3, 2),\n",
- " 29: (3, 2),\n",
- " 28: (3, 2),\n",
- " 27: (3, 2),\n",
- " 26: (3, 2),\n",
- " 25: (3, 2),\n",
- " 24: (3, 2),\n",
- " 23: (3, 2),\n",
- " 22: (3, 2),\n",
- " 21: (3, 2),\n",
- " 20: (3, 2),\n",
- " 19: (3, 2),\n",
- " 18: (3, 2),\n",
- " 17: (3, 2),\n",
- " 16: (3, 2),\n",
- " 15: (3, 2),\n",
- " 14: (3, 2),\n",
- " 13: (3, 2),\n",
- " 12: (3, 2),\n",
- " 11: (3, 2),\n",
- " 10: (3, 2),\n",
- " 9: (3, 2),\n",
- " 47: (4, 3),\n",
- " 46: (4, 3),\n",
- " 45: (4, 3),\n",
- " 44: (4, 3),\n",
- " 43: (4, 3),\n",
- " 51: (5, 4),\n",
- " 50: (5, 4),\n",
- " 49: (5, 4),\n",
- " 48: (5, 4),\n",
- " 55: (6, 5),\n",
- " 54: (6, 5),\n",
- " 53: (6, 5),\n",
- " 52: (6, 5),\n",
- " 197: (7, 0),\n",
- " 196: (7, 0),\n",
- " 195: (7, 0),\n",
- " 194: (7, 0),\n",
- " 193: (7, 0),\n",
- " 192: (7, 0),\n",
- " 191: (7, 0),\n",
- " 190: (7, 0),\n",
- " 189: (7, 0),\n",
- " 188: (7, 0),\n",
- " 187: (7, 0),\n",
- " 186: (7, 0),\n",
- " 185: (7, 0),\n",
- " 184: (7, 0),\n",
- " 183: (7, 0),\n",
- " 182: (7, 0),\n",
- " 181: (7, 0),\n",
- " 180: (7, 0),\n",
- " 179: (7, 0),\n",
- " 178: (7, 0),\n",
- " 177: (7, 0),\n",
- " 176: (7, 0),\n",
- " 175: (7, 0),\n",
- " 174: (7, 0),\n",
- " 173: (7, 0),\n",
- " 172: (7, 0),\n",
- " 171: (7, 0),\n",
- " 170: (7, 0),\n",
- " 169: (7, 0),\n",
- " 168: (7, 0),\n",
- " 167: (7, 0),\n",
- " 166: (7, 0),\n",
- " 165: (7, 0),\n",
- " 164: (7, 0),\n",
- " 163: (7, 0),\n",
- " 162: (7, 0),\n",
- " 161: (7, 0),\n",
- " 160: (7, 0),\n",
- " 159: (7, 0),\n",
- " 158: (7, 0),\n",
- " 157: (7, 0),\n",
- " 156: (7, 0),\n",
- " 155: (7, 0),\n",
- " 154: (7, 0),\n",
- " 153: (7, 0),\n",
- " 152: (7, 0),\n",
- " 151: (7, 0),\n",
- " 150: (7, 0),\n",
- " 149: (7, 0),\n",
- " 148: (7, 0),\n",
- " 147: (7, 0),\n",
- " 146: (7, 0),\n",
- " 145: (7, 0),\n",
- " 212: (7, 1),\n",
- " 211: (7, 1),\n",
- " 210: (7, 1),\n",
- " 209: (7, 1),\n",
- " 208: (7, 1),\n",
- " 207: (7, 1),\n",
- " 206: (7, 1),\n",
- " 205: (7, 1),\n",
- " 203: (7, 1),\n",
- " 204: (7, 1),\n",
- " 202: (7, 1),\n",
- " 201: (7, 1),\n",
- " 200: (7, 1),\n",
- " 199: (7, 1),\n",
- " 198: (7, 1),\n",
- " 144: (7, 1),\n",
- " 143: (7, 1),\n",
- " 142: (7, 1),\n",
- " 141: (7, 1),\n",
- " 140: (7, 1),\n",
- " 139: (7, 1),\n",
- " 138: (7, 1),\n",
- " 137: (7, 1),\n",
- " 136: (7, 1),\n",
- " 135: (7, 1),\n",
- " 134: (7, 1),\n",
- " 133: (7, 1),\n",
- " 132: (7, 1),\n",
- " 131: (7, 1),\n",
- " 130: (7, 1),\n",
- " 129: (7, 1),\n",
- " 128: (7, 1),\n",
- " 127: (7, 1),\n",
- " 228: (7, 2),\n",
- " 227: (7, 2),\n",
- " 226: (7, 2),\n",
- " 225: (7, 2),\n",
- " 224: (7, 2),\n",
- " 223: (7, 2),\n",
- " 222: (7, 2),\n",
- " 221: (7, 2),\n",
- " 220: (7, 2),\n",
- " 219: (7, 2),\n",
- " 218: (7, 2),\n",
- " 217: (7, 2),\n",
- " 216: (7, 2),\n",
- " 215: (7, 2),\n",
- " 214: (7, 2),\n",
- " 213: (7, 2),\n",
- " 126: (7, 2),\n",
- " 125: (7, 2),\n",
- " 124: (7, 2),\n",
- " 123: (7, 2),\n",
- " 122: (7, 2),\n",
- " 121: (7, 2),\n",
- " 120: (7, 2),\n",
- " 119: (7, 2),\n",
- " 118: (7, 2),\n",
- " 117: (7, 2),\n",
- " 116: (7, 2),\n",
- " 115: (7, 2),\n",
- " 114: (7, 2),\n",
- " 113: (7, 2),\n",
- " 242: (7, 3),\n",
- " 241: (7, 3),\n",
- " 240: (7, 3),\n",
- " 239: (7, 3),\n",
- " 238: (7, 3),\n",
- " 237: (7, 3),\n",
- " 236: (7, 3),\n",
- " 235: (7, 3),\n",
- " 234: (7, 3),\n",
- " 233: (7, 3),\n",
- " 232: (7, 3),\n",
- " 231: (7, 3),\n",
- " 230: (7, 3),\n",
- " 229: (7, 3),\n",
- " 112: (7, 3),\n",
- " 111: (7, 3),\n",
- " 110: (7, 3),\n",
- " 109: (7, 3),\n",
- " 108: (7, 3),\n",
- " 107: (7, 3),\n",
- " 106: (7, 3),\n",
- " 105: (7, 3),\n",
- " 104: (7, 3),\n",
- " 103: (7, 3),\n",
- " 102: (7, 3),\n",
- " 101: (7, 3),\n",
- " 100: (7, 3),\n",
- " 99: (7, 3),\n",
- " 98: (7, 3),\n",
- " 97: (7, 3),\n",
- " 249: (7, 4),\n",
- " 248: (7, 4),\n",
- " 247: (7, 4),\n",
- " 246: (7, 4),\n",
- " 245: (7, 4),\n",
- " 244: (7, 4),\n",
- " 243: (7, 4),\n",
- " 96: (7, 4),\n",
- " 95: (7, 4),\n",
- " 94: (7, 4),\n",
- " 93: (7, 4),\n",
- " 92: (7, 4),\n",
- " 91: (7, 4),\n",
- " 90: (7, 4),\n",
- " 89: (7, 4),\n",
- " 88: (7, 4),\n",
- " 87: (7, 4),\n",
- " 86: (7, 4),\n",
- " 85: (7, 4),\n",
- " 84: (7, 4),\n",
- " 83: (7, 4),\n",
- " 82: (7, 4),\n",
- " 81: (7, 4),\n",
- " 80: (7, 4),\n",
- " 79: (7, 4),\n",
- " 78: (7, 4),\n",
- " 77: (7, 4),\n",
- " 76: (7, 4),\n",
- " 75: (7, 4),\n",
- " 74: (7, 4),\n",
- " 73: (7, 4),\n",
- " 263: (7, 5),\n",
- " 262: (7, 5),\n",
- " 261: (7, 5),\n",
- " 260: (7, 5),\n",
- " 259: (7, 5),\n",
- " 258: (7, 5),\n",
- " 257: (7, 5),\n",
- " 256: (7, 5),\n",
- " 255: (7, 5),\n",
- " 254: (7, 5),\n",
- " 253: (7, 5),\n",
- " 252: (7, 5),\n",
- " 251: (7, 5),\n",
- " 250: (7, 5),\n",
- " 72: (7, 5),\n",
- " 71: (7, 5),\n",
- " 70: (7, 5),\n",
- " 69: (7, 5),\n",
- " 68: (7, 5),\n",
- " 67: (7, 5),\n",
- " 66: (7, 5),\n",
- " 65: (7, 5),\n",
- " 64: (7, 5),\n",
- " 63: (7, 5),\n",
- " 62: (7, 5),\n",
- " 61: (7, 5),\n",
- " 60: (7, 5),\n",
- " 59: (7, 5),\n",
- " 58: (7, 5),\n",
- " 57: (7, 5),\n",
- " 56: (7, 5),\n",
- " 292: (8, 0),\n",
- " 291: (8, 0),\n",
- " 290: (8, 0),\n",
- " 289: (8, 0),\n",
- " 288: (8, 0),\n",
- " 287: (8, 0),\n",
- " 286: (8, 0),\n",
- " 275: (8, 0),\n",
- " 276: (8, 0),\n",
- " 277: (8, 0),\n",
- " 278: (8, 0),\n",
- " 279: (8, 0),\n",
- " 280: (8, 0),\n",
- " 281: (8, 0),\n",
- " 282: (8, 0),\n",
- " 283: (8, 0),\n",
- " 284: (8, 0),\n",
- " 285: (8, 0),\n",
- " 274: (8, 0),\n",
- " 273: (8, 0),\n",
- " 272: (8, 0),\n",
- " 271: (8, 0),\n",
- " 270: (8, 0),\n",
- " 269: (8, 0),\n",
- " 268: (8, 0),\n",
- " 267: (8, 0),\n",
- " 266: (8, 0),\n",
- " 265: (8, 0),\n",
- " 264: (8, 0),\n",
- " 311: (8, 1),\n",
- " 310: (8, 1),\n",
- " 309: (8, 1),\n",
- " 308: (8, 1),\n",
- " 307: (8, 1),\n",
- " 306: (8, 1),\n",
- " 305: (8, 1),\n",
- " 304: (8, 1),\n",
- " 303: (8, 1),\n",
- " 302: (8, 1),\n",
- " 301: (8, 1),\n",
- " 300: (8, 1),\n",
- " 299: (8, 1),\n",
- " 298: (8, 1),\n",
- " 297: (8, 1),\n",
- " 296: (8, 1),\n",
- " 295: (8, 1),\n",
- " 294: (8, 1),\n",
- " 293: (8, 1),\n",
- " 332: (9, 0),\n",
- " 331: (9, 0),\n",
- " 330: (9, 0),\n",
- " 329: (9, 1),\n",
- " 328: (9, 1),\n",
- " 327: (9, 1),\n",
- " 326: (9, 1),\n",
- " 325: (9, 1),\n",
- " 324: (9, 1),\n",
- " 323: (9, 1),\n",
- " 322: (9, 1),\n",
- " 321: (9, 1),\n",
- " 320: (9, 1),\n",
- " 319: (9, 1),\n",
- " 318: (9, 1),\n",
- " 317: (9, 1),\n",
- " 316: (9, 1),\n",
- " 315: (9, 1),\n",
- " 314: (9, 1),\n",
- " 313: (9, 1),\n",
- " 312: (9, 1),\n",
- " 336: (10, 0),\n",
- " 335: (10, 0),\n",
- " 334: (10, 0),\n",
- " 333: (10, 0),\n",
- " 355: (10, 5),\n",
- " 354: (10, 5),\n",
- " 353: (10, 5),\n",
- " 352: (10, 5),\n",
- " 351: (10, 5),\n",
- " 350: (10, 5),\n",
- " 349: (10, 5),\n",
- " 348: (10, 5),\n",
- " 347: (10, 5),\n",
- " 346: (10, 5),\n",
- " 345: (10, 5),\n",
- " 344: (10, 5),\n",
- " 343: (10, 5),\n",
- " 342: (10, 5),\n",
- " 341: (10, 5),\n",
- " 340: (10, 5),\n",
- " 339: (10, 5),\n",
- " 338: (10, 5),\n",
- " 337: (10, 5),\n",
- " 389: (11, 0),\n",
- " 388: (11, 0),\n",
- " 387: (11, 0),\n",
- " 386: (11, 0),\n",
- " 385: (11, 0),\n",
- " 384: (11, 0),\n",
- " 383: (11, 0),\n",
- " 382: (11, 0),\n",
- " 381: (11, 0),\n",
- " 369: (11, 0),\n",
- " 370: (11, 0),\n",
- " 371: (11, 0),\n",
- " 372: (11, 0),\n",
- " 373: (11, 0),\n",
- " 374: (11, 0),\n",
- " 375: (11, 0),\n",
- " 376: (11, 0),\n",
- " 377: (11, 0),\n",
- " 378: (11, 0),\n",
- " 379: (11, 0),\n",
- " 380: (11, 0),\n",
- " 368: (11, 0),\n",
- " 367: (11, 5),\n",
- " 366: (11, 5),\n",
- " 365: (11, 5),\n",
- " 364: (11, 5),\n",
- " 363: (11, 5),\n",
- " 362: (11, 5),\n",
- " 361: (11, 5),\n",
- " 360: (11, 5),\n",
- " 359: (11, 5),\n",
- " 358: (11, 5),\n",
- " 357: (11, 5),\n",
- " 356: (11, 5),\n",
- " 422: (12, 3),\n",
- " 421: (12, 3),\n",
- " 420: (12, 3),\n",
- " 419: (12, 3),\n",
- " 418: (12, 3),\n",
- " 417: (12, 3),\n",
- " 416: (12, 3),\n",
- " 415: (12, 3),\n",
- " 414: (12, 3),\n",
- " 413: (12, 3),\n",
- " 412: (12, 3),\n",
- " 411: (12, 3),\n",
- " 410: (12, 3),\n",
- " 409: (12, 3),\n",
- " 408: (12, 3),\n",
- " 407: (12, 3),\n",
- " 406: (12, 3),\n",
- " 405: (12, 3),\n",
- " 404: (12, 3),\n",
- " 403: (12, 3),\n",
- " 402: (12, 3),\n",
- " 401: (12, 4),\n",
- " 400: (12, 4),\n",
- " 399: (12, 4),\n",
- " 398: (12, 4),\n",
- " 397: (12, 4),\n",
- " 396: (12, 4),\n",
- " 395: (12, 4),\n",
- " 394: (12, 4),\n",
- " 393: (12, 4),\n",
- " 392: (12, 4),\n",
- " 391: (12, 4),\n",
- " 390: (12, 4),\n",
- " 435: (13, 3),\n",
- " 434: (13, 3),\n",
- " 433: (13, 3),\n",
- " 432: (13, 3),\n",
- " 431: (13, 3),\n",
- " 430: (13, 3),\n",
- " 429: (13, 3),\n",
- " 428: (13, 3),\n",
- " 427: (13, 3),\n",
- " 426: (13, 3),\n",
- " 425: (13, 3),\n",
- " 424: (13, 3),\n",
- " 423: (13, 3),\n",
- " 454: (13, 4),\n",
- " 453: (13, 4),\n",
- " 452: (13, 4),\n",
- " 451: (13, 4),\n",
- " 450: (13, 4),\n",
- " 449: (13, 4),\n",
- " 448: (13, 4),\n",
- " 447: (13, 4),\n",
- " 446: (13, 4),\n",
- " 445: (13, 4),\n",
- " 444: (13, 4),\n",
- " 443: (13, 4),\n",
- " 442: (13, 4),\n",
- " 441: (13, 4),\n",
- " 440: (13, 4),\n",
- " 439: (13, 4),\n",
- " 438: (13, 4),\n",
- " 437: (13, 4),\n",
- " 436: (13, 4)}"
- ]
- },
- "execution_count": 101,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "strands2\n",
- "ind_map={}\n",
- "for j in list(strands2.keys()):\n",
- " for l in strands2[j]:\n",
- " ind_map[l]=j\n",
- "ind_map\n",
- " "
+ "def find_segs(vir2nuc_scaf):\n",
+ " oligos={}\n",
+ " for i in range(len(vir2nuc_scaf)):\n",
+ " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n",
+ " if oligo not in oligos.keys():\n",
+ " oligos[oligo]=[]\n",
+ " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n",
+ " return oligos\n",
+ "\n",
+ "#class\n",
+ "def decode_vh_vb(virt2nuc):\n",
+ " vh_list={}\n",
+ " vh_vb,pattern=pd.read_pickle(virt2nuc)\n",
+ " for i in pattern.keys():\n",
+ " s=strands()\n",
+ " s.row,s.col=pattern[i]\n",
+ " s.num=i\n",
+ " vh_list[s.num]=s\n",
+ " scafs=vh_vb._scaf\n",
+ " staps=vh_vb._stap\n",
+ " scaf_strands=find_segs(scafs)\n",
+ " scaf_oligos=list(scaf_strands.keys())\n",
+ " for i in scaf_oligos:\n",
+ " pass\n",
+ " \n",
+ " \n",
+ " return vh_list"
]
},
{
"cell_type": "code",
- "execution_count": 97,
- "id": "4a86b6be-8176-489c-a8a9-c31c62d04096",
+ "execution_count": 187,
+ "id": "bc032680",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[1,\n",
- " 0,\n",
- " -1,\n",
- " 4,\n",
- " 3,\n",
- " -1,\n",
- " 7,\n",
- " 6,\n",
- " -1,\n",
- " 41,\n",
- " 40,\n",
- " 39,\n",
- " 38,\n",
- " 37,\n",
- " 36,\n",
- " 35,\n",
- " 34,\n",
- " 33,\n",
- " 32,\n",
- " 31,\n",
- " 30,\n",
- " 29,\n",
- " 28,\n",
- " 27,\n",
- " 26,\n",
- " 25,\n",
- " 24,\n",
- " 23,\n",
- " 22,\n",
- " 21,\n",
- " 20,\n",
- " 19,\n",
- " 18,\n",
- " 17,\n",
- " 16,\n",
- " 15,\n",
- " 14,\n",
- " 13,\n",
- " 12,\n",
- " 11,\n",
- " 10,\n",
- " 9,\n",
- " -1,\n",
- " 46,\n",
- " 45,\n",
- " 44,\n",
- " 43,\n",
- " -1,\n",
- " 50,\n",
- " 49,\n",
- " 48,\n",
- " -1,\n",
- " 54,\n",
- " 53,\n",
- " 52,\n",
- " -1,\n",
- " 196,\n",
- " 195,\n",
- " 194,\n",
- " 193,\n",
- " 192,\n",
- " 191,\n",
- " 190,\n",
- " 189,\n",
- " 188,\n",
- " 187,\n",
- " 186,\n",
- " 185,\n",
- " 184,\n",
- " 183,\n",
- " 182,\n",
- " 181,\n",
- " 180,\n",
- " 179,\n",
- " 178,\n",
- " 177,\n",
- " 176,\n",
- " 175,\n",
- " 174,\n",
- " 173,\n",
- " 172,\n",
- " 171,\n",
- " 170,\n",
- " 169,\n",
- " 168,\n",
- " 167,\n",
- " 166,\n",
- " 165,\n",
- " 164,\n",
- " 163,\n",
- " 162,\n",
- " 161,\n",
- " 160,\n",
- " 159,\n",
- " 158,\n",
- " 157,\n",
- " 156,\n",
- " 155,\n",
- " 154,\n",
- " 153,\n",
- " 152,\n",
- " 151,\n",
- " 150,\n",
- " 149,\n",
- " 148,\n",
- " 147,\n",
- " 146,\n",
- " 145,\n",
- " -1,\n",
- " 211,\n",
- " 210,\n",
- " 209,\n",
- " 208,\n",
- " 207,\n",
- " 206,\n",
- " 205,\n",
- " 203,\n",
- " 204,\n",
- " 202,\n",
- " 201,\n",
- " 200,\n",
- " 199,\n",
- " 198,\n",
- " 144,\n",
- " 143,\n",
- " 142,\n",
- " 141,\n",
- " 140,\n",
- " 139,\n",
- " 138,\n",
- " 137,\n",
- " 136,\n",
- " 135,\n",
- " 134,\n",
- " 133,\n",
- " 132,\n",
- " 131,\n",
- " 130,\n",
- " 129,\n",
- " 128,\n",
- " 127,\n",
- " -1,\n",
- " 227,\n",
- " 226,\n",
- " 225,\n",
- " 224,\n",
- " 223,\n",
- " 222,\n",
- " 221,\n",
- " 220,\n",
- " 219,\n",
- " 218,\n",
- " 217,\n",
- " 216,\n",
- " 215,\n",
- " 214,\n",
- " 213,\n",
- " 126,\n",
- " 125,\n",
- " 124,\n",
- " 123,\n",
- " 122,\n",
- " 121,\n",
- " 120,\n",
- " 119,\n",
- " 118,\n",
- " 117,\n",
- " 116,\n",
- " 115,\n",
- " 114,\n",
- " 113,\n",
- " -1,\n",
- " 241,\n",
- " 240,\n",
- " 239,\n",
- " 238,\n",
- " 237,\n",
- " 236,\n",
- " 235,\n",
- " 234,\n",
- " 233,\n",
- " 232,\n",
- " 231,\n",
- " 230,\n",
- " 229,\n",
- " 112,\n",
- " 111,\n",
- " 110,\n",
- " 109,\n",
- " 108,\n",
- " 107,\n",
- " 106,\n",
- " 105,\n",
- " 104,\n",
- " 103,\n",
- " 102,\n",
- " 101,\n",
- " 100,\n",
- " 99,\n",
- " 98,\n",
- " 97,\n",
- " -1,\n",
- " 248,\n",
- " 247,\n",
- " 246,\n",
- " 245,\n",
- " 244,\n",
- " 243,\n",
- " 96,\n",
- " 95,\n",
- " 94,\n",
- " 93,\n",
- " 92,\n",
- " 91,\n",
- " 90,\n",
- " 89,\n",
- " 88,\n",
- " 87,\n",
- " 86,\n",
- " 85,\n",
- " 84,\n",
- " 83,\n",
- " 82,\n",
- " 81,\n",
- " 80,\n",
- " 79,\n",
- " 78,\n",
- " 77,\n",
- " 76,\n",
- " 75,\n",
- " 74,\n",
- " 73,\n",
- " -1,\n",
- " 262,\n",
- " 261,\n",
- " 260,\n",
- " 259,\n",
- " 258,\n",
- " 257,\n",
- " 256,\n",
- " 255,\n",
- " 254,\n",
- " 253,\n",
- " 252,\n",
- " 251,\n",
- " 250,\n",
- " 72,\n",
- " 71,\n",
- " 70,\n",
- " 69,\n",
- " 68,\n",
- " 67,\n",
- " 66,\n",
- " 65,\n",
- " 64,\n",
- " 63,\n",
- " 62,\n",
- " 61,\n",
- " 60,\n",
- " 59,\n",
- " 58,\n",
- " 57,\n",
- " 56,\n",
- " -1,\n",
- " 291,\n",
- " 290,\n",
- " 289,\n",
- " 288,\n",
- " 287,\n",
- " 286,\n",
- " 275,\n",
- " 276,\n",
- " 277,\n",
- " 278,\n",
- " 279,\n",
- " 280,\n",
- " 281,\n",
- " 282,\n",
- " 283,\n",
- " 284,\n",
- " 285,\n",
- " 274,\n",
- " 273,\n",
- " 272,\n",
- " 271,\n",
- " 270,\n",
- " 269,\n",
- " 268,\n",
- " 267,\n",
- " 266,\n",
- " 265,\n",
- " 264,\n",
- " -1,\n",
- " 310,\n",
- " 309,\n",
- " 308,\n",
- " 307,\n",
- " 306,\n",
- " 305,\n",
- " 304,\n",
- " 303,\n",
- " 302,\n",
- " 301,\n",
- " 300,\n",
- " 299,\n",
- " 298,\n",
- " 297,\n",
- " 296,\n",
- " 295,\n",
- " 294,\n",
- " 293,\n",
- " -1,\n",
- " 331,\n",
- " 330,\n",
- " -1,\n",
- " 328,\n",
- " 327,\n",
- " 326,\n",
- " 325,\n",
- " 324,\n",
- " 323,\n",
- " 322,\n",
- " 321,\n",
- " 320,\n",
- " 319,\n",
- " 318,\n",
- " 317,\n",
- " 316,\n",
- " 315,\n",
- " 314,\n",
- " 313,\n",
- " 312,\n",
- " -1,\n",
- " 335,\n",
- " 334,\n",
- " 333,\n",
- " -1,\n",
- " 354,\n",
- " 353,\n",
- " 352,\n",
- " 351,\n",
- " 350,\n",
- " 349,\n",
- " 348,\n",
- " 347,\n",
- " 346,\n",
- " 345,\n",
- " 344,\n",
- " 343,\n",
- " 342,\n",
- " 341,\n",
- " 340,\n",
- " 339,\n",
- " 338,\n",
- " 337,\n",
- " -1,\n",
- " 388,\n",
- " 387,\n",
- " 386,\n",
- " 385,\n",
- " 384,\n",
- " 383,\n",
- " 382,\n",
- " 381,\n",
- " 369,\n",
- " 370,\n",
- " 371,\n",
- " 372,\n",
- " 373,\n",
- " 374,\n",
- " 375,\n",
- " 376,\n",
- " 377,\n",
- " 378,\n",
- " 379,\n",
- " 380,\n",
- " 368,\n",
- " -1,\n",
- " 366,\n",
- " 365,\n",
- " 364,\n",
- " 363,\n",
- " 362,\n",
- " 361,\n",
- " 360,\n",
- " 359,\n",
- " 358,\n",
- " 357,\n",
- " 356,\n",
- " -1,\n",
- " 421,\n",
- " 420,\n",
- " 419,\n",
- " 418,\n",
- " 417,\n",
- " 416,\n",
- " 415,\n",
- " 414,\n",
- " 413,\n",
- " 412,\n",
- " 411,\n",
- " 410,\n",
- " 409,\n",
- " 408,\n",
- " 407,\n",
- " 406,\n",
- " 405,\n",
- " 404,\n",
- " 403,\n",
- " 402,\n",
- " -1,\n",
- " 400,\n",
- " 399,\n",
- " 398,\n",
- " 397,\n",
- " 396,\n",
- " 395,\n",
- " 394,\n",
- " 393,\n",
- " 392,\n",
- " 391,\n",
- " 390,\n",
- " -1,\n",
- " 434,\n",
- " 433,\n",
- " 432,\n",
- " 431,\n",
- " 430,\n",
- " 429,\n",
- " 428,\n",
- " 427,\n",
- " 426,\n",
- " 425,\n",
- " 424,\n",
- " 423,\n",
- " -1,\n",
- " 453,\n",
- " 452,\n",
- " 451,\n",
- " 450,\n",
- " 449,\n",
- " 448,\n",
- " 447,\n",
- " 446,\n",
- " 445,\n",
- " 444,\n",
- " 443,\n",
- " 442,\n",
- " 441,\n",
- " 440,\n",
- " 439,\n",
- " 438,\n",
- " 437,\n",
- " 436,\n",
- " -1]"
+ "[[(2, 34),\n",
+ " (2, 33),\n",
+ " (2, 32),\n",
+ " (2, 31),\n",
+ " (2, 30),\n",
+ " (2, 29),\n",
+ " (2, 28),\n",
+ " (2, 27),\n",
+ " (2, 26),\n",
+ " (2, 25),\n",
+ " (2, 24),\n",
+ " (2, 23),\n",
+ " (2, 22),\n",
+ " (2, 21),\n",
+ " (2, 20),\n",
+ " (2, 19),\n",
+ " (2, 18),\n",
+ " (2, 17),\n",
+ " (2, 16),\n",
+ " (2, 15),\n",
+ " (2, 14),\n",
+ " (2, 13),\n",
+ " (2, 12),\n",
+ " (2, 11),\n",
+ " (2, 10),\n",
+ " (2, 9),\n",
+ " (2, 8),\n",
+ " (2, 7),\n",
+ " (2, 6),\n",
+ " (2, 5),\n",
+ " (2, 4),\n",
+ " (2, 3),\n",
+ " (2, 2),\n",
+ " (2, 1),\n",
+ " (2, 0)],\n",
+ " [(1, 3),\n",
+ " (1, 4),\n",
+ " (1, 5),\n",
+ " (1, 6),\n",
+ " (1, 7),\n",
+ " (1, 8),\n",
+ " (1, 9),\n",
+ " (1, 10),\n",
+ " (1, 11),\n",
+ " (1, 12),\n",
+ " (1, 13),\n",
+ " (1, 14),\n",
+ " (1, 15),\n",
+ " (1, 16),\n",
+ " (1, 17),\n",
+ " (1, 18),\n",
+ " (1, 19),\n",
+ " (1, 20),\n",
+ " (0, 20),\n",
+ " (0, 19),\n",
+ " (0, 18),\n",
+ " (0, 17),\n",
+ " (0, 16),\n",
+ " (0, 15),\n",
+ " (0, 14),\n",
+ " (0, 13),\n",
+ " (0, 12),\n",
+ " (0, 11),\n",
+ " (0, 10),\n",
+ " (0, 9),\n",
+ " (0, 8),\n",
+ " (0, 7),\n",
+ " (0, 6),\n",
+ " (0, 5),\n",
+ " (0, 4),\n",
+ " (0, 3),\n",
+ " (0, 2)],\n",
+ " [(0, 23),\n",
+ " (0, 22),\n",
+ " (0, 21),\n",
+ " (1, 21),\n",
+ " (1, 22),\n",
+ " (1, 23),\n",
+ " (1, 24),\n",
+ " (1, 25),\n",
+ " (1, 26),\n",
+ " (1, 27),\n",
+ " (1, 28),\n",
+ " (1, 29),\n",
+ " (1, 30),\n",
+ " (1, 31),\n",
+ " (1, 32),\n",
+ " (1, 33),\n",
+ " (1, 34),\n",
+ " (1, 35),\n",
+ " (1, 36),\n",
+ " (1, 37),\n",
+ " (1, 38)],\n",
+ " [(5, 9),\n",
+ " (5, 10),\n",
+ " (5, 11),\n",
+ " (5, 12),\n",
+ " (5, 13),\n",
+ " (5, 14),\n",
+ " (5, 15),\n",
+ " (5, 16),\n",
+ " (5, 17),\n",
+ " (5, 18),\n",
+ " (5, 19),\n",
+ " (5, 20),\n",
+ " (5, 21),\n",
+ " (5, 22),\n",
+ " (5, 23),\n",
+ " (5, 24),\n",
+ " (5, 25),\n",
+ " (5, 26),\n",
+ " (5, 27),\n",
+ " (0, 27),\n",
+ " (0, 26),\n",
+ " (0, 25),\n",
+ " (0, 24)],\n",
+ " [(0, 38),\n",
+ " (0, 37),\n",
+ " (0, 36),\n",
+ " (0, 35),\n",
+ " (0, 34),\n",
+ " (0, 33),\n",
+ " (0, 32),\n",
+ " (0, 31),\n",
+ " (0, 30),\n",
+ " (0, 29),\n",
+ " (0, 28),\n",
+ " (5, 28),\n",
+ " (5, 29),\n",
+ " (5, 30),\n",
+ " (5, 31),\n",
+ " (5, 32),\n",
+ " (5, 33),\n",
+ " (5, 34),\n",
+ " (5, 35),\n",
+ " (5, 36),\n",
+ " (5, 37),\n",
+ " (5, 38),\n",
+ " (5, 39)],\n",
+ " [(3, 0),\n",
+ " (3, 1),\n",
+ " (3, 2),\n",
+ " (3, 3),\n",
+ " (3, 4),\n",
+ " (3, 5),\n",
+ " (3, 6),\n",
+ " (3, 7),\n",
+ " (3, 8),\n",
+ " (3, 9),\n",
+ " (3, 10),\n",
+ " (3, 11),\n",
+ " (3, 12),\n",
+ " (3, 13),\n",
+ " (3, 14),\n",
+ " (3, 15),\n",
+ " (3, 16),\n",
+ " (3, 17),\n",
+ " (3, 18),\n",
+ " (3, 19),\n",
+ " (3, 20),\n",
+ " (4, 20),\n",
+ " (4, 19),\n",
+ " (4, 18),\n",
+ " (4, 17),\n",
+ " (4, 16),\n",
+ " (4, 15),\n",
+ " (4, 14),\n",
+ " (4, 13),\n",
+ " (4, 12),\n",
+ " (4, 11),\n",
+ " (4, 10),\n",
+ " (4, 9)],\n",
+ " [(4, 39),\n",
+ " (4, 38),\n",
+ " (4, 37),\n",
+ " (4, 36),\n",
+ " (4, 35),\n",
+ " (4, 34),\n",
+ " (4, 33),\n",
+ " (4, 32),\n",
+ " (4, 31),\n",
+ " (4, 30),\n",
+ " (4, 29),\n",
+ " (4, 28),\n",
+ " (4, 27),\n",
+ " (4, 26),\n",
+ " (4, 25),\n",
+ " (4, 24),\n",
+ " (4, 23),\n",
+ " (4, 22),\n",
+ " (4, 21),\n",
+ " (3, 21),\n",
+ " (3, 22),\n",
+ " (3, 23),\n",
+ " (3, 24),\n",
+ " (3, 25),\n",
+ " (3, 26),\n",
+ " (3, 27),\n",
+ " (3, 28),\n",
+ " (3, 29),\n",
+ " (3, 30),\n",
+ " (3, 31),\n",
+ " (3, 32),\n",
+ " (3, 33),\n",
+ " (3, 34)]]"
]
},
- "execution_count": 97,
+ "execution_count": 187,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "strands2=df.groupby(['strand',\"vh\"]).groups\n",
- "stacks=[]\n",
- "for i in list(strands2.values()):\n",
- " m=list(i)\n",
- " m.append(-1)\n",
- " for j in range(1,len(m)):\n",
- " stacks.append(m[j])\n",
- "stacks"
+ "s1=decode_vh_vb(\"test.virt2nuc\")\n",
+ "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n",
+ "list(find_segs(vh_vb._stap).values())"
]
},
{
"cell_type": "code",
- "execution_count": 12,
- "id": "087e2625",
+ "execution_count": 142,
+ "id": "29ff7990",
"metadata": {},
"outputs": [],
"source": [
@@ -7828,7 +2867,7 @@
{
"cell_type": "code",
"execution_count": 117,
- "id": "56387503",
+ "id": "3cb9542c",
"metadata": {},
"outputs": [],
"source": [
@@ -7848,8 +2887,8 @@
},
{
"cell_type": "code",
- "execution_count": 13,
- "id": "c73234d5",
+ "execution_count": 116,
+ "id": "4219838b",
"metadata": {},
"outputs": [
{
@@ -8045,7 +3084,7 @@
" (5, 23)]"
]
},
- "execution_count": 13,
+ "execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
@@ -8061,7 +3100,7 @@
{
"cell_type": "code",
"execution_count": 157,
- "id": "b37f7a4c",
+ "id": "45168e4b",
"metadata": {},
"outputs": [
{
@@ -8085,7 +3124,7 @@
{
"cell_type": "code",
"execution_count": 152,
- "id": "d11f5b9c",
+ "id": "33e5c80d",
"metadata": {},
"outputs": [
{
@@ -8106,7 +3145,7 @@
{
"cell_type": "code",
"execution_count": 62,
- "id": "7bd3df35",
+ "id": "9b79e902",
"metadata": {},
"outputs": [
{
@@ -8334,7 +3373,7 @@
{
"cell_type": "code",
"execution_count": 3,
- "id": "efe70397",
+ "id": "cecd1c9f",
"metadata": {},
"outputs": [],
"source": [
@@ -8460,7 +3499,7 @@
{
"cell_type": "code",
"execution_count": 4,
- "id": "646b1ae9",
+ "id": "b4af37c1",
"metadata": {},
"outputs": [],
"source": [
@@ -8481,7 +3520,7 @@
{
"cell_type": "code",
"execution_count": 20,
- "id": "7ead2ea3",
+ "id": "2fa89abc",
"metadata": {},
"outputs": [
{
@@ -8543,7 +3582,7 @@
{
"cell_type": "code",
"execution_count": 25,
- "id": "3095b830",
+ "id": "6e620d24",
"metadata": {},
"outputs": [
{
@@ -8605,7 +3644,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "b749b541",
+ "id": "fef6094a",
"metadata": {},
"outputs": [],
"source": [
@@ -8618,7 +3657,7 @@
{
"cell_type": "code",
"execution_count": 13,
- "id": "a5a89254",
+ "id": "be089a18",
"metadata": {},
"outputs": [
{
@@ -8639,7 +3678,7 @@
{
"cell_type": "code",
"execution_count": 7,
- "id": "04759ac6",
+ "id": "6f75d365",
"metadata": {},
"outputs": [
{
@@ -8688,7 +3727,7 @@
{
"cell_type": "code",
"execution_count": 36,
- "id": "7c36faba",
+ "id": "8dfadf61",
"metadata": {},
"outputs": [
{
@@ -8750,7 +3789,7 @@
{
"cell_type": "code",
"execution_count": 1,
- "id": "9985773f",
+ "id": "ba097c82",
"metadata": {},
"outputs": [
{
@@ -8776,7 +3815,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "134923d4",
+ "id": "1c4d4fa9",
"metadata": {},
"outputs": [],
"source": []
@@ -8784,7 +3823,7 @@
{
"cell_type": "code",
"execution_count": 3,
- "id": "f7bd6aef",
+ "id": "d4ff1f83",
"metadata": {},
"outputs": [
{
@@ -8804,7 +3843,7 @@
{
"cell_type": "code",
"execution_count": 13,
- "id": "9913320b",
+ "id": "6f1dab46",
"metadata": {},
"outputs": [
{
@@ -8825,7 +3864,7 @@
{
"cell_type": "code",
"execution_count": 15,
- "id": "46af2b4f",
+ "id": "615964fe",
"metadata": {},
"outputs": [
{
@@ -8895,7 +3934,7 @@
{
"cell_type": "code",
"execution_count": 16,
- "id": "6ae574e4",
+ "id": "a71b0639",
"metadata": {},
"outputs": [],
"source": [
@@ -8906,7 +3945,7 @@
{
"cell_type": "code",
"execution_count": 26,
- "id": "4b56fb9d",
+ "id": "2bb83a1e",
"metadata": {},
"outputs": [
{
@@ -9015,7 +4054,7 @@
{
"cell_type": "code",
"execution_count": 25,
- "id": "0c061135",
+ "id": "86f5c21e",
"metadata": {},
"outputs": [
{
@@ -9036,7 +4075,7 @@
{
"cell_type": "code",
"execution_count": 10,
- "id": "1d7952e2",
+ "id": "f3fae511",
"metadata": {},
"outputs": [
{
@@ -9067,7 +4106,7 @@
{
"cell_type": "code",
"execution_count": 1,
- "id": "3a02aa96",
+ "id": "07f3352b",
"metadata": {},
"outputs": [
{
@@ -9564,7 +4603,7 @@
{
"cell_type": "code",
"execution_count": 3,
- "id": "6ab2279a",
+ "id": "61be89c1",
"metadata": {},
"outputs": [
{
@@ -9598,7 +4637,7 @@
{
"cell_type": "code",
"execution_count": 4,
- "id": "d7dbbbbf",
+ "id": "a00c445c",
"metadata": {},
"outputs": [
{
@@ -9618,7 +4657,7 @@
{
"cell_type": "code",
"execution_count": 5,
- "id": "35968795",
+ "id": "a648fae4",
"metadata": {},
"outputs": [],
"source": [
@@ -9628,7 +4667,7 @@
{
"cell_type": "code",
"execution_count": 6,
- "id": "7512de77",
+ "id": "9d100033",
"metadata": {},
"outputs": [
{
@@ -9649,7 +4688,7 @@
{
"cell_type": "code",
"execution_count": 7,
- "id": "74944c42",
+ "id": "66d3eaf4",
"metadata": {},
"outputs": [
{
@@ -9676,7 +4715,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "e1d9ef64",
+ "id": "f60b0cdc",
"metadata": {},
"outputs": [],
"source": []