diff --git a/mrdna/readers/test/segmentmodel_from_oxdna_pinyi.py b/mrdna/readers/test/segmentmodel_from_oxdna_pinyi.py new file mode 100644 index 0000000000000000000000000000000000000000..6a5e49b2f09802a4c75f31ef54041b577e798b49 --- /dev/null +++ b/mrdna/readers/test/segmentmodel_from_oxdna_pinyi.py @@ -0,0 +1,205 @@ +from mrdna import logger, devlogger +from .segmentmodel_from_lists import model_from_basepair_stack_3prime +from ..arbdmodel.coords import rotationAboutAxis +import pandas as pd + +from oxlibs import * +import numpy as np +from scipy.spatial import distance_matrix +pd.options.mode.chained_assignment = None # default='warn' + +_seq_to_int_dict = dict(A=0,T=1,C=2,G=3) +_seq_to_int_dict = {k:str(v) for k,v in _seq_to_int_dict.items()} + +_yrot = rotationAboutAxis(axis=(0,1,0), angle=180).dot(rotationAboutAxis(axis=(0,0,1),angle=-40)) + +def mrdna_model_from_oxdna(coordinate_file, topology_file,virt2nuc=None,get_nt_prop=False, **model_parameters): + """ Construct an mrdna model from oxDNA coordinate and topology files """ + top_data = np.loadtxt(topology_file, skiprows=1, + unpack=True, + dtype=np.dtype('i4,U1,i4,i4') + ) + conf_data = np.loadtxt(coordinate_file, skiprows=3) + def _get_bp(sequence=None): + dists = distance_matrix(r,basepair_pos) + np.eye(len(r))*1000 + dists = 0.5*(dists + dists.T) + + bp = np.array([np.argmin(da) for da in dists]) + + for i,j in enumerate(bp): + if j == -1: continue + # devlogger.info(f'bp {i} {j} {dists[i,j]}') + if dists[i,j] > 2: + bp[i] = -1 + elif bp[j] != i: + bpj = bp[j] + logger.warning( " ".join([str(_x) for _x in ["Bad pair", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) ) + + for i,j in enumerate(bp): + if j == -1: continue + if bp[j] != i: + bpj = bp[j] + logger.warning( " ".join([str(_x) for _x in ["Bad pair2", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) ) + raise Exception + + if sequence is not None: + seq = sequence + bp_seq = sequence[bp] + bp_seq[bp==-1] = 'X' + bad_bps = np.where( (bp >= 0) & + (((seq == 'C') & (bp_seq != 'G')) | + ((seq == 'G') & (bp_seq != 'C')) | + ((seq == 'T') & (bp_seq != 'A')) | + ((seq == 'U') & (bp_seq != 'A')) | + ((seq == 'A') & ((bp_seq != 'T') | (bp_seq != 'U'))) + ) )[0] + bp[bp[bad_bps]] = -1 + bp[bad_bps] = -1 + + return bp + + def _get_stack(): + dists = distance_matrix( r + 3.5*normal_dir + 2.1*perp_dir -1*base_dir, r ) + np.eye(len(r))*1000 + stack = np.array([np.argmin(da) for da in dists]) + for i,j in enumerate(stack): + if dists[i,j] > 8: + stack[i] = -1 + elif i < 10: + ## development info + # devlogger.info([i,j,dists[i,j]]) + # dr = r[j] - (r[i] - normal_dir[i]*3.4 + perp_dir[i]*1 + base_dir[i]*1) + dr = r[j] - r[i] + # devlogger.info([normal_dir[i].dot(dr), perp_dir[i].dot(dr), base_dir[i].dot(dr)]) + return np.array(stack) + + def _find_vh_vb_table(s,is_scaf): + L=[] + for i in list(s.keys()): + vh,zid=i + strand,indices=s[i] + if len(indices)==0: + continue + else: + if len(indices)==1: + zids=[str(zid)] + else: + zids=[str(zid)+"."+str(j) for j in range(len(indices))] + for index,z in zip(indices,zids): + L.append(pd.Series({"index":index,"vh":vh,"zid":z,"strand":strand,"is_scaf":bool(is_scaf)})) + return L + def get_virt2nuc(virt2nuc,top_data): + vh_vb,pattern=pd.read_pickle(virt2nuc) + L1=_find_vh_vb_table(vh_vb._scaf,1) + L2=_find_vh_vb_table(vh_vb._stap,0) + nt_prop=pd.DataFrame(L1+L2) + nt_prop.set_index("index",inplace=True) + nt_prop.sort_index(inplace=True) + nt_prop["threeprime"]=top_data[2] + nt_prop["seq"]=top_data[1] + nt_prop["stack"]=top_data[2] + for i in nt_prop.index: + if nt_prop.loc[i]["threeprime"] in nt_prop.index: + if nt_prop.loc[nt_prop.loc[i]["threeprime"]]["vh"]!=nt_prop.loc[i]["vh"]: + nt_prop["stack"][i]=-1 + bp_map=dict(zip(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["is_scaf"]),nt_prop.index)) + bp=-np.ones(len(nt_prop.index),dtype=int) + counter=0 + for i,j,k in zip(nt_prop["vh"],nt_prop["zid"],nt_prop["is_scaf"]): + try: + bp[counter]=bp_map[(i,j,not(k))] + except: + pass + counter+=1 + nt_prop["bp"]=bp + return nt_prop + try: + nt_prop=get_virt2nuc(virt2nuc,top_data) + r=conf_data[:,:3] * 8.518 + base_dir = conf_data[:,3:6] + # basepair_pos = r + base_dir*6.0 + basepair_pos = r + base_dir*10.0 + normal_dir = -conf_data[:,6:9] + perp_dir = np.cross(base_dir, normal_dir) + orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)]) + seq=nt_prop["seq"] + bp=nt_prop["bp"] + stack=nt_prop["stack"] + three_prime=nt_prop["threeprime"] + nt_prop["r"]=r + nt_prop["orientation"]=orientation + + except: + ## Reverse direction so indices run 5'-to-3' + top_data = [a[::-1] for a in top_data] + conf_data = conf_data[::-1,:] + + r = conf_data[:,:3] * 8.518 + base_dir = conf_data[:,3:6] + # basepair_pos = r + base_dir*6.0 + basepair_pos = r + base_dir*10.0 + normal_dir = -conf_data[:,6:9] + perp_dir = np.cross(base_dir, normal_dir) + orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)]) + seq = top_data[1] + bp = _get_bp(seq) + stack = _get_stack() + + three_prime = len(r) - top_data[2] -1 + five_prime = len(r) - top_data[3] -1 + three_prime[three_prime >= len(r)] = -1 + five_prime[five_prime >= len(r)] = -1 + nt_prop=pd.DataFrame({"r":r,"bp":bp,"stack":stack,"threeprime":three_prime, "seq":seq,"orientation":orientation}) + + def _debug_write_bonds(): + from ..arbdmodel import ParticleType, PointParticle, ArbdModel, Group + bond = tuple() + b_t = ParticleType('BASE') + p_t = ParticleType('PHOS') + + parts = [] + for i,(r0,r_bp,three_prime0,bp0,stack0,seq0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack, seq)): + p = PointParticle(p_t, name='PHOS', position = r0, resid=i) + b = PointParticle(b_t, name=seq0, position = 0.5*(r0+r_bp), resid=i) + parts.extend((p,b)) + + model = ArbdModel(parts) + model.writePdb('test.pdb') + + for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)): + model.add_bond(parts[2*i],parts[2*i+1],bond) + j = three_prime0 + if j >= 0: + model.add_bond(parts[2*i],parts[2*j],bond) + j = bp0 + if j >= 0: + model.add_bond(parts[2*i+1],parts[2*j+1],bond) + model.writePsf('test.psf') + + model.bonds = [] + for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)): + j = stack0 + if j >= 0: + model.add_bond(parts[2*i],parts[2*j],bond) + model.writePsf('test.stack.psf') + ## _debug_write_bonds() + + logger.info(f'mrdna_model_from_oxdna: num_bp, num_ss_nt, num_stacked: {np.sum(bp>=0)//2} {np.sum(bp<0)} {np.sum(stack >= 0)}') + + + model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) + + """ + model.DEBUG = True + model.generate_bead_model(1,1,False,True,one_bead_per_monomer=True) + for seg in model.segments: + for bead in seg.beads: + bead.position = bead.position + np.random.standard_normal(3) + + simulate( model, output_name='test', directory='test4' ) + """ + model._dataframe=nt_prop + return model + +if __name__ == "__main__": + mrdna_model_from_oxdna("0-from-collab/nanopore.oxdna","0-from-collab/nanopore.top") + # mrdna_model_from_oxdna("2-oxdna.manual/output/from_mrdna-oxdna-min.last.conf","0-from-collab/nanopore.top") diff --git a/mrdna/readers/test/test.ipynb b/mrdna/readers/test/test.ipynb index 270b98c02dd96d0b2ed26defc0c0193c92b9de32..a3bd160b751ec4f98fab99afeb23a4076371dfd4 100644 --- a/mrdna/readers/test/test.ipynb +++ b/mrdna/readers/test/test.ipynb @@ -3,129 +3,55 @@ { "cell_type": "code", "execution_count": 1, - "id": "1955acef", + "id": "03eb8540", "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pip3 install termcolor\n" + ] + } + ], "source": [ "import pandas as pd\n", "import pickle\n", "import numpy as np\n", - "\n", - "df=pd.read_json(\"test.json\")\n", - "d=list(df[\"vstrands\"])" + "import json\n", + "import re\n", + "import cadnano\n", + "from cadnano.document import Document\n" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "5ebd0c89-5dd5-41a3-85e1-10b53cb34113", + "execution_count": 2, + "id": "cb40f6b8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>c</th>\n", - " <th>d</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>2</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>4</td>\n", - " <td>5</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>5</td>\n", - " <td>6</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>6</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " c d\n", - "0 1 2\n", - "1 2 3\n", - "2 3 4\n", - "3 4 5\n", - "4 5 6\n", - "5 6 1" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "c=[1,2,3,4,5,6]\n", - "d=[2,3,4,5,6,1]\n", - "pd.DataFrame({\"c\":c,\"d\":d})" + "from cadnano.views.pathview import pathstyles" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "1a6b8cb2", + "execution_count": 1, + "id": "ea7e8da0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "pip3 install termcolor\n" - ] - }, - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'mrdna'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marbdmodel\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcoords\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m readArbdCoords, readAvgArbdCoords, rotationAboutAxis\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'" + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", + "it/its\n", + "\n" ] } ], @@ -137,19 +63,204 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "c7d2f43e", + "execution_count": 15, + "id": "3a134bd9-0d8c-40b2-bf71-7b2310e09802", "metadata": {}, "outputs": [], "source": [ - "df=pd.DataFrame(data=d)\n", - "df=df.set_index(\"num\")" + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(lattice_type)\n", + " return lattice_type\n", + "\n", + "\n", + "def read_json_file(filename):\n", + " import cadnano\n", + " from cadnano.document import Document\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " json_data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " json_data = json.loads(content)\n", + "\n", + " try:\n", + " doc = Document()\n", + " cadnano.fileio.v3decode.decode(doc, json_data)\n", + " decoder = 3\n", + " except:\n", + " doc = Document()\n", + " cadnano.fileio.v2decode.decode(doc, json_data)\n", + " decoder = 2\n", + "\n", + " parts = [p for p in doc.getParts()]\n", + " if len(parts) != 1:\n", + " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", + " part = parts[0]\n", + "\n", + " if decoder == 2:\n", + " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", + " l = get_lattice(part)\n", + " if l == 'square':\n", + " for id_num in part.getIdNums():\n", + " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", + " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", + " df=pd.DataFrame(json_data[\"vstrands\"])\n", + " n_df=df.set_index(\"num\")\n", + " return part\n", + "\n", + "def get_helix_angle(part, helix_id, indices):\n", + " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", + " # import ipdb\n", + " # ipdb.set_trace()\n", + "\n", + " \"\"\" FROM CADNANO2.5\n", + " + angle is CCW\n", + " - angle is CW\n", + " Right handed DNA rotates clockwise from 5' to 3'\n", + " we use the convention the 5' end starts at 0 degrees\n", + " and it's pair is minor_groove_angle degrees away\n", + " direction, hence the minus signs. eulerZ\n", + " \"\"\"\n", + "\n", + " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", + " ['helical_pitch',\n", + " 'bases_per_repeat',\n", + " 'turns_per_repeat',\n", + " 'eulerZ',\n", + " 'minor_groove_angle']]\n", + " twist_per_base = tpr*360./bpr\n", + " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", + " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", + " return angle\n", + "\n", + "def gen_id_series(strand,part):\n", + " df=pd.DataFrame(columns=[\"vh\",\"zid\",\"fwd\",\"stack_tuple\",\"threeprime_tuple\",\"x\",\"y\",\"z\"],index=range(strand.totalLength()),dtype=object)\n", + " df[\"vh\"]=strand._id_num\n", + " df[\"fwd\"]=strand.isForward()\n", + " df[\"x\"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10\n", + " df[\"y\"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10\n", + " id_lo,id_hi=strand.idxs()\n", + " zids=[str(i) for i in range(id_lo,id_hi+1)]\n", + " insert_dict={}\n", + " insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])\n", + " z=np.arange(id_lo,id_hi+1)\n", + " zids=[str(i) for i in range(id_lo,id_hi+1)]\n", + " z=list(np.arange(id_lo,id_hi+1))\n", + " zids=[str(i) for i in range(id_lo,id_hi+1)]\n", + " for insert_base in insert_dict:\n", + " z_ind=zids.index(str(insert_base))\n", + " z_val=insert_dict[insert_base]\n", + " z_pos_ind=z.index(insert_base)\n", + " zids.pop(z_ind)\n", + " z.pop(z_pos_ind)\n", + " if z_val!=-1:\n", + " #l=[str(insert_base)+\".\"+str(i) for i in range(z_val+1)]\n", + " l=list(range(z_val+1))\n", + " l.reverse()\n", + " for k in l: \n", + " zids.insert(z_ind,str(insert_base)+\".\"+str(k))\n", + " z.insert(z_pos_ind,insert_base+k/(z_val+1))\n", + " df[\"zid\"]=zids\n", + " df[\"z\"]=np.array(z)*3.4\n", + " \n", + " \n", + " L=[(df[\"vh\"][i],df[\"zid\"][i],df[\"fwd\"][i]) for i in df.index]\n", + " if strand.isForward()==True:\n", + " df[\"stack_tuple\"]=L[1:]+[-1]\n", + " if strand.connection3p() is None:\n", + " df[\"threeprime_tuple\"]=L[1:]+[-1]\n", + " else:\n", + " df[\"threeprime_tuple\"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]\n", + " \n", + " \n", + " else:\n", + " df[\"stack_tuple\"]=[-1]+L[0:-1]\n", + " if strand.connection3p() is None:\n", + " df[\"threeprime_tuple\"]=[-1]+L[0:-1]\n", + " else:\n", + " df[\"threeprime_tuple\"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]\n", + " ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. \n", + " df[\"r\"]=[np.array([df[\"x\"][i],df[\"y\"][i],df[\"z\"][i]],dtype=np.float32) for i in df.index]\n", + " \n", + " return [pd.Series(df.loc[i]) for i in df.index]\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "62f9b7f3", + "execution_count": 18, + "id": "04c497ae", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_prop_table(part):\n", + " strand_set=[]\n", + " for i in part.getidNums():\n", + " fwd,rev=part.getStrandSets(i)\n", + " [strand_set.append(i) for i in fwd.strands()]\n", + " [strand_set.append(i) for i in rev.strands()]\n", + " id_series=[]\n", + " for i in strand_set:\n", + " id_series=id_series+gen_id_series(i,part)\n", + " \n", + " nt_prop=pd.DataFrame(id_series)\n", + " nt_prop.reset_index(inplace=True)\n", + " nt_prop[\"seq\"]=-1\n", + " ind_tuple=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"fwd\"]))\n", + " stacks=[]\n", + " for i in list(nt_prop[\"stack_tuple\"]):\n", + " if i ==-1:\n", + " stacks.append(i)\n", + " else:\n", + " stacks.append(ind_tuple.index(i))\n", + " nt_prop[\"stack\"]=stacks\n", + " tprime=[]\n", + " for i in list(nt_prop[\"threeprime_tuple\"]):\n", + " if i ==-1:\n", + " tprime.append(i)\n", + " else:\n", + " tprime.append(ind_tuple.index(i))\n", + " nt_prop[\"threeprime\"]=tprime\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " counter=-1\n", + " bp=-np.ones(len(nt_prop.index),dtype=int)\n", + " bp_map=dict(zip(ind_tuple,nt_prop.index))\n", + " for i,j,k in ind_tuple:\n", + " counter+=1\n", + " try:\n", + " bp[counter]=bp_map[(i,j,not(k))]\n", + " except:\n", + " pass\n", + " nt_prop[\"bp\"]=bp\n", + "\n", + " return nt_prop" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "11b0f5de-80f6-4845-b97f-a9d31e7be90a", "metadata": {}, "outputs": [ { @@ -173,1125 +284,949 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>row</th>\n", - " <th>col</th>\n", - " <th>scaf</th>\n", - " <th>stap</th>\n", - " <th>loop</th>\n", - " <th>skip</th>\n", - " <th>scafLoop</th>\n", - " <th>stapLoop</th>\n", - " <th>stap_colors</th>\n", - " </tr>\n", - " <tr>\n", - " <th>num</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", + " <th>index</th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>fwd</th>\n", + " <th>stack_tuple</th>\n", + " <th>threeprime_tuple</th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " <th>z</th>\n", + " <th>r</th>\n", + " <th>seq</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>orientation</th>\n", + " <th>bp</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>12</td>\n", - " <td>16</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[[23, 13369809], [38, 12060012]]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>12</td>\n", - " <td>15</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[[3, 1501302]]</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>(0, 6, True)</td>\n", + " <td>(0, 6, True)</td>\n", + " <td>0.000000</td>\n", + " <td>22.50</td>\n", + " <td>17.0</td>\n", + " <td>[0.0, 22.5, 17.0]</td>\n", + " <td>-1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>81.428571</td>\n", + " <td>38</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>6</td>\n", + " <td>True</td>\n", + " <td>(0, 7, True)</td>\n", + " <td>(0, 7, True)</td>\n", + " <td>0.000000</td>\n", + " <td>22.50</td>\n", + " <td>20.4</td>\n", + " <td>[0.0, 22.5, 20.4]</td>\n", + " <td>-1</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>115.714286</td>\n", + " <td>39</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>13</td>\n", - " <td>15</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", - " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[[34, 8947848]]</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>7</td>\n", + " <td>True</td>\n", + " <td>(0, 8, True)</td>\n", + " <td>(0, 8, True)</td>\n", + " <td>0.000000</td>\n", + " <td>22.50</td>\n", + " <td>23.8</td>\n", + " <td>[0.0, 22.5, 23.8]</td>\n", + " <td>-1</td>\n", + " <td>3</td>\n", + " <td>3</td>\n", + " <td>150.000000</td>\n", + " <td>40</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>13</td>\n", - " <td>16</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", - " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[[0, 13369344]]</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " <td>8</td>\n", + " <td>True</td>\n", + " <td>(0, 9, True)</td>\n", + " <td>(0, 9, True)</td>\n", + " <td>0.000000</td>\n", + " <td>22.50</td>\n", + " <td>27.2</td>\n", + " <td>[0.0, 22.5, 27.2]</td>\n", + " <td>-1</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>184.285714</td>\n", + " <td>41</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>13</td>\n", - " <td>17</td>\n", - " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[[39, 8947848]]</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>(0, 10, True)</td>\n", + " <td>(0, 10, True)</td>\n", + " <td>0.000000</td>\n", + " <td>22.50</td>\n", + " <td>30.6</td>\n", + " <td>[0.0, 22.5, 30.6]</td>\n", + " <td>-1</td>\n", + " <td>5</td>\n", + " <td>5</td>\n", + " <td>218.571429</td>\n", + " <td>42</td>\n", " </tr>\n", " <tr>\n", - " <th>5</th>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>410</th>\n", " <td>12</td>\n", - " <td>17</td>\n", - " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", - " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[[9, 0]]</td>\n", + " <td>5</td>\n", + " <td>35</td>\n", + " <td>False</td>\n", + " <td>(5, 34, False)</td>\n", + " <td>(5, 34, False)</td>\n", + " <td>19.485574</td>\n", + " <td>11.25</td>\n", + " <td>119.0</td>\n", + " <td>[19.485573, 11.25, 119.0]</td>\n", + " <td>-1</td>\n", + " <td>409</td>\n", + " <td>409</td>\n", + " <td>1110.000000</td>\n", + " <td>375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>411</th>\n", + " <td>13</td>\n", + " <td>5</td>\n", + " <td>36</td>\n", + " <td>False</td>\n", + " <td>(5, 35, False)</td>\n", + " <td>(5, 35, False)</td>\n", + " <td>19.485574</td>\n", + " <td>11.25</td>\n", + " <td>122.4</td>\n", + " <td>[19.485573, 11.25, 122.4]</td>\n", + " <td>-1</td>\n", + " <td>410</td>\n", + " <td>410</td>\n", + " <td>1144.285714</td>\n", + " <td>376</td>\n", + " </tr>\n", + " <tr>\n", + " <th>412</th>\n", + " <td>14</td>\n", + " <td>5</td>\n", + " <td>37</td>\n", + " <td>False</td>\n", + " <td>(5, 36, False)</td>\n", + " <td>(5, 36, False)</td>\n", + " <td>19.485574</td>\n", + " <td>11.25</td>\n", + " <td>125.8</td>\n", + " <td>[19.485573, 11.25, 125.8]</td>\n", + " <td>-1</td>\n", + " <td>411</td>\n", + " <td>411</td>\n", + " <td>1178.571429</td>\n", + " <td>377</td>\n", + " </tr>\n", + " <tr>\n", + " <th>413</th>\n", + " <td>15</td>\n", + " <td>5</td>\n", + " <td>38</td>\n", + " <td>False</td>\n", + " <td>(5, 37, False)</td>\n", + " <td>(5, 37, False)</td>\n", + " <td>19.485574</td>\n", + " <td>11.25</td>\n", + " <td>129.2</td>\n", + " <td>[19.485573, 11.25, 129.2]</td>\n", + " <td>-1</td>\n", + " <td>412</td>\n", + " <td>412</td>\n", + " <td>1212.857143</td>\n", + " <td>378</td>\n", + " </tr>\n", + " <tr>\n", + " <th>414</th>\n", + " <td>16</td>\n", + " <td>5</td>\n", + " <td>39</td>\n", + " <td>False</td>\n", + " <td>(5, 38, False)</td>\n", + " <td>(5, 38, False)</td>\n", + " <td>19.485574</td>\n", + " <td>11.25</td>\n", + " <td>132.6</td>\n", + " <td>[19.485573, 11.25, 132.6]</td>\n", + " <td>-1</td>\n", + " <td>413</td>\n", + " <td>413</td>\n", + " <td>1247.142857</td>\n", + " <td>379</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", + "<p>415 rows × 15 columns</p>\n", "</div>" ], "text/plain": [ - " row col scaf \\\n", - "num \n", - "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", - "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", - "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", - "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", - "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", - "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", - "\n", - " stap \\\n", - "num \n", - "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", - "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", - "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", - "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", - "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", - "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + " index vh zid fwd stack_tuple threeprime_tuple x y \\\n", + "0 0 0 5 True (0, 6, True) (0, 6, True) 0.000000 22.50 \n", + "1 1 0 6 True (0, 7, True) (0, 7, True) 0.000000 22.50 \n", + "2 2 0 7 True (0, 8, True) (0, 8, True) 0.000000 22.50 \n", + "3 3 0 8 True (0, 9, True) (0, 9, True) 0.000000 22.50 \n", + "4 4 0 9 True (0, 10, True) (0, 10, True) 0.000000 22.50 \n", + ".. ... .. .. ... ... ... ... ... \n", + "410 12 5 35 False (5, 34, False) (5, 34, False) 19.485574 11.25 \n", + "411 13 5 36 False (5, 35, False) (5, 35, False) 19.485574 11.25 \n", + "412 14 5 37 False (5, 36, False) (5, 36, False) 19.485574 11.25 \n", + "413 15 5 38 False (5, 37, False) (5, 37, False) 19.485574 11.25 \n", + "414 16 5 39 False (5, 38, False) (5, 38, False) 19.485574 11.25 \n", "\n", - " loop \\\n", - "num \n", - "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", - "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", - "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", - "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", - "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", - "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + " z r seq stack threeprime orientation \\\n", + "0 17.0 [0.0, 22.5, 17.0] -1 1 1 81.428571 \n", + "1 20.4 [0.0, 22.5, 20.4] -1 2 2 115.714286 \n", + "2 23.8 [0.0, 22.5, 23.8] -1 3 3 150.000000 \n", + "3 27.2 [0.0, 22.5, 27.2] -1 4 4 184.285714 \n", + "4 30.6 [0.0, 22.5, 30.6] -1 5 5 218.571429 \n", + ".. ... ... ... ... ... ... \n", + "410 119.0 [19.485573, 11.25, 119.0] -1 409 409 1110.000000 \n", + "411 122.4 [19.485573, 11.25, 122.4] -1 410 410 1144.285714 \n", + "412 125.8 [19.485573, 11.25, 125.8] -1 411 411 1178.571429 \n", + "413 129.2 [19.485573, 11.25, 129.2] -1 412 412 1212.857143 \n", + "414 132.6 [19.485573, 11.25, 132.6] -1 413 413 1247.142857 \n", "\n", - " skip scafLoop stapLoop \\\n", - "num \n", - "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", - "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", - "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", - "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", - "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", - "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + " bp \n", + "0 38 \n", + "1 39 \n", + "2 40 \n", + "3 41 \n", + "4 42 \n", + ".. ... \n", + "410 375 \n", + "411 376 \n", + "412 377 \n", + "413 378 \n", + "414 379 \n", "\n", - " stap_colors \n", - "num \n", - "0 [[23, 13369809], [38, 12060012]] \n", - "1 [[3, 1501302]] \n", - "2 [[34, 8947848]] \n", - "3 [[0, 13369344]] \n", - "4 [[39, 8947848]] \n", - "5 [[9, 0]] " + "[415 rows x 15 columns]" ] }, - "execution_count": 5, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df" + "gen_prop_table(p)" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "b317d21a", + "execution_count": null, + "id": "f2116b88", + "metadata": {}, + "outputs": [], + "source": [ + "import mrdna\n", + "from mrdna.readers import read_list" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "4c954133", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found cadnano version 2 file\n" - ] + "data": { + "text/plain": [ + "(415,)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "doc = Document()\n", - "def read_json_file(filename):\n", - " import json\n", - " import re\n", - "\n", - " try:\n", - " with open(filename) as ch:\n", - " data = json.load(ch)\n", - " except:\n", - " with open(filename) as ch:\n", - " content = \"\"\n", - " for l in ch:\n", - " l = re.sub(r\"'\", r'\"', l)\n", - " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", - " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", - " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", - " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", - " content += l+\"\\n\"\n", - " data = json.loads(content)\n", - " return data\n", - "f=read_json_file(\"test.json\")\n", - "cadnano.fileio.v2decode.decode(doc, f)\n", - "\n" + "np.array(list(nt_prop['bp'])).shape" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "dc7eb261", + "execution_count": 560, + "id": "5ee54071", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 12, 13, 14, ..., 13920, 13921, 13922]),)" + ] + }, + "execution_count": 560, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def get_lattice(part):\n", - " lattice_type = None\n", - " _gt = part.getGridType()\n", - " try:\n", - " lattice_type = _gt.name.lower()\n", - " except:\n", - " if _gt == 1:\n", - " lattice_type = 'square'\n", - " elif _gt == 2:\n", - " lattice_type = 'honeycomb'\n", - " else:\n", - " print(\"WARNING: unable to determine cadnano part lattice type\")\n", - " return lattice_type\n" + "np.where(np.array(nt_prop[\"bp\"])!=-1)" ] }, { "cell_type": "code", - "execution_count": 35, - "id": "1bf753c6", - "metadata": {}, - "outputs": [], - "source": [ - "def read_json_file(filename):\n", - " import json\n", - " import re\n", - " import cadnano\n", - " from cadnano.document import Document\n", - "\n", - " try:\n", - " with open(filename) as ch:\n", - " json_data = json.load(ch)\n", - " except:\n", - " with open(filename) as ch:\n", - " content = \"\"\n", - " for l in ch:\n", - " l = re.sub(r\"'\", r'\"', l)\n", - " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", - " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", - " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", - " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", - " content += l+\"\\n\"\n", - " json_data = json.loads(content)\n", - "\n", - " try:\n", - " doc = Document()\n", - " cadnano.fileio.v3decode.decode(doc, json_data)\n", - " decoder = 3\n", - " except:\n", - " doc = Document()\n", - " cadnano.fileio.v2decode.decode(doc, json_data)\n", - " decoder = 2\n", - "\n", - " parts = [p for p in doc.getParts()]\n", - " if len(parts) != 1:\n", - " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", - " part = parts[0]\n", - "\n", - " if decoder == 2:\n", - " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", - " l = get_lattice(part)\n", - " if l == 'square':\n", - " for id_num in part.getIdNums():\n", - " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", - " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", - " df=pd.DataFrame(json_data[\"vstrands\"])\n", - " n_df=df.set_index(\"num\")\n", - " else:\n", - " raise(\"Not yet implemented\")\n", - " \n", - " return part,df,decoder\n" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "id": "17ac5a29-3c76-4b8b-9a98-6343bc91e9e8", + "execution_count": 21, + "id": "e1588c54", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.json test.json.oxdna test2.ipynb\n", - "Na_liu.json test.json.top test3.ipynb\n", - "rest_scaf_col.json test.sc test_cad2.5.json\n", - "test.ipynb test.seq.json test_insert.json\n", - "test.json test.virt2nuc test_insert_2.5.json\n" - ] + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>is_scaf</th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 1.7000000000000002]</td>\n", + " <td>213</td>\n", + " <td>-1</td>\n", + " <td>1</td>\n", + " <td>-1</td>\n", + " <td>[[0.14904226617617466, -0.9888308262251284, 0....</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>6</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 2.04]</td>\n", + " <td>214</td>\n", + " <td>-1</td>\n", + " <td>2</td>\n", + " <td>-1</td>\n", + " <td>[[-0.4338837391175583, -0.900968867902419, 0.0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>7</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 2.3800000000000003]</td>\n", + " <td>215</td>\n", + " <td>-1</td>\n", + " <td>3</td>\n", + " <td>-1</td>\n", + " <td>[[-0.8660254037844388, -0.49999999999999994, 0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>8</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 2.72]</td>\n", + " <td>216</td>\n", + " <td>-1</td>\n", + " <td>4</td>\n", + " <td>-1</td>\n", + " <td>[[-0.9972037971811805, 0.07473009358642399, 0....</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 3.06]</td>\n", + " <td>217</td>\n", + " <td>-1</td>\n", + " <td>5</td>\n", + " <td>-1</td>\n", + " <td>[[-0.7818314824680299, 0.6234898018587334, 0.0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>410</th>\n", + " <td>5</td>\n", + " <td>35</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 11.9]</td>\n", + " <td>205</td>\n", + " <td>-1</td>\n", + " <td>411</td>\n", + " <td>-1</td>\n", + " <td>[[0.8660254037844375, -0.5000000000000019, 0.0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>411</th>\n", + " <td>5</td>\n", + " <td>36</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 12.24]</td>\n", + " <td>206</td>\n", + " <td>-1</td>\n", + " <td>412</td>\n", + " <td>-1</td>\n", + " <td>[[0.4338837391175605, -0.900968867902418, 0.0]...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>412</th>\n", + " <td>5</td>\n", + " <td>37</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 12.58]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>413</td>\n", + " <td>-1</td>\n", + " <td>[[-0.14904226617617078, -0.9888308262251292, 0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>413</th>\n", + " <td>5</td>\n", + " <td>38</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 12.920000000000002]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>414</td>\n", + " <td>-1</td>\n", + " <td>[[-0.6801727377709186, -0.7330518718298275, 0....</td>\n", + " </tr>\n", + " <tr>\n", + " <th>414</th>\n", + " <td>5</td>\n", + " <td>39</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 13.260000000000002]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>[[-0.9749279121818233, -0.222520933956317, 0.0...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>415 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " vh zid is_scaf r bp stack \\\n", + "0 0 5 True [0.0, 2.25, 1.7000000000000002] 213 -1 \n", + "1 0 6 True [0.0, 2.25, 2.04] 214 -1 \n", + "2 0 7 True [0.0, 2.25, 2.3800000000000003] 215 -1 \n", + "3 0 8 True [0.0, 2.25, 2.72] 216 -1 \n", + "4 0 9 True [0.0, 2.25, 3.06] 217 -1 \n", + ".. .. ... ... ... ... ... \n", + "410 5 35 False [1.948557375, 1.125, 11.9] 205 -1 \n", + "411 5 36 False [1.948557375, 1.125, 12.24] 206 -1 \n", + "412 5 37 False [1.948557375, 1.125, 12.58] -1 -1 \n", + "413 5 38 False [1.948557375, 1.125, 12.920000000000002] -1 -1 \n", + "414 5 39 False [1.948557375, 1.125, 13.260000000000002] -1 -1 \n", + "\n", + " threeprime seq orientation \n", + "0 1 -1 [[0.14904226617617466, -0.9888308262251284, 0.... \n", + "1 2 -1 [[-0.4338837391175583, -0.900968867902419, 0.0... \n", + "2 3 -1 [[-0.8660254037844388, -0.49999999999999994, 0... \n", + "3 4 -1 [[-0.9972037971811805, 0.07473009358642399, 0.... \n", + "4 5 -1 [[-0.7818314824680299, 0.6234898018587334, 0.0... \n", + ".. ... ... ... \n", + "410 411 -1 [[0.8660254037844375, -0.5000000000000019, 0.0... \n", + "411 412 -1 [[0.4338837391175605, -0.900968867902418, 0.0]... \n", + "412 413 -1 [[-0.14904226617617078, -0.9888308262251292, 0... \n", + "413 414 -1 [[-0.6801727377709186, -0.7330518718298275, 0.... \n", + "414 -1 -1 [[-0.9749279121818233, -0.222520933956317, 0.0... \n", + "\n", + "[415 rows x 9 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "!ls" + "nt_prop" ] }, { "cell_type": "code", - "execution_count": 73, - "id": "1c3104ac-787f-4d32-9917-1c7111427925", + "execution_count": 468, + "id": "156dcda2", "metadata": {}, "outputs": [], "source": [ - "with open(\"test_insert_2.5.json\") as ch:\n", - " json_data = json.load(ch)" + "scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n" ] }, { "cell_type": "code", - "execution_count": 77, - "id": "9b534b7c", + "execution_count": 500, + "id": "6413d856", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'name': 'NaPart1',\n", - " 'color': '#0066cc',\n", - " 'is_visible': True,\n", - " 'active_phos': None,\n", - " 'crossover_span_angle': 45,\n", - " 'max_vhelix_length': 42,\n", - " 'neighbor_active_angle': '',\n", - " 'grid_type': 2,\n", - " 'virtual_helix_order': [0, 1, 2, 3, 4, 5, 6],\n", - " 'is_lattice': True,\n", - " '2': 0,\n", - " 'virtual_helices': {'name': ['vh0', 'vh1', 'vh2', 'vh3', 'vh4', 'vh5', 'vh6'],\n", - " 'is_visible': [True, True, True, True, True, True, True],\n", - " 'color': ['#0066cc',\n", - " '#0066cc',\n", - " '#0066cc',\n", - " '#0066cc',\n", - " '#0066cc',\n", - " '#0066cc',\n", - " '#0066cc'],\n", - " 'eulerZ': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n", - " 'neighbor_active_angle': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n", - " 'neighbors': ['[1, 5]',\n", - " '[0, 2]',\n", - " '[1, 3]',\n", - " '[2, 4]',\n", - " '[3, 5]',\n", - " '[0, 4]',\n", - " '[]'],\n", - " 'bases_per_repeat': [21, 21, 21, 21, 21, 21, 21],\n", - " 'turns_per_repeat': [2, 2, 2, 2, 2, 2, 2],\n", - " 'repeat_hint': [2, 2, 2, 2, 2, 2, 2],\n", - " 'helical_pitch': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n", - " 'minor_groove_angle': [180.0, 180.0, 180.0, 180.0, 180.0, 180.0, 180.0],\n", - " 'length': [42, 42, 42, 42, 42, 42, 42],\n", - " 'z': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]},\n", - " 'origins': [[7.7942295, 9.0, 0.0],\n", - " [5.845672125, 7.875, 0.0],\n", - " [5.845672125, 5.625, 0.0],\n", - " [7.7942295, 4.5, 0.0],\n", - " [9.742786875, 5.625, 0.0],\n", - " [9.742786875, 7.875, 0.0],\n", - " [-13.639901625, -1.125, 0.0]],\n", - " 'directions': [[0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0]],\n", - " 'vh_list': [[0, 42], [1, 42], [2, 42], [3, 42], [4, 42], [5, 42], [6, 42]],\n", - " 'strands': {'indices': [[[[5, 36], [39, 41]],\n", - " [[2, 20], [21, 23], [24, 27], [28, 38]]],\n", - " [[[3, 20], [21, 38]], [[5, 18], [19, 36], [39, 41]]],\n", - " [[[2, 18], [19, 32], [39, 41]], [[0, 34]]],\n", - " [[[0, 20], [21, 34]], [[2, 15], [16, 32], [37, 41]]],\n", - " [[[0, 3], [9, 15], [16, 39]], [[9, 20], [21, 39]]],\n", - " [[[9, 27], [28, 39]], [[0, 3], [9, 22], [23, 39]]],\n", - " [[[1, 31]], [[1, 31]]]],\n", - " 'properties': [[['#0066cc', '#0066cc'],\n", - " ['#16e876', '#cc01d1', '#000000', '#b8056c']],\n", - " [['#16e876', '#cc01d1'], ['#0066cc', '#0066cc', '#0066cc']],\n", - " [['#0066cc', '#0066cc', '#0066cc'], ['#888888']],\n", - " [['#cc0000', '#888888'], ['#0066cc', '#0066cc', '#0066cc']],\n", - " [['#0066cc', '#0066cc', '#0066cc'], ['#cc0000', '#888888']],\n", - " [['#000000', '#b8056c'], ['#0066cc', '#0066cc', '#0066cc']],\n", - " [['#0066cc'], ['#0066cc']]]},\n", - " 'insertions': [[0, 13, 10],\n", - " [0, 29, 11],\n", - " [1, 10, 1],\n", - " [2, 11, -1],\n", - " [3, 28, -1]],\n", - " 'xovers': [[0, True, 36, 1, False, 36],\n", - " [0, False, 21, 1, True, 21],\n", - " [0, False, 28, 5, True, 28],\n", - " [1, True, 20, 0, False, 20],\n", - " [1, False, 5, 0, True, 5],\n", - " [1, False, 19, 2, True, 19],\n", - " [2, True, 18, 1, False, 18],\n", - " [2, True, 32, 3, False, 32],\n", - " [3, True, 20, 4, False, 20],\n", - " [3, False, 2, 2, True, 2],\n", - " [3, False, 16, 4, True, 16],\n", - " [4, True, 15, 3, False, 15],\n", - " [4, True, 39, 5, False, 39],\n", - " [4, False, 21, 3, True, 21],\n", - " [5, True, 27, 0, False, 27],\n", - " [5, False, 9, 4, True, 9]],\n", - " 'oligos': [{'id_num': 6,\n", - " 'idx5p': 31,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo2304',\n", - " 'color': '#0066cc',\n", - " 'length': 31,\n", - " 'is_visible': True},\n", - " {'id_num': 0,\n", - " 'idx5p': 23,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo8960',\n", - " 'color': '#cc01d1',\n", - " 'length': 21,\n", - " 'is_visible': True},\n", - " {'id_num': 5,\n", - " 'idx5p': 3,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo9296',\n", - " 'color': '#0066cc',\n", - " 'length': 4,\n", - " 'is_visible': True},\n", - " {'id_num': 5,\n", - " 'idx5p': 9,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo1120',\n", - " 'color': '#000000',\n", - " 'length': 23,\n", - " 'is_visible': True},\n", - " {'id_num': 6,\n", - " 'idx5p': 1,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo7728',\n", - " 'color': '#0066cc',\n", - " 'length': 31,\n", - " 'is_visible': True},\n", - " {'id_num': 3,\n", - " 'idx5p': 41,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo4848',\n", - " 'color': '#0066cc',\n", - " 'length': 5,\n", - " 'is_visible': True},\n", - " {'id_num': 2,\n", - " 'idx5p': 39,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo3552',\n", - " 'color': '#0066cc',\n", - " 'length': 3,\n", - " 'is_visible': True},\n", - " {'id_num': 3,\n", - " 'idx5p': 0,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo7008',\n", - " 'color': '#cc0000',\n", - " 'length': 33,\n", - " 'is_visible': True},\n", - " {'id_num': 4,\n", - " 'idx5p': 0,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo5696',\n", - " 'color': '#0066cc',\n", - " 'length': 4,\n", - " 'is_visible': True},\n", - " {'id_num': 4,\n", - " 'idx5p': 39,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo7856',\n", - " 'color': '#888888',\n", - " 'length': 32,\n", - " 'is_visible': True},\n", - " {'id_num': 1,\n", - " 'idx5p': 41,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo6016',\n", - " 'color': '#0066cc',\n", - " 'length': 3,\n", - " 'is_visible': True},\n", - " {'id_num': 0,\n", - " 'idx5p': 38,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo4560',\n", - " 'color': '#b8056c',\n", - " 'length': 34,\n", - " 'is_visible': True},\n", - " {'id_num': 0,\n", - " 'idx5p': 39,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo5504',\n", - " 'color': '#0066cc',\n", - " 'length': 3,\n", - " 'is_visible': True},\n", - " {'id_num': 5,\n", - " 'idx5p': 22,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo3488',\n", - " 'color': '#0066cc',\n", - " 'length': 208,\n", - " 'is_visible': True},\n", - " {'id_num': 1,\n", - " 'idx5p': 3,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo1648',\n", - " 'color': '#16e876',\n", - " 'length': 48,\n", - " 'is_visible': True},\n", - " {'id_num': 2,\n", - " 'idx5p': 34,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo1536',\n", - " 'color': '#888888',\n", - " 'length': 34,\n", - " 'is_visible': True}],\n", - " 'instance_properties': [{'slice:position': [0.0, 0.0],\n", - " 'grid:position': [0.0, 0.0],\n", - " 'path:position': [0.0, 0.0]}],\n", - " 'uuid': '217c2ce287e943ca8d0e8cde1ffa3291'}" + "1" ] }, - "execution_count": 77, + "execution_count": 500, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "s=json_data[\"parts\"][0]\n" + "nttype(vslist[\"scaf\"][30])[146]" ] }, { "cell_type": "code", - "execution_count": 80, - "id": "be6cf774-3ef7-464c-8f34-9877e73a16d0", + "execution_count": 498, + "id": "fe8797db", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[[[5, 36], [39, 41]], [[2, 20], [21, 23], [24, 27], [28, 38]]]" + "0" ] }, - "execution_count": 80, + "execution_count": 498, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "s[\"strands\"][\"indices\"][0]" + "vhi,zidi=np.where(np.array(scaf_id)==1)\n", + "scaf_id[30][146]" ] }, { "cell_type": "code", - "execution_count": 71, - "id": "6ec811a1-297b-4782-827e-d7c8eebb5daa", + "execution_count": 480, + "id": "9f1b975f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "<fwd_StrandSet(2)>.<Strand(39, 41)>" + "True" ] }, - "execution_count": 71, + "execution_count": 480, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "s=[i for i in part.oligos()]\n", - "l=s[0]\n", - "l.strand3p()" + "scaf_id[30][146]==np.array(scaf_id)[0][9]" ] }, { "cell_type": "code", - "execution_count": 68, - "id": "188d4ed0-1664-44f1-8318-f11f4fd0e0bd", + "execution_count": 549, + "id": "a16f3fd0", "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "'NucleicAcidPart' object has no attribute 'strand3p'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[68], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpart\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrand3p\u001b[49m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'NucleicAcidPart' object has no attribute 'strand3p'" - ] + "data": { + "text/plain": [ + "Int64Index([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " ...\n", + " 39, 39, 39, 39, 39, 39, 39, 39, 39, 39],\n", + " dtype='int64', name='num', length=7560)" + ] + }, + "execution_count": 549, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "part.strand3p" + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "d={}\n", + "vslist.index[vhi]" ] }, { "cell_type": "code", - "execution_count": 53, - "id": "6e2bd586-1f68-44d1-982d-93837c64e616", + "execution_count": 544, + "id": "b9f25d41", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "([(5, 36), (39, 41)], ['#0066cc', '#0066cc'])" + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" ] }, - "execution_count": 53, + "execution_count": 544, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fwd,rev=part.getStrandSets(0)\n", - "x=[]\n", - "fwd.dump(x)" + "np.where(vslist.index[vhi]!=vhi)" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "cbb83c93", + "execution_count": 550, + "id": "976095ce", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "vh 29\n", + "zid 83\n", + "is_scaf True\n", + "r [-17.537016375, 28.125, 28.220000000000002]\n", + "bp -1\n", + "stack -1\n", + "threeprime -1\n", + "seq -1\n", + "orientation [[-0.5633200580636211, 0.8262387743159955, 0.0...\n", + "Name: 7394, dtype: object" + ] + }, + "execution_count": 550, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "n=dict(json_data)" + "nt_prop.loc[7394]" ] }, { "cell_type": "code", - "execution_count": 33, - "id": "7700a744-858d-4f83-b3be-fd7abb67ae76", + "execution_count": 548, + "id": "ac8f5067", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'name': 'NaPart1',\n", - " 'color': '#0066cc',\n", - " 'is_visible': True,\n", - " 'active_phos': None,\n", - " 'crossover_span_angle': 45,\n", - " 'max_vhelix_length': 42,\n", - " 'neighbor_active_angle': '',\n", - " 'grid_type': 2,\n", - " 'virtual_helix_order': [0, 1, 2, 3, 4, 5],\n", - " 'is_lattice': True,\n", - " 'virtual_helices': {'name': ['vh0', 'vh1', 'vh2', 'vh3', 'vh4', 'vh5'],\n", - " 'is_visible': [True, True, True, True, True, True],\n", - " 'color': ['#0066cc', '#0066cc', '#0066cc', '#0066cc', '#0066cc', '#0066cc'],\n", - " 'eulerZ': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n", - " 'neighbor_active_angle': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n", - " 'neighbors': ['[1, 5]', '[0, 2]', '[1, 3]', '[2, 4]', '[3, 5]', '[0, 4]'],\n", - " 'bases_per_repeat': [21, 21, 21, 21, 21, 21],\n", - " 'turns_per_repeat': [2, 2, 2, 2, 2, 2],\n", - " 'repeat_hint': [2, 2, 2, 2, 2, 2],\n", - " 'helical_pitch': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n", - " 'minor_groove_angle': [180.0, 180.0, 180.0, 180.0, 180.0, 180.0],\n", - " 'length': [42, 42, 42, 42, 42, 42],\n", - " 'z': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]},\n", - " 'origins': [[-1.948557375, 1.125, 0.0],\n", - " [0.0, 2.25, 0.0],\n", - " [1.948557375, 1.125, 0.0],\n", - " [1.948557375, -1.125, 0.0],\n", - " [0.0, -2.25, 0.0],\n", - " [-1.948557375, -1.125, 0.0]],\n", - " 'directions': [[0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0],\n", - " [0.0, 0.0, 1.0]],\n", - " 'vh_list': [[0, 42], [1, 42], [2, 42], [3, 42], [4, 42], [5, 42]],\n", - " 'strands': {'indices': [[[[0, 2], [3, 20], [21, 35]], []],\n", - " [[], [[0, 20], [21, 41]]],\n", - " [[[0, 23], [24, 41]], []],\n", - " [[], [[0, 9], [10, 23], [24, 30], [31, 41]]],\n", - " [[[0, 9], [10, 30], [31, 41]], []],\n", - " [[], [[0, 2], [3, 35], [36, 41]]]],\n", - " 'properties': [[['#0066cc', '#f7931e', '#f7931e'], []],\n", - " [[], ['#f7931e', '#f7931e']],\n", - " [['#f7931e', '#f7931e'], []],\n", - " [[], ['#0066cc', '#f7931e', '#f7931e', '#0066cc']],\n", - " [['#0066cc', '#f7931e', '#0066cc'], []],\n", - " [[], ['#333333', '#f7931e', '#0066cc']]]},\n", - " 'insertions': [],\n", - " 'xovers': [[0, True, 20, 1, False, 20],\n", - " [0, True, 35, 5, False, 35],\n", - " [1, False, 0, 2, True, 0],\n", - " [1, False, 21, 0, True, 21],\n", - " [2, True, 23, 3, False, 23],\n", - " [2, True, 41, 1, False, 41],\n", - " [3, False, 10, 4, True, 10],\n", - " [3, False, 24, 2, True, 24],\n", - " [3, False, 31, 4, True, 31],\n", - " [4, True, 9, 3, False, 9],\n", - " [4, True, 30, 3, False, 30],\n", - " [5, False, 3, 0, True, 3]],\n", - " 'oligos': [{'id_num': 4,\n", - " 'idx5p': 0,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo3632',\n", - " 'color': '#0066cc',\n", - " 'length': 20,\n", - " 'is_visible': True},\n", - " {'id_num': 3,\n", - " 'idx5p': 30,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': True,\n", - " 'sequence': None,\n", - " 'name': 'oligo8000',\n", - " 'color': '#f7931e',\n", - " 'length': 192,\n", - " 'is_visible': True},\n", - " {'id_num': 3,\n", - " 'idx5p': 41,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo0160',\n", - " 'color': '#0066cc',\n", - " 'length': 22,\n", - " 'is_visible': True},\n", - " {'id_num': 5,\n", - " 'idx5p': 41,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo7152',\n", - " 'color': '#0066cc',\n", - " 'length': 6,\n", - " 'is_visible': True},\n", - " {'id_num': 0,\n", - " 'idx5p': 0,\n", - " 'is_5p_fwd': True,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo8896',\n", - " 'color': '#0066cc',\n", - " 'length': 3,\n", - " 'is_visible': True},\n", - " {'id_num': 5,\n", - " 'idx5p': 2,\n", - " 'is_5p_fwd': False,\n", - " 'is_circular': False,\n", - " 'sequence': None,\n", - " 'name': 'oligo1168',\n", - " 'color': '#333333',\n", - " 'length': 3,\n", - " 'is_visible': True}],\n", - " 'instance_properties': [{'slice:position': [0.0, 0.0],\n", - " 'grid:position': [0.0, 0.0],\n", - " 'path:position': [-12.994158258098764, -4.331386086032921]}],\n", - " 'uuid': '7029b213616f4ab5a1adcbf0d3a59edb'}" + "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 29, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 41, 40, 42, 44, 46, 48, 50],\n", + " dtype='int64', name='num')" ] }, - "execution_count": 33, + "execution_count": 548, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "s=n[\"parts\"][0]\n", - "s" + "vslist.index" ] }, { "cell_type": "code", - "execution_count": 199, - "id": "3bb28a94", + "execution_count": 527, + "id": "1006fc48", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([0. , 2.25, 3.4 ])" + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" ] }, - "execution_count": 199, + "execution_count": 527, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "p.getCoordinate(0,10)" + "n=list(nt_prop[\"zid\"])\n", + "np.where(np.array(list(nt_prop[\"vh\"]))==29)" ] }, { "cell_type": "code", - "execution_count": 434, - "id": "be894ade", + "execution_count": 503, + "id": "09c7e7d4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-503-1b9956d4cdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], "source": [ - "def mrdna_model_from_cadnano(json_file,**model_parameters):\n", - " part,vslist=read_json_file(json_file)\n", - " props = part.getModelProperties().copy()\n", - " try:\n", - " if props.get('point_type') == PointType.ARBITRARY:\n", - " # TODO add code to encode Parts with ARBITRARY point configurations\n", - " raise NotImplementedError(\"Not implemented\")\n", - " except:\n", - " try:\n", - " vh_props, origins = part.helixPropertiesAndOrigins()\n", - " except:\n", - " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", - " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", - " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", - " cad_bps=part.getIndices(0)\n", - " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", - " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", - " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", - " is_scaf=np.zeros(totnt,dtype=bool)\n", - " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", - " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", - " nt_prop[\"is_scaf\"]=is_scaf\n", - " tot_id=scaf_id+stap_id\n", - " vhi,zidi=np.where(np.array(scaf_id)==1)\n", - " vhj,zidj=np.where(np.array(stap_id)==1)\n", - " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", - " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", - " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", - " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", - " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", - " nt_prop=nt_prop.fillna(-1)\n", - " for i in range(int(len(vhzid)/2)):\n", - " try:\n", - " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", - " nt_prop[\"bp\"][bp1]=bp2\n", - " nt_prop[\"bp\"][bp2]=bp1\n", - " except:\n", - " pass\n", - " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", - " for i in range(len(nt_prop.index)):\n", - " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", - " if p==True:\n", - " k,l=(vslist[\"scaf\"][m])[n][2:]\n", - " if k!=-1 and l!=-1:\n", - " n=index2.index(((k,l),True))\n", - " tprime_list[i]=int(n)\n", - "\n", - " else:\n", - " k,l=(vslist[\"stap\"][m])[n][2:]\n", - " if k!=-1 and l!=-1:\n", - " n=index2.index(((k,l),False))\n", - " tprime_list[i]=int(n)\n", - " nt_prop[\"threeprime\"]=tprime_list\n", - " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", - " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", - " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", - "\n", - "\n", - " return nt_prop\n" + "vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + "index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n", + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " \n", + "print(index2.index(((30,146),(True))))" ] }, { "cell_type": "code", - "execution_count": 440, - "id": "9a290811", + "execution_count": 537, + "id": "5f0c5266", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n", - " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", - " 27, 28, 29, 30, 31, 66, 33, 34, -1, 0, 35, 36, 37,\n", - " 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 87, 49, 50,\n", - " 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,\n", - " 64, 65, -1, 67, 68, 71, 72, 73, 74, 75, 76, 77, 78,\n", - " 79, 80, 81, 82, 83, 84, 85, 86, 48, 88, 89, 90, 91,\n", - " 92, 93, 94, 95, 96, 97, 98, 99, 100, 134, 102, 103, -1,\n", - " 70, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,\n", - " 116, 151, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,\n", - " 129, 130, 131, 132, 133, -1, 135, 136, 137, 138, 141, 142, 143,\n", - " -1, 145, 146, 147, 148, 149, 150, 117, 152, 153, 154, 155, 156,\n", - " 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,\n", - " 170, 171, 172, 173, 174, 209, -1, 175, 176, 177, 144, 179, 180,\n", - " 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, -1, 193,\n", - " 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,\n", - " 207, 208, -1, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,\n", - " 220, 221, 222, 223, 224, 225, 226, 227, 265, 229, 230, -1, 232,\n", - " 233, 234, 403, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245,\n", - " 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,\n", - " 261, 262, 263, 264, 228, 266, 267, 268, 269, 270, 271, 272, 273,\n", - " 274, 275, 276, 277, 278, 279, 280, 281, 282, -1, -1, 283, 284,\n", - " 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297,\n", - " 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310,\n", - " 311, 312, 313, 314, 315, 316, 319, 320, 321, 322, 323, 324, 325,\n", - " 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338,\n", - " 364, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351,\n", - " 352, -1, -1, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,\n", - " 363, 339, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375,\n", - " 376, 377, 378, 379, 380, 381, 382, 385, 386, 387, 388, 389, 390,\n", - " 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 235,\n", - " 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, -1])" - ] - }, - "execution_count": 440, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "11 135 30 146 3010\n" + ] + }, + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-537-f07d5cbf0867>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] } ], "source": [ - "np.array(nt_prop[\"threeprime\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 302, - "id": "551cc70f", - "metadata": {}, - "outputs": [], - "source": [ - "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " \n", "for i in range(len(nt_prop.index)):\n", " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", " if p==True:\n", " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k==30 and l==146:\n", + " print(m,n,k,l,i)\n", " if k!=-1 and l!=-1:\n", " n=index2.index(((k,l),True))\n", " tprime_list[i]=int(n)\n", - " \n", + "\n", " else:\n", " k,l=(vslist[\"stap\"][m])[n][2:]\n", " if k!=-1 and l!=-1:\n", " n=index2.index(((k,l),False))\n", " tprime_list[i]=int(n)\n", - "nt_prop[\"threeprime\"]=tprime_list" - ] - }, - { - "cell_type": "code", - "execution_count": 368, - "id": "1d40286a", - "metadata": {}, - "outputs": [], - "source": [ - "def get_helix_angle(part, helix_id, indices):\n", - " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", - " # import ipdb\n", - " # ipdb.set_trace()\n", - "\n", - " \"\"\" FROM CADNANO2.5\n", - " + angle is CCW\n", - " - angle is CW\n", - " Right handed DNA rotates clockwise from 5' to 3'\n", - " we use the convention the 5' end starts at 0 degrees\n", - " and it's pair is minor_groove_angle degrees away\n", - " direction, hence the minus signs. eulerZ\n", - " \"\"\"\n", - "\n", - " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", - " ['helical_pitch',\n", - " 'bases_per_repeat',\n", - " 'turns_per_repeat',\n", - " 'eulerZ',\n", - " 'minor_groove_angle']]\n", - " twist_per_base = tpr*360./bpr\n", - " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", - " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", - " return rotationAboutAxis(np.array((0,0,1)),angle)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 429, - "id": "f45dd87c", - "metadata": {}, - "outputs": [], - "source": [ - "\n", + "nt_prop[\"threeprime\"]=tprime_list\n", "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", - "\n", "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", - "\n", - "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n" + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + " ## Todo: sequence " ] }, { "cell_type": "code", - "execution_count": 430, - "id": "ef29b662", + "execution_count": 491, + "id": "fec987da", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "33 -1\n", - "68 -1\n", - "102 -1\n", - "136 -1\n", - "142 -1\n", - "176 -1\n", - "194 399\n", - "211 -1\n", - "233 20\n", - "281 -1\n", - "284 -1\n", - "351 -1\n", - "354 145\n", - "413 -1\n", - "Name: bp, dtype: int64" + "[11, 135, 30, 147]" ] }, - "execution_count": 430, + "execution_count": 491, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "stackid" + "list(vslist.loc[30][\"scaf\"])[146]" ] }, { "cell_type": "code", - "execution_count": 431, - "id": "6678f56c", + "execution_count": 493, + "id": "f332ad87", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "232" - ] - }, - "execution_count": 431, - "metadata": {}, - "output_type": "execute_result" + "ename": "ValueError", + "evalue": "(30, 146) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-493-d1dd239124c3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvhzid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: (30, 146) is not in list" + ] } ], "source": [ - "nt_prop[\"stack\"][233]" + "vhzid.index((30,146))" ] }, { "cell_type": "code", - "execution_count": 433, - "id": "cb9dbf13", + "execution_count": 4, + "id": "dd3cd839", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "353" - ] - }, - "execution_count": 433, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "nt_prop[\"stack\"][354]" + "df=pd.DataFrame(data=d)\n", + "df=df.set_index(\"num\")" ] }, { "cell_type": "code", - "execution_count": 167, - "id": "165d3cc6", + "execution_count": null, + "id": "41b3d9af", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "9ec18edc", "metadata": {}, "outputs": [], "source": [ - "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n", - "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n", - "nts=scaf_id+stap_id" + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" ] }, { "cell_type": "code", - "execution_count": 360, - "id": "b2856178", + "execution_count": 13, + "id": "2fa31a78", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + }, + { + "data": { + "text/plain": [ + "NucleicAcidPart_-1_2800" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n" + "p=read_json_file(\"test/test.json\")\n", + "p" ] }, { "cell_type": "code", - "execution_count": 190, - "id": "07918f5c", + "execution_count": 441, + "id": "64eb309f", "metadata": {}, "outputs": [ { @@ -1462,157 +1397,324 @@ "5 [[9, 0]] " ] }, - "execution_count": 190, + "execution_count": 441, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "vslist" + "f" ] }, { "cell_type": "code", - "execution_count": 200, - "id": "86293e8a", + "execution_count": 199, + "id": "bda3cddd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 2.25, 3.4 ])" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.getCoordinate(0,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 434, + "id": "a86cfa84", "metadata": {}, "outputs": [], "source": [ - "def mrdna_model_from_cadnano_v2(json_data,**model_parameters):\n", - " part,vslist=decode_cadnano_part(json_data)\n", + "def mrdna_model_from_cadnano(json_file,**model_parameters):\n", + " part,vslist=read_json_file(json_file)\n", " props = part.getModelProperties().copy()\n", - "\n", - " if props.get('point_type') == PointType.ARBITRARY:\n", + " try:\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", " # TODO add code to encode Parts with ARBITRARY point configurations\n", - " raise NotImplementedError(\"Not implemented\")\n", - " else:\n", + " raise NotImplementedError(\"Not implemented\")\n", + " except:\n", " try:\n", " vh_props, origins = part.helixPropertiesAndOrigins()\n", " except:\n", " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", - " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n", - " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n", + " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", " cad_bps=part.getIndices(0)\n", - " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n", - " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n", + " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", + " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", - " is_scaf=np.zeros(totnt)\n", + " is_scaf=np.zeros(totnt,dtype=bool)\n", " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", " nt_prop[\"is_scaf\"]=is_scaf\n", - " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n", - " nt_prop[\"vh\"]=vhi\n", - " nt_prop[\"zid\"]=zids\n", - " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n", - " return nt_prop\n" - ] - }, - { - "cell_type": "code", - "execution_count": 201, - "id": "b398277c", - "metadata": {}, - "outputs": [ + " tot_id=scaf_id+stap_id\n", + " vhi,zidi=np.where(np.array(scaf_id)==1)\n", + " vhj,zidj=np.where(np.array(stap_id)==1)\n", + " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", + " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " for i in range(int(len(vhzid)/2)):\n", + " try:\n", + " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", + " nt_prop[\"bp\"][bp1]=bp2\n", + " nt_prop[\"bp\"][bp2]=bp1\n", + " except:\n", + " pass\n", + " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + " nt_prop[\"threeprime\"]=tprime_list\n", + " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + "\n", + "\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 442, + "id": "be5de5ba", + "metadata": {}, + "outputs": [ { - "ename": "NameError", - "evalue": "name 'decode_cadnano_part' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined" - ] + "data": { + "text/plain": [ + "array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1])" + ] + }, + "execution_count": 442, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "mrdna_model_from_cadnano(\"test.json\")" + "np.array(nt_prop[\"seq\"])" ] }, { "cell_type": "code", - "execution_count": 146, - "id": "c0d9eb64", + "execution_count": 302, + "id": "0ce6701d", "metadata": {}, "outputs": [], "source": [ - "a,b=np.where(np.array(nts)==1)" + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + " \n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list" ] }, { "cell_type": "code", - "execution_count": 148, - "id": "ab563ec9", + "execution_count": 368, + "id": "9d0e49cf", + "metadata": {}, + "outputs": [], + "source": [ + "def get_helix_angle(part, helix_id, indices):\n", + " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", + " # import ipdb\n", + " # ipdb.set_trace()\n", + "\n", + " \"\"\" FROM CADNANO2.5\n", + " + angle is CCW\n", + " - angle is CW\n", + " Right handed DNA rotates clockwise from 5' to 3'\n", + " we use the convention the 5' end starts at 0 degrees\n", + " and it's pair is minor_groove_angle degrees away\n", + " direction, hence the minus signs. eulerZ\n", + " \"\"\"\n", + "\n", + " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", + " ['helical_pitch',\n", + " 'bases_per_repeat',\n", + " 'turns_per_repeat',\n", + " 'eulerZ',\n", + " 'minor_groove_angle']]\n", + " twist_per_base = tpr*360./bpr\n", + " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", + " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", + " return rotationAboutAxis(np.array((0,0,1)),angle)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 429, + "id": "c1d77642", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 430, + "id": "2c2d1227", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n", - " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n", - " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", - " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n", - " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", - " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n", - " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", - " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n", - " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", - " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", - " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n", - " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", - " 34, 35, 36, 37, 38, 39])" + "33 -1\n", + "68 -1\n", + "102 -1\n", + "136 -1\n", + "142 -1\n", + "176 -1\n", + "194 399\n", + "211 -1\n", + "233 20\n", + "281 -1\n", + "284 -1\n", + "351 -1\n", + "354 145\n", + "413 -1\n", + "Name: bp, dtype: int64" ] }, - "execution_count": 148, + "execution_count": 430, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "nt_prop=pd.DataFrame(index)" + "stackid" ] }, { "cell_type": "code", - "execution_count": 137, - "id": "3c45aeac", + "execution_count": 431, + "id": "e701d029", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "232" + ] + }, + "execution_count": 431, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def nttype(scafs):\n", - " def judge(i):\n", - " if i ==[-1,-1,-1,-1]:\n", - " return 0\n", - " else: return 1\n", - " n=np.array([judge(i) for i in scafs])\n", - " return n\n", - "\n" + "nt_prop[\"stack\"][233]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "803b3c67", + "execution_count": 433, + "id": "4a62f5d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "353" + ] + }, + "execution_count": 433, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][354]" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "d80ab792", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n", + "nts=scaf_id+stap_id" + ] }, { "cell_type": "code", - "execution_count": 84, - "id": "adb6b347", + "execution_count": 360, + "id": "8e009bc9", "metadata": {}, "outputs": [], "source": [ - "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n", - "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]" + "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n" ] }, { "cell_type": "code", - "execution_count": 156, - "id": "307e53ad", + "execution_count": 190, + "id": "3dc97f0d", "metadata": {}, "outputs": [ { @@ -1636,17 +1738,21 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th></th>\n", - " <th>r</th>\n", - " <th>bp</th>\n", - " <th>stack</th>\n", - " <th>threeprime</th>\n", - " <th>seq</th>\n", - " <th>orientation</th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", " </tr>\n", " <tr>\n", - " <th>vh</th>\n", - " <th>zid</th>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", @@ -1658,2299 +1764,404 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <th>0</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <th>3</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <th>1</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <th>2</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <th>8</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " r bp stack threeprime seq orientation\n", - "vh zid \n", - "0 0 NaN NaN NaN NaN NaN NaN\n", - "1 3 NaN NaN NaN NaN NaN NaN\n", - "2 1 NaN NaN NaN NaN NaN NaN\n", - "3 2 NaN NaN NaN NaN NaN NaN\n", - "1 8 NaN NaN NaN NaN NaN NaN" - ] - }, - "execution_count": 156, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "i=range(5)\n", - "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n", - "d=pd.DataFrame(index=i,columns=col)\n", - "d['vh']=[0,1,2,3,1]\n", - "d['zid']=[0,3,1,2,8]\n", - "d.set_index([\"vh\",\"zid\"],inplace=True)\n", - "d" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "id": "d030974e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>vh</th>\n", - " <th>zid</th>\n", - " <th>r</th>\n", - " <th>bp</th>\n", - " <th>stack</th>\n", - " <th>threeprime</th>\n", - " <th>seq</th>\n", - " <th>orientation</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>1</td>\n", - " <td>8</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " vh zid r bp stack threeprime seq orientation\n", - "0 0 0 NaN NaN NaN NaN NaN NaN\n", - "1 1 3 NaN NaN NaN NaN NaN NaN\n", - "2 2 1 NaN NaN NaN NaN NaN NaN\n", - "3 3 2 NaN NaN NaN NaN NaN NaN\n", - "4 1 8 NaN NaN NaN NaN NaN NaN" - ] - }, - "execution_count": 157, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "d=d.reset_index()\n", - "d" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "6ddb4784", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([0, 2]),)" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s=[True,False,True,False,False]\n", - "np.where(np.array(s)==True)" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "id": "28e3acea", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th></th>\n", - " <th>r</th>\n", - " <th>bp</th>\n", - " <th>stack</th>\n", - " <th>threeprime</th>\n", - " <th>seq</th>\n", - " <th>orientation</th>\n", - " </tr>\n", - " <tr>\n", - " <th>vh</th>\n", - " <th>zid</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <th>0</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <th>3</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <th>1</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <th>2</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <th>8</th>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " r bp stack threeprime seq orientation\n", - "vh zid \n", - "0 0 NaN NaN NaN NaN NaN NaN\n", - "1 3 NaN NaN NaN NaN NaN NaN\n", - "2 1 NaN NaN NaN NaN NaN NaN\n", - "3 2 NaN NaN NaN NaN NaN NaN\n", - "1 8 NaN NaN NaN NaN NaN NaN" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "id": "545acf6d", - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"" - ] - } - ], - "source": [ - "d[[0,0]]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "f6748d9c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[23, 13369809], [38, 12060012]]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[\"vstrands\"][0][\"stap_colors\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5005611f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "210" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", - "len(vh_vb._scaf)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "b6643a5e-d63e-452d-99d0-0f0ad460bdf3", - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"test.virt2nuc\",\"rb\") as f:\n", - " df=pickle.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "83fc4ec6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(<libs.cadnano_utils.vhelix_vbase_to_nucleotide at 0x7f9b1b65c820>,\n", - " {0: (12, 16),\n", - " 1: (12, 15),\n", - " 2: (13, 15),\n", - " 3: (13, 16),\n", - " 4: (13, 17),\n", - " 5: (12, 17)})" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "1cd359b5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pattern" - ] - }, - { - "cell_type": "code", - "execution_count": 173, - "id": "078656d6", - "metadata": {}, - "outputs": [], - "source": [ - "class strands():\n", - " def __init__(self):\n", - " self.row=0 \n", - " self.col=0\n", - " self.num=0\n", - " self.scaf=[]\n", - " self.stap=[]\n", - " self.loop=[]\n", - " self.skip=[]\n", - " self.scafLoop=[]\n", - " self.stapLoop=[]\n", - " self.stap_colors=[]\n", - " self.scaf_contact={}\n", - " self.stap_connect={}\n", - " def to_dict(self):\n", - " d={}\n", - " d['row']=self.row\n", - " d['col']=self.col\n", - " d['num']=self.num\n", - " d['scaf']=self.scaf\n", - " d['stap']=self.stap\n", - " d['loop']=self.loop\n", - " d['skip']=self.skip\n", - " d['scafLoop']=self.scafLoop\n", - " d['stapLoop']=self.stapLoop\n", - " d['stap_colors']=self.stap_colors\n", - " return d\n" - ] - }, - { - "cell_type": "code", - "execution_count": 177, - "id": "914acd5d", - "metadata": {}, - "outputs": [], - "source": [ - "def find_segs(vir2nuc_scaf):\n", - " oligos={}\n", - " for i in range(len(vir2nuc_scaf)):\n", - " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", - " if oligo not in oligos.keys():\n", - " oligos[oligo]=[]\n", - " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", - " return oligos\n", - "\n", - "#class\n", - "def decode_vh_vb(virt2nuc):\n", - " vh_list={}\n", - " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", - " for i in pattern.keys():\n", - " s=strands()\n", - " s.row,s.col=pattern[i]\n", - " s.num=i\n", - " vh_list[s.num]=s\n", - " scafs=vh_vb._scaf\n", - " staps=vh_vb._stap\n", - " scaf_strands=find_segs(scafs)\n", - " scaf_oligos=list(scaf_strands.keys())\n", - " for i in scaf_oligos:\n", - " pass\n", - " \n", - " \n", - " return vh_list" - ] - }, - { - "cell_type": "code", - "execution_count": 198, - "id": "18132c9b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[(2, 34),\n", - " (2, 33),\n", - " (2, 32),\n", - " (2, 31),\n", - " (2, 30),\n", - " (2, 29),\n", - " (2, 28),\n", - " (2, 27),\n", - " (2, 26),\n", - " (2, 25),\n", - " (2, 24),\n", - " (2, 23),\n", - " (2, 22),\n", - " (2, 21),\n", - " (2, 20),\n", - " (2, 19),\n", - " (2, 18),\n", - " (2, 17),\n", - " (2, 16),\n", - " (2, 15),\n", - " (2, 14),\n", - " (2, 13),\n", - " (2, 12),\n", - " (2, 11),\n", - " (2, 10),\n", - " (2, 9),\n", - " (2, 8),\n", - " (2, 7),\n", - " (2, 6),\n", - " (2, 5),\n", - " (2, 4),\n", - " (2, 3),\n", - " (2, 2),\n", - " (2, 1),\n", - " (2, 0)],\n", - " [(1, 3),\n", - " (1, 4),\n", - " (1, 5),\n", - " (1, 6),\n", - " (1, 7),\n", - " (1, 8),\n", - " (1, 9),\n", - " (1, 10),\n", - " (1, 11),\n", - " (1, 12),\n", - " (1, 13),\n", - " (1, 14),\n", - " (1, 15),\n", - " (1, 16),\n", - " (1, 17),\n", - " (1, 18),\n", - " (1, 19),\n", - " (1, 20),\n", - " (0, 20),\n", - " (0, 19),\n", - " (0, 18),\n", - " (0, 17),\n", - " (0, 16),\n", - " (0, 15),\n", - " (0, 14),\n", - " (0, 13),\n", - " (0, 12),\n", - " (0, 11),\n", - " (0, 10),\n", - " (0, 9),\n", - " (0, 8),\n", - " (0, 7),\n", - " (0, 6),\n", - " (0, 5),\n", - " (0, 4),\n", - " (0, 3),\n", - " (0, 2)],\n", - " [(0, 23),\n", - " (0, 22),\n", - " (0, 21),\n", - " (1, 21),\n", - " (1, 22),\n", - " (1, 23),\n", - " (1, 24),\n", - " (1, 25),\n", - " (1, 26),\n", - " (1, 27),\n", - " (1, 28),\n", - " (1, 29),\n", - " (1, 30),\n", - " (1, 31),\n", - " (1, 32),\n", - " (1, 33),\n", - " (1, 34),\n", - " (1, 35),\n", - " (1, 36),\n", - " (1, 37),\n", - " (1, 38)],\n", - " [(5, 9),\n", - " (5, 10),\n", - " (5, 11),\n", - " (5, 12),\n", - " (5, 13),\n", - " (5, 14),\n", - " (5, 15),\n", - " (5, 16),\n", - " (5, 17),\n", - " (5, 18),\n", - " (5, 19),\n", - " (5, 20),\n", - " (5, 21),\n", - " (5, 22),\n", - " (5, 23),\n", - " (5, 24),\n", - " (5, 25),\n", - " (5, 26),\n", - " (5, 27),\n", - " (0, 27),\n", - " (0, 26),\n", - " (0, 25),\n", - " (0, 24)],\n", - " [(0, 38),\n", - " (0, 37),\n", - " (0, 36),\n", - " (0, 35),\n", - " (0, 34),\n", - " (0, 33),\n", - " (0, 32),\n", - " (0, 31),\n", - " (0, 30),\n", - " (0, 29),\n", - " (0, 28),\n", - " (5, 28),\n", - " (5, 29),\n", - " (5, 30),\n", - " (5, 31),\n", - " (5, 32),\n", - " (5, 33),\n", - " (5, 34),\n", - " (5, 35),\n", - " (5, 36),\n", - " (5, 37),\n", - " (5, 38),\n", - " (5, 39)],\n", - " [(3, 0),\n", - " (3, 1),\n", - " (3, 2),\n", - " (3, 3),\n", - " (3, 4),\n", - " (3, 5),\n", - " (3, 6),\n", - " (3, 7),\n", - " (3, 8),\n", - " (3, 9),\n", - " (3, 10),\n", - " (3, 11),\n", - " (3, 12),\n", - " (3, 13),\n", - " (3, 14),\n", - " (3, 15),\n", - " (3, 16),\n", - " (3, 17),\n", - " (3, 18),\n", - " (3, 19),\n", - " (3, 20),\n", - " (4, 20),\n", - " (4, 19),\n", - " (4, 18),\n", - " (4, 17),\n", - " (4, 16),\n", - " (4, 15),\n", - " (4, 14),\n", - " (4, 13),\n", - " (4, 12),\n", - " (4, 11),\n", - " (4, 10),\n", - " (4, 9)],\n", - " [(4, 39),\n", - " (4, 38),\n", - " (4, 37),\n", - " (4, 36),\n", - " (4, 35),\n", - " (4, 34),\n", - " (4, 33),\n", - " (4, 32),\n", - " (4, 31),\n", - " (4, 30),\n", - " (4, 29),\n", - " (4, 28),\n", - " (4, 27),\n", - " (4, 26),\n", - " (4, 25),\n", - " (4, 24),\n", - " (4, 23),\n", - " (4, 22),\n", - " (4, 21),\n", - " (3, 21),\n", - " (3, 22),\n", - " (3, 23),\n", - " (3, 24),\n", - " (3, 25),\n", - " (3, 26),\n", - " (3, 27),\n", - " (3, 28),\n", - " (3, 29),\n", - " (3, 30),\n", - " (3, 31),\n", - " (3, 32),\n", - " (3, 33),\n", - " (3, 34)]]" - ] - }, - "execution_count": 198, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s1=decode_vh_vb(\"virt2nuc\")\n", - "vh_vb,pattern=pd.read_pickle(\"virt2nuc\")\n", - "list(find_segs(vh_vb._stap).values())" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "dafaa4cf-47d9-4da3-8b63-1129c074c5ef", - "metadata": {}, - "outputs": [ - { - "data": { + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], "text/plain": [ - "{0: (12, 16),\n", - " 1: (12, 15),\n", - " 2: (13, 15),\n", - " 3: (13, 16),\n", - " 4: (13, 17),\n", - " 5: (12, 17),\n", - " 6: (15, 5)}" + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " ] }, - "execution_count": 28, + "execution_count": 190, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pattern" + "vslist" ] }, { "cell_type": "code", - "execution_count": null, - "id": "29e7336d-5ff3-4be1-bd47-585236ee7bb2", + "execution_count": 200, + "id": "3d019b73", "metadata": {}, "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "75e15cf7-b1a3-4133-8bc4-3eaab5922b96", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'_scaf': {(0, 39): (0, [2]),\n", - " (0, 40): (0, [1]),\n", - " (0, 41): (0, [0]),\n", - " (1, 41): (1, [5]),\n", - " (1, 40): (1, [4]),\n", - " (1, 39): (1, [3]),\n", - " (2, 39): (2, [8]),\n", - " (2, 40): (2, [7]),\n", - " (2, 41): (2, [6]),\n", - " (3, 41): (4, [48]),\n", - " (3, 40): (4, [47]),\n", - " (3, 39): (4, [46]),\n", - " (3, 38): (4, [45]),\n", - " (3, 37): (4, [44]),\n", - " (4, 0): (5, [52]),\n", - " (4, 1): (5, [51]),\n", - " (4, 2): (5, [50]),\n", - " (4, 3): (5, [49]),\n", - " (5, 3): (6, [56]),\n", - " (5, 2): (6, [55]),\n", - " (5, 1): (6, [54]),\n", - " (5, 0): (6, [53]),\n", - " (5, 22): (7, [244]),\n", - " (5, 21): (7, [243]),\n", - " (5, 20): (7, [242]),\n", - " (5, 19): (7, [241]),\n", - " (5, 18): (7, [240]),\n", - " (5, 17): (7, [239]),\n", - " (5, 16): (7, [238]),\n", - " (5, 15): (7, [237]),\n", - " (5, 14): (7, [236]),\n", - " (5, 13): (7, [235]),\n", - " (5, 12): (7, [234]),\n", - " (5, 11): (7, [233]),\n", - " (5, 10): (7, [232]),\n", - " (5, 9): (7, [231]),\n", - " (4, 9): (7, [230]),\n", - " (4, 10): (7, [229]),\n", - " (4, 11): (7, [228]),\n", - " (4, 12): (7, [227]),\n", - " (4, 13): (7, [226]),\n", - " (4, 14): (7, [225]),\n", - " (4, 15): (7, [224]),\n", - " (3, 15): (7, [223]),\n", - " (3, 14): (7, [222]),\n", - " (3, 13): (7, [221]),\n", - " (3, 12): (7, [220]),\n", - " (3, 11): (7, [219]),\n", - " (3, 10): (7, [218]),\n", - " (3, 9): (7, [217]),\n", - " (3, 8): (7, [216]),\n", - " (3, 7): (7, [215]),\n", - " (3, 6): (7, [214]),\n", - " (3, 5): (7, [213]),\n", - " (3, 4): (7, [212]),\n", - " (3, 3): (7, [211]),\n", - " (3, 2): (7, [210]),\n", - " (2, 2): (7, [209]),\n", - " (2, 3): (7, [208]),\n", - " (2, 4): (7, [207]),\n", - " (2, 5): (7, [206]),\n", - " (2, 6): (7, [205]),\n", - " (2, 7): (7, [204]),\n", - " (2, 8): (7, [203]),\n", - " (2, 9): (7, [202]),\n", - " (2, 10): (7, [201]),\n", - " (2, 11): (7, [200]),\n", - " (2, 12): (7, [199]),\n", - " (2, 13): (7, [198]),\n", - " (2, 14): (7, [197]),\n", - " (2, 15): (7, [196]),\n", - " (2, 16): (7, [195]),\n", - " (2, 17): (7, [194]),\n", - " (2, 18): (7, [193]),\n", - " (1, 18): (7, [192]),\n", - " (1, 17): (7, [191]),\n", - " (1, 16): (7, [190]),\n", - " (1, 15): (7, [189]),\n", - " (1, 14): (7, [188]),\n", - " (1, 13): (7, [187]),\n", - " (1, 12): (7, [186]),\n", - " (1, 11): (7, [185]),\n", - " (1, 10): (7, [184]),\n", - " (1, 9): (7, [183]),\n", - " (1, 8): (7, [182]),\n", - " (1, 7): (7, [181]),\n", - " (1, 6): (7, [180]),\n", - " (1, 5): (7, [179]),\n", - " (0, 5): (7, [178]),\n", - " (0, 6): (7, [177]),\n", - " (0, 7): (7, [176]),\n", - " (0, 8): (7, [175]),\n", - " (0, 9): (7, [174]),\n", - " (0, 10): (7, [173]),\n", - " (0, 11): (7, [172]),\n", - " (0, 12): (7, [171]),\n", - " (0, 13): (7, [170]),\n", - " (0, 14): (7, [169]),\n", - " (0, 15): (7, [168]),\n", - " (0, 16): (7, [167]),\n", - " (0, 17): (7, [166]),\n", - " (0, 18): (7, [165]),\n", - " (0, 19): (7, [164]),\n", - " (0, 20): (7, [163]),\n", - " (0, 21): (7, [162]),\n", - " (0, 22): (7, [161]),\n", - " (0, 23): (7, [160]),\n", - " (0, 24): (7, [159]),\n", - " (0, 25): (7, [158]),\n", - " (0, 26): (7, [157]),\n", - " (0, 27): (7, [156]),\n", - " (0, 28): (7, [155]),\n", - " (0, 29): (7, [154]),\n", - " (0, 30): (7, [153]),\n", - " (0, 31): (7, [152]),\n", - " (0, 32): (7, [151]),\n", - " (0, 33): (7, [150]),\n", - " (0, 34): (7, [149]),\n", - " (0, 35): (7, [148]),\n", - " (0, 36): (7, [147]),\n", - " (1, 36): (7, [146]),\n", - " (1, 35): (7, [145]),\n", - " (1, 34): (7, [144]),\n", - " (1, 33): (7, [143]),\n", - " (1, 32): (7, [142]),\n", - " (1, 31): (7, [141]),\n", - " (1, 30): (7, [140]),\n", - " (1, 29): (7, [139]),\n", - " (1, 28): (7, [138]),\n", - " (1, 27): (7, [137]),\n", - " (1, 26): (7, [136]),\n", - " (1, 25): (7, [135]),\n", - " (1, 24): (7, [134]),\n", - " (1, 23): (7, [133]),\n", - " (1, 22): (7, [132]),\n", - " (1, 21): (7, [131]),\n", - " (1, 20): (7, [130]),\n", - " (1, 19): (7, [129]),\n", - " (2, 19): (7, [128]),\n", - " (2, 20): (7, [127]),\n", - " (2, 21): (7, [126]),\n", - " (2, 22): (7, [125]),\n", - " (2, 23): (7, [124]),\n", - " (2, 24): (7, [123]),\n", - " (2, 25): (7, [122]),\n", - " (2, 26): (7, [121]),\n", - " (2, 27): (7, [120]),\n", - " (2, 28): (7, [119]),\n", - " (2, 29): (7, [118]),\n", - " (2, 30): (7, [117]),\n", - " (2, 31): (7, [116]),\n", - " (2, 32): (7, [115]),\n", - " (3, 32): (7, [114]),\n", - " (3, 31): (7, [113]),\n", - " (3, 30): (7, [112]),\n", - " (3, 29): (7, [111]),\n", - " (3, 28): (7, [110]),\n", - " (3, 27): (7, [109]),\n", - " (3, 26): (7, [108]),\n", - " (3, 25): (7, [107]),\n", - " (3, 24): (7, [106]),\n", - " (3, 23): (7, [105]),\n", - " (3, 22): (7, [104]),\n", - " (3, 21): (7, [103]),\n", - " (3, 20): (7, [102]),\n", - " (3, 19): (7, [101]),\n", - " (3, 18): (7, [100]),\n", - " (3, 17): (7, [99]),\n", - " (3, 16): (7, [98]),\n", - " (4, 16): (7, [97]),\n", - " (4, 17): (7, [96]),\n", - " (4, 18): (7, [95]),\n", - " (4, 19): (7, [94]),\n", - " (4, 20): (7, [93]),\n", - " (4, 21): (7, [92]),\n", - " (4, 22): (7, [91]),\n", - " (4, 23): (7, [90]),\n", - " (4, 24): (7, [89]),\n", - " (4, 25): (7, [88]),\n", - " (4, 26): (7, [87]),\n", - " (4, 27): (7, [86]),\n", - " (4, 28): (7, [85]),\n", - " (4, 29): (7, [84]),\n", - " (4, 30): (7, [83]),\n", - " (4, 31): (7, [82]),\n", - " (4, 32): (7, [81]),\n", - " (4, 33): (7, [80]),\n", - " (4, 34): (7, [79]),\n", - " (4, 35): (7, [78]),\n", - " (4, 36): (7, [77]),\n", - " (4, 37): (7, [76]),\n", - " (4, 38): (7, [75]),\n", - " (4, 39): (7, [74]),\n", - " (5, 39): (7, [73]),\n", - " (5, 38): (7, [72]),\n", - " (5, 37): (7, [71]),\n", - " (5, 36): (7, [70]),\n", - " (5, 35): (7, [69]),\n", - " (5, 34): (7, [68]),\n", - " (5, 33): (7, [67]),\n", - " (5, 32): (7, [66]),\n", - " (5, 31): (7, [65]),\n", - " (5, 30): (7, [64]),\n", - " (5, 29): (7, [63]),\n", - " (5, 28): (7, [62]),\n", - " (5, 27): (7, [61]),\n", - " (5, 26): (7, [60]),\n", - " (5, 25): (7, [59]),\n", - " (5, 24): (7, [58]),\n", - " (5, 23): (7, [57])},\n", - " '_stap': {(2, 34): (3, [43]),\n", - " (2, 33): (3, [42]),\n", - " (2, 32): (3, [41]),\n", - " (2, 31): (3, [40]),\n", - " (2, 30): (3, [39]),\n", - " (2, 29): (3, [38]),\n", - " (2, 28): (3, [37]),\n", - " (2, 27): (3, [36]),\n", - " (2, 26): (3, [35]),\n", - " (2, 25): (3, [34]),\n", - " (2, 24): (3, [33]),\n", - " (2, 23): (3, [32]),\n", - " (2, 22): (3, [31]),\n", - " (2, 21): (3, [30]),\n", - " (2, 20): (3, [29]),\n", - " (2, 19): (3, [28]),\n", - " (2, 18): (3, [27]),\n", - " (2, 17): (3, [26]),\n", - " (2, 16): (3, [25]),\n", - " (2, 15): (3, [24]),\n", - " (2, 14): (3, [23]),\n", - " (2, 13): (3, [22]),\n", - " (2, 12): (3, [21]),\n", - " (2, 11): (3, [20]),\n", - " (2, 10): (3, [19]),\n", - " (2, 9): (3, [18]),\n", - " (2, 8): (3, [17]),\n", - " (2, 7): (3, [16]),\n", - " (2, 6): (3, [15]),\n", - " (2, 5): (3, [14]),\n", - " (2, 4): (3, [13]),\n", - " (2, 3): (3, [12]),\n", - " (2, 2): (3, [11]),\n", - " (2, 1): (3, [10]),\n", - " (2, 0): (3, [9]),\n", - " (1, 3): (8, [281]),\n", - " (1, 4): (8, [280]),\n", - " (1, 5): (8, [279]),\n", - " (1, 6): (8, [278]),\n", - " (1, 7): (8, [277]),\n", - " (1, 8): (8, [276]),\n", - " (1, 9): (8, [275]),\n", - " (1, 10): (8, [274]),\n", - " (1, 11): (8, [273]),\n", - " (1, 12): (8, [272]),\n", - " (1, 13): (8, [271]),\n", - " (1, 14): (8, [270]),\n", - " (1, 15): (8, [269]),\n", - " (1, 16): (8, [268]),\n", - " (1, 17): (8, [267]),\n", - " (1, 18): (8, [266]),\n", - " (1, 19): (8, [265]),\n", - " (1, 20): (8, [264]),\n", - " (0, 20): (8, [263]),\n", - " (0, 19): (8, [262]),\n", - " (0, 18): (8, [261]),\n", - " (0, 17): (8, [260]),\n", - " (0, 16): (8, [259]),\n", - " (0, 15): (8, [258]),\n", - " (0, 14): (8, [257]),\n", - " (0, 13): (8, [256]),\n", - " (0, 12): (8, [255]),\n", - " (0, 11): (8, [254]),\n", - " (0, 10): (8, [253]),\n", - " (0, 9): (8, [252]),\n", - " (0, 8): (8, [251]),\n", - " (0, 7): (8, [250]),\n", - " (0, 6): (8, [249]),\n", - " (0, 5): (8, [248]),\n", - " (0, 4): (8, [247]),\n", - " (0, 3): (8, [246]),\n", - " (0, 2): (8, [245]),\n", - " (0, 23): (9, [302]),\n", - " (0, 22): (9, [301]),\n", - " (0, 21): (9, [300]),\n", - " (1, 21): (9, [299]),\n", - " (1, 22): (9, [298]),\n", - " (1, 23): (9, [297]),\n", - " (1, 24): (9, [296]),\n", - " (1, 25): (9, [295]),\n", - " (1, 26): (9, [294]),\n", - " (1, 27): (9, [293]),\n", - " (1, 28): (9, [292]),\n", - " (1, 29): (9, [291]),\n", - " (1, 30): (9, [290]),\n", - " (1, 31): (9, [289]),\n", - " (1, 32): (9, [288]),\n", - " (1, 33): (9, [287]),\n", - " (1, 34): (9, [286]),\n", - " (1, 35): (9, [285]),\n", - " (1, 36): (9, [284]),\n", - " (1, 37): (9, [283]),\n", - " (1, 38): (9, [282]),\n", - " (5, 9): (10, [325]),\n", - " (5, 10): (10, [324]),\n", - " (5, 11): (10, [323]),\n", - " (5, 12): (10, [322]),\n", - " (5, 13): (10, [321]),\n", - " (5, 14): (10, [320]),\n", - " (5, 15): (10, [319]),\n", - " (5, 16): (10, [318]),\n", - " (5, 17): (10, [317]),\n", - " (5, 18): (10, [316]),\n", - " (5, 19): (10, [315]),\n", - " (5, 20): (10, [314]),\n", - " (5, 21): (10, [313]),\n", - " (5, 22): (10, [312]),\n", - " (5, 23): (10, [311]),\n", - " (5, 24): (10, [310]),\n", - " (5, 25): (10, [309]),\n", - " (5, 26): (10, [308]),\n", - " (5, 27): (10, [307]),\n", - " (0, 27): (10, [306]),\n", - " (0, 26): (10, [305]),\n", - " (0, 25): (10, [304]),\n", - " (0, 24): (10, [303]),\n", - " (0, 38): (11, [348]),\n", - " (0, 37): (11, [347]),\n", - " (0, 36): (11, [346]),\n", - " (0, 35): (11, [345]),\n", - " (0, 34): (11, [344]),\n", - " (0, 33): (11, [343]),\n", - " (0, 32): (11, [342]),\n", - " (0, 31): (11, [341]),\n", - " (0, 30): (11, [340]),\n", - " (0, 29): (11, [339]),\n", - " (0, 28): (11, [338]),\n", - " (5, 28): (11, [337]),\n", - " (5, 29): (11, [336]),\n", - " (5, 30): (11, [335]),\n", - " (5, 31): (11, [334]),\n", - " (5, 32): (11, [333]),\n", - " (5, 33): (11, [332]),\n", - " (5, 34): (11, [331]),\n", - " (5, 35): (11, [330]),\n", - " (5, 36): (11, [329]),\n", - " (5, 37): (11, [328]),\n", - " (5, 38): (11, [327]),\n", - " (5, 39): (11, [326]),\n", - " (3, 0): (12, [381]),\n", - " (3, 1): (12, [380]),\n", - " (3, 2): (12, [379]),\n", - " (3, 3): (12, [378]),\n", - " (3, 4): (12, [377]),\n", - " (3, 5): (12, [376]),\n", - " (3, 6): (12, [375]),\n", - " (3, 7): (12, [374]),\n", - " (3, 8): (12, [373]),\n", - " (3, 9): (12, [372]),\n", - " (3, 10): (12, [371]),\n", - " (3, 11): (12, [370]),\n", - " (3, 12): (12, [369]),\n", - " (3, 13): (12, [368]),\n", - " (3, 14): (12, [367]),\n", - " (3, 15): (12, [366]),\n", - " (3, 16): (12, [365]),\n", - " (3, 17): (12, [364]),\n", - " (3, 18): (12, [363]),\n", - " (3, 19): (12, [362]),\n", - " (3, 20): (12, [361]),\n", - " (4, 20): (12, [360]),\n", - " (4, 19): (12, [359]),\n", - " (4, 18): (12, [358]),\n", - " (4, 17): (12, [357]),\n", - " (4, 16): (12, [356]),\n", - " (4, 15): (12, [355]),\n", - " (4, 14): (12, [354]),\n", - " (4, 13): (12, [353]),\n", - " (4, 12): (12, [352]),\n", - " (4, 11): (12, [351]),\n", - " (4, 10): (12, [350]),\n", - " (4, 9): (12, [349]),\n", - " (4, 39): (13, [414]),\n", - " (4, 38): (13, [413]),\n", - " (4, 37): (13, [412]),\n", - " (4, 36): (13, [411]),\n", - " (4, 35): (13, [410]),\n", - " (4, 34): (13, [409]),\n", - " (4, 33): (13, [408]),\n", - " (4, 32): (13, [407]),\n", - " (4, 31): (13, [406]),\n", - " (4, 30): (13, [405]),\n", - " (4, 29): (13, [404]),\n", - " (4, 28): (13, [403]),\n", - " (4, 27): (13, [402]),\n", - " (4, 26): (13, [401]),\n", - " (4, 25): (13, [400]),\n", - " (4, 24): (13, [399]),\n", - " (4, 23): (13, [398]),\n", - " (4, 22): (13, [397]),\n", - " (4, 21): (13, [396]),\n", - " (3, 21): (13, [395]),\n", - " (3, 22): (13, [394]),\n", - " (3, 23): (13, [393]),\n", - " (3, 24): (13, [392]),\n", - " (3, 25): (13, [391]),\n", - " (3, 26): (13, [390]),\n", - " (3, 27): (13, [389]),\n", - " (3, 28): (13, [388]),\n", - " (3, 29): (13, [387]),\n", - " (3, 30): (13, [386]),\n", - " (3, 31): (13, [385]),\n", - " (3, 32): (13, [384]),\n", - " (3, 33): (13, [383]),\n", - " (3, 34): (13, [382])},\n", - " 'nuc_count': 0,\n", - " 'strand_count': 0}" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vh_vb.__dict__" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "cef5068b-4858-44e2-9837-e95f1718e111", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(0, [2]),\n", - " (0, [1]),\n", - " (0, [0]),\n", - " (1, [5]),\n", - " (1, [4]),\n", - " (1, [3]),\n", - " (2, [8]),\n", - " (2, [7]),\n", - " (2, [6]),\n", - " (4, [47]),\n", - " (4, [46]),\n", - " (4, [45]),\n", - " (4, [44]),\n", - " (4, [43]),\n", - " (5, [51]),\n", - " (5, [50]),\n", - " (5, [49]),\n", - " (5, [48]),\n", - " (6, [55]),\n", - " (6, [54]),\n", - " (6, [53]),\n", - " (6, [52]),\n", - " (7, [263]),\n", - " (7, [262]),\n", - " (7, [261]),\n", - " (7, [260]),\n", - " (7, [259]),\n", - " (7, [258]),\n", - " (7, [257]),\n", - " (7, [256]),\n", - " (7, [255]),\n", - " (7, [254]),\n", - " (7, [253]),\n", - " (7, [252]),\n", - " (7, [251]),\n", - " (7, [250]),\n", - " (7, [249]),\n", - " (7, [248]),\n", - " (7, [247]),\n", - " (7, [246]),\n", - " (7, [245]),\n", - " (7, [244]),\n", - " (7, [243]),\n", - " (7, [242]),\n", - " (7, [241]),\n", - " (7, [240]),\n", - " (7, [239]),\n", - " (7, [238]),\n", - " (7, [237]),\n", - " (7, [236]),\n", - " (7, [235]),\n", - " (7, [234]),\n", - " (7, [233]),\n", - " (7, [232]),\n", - " (7, [231]),\n", - " (7, [230]),\n", - " (7, [229]),\n", - " (7, [228]),\n", - " (7, [227]),\n", - " (7, [226]),\n", - " (7, [225]),\n", - " (7, [224]),\n", - " (7, [223]),\n", - " (7, [222]),\n", - " (7, [221]),\n", - " (7, [220]),\n", - " (7, []),\n", - " (7, [219]),\n", - " (7, [218]),\n", - " (7, [217]),\n", - " (7, [216]),\n", - " (7, [215]),\n", - " (7, [214]),\n", - " (7, [213]),\n", - " (7, [212]),\n", - " (7, [211]),\n", - " (7, [210]),\n", - " (7, [209]),\n", - " (7, [208]),\n", - " (7, [207]),\n", - " (7, [206]),\n", - " (7, [205]),\n", - " (7, [203, 204]),\n", - " (7, [202]),\n", - " (7, [201]),\n", - " (7, [200]),\n", - " (7, [199]),\n", - " (7, [198]),\n", - " (7, [197]),\n", - " (7, [196]),\n", - " (7, [195]),\n", - " (7, [194]),\n", - " (7, [193]),\n", - " (7, [192]),\n", - " (7, [191]),\n", - " (7, [190]),\n", - " (7, [189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179]),\n", - " (7, [178]),\n", - " (7, [177]),\n", - " (7, [176]),\n", - " (7, [175]),\n", - " (7, [174]),\n", - " (7, [173]),\n", - " (7, [172]),\n", - " (7, [171]),\n", - " (7, [170]),\n", - " (7, [169]),\n", - " (7, [168]),\n", - " (7, [167]),\n", - " (7, [166]),\n", - " (7, [165]),\n", - " (7, [164]),\n", - " (7, [163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152]),\n", - " (7, [151]),\n", - " (7, [150]),\n", - " (7, [149]),\n", - " (7, [148]),\n", - " (7, [147]),\n", - " (7, [146]),\n", - " (7, [145]),\n", - " (7, [144]),\n", - " (7, [143]),\n", - " (7, [142]),\n", - " (7, [141]),\n", - " (7, [140]),\n", - " (7, [139]),\n", - " (7, [138]),\n", - " (7, [137]),\n", - " (7, [136]),\n", - " (7, [135]),\n", - " (7, [134]),\n", - " (7, [133]),\n", - " (7, [132]),\n", - " (7, [131]),\n", - " (7, [130]),\n", - " (7, [129]),\n", - " (7, [128]),\n", - " (7, [127]),\n", - " (7, [126]),\n", - " (7, [125]),\n", - " (7, [124]),\n", - " (7, [123]),\n", - " (7, [122]),\n", - " (7, [121]),\n", - " (7, [120]),\n", - " (7, [119]),\n", - " (7, [118]),\n", - " (7, [117]),\n", - " (7, [116]),\n", - " (7, [115]),\n", - " (7, [114]),\n", - " (7, [113]),\n", - " (7, [112]),\n", - " (7, [111]),\n", - " (7, [110]),\n", - " (7, [109]),\n", - " (7, []),\n", - " (7, [108]),\n", - " (7, [107]),\n", - " (7, [106]),\n", - " (7, [105]),\n", - " (7, [104]),\n", - " (7, [103]),\n", - " (7, [102]),\n", - " (7, [101]),\n", - " (7, [100]),\n", - " (7, [99]),\n", - " (7, [98]),\n", - " (7, [97]),\n", - " (7, [96]),\n", - " (7, [95]),\n", - " (7, [94]),\n", - " (7, [93]),\n", - " (7, [92]),\n", - " (7, [91]),\n", - " (7, [90]),\n", - " (7, [89]),\n", - " (7, [88]),\n", - " (7, [87]),\n", - " (7, [86]),\n", - " (7, [85]),\n", - " (7, [84]),\n", - " (7, [83]),\n", - " (7, [82]),\n", - " (7, [81]),\n", - " (7, [80]),\n", - " (7, [79]),\n", - " (7, [78]),\n", - " (7, [77]),\n", - " (7, [76]),\n", - " (7, [75]),\n", - " (7, [74]),\n", - " (7, [73]),\n", - " (7, [72]),\n", - " (7, [71]),\n", - " (7, [70]),\n", - " (7, [69]),\n", - " (7, [68]),\n", - " (7, [67]),\n", - " (7, [66]),\n", - " (7, [65]),\n", - " (7, [64]),\n", - " (7, [63]),\n", - " (7, [62]),\n", - " (7, [61]),\n", - " (7, [60]),\n", - " (7, [59]),\n", - " (7, [58]),\n", - " (7, [57]),\n", - " (7, [56])]" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "list(vh_vb._scaf.values())" + "def mrdna_model_from_cadnano(json_data,**model_parameters):\n", + " part,vslist=decode_cadnano_part(json_data)\n", + " props = part.getModelProperties().copy()\n", + "\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " else:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n", + " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n", + " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n", + " nt_prop[\"vh\"]=vhi\n", + " nt_prop[\"zid\"]=zids\n", + " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n", + " return nt_prop\n" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "704b72d9-4745-4818-83b5-934ca486e1bd", + "execution_count": 201, + "id": "fb789ffb", "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "0", + "ename": "NameError", + "evalue": "name 'decode_cadnano_part' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mvh_vb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_scaf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 0" + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined" ] } ], "source": [ - "vh_vb._scaf[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 199, - "id": "5a920c32-cb2a-4a75-ac5a-d15fcff28aab", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def find_vh_vb_table(s,is_scaf):\n", - " L=[]\n", - " for i in list(s.keys()):\n", - " vh,zid=i\n", - " strand,indices=s[i]\n", - " if len(indices)==0:\n", - " continue\n", - " else:\n", - " if len(indices)==1:\n", - " zids=[str(zid)]\n", - " else:\n", - " zids=[str(zid)+\".\"+str(j) for j in range(len(indices))]\n", - " for index,z in zip(indices,zids):\n", - " L.append(pd.Series({\"index\":index,\"vh\":vh,\"zid\":z,\"strand\":strand,\"is_scaf\":bool(is_scaf)}))\n", - " return L\n", - "L1=find_vh_vb_table(vh_vb._scaf,1)\n", - "L2=find_vh_vb_table(vh_vb._stap,0)" - ] - }, - { - "cell_type": "code", - "execution_count": 219, - "id": "8a6971b1-be0a-4546-90a7-918fa482f873", - "metadata": {}, - "outputs": [], - "source": [ - "df=pd.DataFrame(L1+L2)\n", - "pd.options.mode.chained_assignment = None # default='warn'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e8e7753-bb38-43a9-a33e-7882db3ace50", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 255, - "id": "74ba42d3-aa69-4dd6-9c26-0023095b2923", - "metadata": {}, - "outputs": [], - "source": [ - "def get_virt2nuc(virt2nuc,top_data):\n", - " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", - " L1=find_vh_vb_table(vh_vb._scaf,1)\n", - " L2=find_vh_vb_table(vh_vb._stap,0)\n", - " nt_prop=pd.DataFrame(L1+L2)\n", - " nt_prop.set_index(\"index\",inplace=True)\n", - " nt_prop.sort_index(inplace=True)\n", - " nt_prop[\"threeprime\"]=top_data[2]\n", - " nt_prop[\"seq\"]=top_data[1]\n", - " nt_prop[\"stack\"]=top_data[2]\n", - " for i in nt_prop.index:\n", - " if nt_prop.loc[i][\"threeprime\"] in nt_prop.index:\n", - " if nt_prop.loc[nt_prop.loc[i][\"threeprime\"]][\"vh\"]!=nt_prop.loc[i][\"vh\"]:\n", - " nt_prop[\"stack\"][i]=-1\n", - " bp_map=dict(zip(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"is_scaf\"]),nt_prop.index))\n", - " bp=-np.ones(len(nt_prop.index),dtype=int)\n", - " counter=0\n", - " for i,j,k in zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"is_scaf\"]):\n", - " try:\n", - " bp[counter]=bp_map[(i,j,not(k))]\n", - " except:\n", - " pass\n", - " counter+=1\n", - " nt_prop[\"bp\"]=bp\n", - " return nt_prop" - ] - }, - { - "cell_type": "code", - "execution_count": 257, - "id": "74afbfe6-5fd3-4f41-8a41-c0aa56ddd1be", - "metadata": {}, - "outputs": [], - "source": [ - "vh_vb,pattern=pd.read_pickle(\"virt2nuc\")\n", - "s=get_virt2nuc(\"virt2nuc\",top_data)\n" + "mrdna_model_from_cadnano(\"test.json\")" ] }, { "cell_type": "code", - "execution_count": 261, - "id": "9a115dac-e6cc-408c-9bc2-f33cf437f8c2", + "execution_count": 146, + "id": "98703867", "metadata": {}, "outputs": [], "source": [ - "s.to_csv(\"s.csv\")" + "a,b=np.where(np.array(nts)==1)" ] }, { "cell_type": "code", - "execution_count": 263, - "id": "170a2918-f113-475c-b0a3-edbf33153e33", + "execution_count": 148, + "id": "c316fead", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 -1\n", - "1 -1\n", - "2 -1\n", - "3 -1\n", - "4 -1\n", - " ..\n", - "450 77\n", - "451 76\n", - "452 75\n", - "453 74\n", - "454 73\n", - "Name: bp, Length: 455, dtype: int64" + "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n", + " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n", + " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", + " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n", + " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", + " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39])" ] }, - "execution_count": 263, + "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "coordinate_col=\"bp\"\n", - "dg[coordinate_col]" + "nt_prop=pd.DataFrame(index)" ] }, { "cell_type": "code", - "execution_count": 176, - "id": "675f9e19-e4d8-4dfa-bf89-56f628ba2a75", + "execution_count": 137, + "id": "0718e41e", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[-1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 228,\n", - " 227,\n", - " 226,\n", - " 225,\n", - " 224,\n", - " 223,\n", - " 222,\n", - " 221,\n", - " 220,\n", - " 219,\n", - " 218,\n", - " 217,\n", - " 216,\n", - " 215,\n", - " 214,\n", - " 213,\n", - " 126,\n", - " 125,\n", - " 124,\n", - " 123,\n", - " 122,\n", - " 121,\n", - " 120,\n", - " 119,\n", - " 118,\n", - " 117,\n", - " 116,\n", - " 115,\n", - " 114,\n", - " 113,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 341,\n", - " 340,\n", - " 339,\n", - " 338,\n", - " 337,\n", - " 367,\n", - " 366,\n", - " 365,\n", - " 364,\n", - " 363,\n", - " 362,\n", - " 361,\n", - " 360,\n", - " 359,\n", - " 358,\n", - " 357,\n", - " 356,\n", - " 454,\n", - " 453,\n", - " 452,\n", - " 451,\n", - " 450,\n", - " 449,\n", - " 448,\n", - " 447,\n", - " 446,\n", - " 445,\n", - " 444,\n", - " 443,\n", - " 442,\n", - " 441,\n", - " 440,\n", - " 439,\n", - " 438,\n", - " 437,\n", - " 436,\n", - " 401,\n", - " 400,\n", - " 399,\n", - " 398,\n", - " 397,\n", - " 406,\n", - " 405,\n", - " 404,\n", - " 403,\n", - " 402,\n", - " 435,\n", - " 434,\n", - " 433,\n", - " 432,\n", - " 431,\n", - " 430,\n", - " 429,\n", - " 428,\n", - " 427,\n", - " 426,\n", - " 425,\n", - " 40,\n", - " 39,\n", - " 38,\n", - " 37,\n", - " 36,\n", - " 35,\n", - " 34,\n", - " 33,\n", - " 32,\n", - " 31,\n", - " 30,\n", - " 29,\n", - " 28,\n", - " 27,\n", - " 294,\n", - " 293,\n", - " 329,\n", - " 328,\n", - " 327,\n", - " 326,\n", - " 325,\n", - " 324,\n", - " 323,\n", - " 322,\n", - " 321,\n", - " 320,\n", - " 319,\n", - " 318,\n", - " 317,\n", - " 316,\n", - " 315,\n", - " 314,\n", - " 387,\n", - " 386,\n", - " 385,\n", - " 384,\n", - " 383,\n", - " 382,\n", - " 381,\n", - " 380,\n", - " 379,\n", - " 378,\n", - " 377,\n", - " 376,\n", - " 375,\n", - " 374,\n", - " 373,\n", - " 372,\n", - " 371,\n", - " 370,\n", - " 369,\n", - " 368,\n", - " 336,\n", - " 335,\n", - " 334,\n", - " 333,\n", - " 332,\n", - " 331,\n", - " 330,\n", - " 292,\n", - " 291,\n", - " 290,\n", - " 289,\n", - " 288,\n", - " 287,\n", - " 286,\n", - " 285,\n", - " 284,\n", - " 283,\n", - " 282,\n", - " 281,\n", - " 280,\n", - " 279,\n", - " 278,\n", - " 277,\n", - " 276,\n", - " 275,\n", - " 274,\n", - " 273,\n", - " 272,\n", - " 271,\n", - " 270,\n", - " 269,\n", - " 268,\n", - " 267,\n", - " 309,\n", - " 308,\n", - " 307,\n", - " 306,\n", - " 305,\n", - " 304,\n", - " 303,\n", - " 302,\n", - " 301,\n", - " 300,\n", - " 299,\n", - " 298,\n", - " 297,\n", - " 296,\n", - " 295,\n", - " 26,\n", - " 25,\n", - " 24,\n", - " 23,\n", - " 22,\n", - " 21,\n", - " 20,\n", - " 19,\n", - " 18,\n", - " 17,\n", - " 16,\n", - " 15,\n", - " 14,\n", - " 13,\n", - " 12,\n", - " 11,\n", - " 420,\n", - " 419,\n", - " 418,\n", - " 417,\n", - " 416,\n", - " 415,\n", - " 414,\n", - " 413,\n", - " 412,\n", - " 411,\n", - " 410,\n", - " 409,\n", - " 408,\n", - " 407,\n", - " 396,\n", - " 395,\n", - " 394,\n", - " 393,\n", - " 392,\n", - " 391,\n", - " 390,\n", - " 355,\n", - " 354,\n", - " 353,\n", - " 352,\n", - " 351,\n", - " 350,\n", - " 349,\n", - " 348,\n", - " 347,\n", - " 346,\n", - " 345,\n", - " 344,\n", - " 343,\n", - " 342,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 197,\n", - " 196,\n", - " 195,\n", - " 194,\n", - " 193,\n", - " 192,\n", - " 191,\n", - " 190,\n", - " 189,\n", - " 188,\n", - " 187,\n", - " 186,\n", - " 185,\n", - " 184,\n", - " 183,\n", - " 182,\n", - " 181,\n", - " 180,\n", - " 179,\n", - " 178,\n", - " 177,\n", - " 176,\n", - " 175,\n", - " 174,\n", - " 173,\n", - " 172,\n", - " 128,\n", - " 127,\n", - " 212,\n", - " 211,\n", - " 210,\n", - " 209,\n", - " 208,\n", - " 207,\n", - " 206,\n", - " 205,\n", - " 204,\n", - " 203,\n", - " 202,\n", - " 201,\n", - " 200,\n", - " 199,\n", - " 198,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 144,\n", - " 143,\n", - " 142,\n", - " 141,\n", - " 140,\n", - " 139,\n", - " 138,\n", - " 137,\n", - " 136,\n", - " 135,\n", - " 134,\n", - " 133,\n", - " 132,\n", - " 131,\n", - " 130,\n", - " 129,\n", - " 171,\n", - " 170,\n", - " 169,\n", - " 168,\n", - " 167,\n", - " 166,\n", - " 165,\n", - " 60,\n", - " 59,\n", - " 58,\n", - " 57,\n", - " 56,\n", - " 263,\n", - " 262,\n", - " 261,\n", - " 260,\n", - " 259,\n", - " 258,\n", - " 257,\n", - " 256,\n", - " 255,\n", - " 254,\n", - " 253,\n", - " 252,\n", - " 251,\n", - " 250,\n", - " 72,\n", - " 71,\n", - " 70,\n", - " 69,\n", - " 68,\n", - " 67,\n", - " 66,\n", - " 65,\n", - " 64,\n", - " 63,\n", - " 62,\n", - " 61,\n", - " 164,\n", - " 163,\n", - " 162,\n", - " 161,\n", - " 160,\n", - " 159,\n", - " 158,\n", - " 157,\n", - " 156,\n", - " 155,\n", - " 154,\n", - " 153,\n", - " 152,\n", - " 151,\n", - " 150,\n", - " 149,\n", - " 148,\n", - " 147,\n", - " 146,\n", - " 145,\n", - " -1,\n", - " -1,\n", - " 249,\n", - " 248,\n", - " 247,\n", - " 246,\n", - " 245,\n", - " 244,\n", - " 243,\n", - " 96,\n", - " 95,\n", - " 94,\n", - " 93,\n", - " 92,\n", - " 101,\n", - " 100,\n", - " 99,\n", - " 98,\n", - " 97,\n", - " 242,\n", - " 241,\n", - " 240,\n", - " 239,\n", - " 238,\n", - " 237,\n", - " 236,\n", - " 235,\n", - " 234,\n", - " 233,\n", - " 232,\n", - " 231,\n", - " 230,\n", - " 229,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 112,\n", - " 111,\n", - " 110,\n", - " 109,\n", - " 108,\n", - " 107,\n", - " 106,\n", - " 105,\n", - " 104,\n", - " 103,\n", - " 102,\n", - " 91,\n", - " 90,\n", - " 89,\n", - " 88,\n", - " 87,\n", - " 86,\n", - " 85,\n", - " 84,\n", - " 83,\n", - " 82,\n", - " 81,\n", - " 80,\n", - " 79,\n", - " 78,\n", - " 77,\n", - " 76,\n", - " 75,\n", - " 74,\n", - " 73]" - ] - }, - "execution_count": 176, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "vhzid=list(zip(dg[\"vh\"],dg[\"zid\"],dg[\"is_scaf\"]))\n", - "bp_list=dict(zip(vhzid,dg[\"index\"]))\n", - "bps =[]\n", - "for vh,zid,scaf in bp_list.keys():\n", - " try:\n", - " bps.append(bp_list[(vh,zid,not(scaf))])\n", - " except:\n", - " bps.append(-1)\n", - "bps " + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 126, - "id": "9f960b0e-61d2-448e-bde9-a6595d733a11", + "execution_count": null, + "id": "1a61115e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "12198835", "metadata": {}, "outputs": [], "source": [ - "top_data = np.loadtxt(\"test_insert.json.top\", skiprows=1,\n", - " unpack=True,\n", - " dtype=np.dtype('i4,U1,i4,i4')\n", - " )\n", - "dg=df.sort_index()" + "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n", + "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]" ] }, { "cell_type": "code", - "execution_count": 128, - "id": "6df902fa-6979-403f-bef3-0afe2c4dac5f", + "execution_count": 156, + "id": "1e5c9807", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], "text/plain": [ - "[array([ 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,\n", - " 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n", - " 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6,\n", - " 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", - " 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,\n", - " 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,\n", - " 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,\n", - " 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,\n", - " 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,\n", - " 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12,\n", - " 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,\n", - " 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,\n", - " 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,\n", - " 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,\n", - " 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,\n", - " 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14], dtype=int32),\n", - " array(['C', 'C', 'G', 'G', 'T', 'C', 'G', 'G', 'G', 'C', 'A', 'A', 'A',\n", - " 'G', 'G', 'G', 'A', 'C', 'T', 'T', 'T', 'T', 'G', 'A', 'C', 'T',\n", - " 'G', 'T', 'G', 'C', 'C', 'C', 'C', 'C', 'A', 'A', 'A', 'A', 'G',\n", - " 'G', 'G', 'A', 'C', 'T', 'T', 'C', 'C', 'T', 'G', 'C', 'T', 'T',\n", - " 'G', 'A', 'C', 'A', 'T', 'A', 'G', 'T', 'G', 'C', 'G', 'G', 'C',\n", - " 'A', 'G', 'G', 'C', 'G', 'C', 'T', 'T', 'G', 'G', 'C', 'G', 'T',\n", - " 'G', 'A', 'A', 'C', 'C', 'T', 'C', 'G', 'C', 'A', 'T', 'G', 'G',\n", - " 'A', 'G', 'T', 'C', 'T', 'C', 'G', 'G', 'C', 'A', 'G', 'C', 'G',\n", - " 'T', 'G', 'T', 'C', 'C', 'G', 'C', 'G', 'T', 'C', 'C', 'C', 'T',\n", - " 'T', 'T', 'T', 'G', 'G', 'G', 'G', 'G', 'C', 'A', 'C', 'C', 'C',\n", - " 'C', 'G', 'C', 'C', 'C', 'A', 'C', 'G', 'C', 'T', 'G', 'A', 'G',\n", - " 'G', 'C', 'G', 'A', 'A', 'C', 'C', 'A', 'A', 'A', 'G', 'T', 'A',\n", - " 'T', 'G', 'T', 'G', 'A', 'G', 'C', 'G', 'T', 'T', 'T', 'A', 'C',\n", - " 'A', 'C', 'T', 'T', 'A', 'T', 'C', 'A', 'C', 'T', 'C', 'T', 'A',\n", - " 'A', 'T', 'G', 'T', 'G', 'A', 'T', 'A', 'T', 'A', 'T', 'G', 'C',\n", - " 'C', 'C', 'G', 'T', 'C', 'C', 'A', 'G', 'A', 'C', 'G', 'A', 'T',\n", - " 'C', 'G', 'T', 'G', 'C', 'C', 'A', 'G', 'T', 'C', 'A', 'A', 'A',\n", - " 'A', 'G', 'T', 'C', 'C', 'C', 'T', 'T', 'T', 'T', 'A', 'T', 'C',\n", - " 'C', 'A', 'C', 'C', 'A', 'T', 'C', 'C', 'G', 'C', 'C', 'A', 'T',\n", - " 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'A', 'C', 'T', 'A', 'G', 'G',\n", - " 'T', 'A', 'T', 'T', 'G', 'G', 'A', 'C', 'G', 'G', 'G', 'C', 'A',\n", - " 'T', 'A', 'T', 'A', 'T', 'C', 'A', 'C', 'A', 'T', 'T', 'A', 'G',\n", - " 'A', 'G', 'T', 'G', 'A', 'T', 'A', 'G', 'G', 'G', 'C', 'A', 'C',\n", - " 'G', 'A', 'T', 'C', 'G', 'T', 'C', 'T', 'G', 'G', 'A', 'T', 'T',\n", - " 'T', 'G', 'G', 'C', 'C', 'T', 'C', 'A', 'G', 'C', 'G', 'T', 'G',\n", - " 'G', 'G', 'C', 'G', 'G', 'A', 'G', 'T', 'G', 'T', 'A', 'A', 'C',\n", - " 'A', 'C', 'T', 'A', 'A', 'A', 'T', 'A', 'C', 'C', 'T', 'A', 'G',\n", - " 'T', 'T', 'T', 'G', 'G', 'A', 'A', 'G', 'C', 'G', 'C', 'C', 'T',\n", - " 'G', 'C', 'C', 'G', 'A', 'C', 'G', 'C', 'T', 'C', 'A', 'C', 'A',\n", - " 'T', 'A', 'C', 'T', 'T', 'T', 'G', 'G', 'T', 'T', 'C', 'A', 'G',\n", - " 'G', 'G', 'T', 'A', 'T', 'G', 'G', 'G', 'A', 'G', 'A', 'C', 'C',\n", - " 'T', 'G', 'C', 'C', 'C', 'G', 'G', 'A', 'T', 'G', 'G', 'T', 'G',\n", - " 'G', 'A', 'T', 'A', 'A', 'A', 'T', 'T', 'A', 'A', 'C', 'G', 'C',\n", - " 'G', 'G', 'A', 'C', 'A', 'C', 'G', 'T', 'C', 'C', 'A', 'T', 'G',\n", - " 'C', 'G', 'A', 'G', 'G', 'T', 'T', 'C', 'A', 'C', 'G', 'C', 'C'],\n", - " dtype='<U1'),\n", - " array([ -1, 0, 1, -1, 3, 4, -1, 6, 7, -1, 9, 10, 11,\n", - " 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,\n", - " 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,\n", - " 38, 39, 40, 41, -1, 43, 44, 45, 46, -1, 48, 49, 50,\n", - " -1, 52, 53, 54, -1, 56, 57, 58, 59, 60, 61, 62, 63,\n", - " 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,\n", - " 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,\n", - " 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,\n", - " 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,\n", - " 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,\n", - " 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141,\n", - " 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,\n", - " 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,\n", - " 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,\n", - " 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193,\n", - " 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,\n", - " 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,\n", - " 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232,\n", - " 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245,\n", - " 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258,\n", - " 259, 260, 261, 262, -1, 264, 265, 266, 267, 268, 269, 270, 271,\n", - " 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,\n", - " 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297,\n", - " 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310,\n", - " -1, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323,\n", - " 324, 325, 326, 327, 328, 329, 330, 331, -1, 333, 334, 335, 336,\n", - " 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,\n", - " 350, 351, 352, 353, 354, -1, 356, 357, 358, 359, 360, 361, 362,\n", - " 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375,\n", - " 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,\n", - " -1, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401,\n", - " 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414,\n", - " 415, 416, 417, 418, 419, 420, 421, -1, 423, 424, 425, 426, 427,\n", - " 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440,\n", - " 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453],\n", - " dtype=int32),\n", - " array([ 1, 2, -1, 4, 5, -1, 7, 8, -1, 10, 11, 12, 13,\n", - " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", - " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,\n", - " 40, 41, 42, -1, 44, 45, 46, 47, -1, 49, 50, 51, -1,\n", - " 53, 54, 55, -1, 57, 58, 59, 60, 61, 62, 63, 64, 65,\n", - " 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,\n", - " 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,\n", - " 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,\n", - " 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,\n", - " 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,\n", - " 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,\n", - " 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,\n", - " 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,\n", - " 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,\n", - " 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,\n", - " 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,\n", - " 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,\n", - " 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,\n", - " 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247,\n", - " 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,\n", - " 261, 262, 263, -1, 265, 266, 267, 268, 269, 270, 271, 272, 273,\n", - " 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286,\n", - " 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,\n", - " 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, -1,\n", - " 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325,\n", - " 326, 327, 328, 329, 330, 331, 332, -1, 334, 335, 336, 337, 338,\n", - " 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351,\n", - " 352, 353, 354, 355, -1, 357, 358, 359, 360, 361, 362, 363, 364,\n", - " 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377,\n", - " 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, -1,\n", - " 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403,\n", - " 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416,\n", - " 417, 418, 419, 420, 421, 422, -1, 424, 425, 426, 427, 428, 429,\n", - " 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442,\n", - " 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, -1],\n", - " dtype=int32)]" + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" ] }, - "execution_count": 128, + "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "top_data" - ] - }, - { - "cell_type": "code", - "execution_count": 222, - "id": "9dbdf874-ed47-45bf-a70d-8c4928a5761b", - "metadata": {}, - "outputs": [], - "source": [ - "dg[\"threeprime\"]=top_data[-1]\n", - "stacks=[]\n", - "dg[\"seq\"]=top_data[1]\n", - "dg[\"stack\"]=dg[\"threeprime\"]\n", - "for i in dg.index:\n", - " if dg.loc[i][\"threeprime\"] in dg.index:\n", - " if dg.loc[dg.loc[i][\"threeprime\"]][\"vh\"]!=dg.loc[i][\"vh\"]:\n", - " dg[\"stack\"][i]=-1\n" + "i=range(5)\n", + "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n", + "d=pd.DataFrame(index=i,columns=col)\n", + "d['vh']=[0,1,2,3,1]\n", + "d['zid']=[0,3,1,2,8]\n", + "d.set_index([\"vh\",\"zid\"],inplace=True)\n", + "d" ] }, { "cell_type": "code", - "execution_count": 179, - "id": "6e9e8a3f-6884-40ab-9746-9b338a650858", + "execution_count": 157, + "id": "31c50f63", "metadata": {}, "outputs": [ { @@ -3974,15 +2185,14 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>level_0</th>\n", - " <th>index</th>\n", " <th>vh</th>\n", " <th>zid</th>\n", - " <th>strand</th>\n", - " <th>is_scaf</th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", " <th>stack</th>\n", " <th>threeprime</th>\n", " <th>seq</th>\n", + " <th>orientation</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", @@ -3990,172 +2200,108 @@ " <th>0</th>\n", " <td>0</td>\n", " <td>0</td>\n", - " <td>0</td>\n", - " <td>41</td>\n", - " <td>0</td>\n", - " <td>True</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>C</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>40</td>\n", - " <td>0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>C</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>39</td>\n", - " <td>0</td>\n", - " <td>True</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>G</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>39</td>\n", - " <td>1</td>\n", - " <td>True</td>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>G</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>4</td>\n", - " <td>4</td>\n", " <td>1</td>\n", - " <td>40</td>\n", - " <td>1</td>\n", - " <td>True</td>\n", - " <td>5</td>\n", - " <td>5</td>\n", - " <td>T</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>450</th>\n", - " <td>450</td>\n", - " <td>450</td>\n", - " <td>4</td>\n", - " <td>35</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>451</td>\n", - " <td>451</td>\n", - " <td>A</td>\n", - " </tr>\n", - " <tr>\n", - " <th>451</th>\n", - " <td>451</td>\n", - " <td>451</td>\n", - " <td>4</td>\n", - " <td>36</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>452</td>\n", - " <td>452</td>\n", - " <td>C</td>\n", - " </tr>\n", - " <tr>\n", - " <th>452</th>\n", - " <td>452</td>\n", - " <td>452</td>\n", - " <td>4</td>\n", - " <td>37</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>453</td>\n", - " <td>453</td>\n", - " <td>G</td>\n", - " </tr>\n", - " <tr>\n", - " <th>453</th>\n", - " <td>453</td>\n", - " <td>453</td>\n", - " <td>4</td>\n", - " <td>38</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>454</td>\n", - " <td>454</td>\n", - " <td>C</td>\n", - " </tr>\n", - " <tr>\n", - " <th>454</th>\n", - " <td>454</td>\n", - " <td>454</td>\n", - " <td>4</td>\n", - " <td>39</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>C</td>\n", + " <td>8</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>455 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ - " level_0 index vh zid strand is_scaf stack threeprime seq\n", - "0 0 0 0 41 0 True 1 1 C\n", - "1 1 1 0 40 0 True 2 2 C\n", - "2 2 2 0 39 0 True -1 -1 G\n", - "3 3 3 1 39 1 True 4 4 G\n", - "4 4 4 1 40 1 True 5 5 T\n", - ".. ... ... .. .. ... ... ... ... ..\n", - "450 450 450 4 35 13 False 451 451 A\n", - "451 451 451 4 36 13 False 452 452 C\n", - "452 452 452 4 37 13 False 453 453 G\n", - "453 453 453 4 38 13 False 454 454 C\n", - "454 454 454 4 39 13 False -1 -1 C\n", - "\n", - "[455 rows x 9 columns]" + " vh zid r bp stack threeprime seq orientation\n", + "0 0 0 NaN NaN NaN NaN NaN NaN\n", + "1 1 3 NaN NaN NaN NaN NaN NaN\n", + "2 2 1 NaN NaN NaN NaN NaN NaN\n", + "3 3 2 NaN NaN NaN NaN NaN NaN\n", + "4 1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "d=d.reset_index()\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "67546136", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 2]),)" ] }, - "execution_count": 179, + "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dg" + "s=[True,False,True,False,False]\n", + "np.where(np.array(s)==True)" ] }, { "cell_type": "code", - "execution_count": 196, - "id": "9492a36d-ca4c-4859-833b-1f7831360c4e", - "metadata": { - "scrolled": true - }, + "execution_count": 127, + "id": "bad20d6a", + "metadata": {}, "outputs": [ { "data": { @@ -4178,3620 +2324,513 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>level_0</th>\n", - " <th>index</th>\n", - " <th>vh</th>\n", - " <th>zid</th>\n", - " <th>strand</th>\n", - " <th>is_scaf</th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", " <th>stack</th>\n", " <th>threeprime</th>\n", " <th>seq</th>\n", - " <th>bp</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>41</td>\n", - " <td>0</td>\n", - " <td>True</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>C</td>\n", - " <td>-1</td>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>40</td>\n", - " <td>0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>C</td>\n", - " <td>-1</td>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>39</td>\n", - " <td>0</td>\n", - " <td>True</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>G</td>\n", - " <td>-1</td>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>39</td>\n", - " <td>1</td>\n", - " <td>True</td>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>G</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>1</td>\n", - " <td>40</td>\n", - " <td>1</td>\n", - " <td>True</td>\n", - " <td>5</td>\n", - " <td>5</td>\n", - " <td>T</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>450</th>\n", - " <td>450</td>\n", - " <td>450</td>\n", - " <td>4</td>\n", - " <td>35</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>451</td>\n", - " <td>451</td>\n", - " <td>A</td>\n", - " <td>77</td>\n", - " </tr>\n", - " <tr>\n", - " <th>451</th>\n", - " <td>451</td>\n", - " <td>451</td>\n", - " <td>4</td>\n", - " <td>36</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>452</td>\n", - " <td>452</td>\n", - " <td>C</td>\n", - " <td>76</td>\n", - " </tr>\n", - " <tr>\n", - " <th>452</th>\n", - " <td>452</td>\n", - " <td>452</td>\n", - " <td>4</td>\n", - " <td>37</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>453</td>\n", - " <td>453</td>\n", - " <td>G</td>\n", - " <td>75</td>\n", - " </tr>\n", - " <tr>\n", - " <th>453</th>\n", - " <td>453</td>\n", - " <td>453</td>\n", - " <td>4</td>\n", - " <td>38</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>454</td>\n", - " <td>454</td>\n", - " <td>C</td>\n", - " <td>74</td>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " <tr>\n", - " <th>454</th>\n", - " <td>454</td>\n", - " <td>454</td>\n", - " <td>4</td>\n", - " <td>39</td>\n", - " <td>13</td>\n", - " <td>False</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>C</td>\n", - " <td>73</td>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>455 rows × 10 columns</p>\n", "</div>" ], "text/plain": [ - " level_0 index vh zid strand is_scaf stack threeprime seq bp\n", - "0 0 0 0 41 0 True 1 1 C -1\n", - "1 1 1 0 40 0 True 2 2 C -1\n", - "2 2 2 0 39 0 True -1 -1 G -1\n", - "3 3 3 1 39 1 True 4 4 G -1\n", - "4 4 4 1 40 1 True 5 5 T -1\n", - ".. ... ... .. .. ... ... ... ... .. ..\n", - "450 450 450 4 35 13 False 451 451 A 77\n", - "451 451 451 4 36 13 False 452 452 C 76\n", - "452 452 452 4 37 13 False 453 453 G 75\n", - "453 453 453 4 38 13 False 454 454 C 74\n", - "454 454 454 4 39 13 False -1 -1 C 73\n", - "\n", - "[455 rows x 10 columns]" + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" ] }, - "execution_count": 196, + "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "bp_map=dict(zip(zip(dg[\"vh\"],dg[\"zid\"],dg[\"is_scaf\"]),dg[\"index\"]))\n", - "bp=-np.ones(len(dg.index),dtype=int)\n", - "counter=0\n", - "for i,j,k in zip(dg[\"vh\"],dg[\"zid\"],dg[\"is_scaf\"]):\n", - " try:\n", - " bp[counter]=bp_map[(i,j,not(k))]\n", - " except:\n", - " pass\n", - " counter+=1\n", - "dg[\"bp\"]=bp\n", - "dg" + "d" ] }, { "cell_type": "code", - "execution_count": 197, - "id": "319e2a8b-a24b-431e-80ec-6a21a8505ead", + "execution_count": 100, + "id": "48225afa", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "(level_0 450\n", - " index 450\n", - " vh 4\n", - " zid 35\n", - " strand 13\n", - " is_scaf False\n", - " stack 451\n", - " threeprime 451\n", - " seq A\n", - " bp 77\n", - " Name: 450, dtype: object,\n", - " level_0 77\n", - " index 77\n", - " vh 4\n", - " zid 35\n", - " strand 7\n", - " is_scaf True\n", - " stack 78\n", - " threeprime 78\n", - " seq T\n", - " bp 450\n", - " Name: 77, dtype: object)" - ] - }, - "execution_count": 197, - "metadata": {}, - "output_type": "execute_result" + "ename": "KeyError", + "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"" + ] } ], "source": [ - "dg.loc[450],dg.loc[77]" + "d[[0,0]]" ] }, { "cell_type": "code", - "execution_count": 107, - "id": "d57bc82b-c233-4f0d-862e-47134101f43c", + "execution_count": 13, + "id": "c75bd92f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{2: (0, 0),\n", - " 1: (0, 0),\n", - " 0: (0, 0),\n", - " 5: (1, 1),\n", - " 4: (1, 1),\n", - " 3: (1, 1),\n", - " 8: (2, 2),\n", - " 7: (2, 2),\n", - " 6: (2, 2),\n", - " 42: (3, 2),\n", - " 41: (3, 2),\n", - " 40: (3, 2),\n", - " 39: (3, 2),\n", - " 38: (3, 2),\n", - " 37: (3, 2),\n", - " 36: (3, 2),\n", - " 35: (3, 2),\n", - " 34: (3, 2),\n", - " 33: (3, 2),\n", - " 32: (3, 2),\n", - " 31: (3, 2),\n", - " 30: (3, 2),\n", - " 29: (3, 2),\n", - " 28: (3, 2),\n", - " 27: (3, 2),\n", - " 26: (3, 2),\n", - " 25: (3, 2),\n", - " 24: (3, 2),\n", - " 23: (3, 2),\n", - " 22: (3, 2),\n", - " 21: (3, 2),\n", - " 20: (3, 2),\n", - " 19: (3, 2),\n", - " 18: (3, 2),\n", - " 17: (3, 2),\n", - " 16: (3, 2),\n", - " 15: (3, 2),\n", - " 14: (3, 2),\n", - " 13: (3, 2),\n", - " 12: (3, 2),\n", - " 11: (3, 2),\n", - " 10: (3, 2),\n", - " 9: (3, 2),\n", - " 47: (4, 3),\n", - " 46: (4, 3),\n", - " 45: (4, 3),\n", - " 44: (4, 3),\n", - " 43: (4, 3),\n", - " 51: (5, 4),\n", - " 50: (5, 4),\n", - " 49: (5, 4),\n", - " 48: (5, 4),\n", - " 55: (6, 5),\n", - " 54: (6, 5),\n", - " 53: (6, 5),\n", - " 52: (6, 5),\n", - " 197: (7, 0),\n", - " 196: (7, 0),\n", - " 195: (7, 0),\n", - " 194: (7, 0),\n", - " 193: (7, 0),\n", - " 192: (7, 0),\n", - " 191: (7, 0),\n", - " 190: (7, 0),\n", - " 189: (7, 0),\n", - " 188: (7, 0),\n", - " 187: (7, 0),\n", - " 186: (7, 0),\n", - " 185: (7, 0),\n", - " 184: (7, 0),\n", - " 183: (7, 0),\n", - " 182: (7, 0),\n", - " 181: (7, 0),\n", - " 180: (7, 0),\n", - " 179: (7, 0),\n", - " 178: (7, 0),\n", - " 177: (7, 0),\n", - " 176: (7, 0),\n", - " 175: (7, 0),\n", - " 174: (7, 0),\n", - " 173: (7, 0),\n", - " 172: (7, 0),\n", - " 171: (7, 0),\n", - " 170: (7, 0),\n", - " 169: (7, 0),\n", - " 168: (7, 0),\n", - " 167: (7, 0),\n", - " 166: (7, 0),\n", - " 165: (7, 0),\n", - " 164: (7, 0),\n", - " 163: (7, 0),\n", - " 162: (7, 0),\n", - " 161: (7, 0),\n", - " 160: (7, 0),\n", - " 159: (7, 0),\n", - " 158: (7, 0),\n", - " 157: (7, 0),\n", - " 156: (7, 0),\n", - " 155: (7, 0),\n", - " 154: (7, 0),\n", - " 153: (7, 0),\n", - " 152: (7, 0),\n", - " 151: (7, 0),\n", - " 150: (7, 0),\n", - " 149: (7, 0),\n", - " 148: (7, 0),\n", - " 147: (7, 0),\n", - " 146: (7, 0),\n", - " 145: (7, 0),\n", - " 212: (7, 1),\n", - " 211: (7, 1),\n", - " 210: (7, 1),\n", - " 209: (7, 1),\n", - " 208: (7, 1),\n", - " 207: (7, 1),\n", - " 206: (7, 1),\n", - " 205: (7, 1),\n", - " 203: (7, 1),\n", - " 204: (7, 1),\n", - " 202: (7, 1),\n", - " 201: (7, 1),\n", - " 200: (7, 1),\n", - " 199: (7, 1),\n", - " 198: (7, 1),\n", - " 144: (7, 1),\n", - " 143: (7, 1),\n", - " 142: (7, 1),\n", - " 141: (7, 1),\n", - " 140: (7, 1),\n", - " 139: (7, 1),\n", - " 138: (7, 1),\n", - " 137: (7, 1),\n", - " 136: (7, 1),\n", - " 135: (7, 1),\n", - " 134: (7, 1),\n", - " 133: (7, 1),\n", - " 132: (7, 1),\n", - " 131: (7, 1),\n", - " 130: (7, 1),\n", - " 129: (7, 1),\n", - " 128: (7, 1),\n", - " 127: (7, 1),\n", - " 228: (7, 2),\n", - " 227: (7, 2),\n", - " 226: (7, 2),\n", - " 225: (7, 2),\n", - " 224: (7, 2),\n", - " 223: (7, 2),\n", - " 222: (7, 2),\n", - " 221: (7, 2),\n", - " 220: (7, 2),\n", - " 219: (7, 2),\n", - " 218: (7, 2),\n", - " 217: (7, 2),\n", - " 216: (7, 2),\n", - " 215: (7, 2),\n", - " 214: (7, 2),\n", - " 213: (7, 2),\n", - " 126: (7, 2),\n", - " 125: (7, 2),\n", - " 124: (7, 2),\n", - " 123: (7, 2),\n", - " 122: (7, 2),\n", - " 121: (7, 2),\n", - " 120: (7, 2),\n", - " 119: (7, 2),\n", - " 118: (7, 2),\n", - " 117: (7, 2),\n", - " 116: (7, 2),\n", - " 115: (7, 2),\n", - " 114: (7, 2),\n", - " 113: (7, 2),\n", - " 242: (7, 3),\n", - " 241: (7, 3),\n", - " 240: (7, 3),\n", - " 239: (7, 3),\n", - " 238: (7, 3),\n", - " 237: (7, 3),\n", - " 236: (7, 3),\n", - " 235: (7, 3),\n", - " 234: (7, 3),\n", - " 233: (7, 3),\n", - " 232: (7, 3),\n", - " 231: (7, 3),\n", - " 230: (7, 3),\n", - " 229: (7, 3),\n", - " 112: (7, 3),\n", - " 111: (7, 3),\n", - " 110: (7, 3),\n", - " 109: (7, 3),\n", - " 108: (7, 3),\n", - " 107: (7, 3),\n", - " 106: (7, 3),\n", - " 105: (7, 3),\n", - " 104: (7, 3),\n", - " 103: (7, 3),\n", - " 102: (7, 3),\n", - " 101: (7, 3),\n", - " 100: (7, 3),\n", - " 99: (7, 3),\n", - " 98: (7, 3),\n", - " 97: (7, 3),\n", - " 249: (7, 4),\n", - " 248: (7, 4),\n", - " 247: (7, 4),\n", - " 246: (7, 4),\n", - " 245: (7, 4),\n", - " 244: (7, 4),\n", - " 243: (7, 4),\n", - " 96: (7, 4),\n", - " 95: (7, 4),\n", - " 94: (7, 4),\n", - " 93: (7, 4),\n", - " 92: (7, 4),\n", - " 91: (7, 4),\n", - " 90: (7, 4),\n", - " 89: (7, 4),\n", - " 88: (7, 4),\n", - " 87: (7, 4),\n", - " 86: (7, 4),\n", - " 85: (7, 4),\n", - " 84: (7, 4),\n", - " 83: (7, 4),\n", - " 82: (7, 4),\n", - " 81: (7, 4),\n", - " 80: (7, 4),\n", - " 79: (7, 4),\n", - " 78: (7, 4),\n", - " 77: (7, 4),\n", - " 76: (7, 4),\n", - " 75: (7, 4),\n", - " 74: (7, 4),\n", - " 73: (7, 4),\n", - " 263: (7, 5),\n", - " 262: (7, 5),\n", - " 261: (7, 5),\n", - " 260: (7, 5),\n", - " 259: (7, 5),\n", - " 258: (7, 5),\n", - " 257: (7, 5),\n", - " 256: (7, 5),\n", - " 255: (7, 5),\n", - " 254: (7, 5),\n", - " 253: (7, 5),\n", - " 252: (7, 5),\n", - " 251: (7, 5),\n", - " 250: (7, 5),\n", - " 72: (7, 5),\n", - " 71: (7, 5),\n", - " 70: (7, 5),\n", - " 69: (7, 5),\n", - " 68: (7, 5),\n", - " 67: (7, 5),\n", - " 66: (7, 5),\n", - " 65: (7, 5),\n", - " 64: (7, 5),\n", - " 63: (7, 5),\n", - " 62: (7, 5),\n", - " 61: (7, 5),\n", - " 60: (7, 5),\n", - " 59: (7, 5),\n", - " 58: (7, 5),\n", - " 57: (7, 5),\n", - " 56: (7, 5),\n", - " 292: (8, 0),\n", - " 291: (8, 0),\n", - " 290: (8, 0),\n", - " 289: (8, 0),\n", - " 288: (8, 0),\n", - " 287: (8, 0),\n", - " 286: (8, 0),\n", - " 275: (8, 0),\n", - " 276: (8, 0),\n", - " 277: (8, 0),\n", - " 278: (8, 0),\n", - " 279: (8, 0),\n", - " 280: (8, 0),\n", - " 281: (8, 0),\n", - " 282: (8, 0),\n", - " 283: (8, 0),\n", - " 284: (8, 0),\n", - " 285: (8, 0),\n", - " 274: (8, 0),\n", - " 273: (8, 0),\n", - " 272: (8, 0),\n", - " 271: (8, 0),\n", - " 270: (8, 0),\n", - " 269: (8, 0),\n", - " 268: (8, 0),\n", - " 267: (8, 0),\n", - " 266: (8, 0),\n", - " 265: (8, 0),\n", - " 264: (8, 0),\n", - " 311: (8, 1),\n", - " 310: (8, 1),\n", - " 309: (8, 1),\n", - " 308: (8, 1),\n", - " 307: (8, 1),\n", - " 306: (8, 1),\n", - " 305: (8, 1),\n", - " 304: (8, 1),\n", - " 303: (8, 1),\n", - " 302: (8, 1),\n", - " 301: (8, 1),\n", - " 300: (8, 1),\n", - " 299: (8, 1),\n", - " 298: (8, 1),\n", - " 297: (8, 1),\n", - " 296: (8, 1),\n", - " 295: (8, 1),\n", - " 294: (8, 1),\n", - " 293: (8, 1),\n", - " 332: (9, 0),\n", - " 331: (9, 0),\n", - " 330: (9, 0),\n", - " 329: (9, 1),\n", - " 328: (9, 1),\n", - " 327: (9, 1),\n", - " 326: (9, 1),\n", - " 325: (9, 1),\n", - " 324: (9, 1),\n", - " 323: (9, 1),\n", - " 322: (9, 1),\n", - " 321: (9, 1),\n", - " 320: (9, 1),\n", - " 319: (9, 1),\n", - " 318: (9, 1),\n", - " 317: (9, 1),\n", - " 316: (9, 1),\n", - " 315: (9, 1),\n", - " 314: (9, 1),\n", - " 313: (9, 1),\n", - " 312: (9, 1),\n", - " 336: (10, 0),\n", - " 335: (10, 0),\n", - " 334: (10, 0),\n", - " 333: (10, 0),\n", - " 355: (10, 5),\n", - " 354: (10, 5),\n", - " 353: (10, 5),\n", - " 352: (10, 5),\n", - " 351: (10, 5),\n", - " 350: (10, 5),\n", - " 349: (10, 5),\n", - " 348: (10, 5),\n", - " 347: (10, 5),\n", - " 346: (10, 5),\n", - " 345: (10, 5),\n", - " 344: (10, 5),\n", - " 343: (10, 5),\n", - " 342: (10, 5),\n", - " 341: (10, 5),\n", - " 340: (10, 5),\n", - " 339: (10, 5),\n", - " 338: (10, 5),\n", - " 337: (10, 5),\n", - " 389: (11, 0),\n", - " 388: (11, 0),\n", - " 387: (11, 0),\n", - " 386: (11, 0),\n", - " 385: (11, 0),\n", - " 384: (11, 0),\n", - " 383: (11, 0),\n", - " 382: (11, 0),\n", - " 381: (11, 0),\n", - " 369: (11, 0),\n", - " 370: (11, 0),\n", - " 371: (11, 0),\n", - " 372: (11, 0),\n", - " 373: (11, 0),\n", - " 374: (11, 0),\n", - " 375: (11, 0),\n", - " 376: (11, 0),\n", - " 377: (11, 0),\n", - " 378: (11, 0),\n", - " 379: (11, 0),\n", - " 380: (11, 0),\n", - " 368: (11, 0),\n", - " 367: (11, 5),\n", - " 366: (11, 5),\n", - " 365: (11, 5),\n", - " 364: (11, 5),\n", - " 363: (11, 5),\n", - " 362: (11, 5),\n", - " 361: (11, 5),\n", - " 360: (11, 5),\n", - " 359: (11, 5),\n", - " 358: (11, 5),\n", - " 357: (11, 5),\n", - " 356: (11, 5),\n", - " 422: (12, 3),\n", - " 421: (12, 3),\n", - " 420: (12, 3),\n", - " 419: (12, 3),\n", - " 418: (12, 3),\n", - " 417: (12, 3),\n", - " 416: (12, 3),\n", - " 415: (12, 3),\n", - " 414: (12, 3),\n", - " 413: (12, 3),\n", - " 412: (12, 3),\n", - " 411: (12, 3),\n", - " 410: (12, 3),\n", - " 409: (12, 3),\n", - " 408: (12, 3),\n", - " 407: (12, 3),\n", - " 406: (12, 3),\n", - " 405: (12, 3),\n", - " 404: (12, 3),\n", - " 403: (12, 3),\n", - " 402: (12, 3),\n", - " 401: (12, 4),\n", - " 400: (12, 4),\n", - " 399: (12, 4),\n", - " 398: (12, 4),\n", - " 397: (12, 4),\n", - " 396: (12, 4),\n", - " 395: (12, 4),\n", - " 394: (12, 4),\n", - " 393: (12, 4),\n", - " 392: (12, 4),\n", - " 391: (12, 4),\n", - " 390: (12, 4),\n", - " 435: (13, 3),\n", - " 434: (13, 3),\n", - " 433: (13, 3),\n", - " 432: (13, 3),\n", - " 431: (13, 3),\n", - " 430: (13, 3),\n", - " 429: (13, 3),\n", - " 428: (13, 3),\n", - " 427: (13, 3),\n", - " 426: (13, 3),\n", - " 425: (13, 3),\n", - " 424: (13, 3),\n", - " 423: (13, 3),\n", - " 454: (13, 4),\n", - " 453: (13, 4),\n", - " 452: (13, 4),\n", - " 451: (13, 4),\n", - " 450: (13, 4),\n", - " 449: (13, 4),\n", - " 448: (13, 4),\n", - " 447: (13, 4),\n", - " 446: (13, 4),\n", - " 445: (13, 4),\n", - " 444: (13, 4),\n", - " 443: (13, 4),\n", - " 442: (13, 4),\n", - " 441: (13, 4),\n", - " 440: (13, 4),\n", - " 439: (13, 4),\n", - " 438: (13, 4),\n", - " 437: (13, 4),\n", - " 436: (13, 4)}" + "[[23, 13369809], [38, 12060012]]" ] }, - "execution_count": 107, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[\"stack\"]=df.index[1:]+[-1]\n", - "df[\"threeprime\"]=df.index[1:]\n", - "\n", - "strands_map=df.groupby(['strand',\"vh\"]).groups\n", - "ind_map={}\n", - "for j in list(strands_map.keys()):\n", - " for l in strands_map[j]:\n", - " ind_map[l]=j\n", - "for i in list(df.index):\n", - " if df.loc[i]\n" + "df[\"vstrands\"][0][\"stap_colors\"]" ] }, { "cell_type": "code", - "execution_count": 99, - "id": "c88b4d35-0f58-47da-8687-e6a8433d363f", + "execution_count": 14, + "id": "00f1513e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " True,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " False,\n", - " True]" + "210" ] }, - "execution_count": 99, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "list(df[\"stack\"]==df[\"threeprime\"])" + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "len(vh_vb._scaf)" ] }, { "cell_type": "code", - "execution_count": 84, - "id": "eeea91ae-b631-454f-b66f-af580c7ece5c", + "execution_count": 198, + "id": "aaa65658", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{0: [2, 1, 0], 1: [5, 4, 3], 2: [8, 7, 6], 3: [42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9], 4: [47, 46, 45, 44, 43], 5: [51, 50, 49, 48], 6: [55, 54, 53, 52], 7: [263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213, 212, 211, 210, 209, 208, 207, 206, 205, 203, 204, 202, 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165, 164, ...], 8: [311, 310, 309, 308, 307, 306, 305, 304, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294, 293, 292, 291, 290, 289, 288, 287, 286, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 274, 273, 272, 271, 270, 269, 268, 267, 266, 265, 264], 9: [332, 331, 330, 329, 328, 327, 326, 325, 324, 323, 322, 321, 320, 319, 318, 317, 316, 315, 314, 313, 312], 10: [355, 354, 353, 352, 351, 350, 349, 348, 347, 346, 345, 344, 343, 342, 341, 340, 339, 338, 337, 336, 335, 334, 333], 11: [389, 388, 387, 386, 385, 384, 383, 382, 381, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 368, 367, 366, 365, 364, 363, 362, 361, 360, 359, 358, 357, 356], 12: [422, 421, 420, 419, 418, 417, 416, 415, 414, 413, 412, 411, 410, 409, 408, 407, 406, 405, 404, 403, 402, 401, 400, 399, 398, 397, 396, 395, 394, 393, 392, 391, 390], 13: [454, 453, 452, 451, 450, 449, 448, 447, 446, 445, 444, 443, 442, 441, 440, 439, 438, 437, 436, 435, 434, 433, 432, 431, 430, 429, 428, 427, 426, 425, 424, 423]}" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" + "ename": "TypeError", + "evalue": "file must have 'read' and 'readline' attributes", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[198], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.virt2nuc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: file must have 'read' and 'readline' attributes" + ] } ], "source": [ - "strands=df.groupby(['strand']).groups\n", - "strands" + "df = pickle.load(\"test.virt2nuc\")" ] }, { "cell_type": "code", - "execution_count": 88, - "id": "be4a60fe-444d-4772-80e8-b95591a079b3", + "execution_count": 15, + "id": "cbddf07f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[1,\n", - " 0,\n", - " -1,\n", - " 4,\n", - " 3,\n", - " -1,\n", - " 7,\n", - " 6,\n", - " -1,\n", - " 41,\n", - " 40,\n", - " 39,\n", - " 38,\n", - " 37,\n", - " 36,\n", - " 35,\n", - " 34,\n", - " 33,\n", - " 32,\n", - " 31,\n", - " 30,\n", - " 29,\n", - " 28,\n", - " 27,\n", - " 26,\n", - " 25,\n", - " 24,\n", - " 23,\n", - " 22,\n", - " 21,\n", - " 20,\n", - " 19,\n", - " 18,\n", - " 17,\n", - " 16,\n", - " 15,\n", - " 14,\n", - " 13,\n", - " 12,\n", - " 11,\n", - " 10,\n", - " 9,\n", - " -1,\n", - " 46,\n", - " 45,\n", - " 44,\n", - " 43,\n", - " -1,\n", - " 50,\n", - " 49,\n", - " 48,\n", - " -1,\n", - " 54,\n", - " 53,\n", - " 52,\n", - " -1,\n", - " 262,\n", - " 261,\n", - " 260,\n", - " 259,\n", - " 258,\n", - " 257,\n", - " 256,\n", - " 255,\n", - " 254,\n", - " 253,\n", - " 252,\n", - " 251,\n", - " 250,\n", - " 249,\n", - " 248,\n", - " 247,\n", - " 246,\n", - " 245,\n", - " 244,\n", - " 243,\n", - " 242,\n", - " 241,\n", - " 240,\n", - " 239,\n", - " 238,\n", - " 237,\n", - " 236,\n", - " 235,\n", - " 234,\n", - " 233,\n", - " 232,\n", - " 231,\n", - " 230,\n", - " 229,\n", - " 228,\n", - " 227,\n", - " 226,\n", - " 225,\n", - " 224,\n", - " 223,\n", - " 222,\n", - " 221,\n", - " 220,\n", - " 219,\n", - " 218,\n", - " 217,\n", - " 216,\n", - " 215,\n", - " 214,\n", - " 213,\n", - " 212,\n", - " 211,\n", - " 210,\n", - " 209,\n", - " 208,\n", - " 207,\n", - " 206,\n", - " 205,\n", - " 203,\n", - " 204,\n", - " 202,\n", - " 201,\n", - " 200,\n", - " 199,\n", - " 198,\n", - " 197,\n", - " 196,\n", - " 195,\n", - " 194,\n", - " 193,\n", - " 192,\n", - " 191,\n", - " 190,\n", - " 189,\n", - " 188,\n", - " 187,\n", - " 186,\n", - " 185,\n", - " 184,\n", - " 183,\n", - " 182,\n", - " 181,\n", - " 180,\n", - " 179,\n", - " 178,\n", - " 177,\n", - " 176,\n", - " 175,\n", - " 174,\n", - " 173,\n", - " 172,\n", - " 171,\n", - " 170,\n", - " 169,\n", - " 168,\n", - " 167,\n", - " 166,\n", - " 165,\n", - " 164,\n", - " 163,\n", - " 162,\n", - " 161,\n", - " 160,\n", - " 159,\n", - " 158,\n", - " 157,\n", - " 156,\n", - " 155,\n", - " 154,\n", - " 153,\n", - " 152,\n", - " 151,\n", - " 150,\n", - " 149,\n", - " 148,\n", - " 147,\n", - " 146,\n", - " 145,\n", - " 144,\n", - " 143,\n", - " 142,\n", - " 141,\n", - " 140,\n", - " 139,\n", - " 138,\n", - " 137,\n", - " 136,\n", - " 135,\n", - " 134,\n", - " 133,\n", - " 132,\n", - " 131,\n", - " 130,\n", - " 129,\n", - " 128,\n", - " 127,\n", - " 126,\n", - " 125,\n", - " 124,\n", - " 123,\n", - " 122,\n", - " 121,\n", - " 120,\n", - " 119,\n", - " 118,\n", - " 117,\n", - " 116,\n", - " 115,\n", - " 114,\n", - " 113,\n", - " 112,\n", - " 111,\n", - " 110,\n", - " 109,\n", - " 108,\n", - " 107,\n", - " 106,\n", - " 105,\n", - " 104,\n", - " 103,\n", - " 102,\n", - " 101,\n", - " 100,\n", - " 99,\n", - " 98,\n", - " 97,\n", - " 96,\n", - " 95,\n", - " 94,\n", - " 93,\n", - " 92,\n", - " 91,\n", - " 90,\n", - " 89,\n", - " 88,\n", - " 87,\n", - " 86,\n", - " 85,\n", - " 84,\n", - " 83,\n", - " 82,\n", - " 81,\n", - " 80,\n", - " 79,\n", - " 78,\n", - " 77,\n", - " 76,\n", - " 75,\n", - " 74,\n", - " 73,\n", - " 72,\n", - " 71,\n", - " 70,\n", - " 69,\n", - " 68,\n", - " 67,\n", - " 66,\n", - " 65,\n", - " 64,\n", - " 63,\n", - " 62,\n", - " 61,\n", - " 60,\n", - " 59,\n", - " 58,\n", - " 57,\n", - " 56,\n", - " -1,\n", - " 310,\n", - " 309,\n", - " 308,\n", - " 307,\n", - " 306,\n", - " 305,\n", - " 304,\n", - " 303,\n", - " 302,\n", - " 301,\n", - " 300,\n", - " 299,\n", - " 298,\n", - " 297,\n", - " 296,\n", - " 295,\n", - " 294,\n", - " 293,\n", - " 292,\n", - " 291,\n", - " 290,\n", - " 289,\n", - " 288,\n", - " 287,\n", - " 286,\n", - " 275,\n", - " 276,\n", - " 277,\n", - " 278,\n", - " 279,\n", - " 280,\n", - " 281,\n", - " 282,\n", - " 283,\n", - " 284,\n", - " 285,\n", - " 274,\n", - " 273,\n", - " 272,\n", - " 271,\n", - " 270,\n", - " 269,\n", - " 268,\n", - " 267,\n", - " 266,\n", - " 265,\n", - " 264,\n", - " -1,\n", - " 331,\n", - " 330,\n", - " 329,\n", - " 328,\n", - " 327,\n", - " 326,\n", - " 325,\n", - " 324,\n", - " 323,\n", - " 322,\n", - " 321,\n", - " 320,\n", - " 319,\n", - " 318,\n", - " 317,\n", - " 316,\n", - " 315,\n", - " 314,\n", - " 313,\n", - " 312,\n", - " -1,\n", - " 354,\n", - " 353,\n", - " 352,\n", - " 351,\n", - " 350,\n", - " 349,\n", - " 348,\n", - " 347,\n", - " 346,\n", - " 345,\n", - " 344,\n", - " 343,\n", - " 342,\n", - " 341,\n", - " 340,\n", - " 339,\n", - " 338,\n", - " 337,\n", - " 336,\n", - " 335,\n", - " 334,\n", - " 333,\n", - " -1,\n", - " 388,\n", - " 387,\n", - " 386,\n", - " 385,\n", - " 384,\n", - " 383,\n", - " 382,\n", - " 381,\n", - " 369,\n", - " 370,\n", - " 371,\n", - " 372,\n", - " 373,\n", - " 374,\n", - " 375,\n", - " 376,\n", - " 377,\n", - " 378,\n", - " 379,\n", - " 380,\n", - " 368,\n", - " 367,\n", - " 366,\n", - " 365,\n", - " 364,\n", - " 363,\n", - " 362,\n", - " 361,\n", - " 360,\n", - " 359,\n", - " 358,\n", - " 357,\n", - " 356,\n", - " -1,\n", - " 421,\n", - " 420,\n", - " 419,\n", - " 418,\n", - " 417,\n", - " 416,\n", - " 415,\n", - " 414,\n", - " 413,\n", - " 412,\n", - " 411,\n", - " 410,\n", - " 409,\n", - " 408,\n", - " 407,\n", - " 406,\n", - " 405,\n", - " 404,\n", - " 403,\n", - " 402,\n", - " 401,\n", - " 400,\n", - " 399,\n", - " 398,\n", - " 397,\n", - " 396,\n", - " 395,\n", - " 394,\n", - " 393,\n", - " 392,\n", - " 391,\n", - " 390,\n", - " -1,\n", - " 453,\n", - " 452,\n", - " 451,\n", - " 450,\n", - " 449,\n", - " 448,\n", - " 447,\n", - " 446,\n", - " 445,\n", - " 444,\n", - " 443,\n", - " 442,\n", - " 441,\n", - " 440,\n", - " 439,\n", - " 438,\n", - " 437,\n", - " 436,\n", - " 435,\n", - " 434,\n", - " 433,\n", - " 432,\n", - " 431,\n", - " 430,\n", - " 429,\n", - " 428,\n", - " 427,\n", - " 426,\n", - " 425,\n", - " 424,\n", - " 423,\n", - " -1]" + "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}" ] }, - "execution_count": 88, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "threeprime=[]\n", - "for i in list(strands.values()):\n", - " m=list(i)\n", - " m.append(-1)\n", - " for j in range(1,len(m)):\n", - " threeprime.append(m[j])\n", - "threeprime" + "pattern" ] }, { "cell_type": "code", - "execution_count": 106, - "id": "dc9618ff-8016-480c-b704-31b2dc5f58a1", + "execution_count": 173, + "id": "fac8699e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2 1\n", - "1 0\n", - "0 -1\n", - "5 4\n", - "4 3\n", - "3 -1\n", - "8 7\n", - "7 6\n", - "6 -1\n", - "47 41\n", - "46 40\n", - "45 39\n", - "44 38\n", - "43 37\n", - "51 36\n", - "50 35\n", - "49 34\n", - "48 33\n", - "55 32\n", - "54 31\n", - "53 30\n", - "52 29\n", - "263 28\n", - "262 27\n", - "261 26\n", - "260 25\n", - "259 24\n", - "258 23\n", - "257 22\n", - "256 21\n", - "255 20\n", - "254 19\n", - "253 18\n", - "252 17\n", - "251 16\n", - "250 15\n", - "249 14\n", - "248 13\n", - "247 12\n", - "246 11\n", - "245 10\n", - "244 9\n", - "243 -1\n", - "242 46\n", - "241 45\n", - "240 44\n", - "239 43\n", - "238 -1\n", - "237 50\n", - "236 49\n", - "235 48\n", - "234 -1\n", - "233 54\n", - "232 53\n", - "231 52\n", - "230 -1\n", - "229 262\n", - "228 261\n", - "227 260\n", - "226 259\n", - "225 258\n", - "224 257\n", - "223 256\n", - "222 255\n", - "221 254\n", - "220 253\n", - "219 252\n", - "218 251\n", - "217 250\n", - "216 249\n", - "215 248\n", - "214 247\n", - "213 246\n", - "212 245\n", - "211 244\n", - "210 243\n", - "209 242\n", - "208 241\n", - "207 240\n", - "206 239\n", - "205 238\n", - "203 237\n", - "204 236\n", - "202 235\n", - "201 234\n", - "200 233\n", - "199 232\n", - "198 231\n", - "197 230\n", - "196 229\n", - "195 228\n", - "194 227\n", - "193 226\n", - "192 225\n", - "191 224\n", - "190 223\n", - "189 222\n", - "188 221\n", - "187 220\n", - "186 219\n", - "185 218\n", - "184 217\n", - "183 216\n", - "182 215\n", - "181 214\n", - "180 213\n", - "179 212\n", - "178 211\n", - "177 210\n", - "176 209\n", - "175 208\n", - "174 207\n", - "173 206\n", - "172 205\n", - "171 203\n", - "170 204\n", - "169 202\n", - "168 201\n", - "167 200\n", - "166 199\n", - "165 198\n", - "164 197\n", - "163 196\n", - "162 195\n", - "161 194\n", - "160 193\n", - "159 192\n", - "158 191\n", - "157 190\n", - "156 189\n", - "155 188\n", - "154 187\n", - "153 186\n", - "152 185\n", - "151 184\n", - "150 183\n", - "149 182\n", - "148 181\n", - "147 180\n", - "146 179\n", - "145 178\n", - "144 177\n", - "143 176\n", - "142 175\n", - "141 174\n", - "140 173\n", - "139 172\n", - "138 171\n", - "137 170\n", - "136 169\n", - "135 168\n", - "134 167\n", - "133 166\n", - "132 165\n", - "131 164\n", - "130 163\n", - "129 162\n", - "128 161\n", - "127 160\n", - "126 159\n", - "125 158\n", - "124 157\n", - "123 156\n", - "122 155\n", - "121 154\n", - "120 153\n", - "119 152\n", - "118 151\n", - "117 150\n", - "116 149\n", - "115 148\n", - "114 147\n", - "113 146\n", - "112 145\n", - "111 144\n", - "110 143\n", - "109 142\n", - "108 141\n", - "107 140\n", - "106 139\n", - "105 138\n", - "104 137\n", - "103 136\n", - "102 135\n", - "101 134\n", - "100 133\n", - "99 132\n", - "98 131\n", - "97 130\n", - "96 129\n", - "95 128\n", - "94 127\n", - "93 126\n", - "92 125\n", - "91 124\n", - "90 123\n", - "89 122\n", - "88 121\n", - "87 120\n", - "86 119\n", - "85 118\n", - "84 117\n", - "83 116\n", - "82 115\n", - "81 114\n", - "80 113\n", - "79 112\n", - "78 111\n", - "77 110\n", - "76 109\n", - "75 108\n", - "74 107\n", - "73 106\n", - "72 105\n", - "71 104\n", - "70 103\n", - "69 102\n", - "68 101\n", - "67 100\n", - "66 99\n", - "65 98\n", - "64 97\n", - "63 96\n", - "62 95\n", - "61 94\n", - "60 93\n", - "59 92\n", - "58 91\n", - "57 90\n", - "56 89\n", - "42 88\n", - "41 87\n", - "40 86\n", - "39 85\n", - "38 84\n", - "37 83\n", - "36 82\n", - "35 81\n", - "34 80\n", - "33 79\n", - "32 78\n", - "31 77\n", - "30 76\n", - "29 75\n", - "28 74\n", - "27 73\n", - "26 72\n", - "25 71\n", - "24 70\n", - "23 69\n", - "22 68\n", - "21 67\n", - "20 66\n", - "19 65\n", - "18 64\n", - "17 63\n", - "16 62\n", - "15 61\n", - "14 60\n", - "13 59\n", - "12 58\n", - "11 57\n", - "10 56\n", - "9 -1\n", - "311 310\n", - "310 309\n", - "309 308\n", - "308 307\n", - "307 306\n", - "306 305\n", - "305 304\n", - "304 303\n", - "303 302\n", - "302 301\n", - "301 300\n", - "300 299\n", - "299 298\n", - "298 297\n", - "297 296\n", - "296 295\n", - "295 294\n", - "294 293\n", - "293 292\n", - "292 291\n", - "291 290\n", - "290 289\n", - "289 288\n", - "288 287\n", - "287 286\n", - "286 275\n", - "275 276\n", - "276 277\n", - "277 278\n", - "278 279\n", - "279 280\n", - "280 281\n", - "281 282\n", - "282 283\n", - "283 284\n", - "284 285\n", - "285 274\n", - "274 273\n", - "273 272\n", - "272 271\n", - "271 270\n", - "270 269\n", - "269 268\n", - "268 267\n", - "267 266\n", - "266 265\n", - "265 264\n", - "264 -1\n", - "332 331\n", - "331 330\n", - "330 329\n", - "329 328\n", - "328 327\n", - "327 326\n", - "326 325\n", - "325 324\n", - "324 323\n", - "323 322\n", - "322 321\n", - "321 320\n", - "320 319\n", - "319 318\n", - "318 317\n", - "317 316\n", - "316 315\n", - "315 314\n", - "314 313\n", - "313 312\n", - "312 -1\n", - "355 354\n", - "354 353\n", - "353 352\n", - "352 351\n", - "351 350\n", - "350 349\n", - "349 348\n", - "348 347\n", - "347 346\n", - "346 345\n", - "345 344\n", - "344 343\n", - "343 342\n", - "342 341\n", - "341 340\n", - "340 339\n", - "339 338\n", - "338 337\n", - "337 336\n", - "336 335\n", - "335 334\n", - "334 333\n", - "333 -1\n", - "389 388\n", - "388 387\n", - "387 386\n", - "386 385\n", - "385 384\n", - "384 383\n", - "383 382\n", - "382 381\n", - "381 369\n", - "369 370\n", - "370 371\n", - "371 372\n", - "372 373\n", - "373 374\n", - "374 375\n", - "375 376\n", - "376 377\n", - "377 378\n", - "378 379\n", - "379 380\n", - "380 368\n", - "368 367\n", - "367 366\n", - "366 365\n", - "365 364\n", - "364 363\n", - "363 362\n", - "362 361\n", - "361 360\n", - "360 359\n", - "359 358\n", - "358 357\n", - "357 356\n", - "356 -1\n", - "422 421\n", - "421 420\n", - "420 419\n", - "419 418\n", - "418 417\n", - "417 416\n", - "416 415\n", - "415 414\n", - "414 413\n", - "413 412\n", - "412 411\n", - "411 410\n", - "410 409\n", - "409 408\n", - "408 407\n", - "407 406\n", - "406 405\n", - "405 404\n", - "404 403\n", - "403 402\n", - "402 401\n", - "401 400\n", - "400 399\n", - "399 398\n", - "398 397\n", - "397 396\n", - "396 395\n", - "395 394\n", - "394 393\n", - "393 392\n", - "392 391\n", - "391 390\n", - "390 -1\n", - "454 453\n", - "453 452\n", - "452 451\n", - "451 450\n", - "450 449\n", - "449 448\n", - "448 447\n", - "447 446\n", - "446 445\n", - "445 444\n", - "444 443\n", - "443 442\n", - "442 441\n", - "441 440\n", - "440 439\n", - "439 438\n", - "438 437\n", - "437 436\n", - "436 435\n", - "435 434\n", - "434 433\n", - "433 432\n", - "432 431\n", - "431 430\n", - "430 429\n", - "429 428\n", - "428 427\n", - "427 426\n", - "426 425\n", - "425 424\n", - "424 423\n", - "423 -1\n" - ] - }, - { - "data": { - "text/plain": [ - "[1,\n", - " 0,\n", - " -1,\n", - " 4,\n", - " 3,\n", - " -1,\n", - " 7,\n", - " 6,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 197,\n", - " 196,\n", - " 195,\n", - " 194,\n", - " 193,\n", - " 192,\n", - " 191,\n", - " 190,\n", - " 189,\n", - " 188,\n", - " 187,\n", - " 186,\n", - " 185,\n", - " 184,\n", - " 183,\n", - " 182,\n", - " 181,\n", - " 180,\n", - " 179,\n", - " 178,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " -1,\n", - " 310,\n", - " 309,\n", - " 308,\n", - " 307,\n", - " 306,\n", - " 305,\n", - " 304,\n", - " 303,\n", - " 302,\n", - " 301,\n", - " 300,\n", - " 299,\n", - " 298,\n", - " 297,\n", - " 296,\n", - " 295,\n", - " 294,\n", - " 293,\n", - " -1,\n", - " 291,\n", - " 290,\n", - " 289,\n", - " 288,\n", - " 287,\n", - " 286,\n", - " 275,\n", - " 276,\n", - " 277,\n", - " 278,\n", - " 279,\n", - " 280,\n", - " 281,\n", - " 282,\n", - " 283,\n", - " 284,\n", - " 285,\n", - " 274,\n", - " 273,\n", - " 272,\n", - " 271,\n", - " 270,\n", - " 269,\n", - " 268,\n", - " 267,\n", - " 266,\n", - " 265,\n", - " 264,\n", - " -1,\n", - " 331,\n", - " 330,\n", - " -1,\n", - " 328,\n", - " 327,\n", - " 326,\n", - " 325,\n", - " 324,\n", - " 323,\n", - " 322,\n", - " 321,\n", - " 320,\n", - " 319,\n", - " 318,\n", - " 317,\n", - " 316,\n", - " 315,\n", - " 314,\n", - " 313,\n", - " 312,\n", - " -1,\n", - " 354,\n", - " 353,\n", - " 352,\n", - " 351,\n", - " 350,\n", - " 349,\n", - " 348,\n", - " 347,\n", - " 346,\n", - " 345,\n", - " 344,\n", - " 343,\n", - " 342,\n", - " 341,\n", - " 340,\n", - " 339,\n", - " 338,\n", - " 337,\n", - " -1,\n", - " 335,\n", - " 334,\n", - " 333,\n", - " -1,\n", - " 388,\n", - " 387,\n", - " 386,\n", - " 385,\n", - " 384,\n", - " 383,\n", - " 382,\n", - " 381,\n", - " 369,\n", - " 370,\n", - " 371,\n", - " 372,\n", - " 373,\n", - " 374,\n", - " 375,\n", - " 376,\n", - " 377,\n", - " 378,\n", - " 379,\n", - " 380,\n", - " 368,\n", - " -1,\n", - " 366,\n", - " 365,\n", - " 364,\n", - " 363,\n", - " 362,\n", - " 361,\n", - " 360,\n", - " 359,\n", - " 358,\n", - " 357,\n", - " 356,\n", - " -1,\n", - " 421,\n", - " 420,\n", - " 419,\n", - " 418,\n", - " 417,\n", - " 416,\n", - " 415,\n", - " 414,\n", - " 413,\n", - " 412,\n", - " 411,\n", - " 410,\n", - " 409,\n", - " 408,\n", - " 407,\n", - " 406,\n", - " 405,\n", - " 404,\n", - " 403,\n", - " 402,\n", - " -1,\n", - " 400,\n", - " 399,\n", - " 398,\n", - " 397,\n", - " 396,\n", - " 395,\n", - " 394,\n", - " 393,\n", - " 392,\n", - " 391,\n", - " 390,\n", - " -1,\n", - " 453,\n", - " 452,\n", - " 451,\n", - " 450,\n", - " 449,\n", - " 448,\n", - " 447,\n", - " 446,\n", - " 445,\n", - " 444,\n", - " 443,\n", - " 442,\n", - " 441,\n", - " 440,\n", - " 439,\n", - " 438,\n", - " 437,\n", - " 436,\n", - " -1,\n", - " 434,\n", - " 433,\n", - " 432,\n", - " 431,\n", - " 430,\n", - " 429,\n", - " 428,\n", - " 427,\n", - " 426,\n", - " 425,\n", - " 424,\n", - " 423,\n", - " -1]" - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "stacks=[]\n", - "for i,j in zip(df.index,df[\"threeprime\"]):\n", - " try:\n", - " print(i,j)\n", - " if ind_map[i]==ind_map[j]:\n", - " stacks.append(j)\n", - " else:\n", - " stacks.append(-1)\n", - " except:\n", - " stacks.append(-1)\n", - "stacks" + "class strands():\n", + " def __init__(self):\n", + " self.row=0 \n", + " self.col=0\n", + " self.num=0\n", + " self.scaf=[]\n", + " self.stap=[]\n", + " self.loop=[]\n", + " self.skip=[]\n", + " self.scafLoop=[]\n", + " self.stapLoop=[]\n", + " self.stap_colors=[]\n", + " self.scaf_contact={}\n", + " self.stap_connect={}\n", + " def to_dict(self):\n", + " d={}\n", + " d['row']=self.row\n", + " d['col']=self.col\n", + " d['num']=self.num\n", + " d['scaf']=self.scaf\n", + " d['stap']=self.stap\n", + " d['loop']=self.loop\n", + " d['skip']=self.skip\n", + " d['scafLoop']=self.scafLoop\n", + " d['stapLoop']=self.stapLoop\n", + " d['stap_colors']=self.stap_colors\n", + " return d\n" ] }, { "cell_type": "code", - "execution_count": 101, - "id": "64cb336b-9399-49ed-8a82-173a2b967a75", + "execution_count": 177, + "id": "308cd6c1", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{2: (0, 0),\n", - " 1: (0, 0),\n", - " 0: (0, 0),\n", - " 5: (1, 1),\n", - " 4: (1, 1),\n", - " 3: (1, 1),\n", - " 8: (2, 2),\n", - " 7: (2, 2),\n", - " 6: (2, 2),\n", - " 42: (3, 2),\n", - " 41: (3, 2),\n", - " 40: (3, 2),\n", - " 39: (3, 2),\n", - " 38: (3, 2),\n", - " 37: (3, 2),\n", - " 36: (3, 2),\n", - " 35: (3, 2),\n", - " 34: (3, 2),\n", - " 33: (3, 2),\n", - " 32: (3, 2),\n", - " 31: (3, 2),\n", - " 30: (3, 2),\n", - " 29: (3, 2),\n", - " 28: (3, 2),\n", - " 27: (3, 2),\n", - " 26: (3, 2),\n", - " 25: (3, 2),\n", - " 24: (3, 2),\n", - " 23: (3, 2),\n", - " 22: (3, 2),\n", - " 21: (3, 2),\n", - " 20: (3, 2),\n", - " 19: (3, 2),\n", - " 18: (3, 2),\n", - " 17: (3, 2),\n", - " 16: (3, 2),\n", - " 15: (3, 2),\n", - " 14: (3, 2),\n", - " 13: (3, 2),\n", - " 12: (3, 2),\n", - " 11: (3, 2),\n", - " 10: (3, 2),\n", - " 9: (3, 2),\n", - " 47: (4, 3),\n", - " 46: (4, 3),\n", - " 45: (4, 3),\n", - " 44: (4, 3),\n", - " 43: (4, 3),\n", - " 51: (5, 4),\n", - " 50: (5, 4),\n", - " 49: (5, 4),\n", - " 48: (5, 4),\n", - " 55: (6, 5),\n", - " 54: (6, 5),\n", - " 53: (6, 5),\n", - " 52: (6, 5),\n", - " 197: (7, 0),\n", - " 196: (7, 0),\n", - " 195: (7, 0),\n", - " 194: (7, 0),\n", - " 193: (7, 0),\n", - " 192: (7, 0),\n", - " 191: (7, 0),\n", - " 190: (7, 0),\n", - " 189: (7, 0),\n", - " 188: (7, 0),\n", - " 187: (7, 0),\n", - " 186: (7, 0),\n", - " 185: (7, 0),\n", - " 184: (7, 0),\n", - " 183: (7, 0),\n", - " 182: (7, 0),\n", - " 181: (7, 0),\n", - " 180: (7, 0),\n", - " 179: (7, 0),\n", - " 178: (7, 0),\n", - " 177: (7, 0),\n", - " 176: (7, 0),\n", - " 175: (7, 0),\n", - " 174: (7, 0),\n", - " 173: (7, 0),\n", - " 172: (7, 0),\n", - " 171: (7, 0),\n", - " 170: (7, 0),\n", - " 169: (7, 0),\n", - " 168: (7, 0),\n", - " 167: (7, 0),\n", - " 166: (7, 0),\n", - " 165: (7, 0),\n", - " 164: (7, 0),\n", - " 163: (7, 0),\n", - " 162: (7, 0),\n", - " 161: (7, 0),\n", - " 160: (7, 0),\n", - " 159: (7, 0),\n", - " 158: (7, 0),\n", - " 157: (7, 0),\n", - " 156: (7, 0),\n", - " 155: (7, 0),\n", - " 154: (7, 0),\n", - " 153: (7, 0),\n", - " 152: (7, 0),\n", - " 151: (7, 0),\n", - " 150: (7, 0),\n", - " 149: (7, 0),\n", - " 148: (7, 0),\n", - " 147: (7, 0),\n", - " 146: (7, 0),\n", - " 145: (7, 0),\n", - " 212: (7, 1),\n", - " 211: (7, 1),\n", - " 210: (7, 1),\n", - " 209: (7, 1),\n", - " 208: (7, 1),\n", - " 207: (7, 1),\n", - " 206: (7, 1),\n", - " 205: (7, 1),\n", - " 203: (7, 1),\n", - " 204: (7, 1),\n", - " 202: (7, 1),\n", - " 201: (7, 1),\n", - " 200: (7, 1),\n", - " 199: (7, 1),\n", - " 198: (7, 1),\n", - " 144: (7, 1),\n", - " 143: (7, 1),\n", - " 142: (7, 1),\n", - " 141: (7, 1),\n", - " 140: (7, 1),\n", - " 139: (7, 1),\n", - " 138: (7, 1),\n", - " 137: (7, 1),\n", - " 136: (7, 1),\n", - " 135: (7, 1),\n", - " 134: (7, 1),\n", - " 133: (7, 1),\n", - " 132: (7, 1),\n", - " 131: (7, 1),\n", - " 130: (7, 1),\n", - " 129: (7, 1),\n", - " 128: (7, 1),\n", - " 127: (7, 1),\n", - " 228: (7, 2),\n", - " 227: (7, 2),\n", - " 226: (7, 2),\n", - " 225: (7, 2),\n", - " 224: (7, 2),\n", - " 223: (7, 2),\n", - " 222: (7, 2),\n", - " 221: (7, 2),\n", - " 220: (7, 2),\n", - " 219: (7, 2),\n", - " 218: (7, 2),\n", - " 217: (7, 2),\n", - " 216: (7, 2),\n", - " 215: (7, 2),\n", - " 214: (7, 2),\n", - " 213: (7, 2),\n", - " 126: (7, 2),\n", - " 125: (7, 2),\n", - " 124: (7, 2),\n", - " 123: (7, 2),\n", - " 122: (7, 2),\n", - " 121: (7, 2),\n", - " 120: (7, 2),\n", - " 119: (7, 2),\n", - " 118: (7, 2),\n", - " 117: (7, 2),\n", - " 116: (7, 2),\n", - " 115: (7, 2),\n", - " 114: (7, 2),\n", - " 113: (7, 2),\n", - " 242: (7, 3),\n", - " 241: (7, 3),\n", - " 240: (7, 3),\n", - " 239: (7, 3),\n", - " 238: (7, 3),\n", - " 237: (7, 3),\n", - " 236: (7, 3),\n", - " 235: (7, 3),\n", - " 234: (7, 3),\n", - " 233: (7, 3),\n", - " 232: (7, 3),\n", - " 231: (7, 3),\n", - " 230: (7, 3),\n", - " 229: (7, 3),\n", - " 112: (7, 3),\n", - " 111: (7, 3),\n", - " 110: (7, 3),\n", - " 109: (7, 3),\n", - " 108: (7, 3),\n", - " 107: (7, 3),\n", - " 106: (7, 3),\n", - " 105: (7, 3),\n", - " 104: (7, 3),\n", - " 103: (7, 3),\n", - " 102: (7, 3),\n", - " 101: (7, 3),\n", - " 100: (7, 3),\n", - " 99: (7, 3),\n", - " 98: (7, 3),\n", - " 97: (7, 3),\n", - " 249: (7, 4),\n", - " 248: (7, 4),\n", - " 247: (7, 4),\n", - " 246: (7, 4),\n", - " 245: (7, 4),\n", - " 244: (7, 4),\n", - " 243: (7, 4),\n", - " 96: (7, 4),\n", - " 95: (7, 4),\n", - " 94: (7, 4),\n", - " 93: (7, 4),\n", - " 92: (7, 4),\n", - " 91: (7, 4),\n", - " 90: (7, 4),\n", - " 89: (7, 4),\n", - " 88: (7, 4),\n", - " 87: (7, 4),\n", - " 86: (7, 4),\n", - " 85: (7, 4),\n", - " 84: (7, 4),\n", - " 83: (7, 4),\n", - " 82: (7, 4),\n", - " 81: (7, 4),\n", - " 80: (7, 4),\n", - " 79: (7, 4),\n", - " 78: (7, 4),\n", - " 77: (7, 4),\n", - " 76: (7, 4),\n", - " 75: (7, 4),\n", - " 74: (7, 4),\n", - " 73: (7, 4),\n", - " 263: (7, 5),\n", - " 262: (7, 5),\n", - " 261: (7, 5),\n", - " 260: (7, 5),\n", - " 259: (7, 5),\n", - " 258: (7, 5),\n", - " 257: (7, 5),\n", - " 256: (7, 5),\n", - " 255: (7, 5),\n", - " 254: (7, 5),\n", - " 253: (7, 5),\n", - " 252: (7, 5),\n", - " 251: (7, 5),\n", - " 250: (7, 5),\n", - " 72: (7, 5),\n", - " 71: (7, 5),\n", - " 70: (7, 5),\n", - " 69: (7, 5),\n", - " 68: (7, 5),\n", - " 67: (7, 5),\n", - " 66: (7, 5),\n", - " 65: (7, 5),\n", - " 64: (7, 5),\n", - " 63: (7, 5),\n", - " 62: (7, 5),\n", - " 61: (7, 5),\n", - " 60: (7, 5),\n", - " 59: (7, 5),\n", - " 58: (7, 5),\n", - " 57: (7, 5),\n", - " 56: (7, 5),\n", - " 292: (8, 0),\n", - " 291: (8, 0),\n", - " 290: (8, 0),\n", - " 289: (8, 0),\n", - " 288: (8, 0),\n", - " 287: (8, 0),\n", - " 286: (8, 0),\n", - " 275: (8, 0),\n", - " 276: (8, 0),\n", - " 277: (8, 0),\n", - " 278: (8, 0),\n", - " 279: (8, 0),\n", - " 280: (8, 0),\n", - " 281: (8, 0),\n", - " 282: (8, 0),\n", - " 283: (8, 0),\n", - " 284: (8, 0),\n", - " 285: (8, 0),\n", - " 274: (8, 0),\n", - " 273: (8, 0),\n", - " 272: (8, 0),\n", - " 271: (8, 0),\n", - " 270: (8, 0),\n", - " 269: (8, 0),\n", - " 268: (8, 0),\n", - " 267: (8, 0),\n", - " 266: (8, 0),\n", - " 265: (8, 0),\n", - " 264: (8, 0),\n", - " 311: (8, 1),\n", - " 310: (8, 1),\n", - " 309: (8, 1),\n", - " 308: (8, 1),\n", - " 307: (8, 1),\n", - " 306: (8, 1),\n", - " 305: (8, 1),\n", - " 304: (8, 1),\n", - " 303: (8, 1),\n", - " 302: (8, 1),\n", - " 301: (8, 1),\n", - " 300: (8, 1),\n", - " 299: (8, 1),\n", - " 298: (8, 1),\n", - " 297: (8, 1),\n", - " 296: (8, 1),\n", - " 295: (8, 1),\n", - " 294: (8, 1),\n", - " 293: (8, 1),\n", - " 332: (9, 0),\n", - " 331: (9, 0),\n", - " 330: (9, 0),\n", - " 329: (9, 1),\n", - " 328: (9, 1),\n", - " 327: (9, 1),\n", - " 326: (9, 1),\n", - " 325: (9, 1),\n", - " 324: (9, 1),\n", - " 323: (9, 1),\n", - " 322: (9, 1),\n", - " 321: (9, 1),\n", - " 320: (9, 1),\n", - " 319: (9, 1),\n", - " 318: (9, 1),\n", - " 317: (9, 1),\n", - " 316: (9, 1),\n", - " 315: (9, 1),\n", - " 314: (9, 1),\n", - " 313: (9, 1),\n", - " 312: (9, 1),\n", - " 336: (10, 0),\n", - " 335: (10, 0),\n", - " 334: (10, 0),\n", - " 333: (10, 0),\n", - " 355: (10, 5),\n", - " 354: (10, 5),\n", - " 353: (10, 5),\n", - " 352: (10, 5),\n", - " 351: (10, 5),\n", - " 350: (10, 5),\n", - " 349: (10, 5),\n", - " 348: (10, 5),\n", - " 347: (10, 5),\n", - " 346: (10, 5),\n", - " 345: (10, 5),\n", - " 344: (10, 5),\n", - " 343: (10, 5),\n", - " 342: (10, 5),\n", - " 341: (10, 5),\n", - " 340: (10, 5),\n", - " 339: (10, 5),\n", - " 338: (10, 5),\n", - " 337: (10, 5),\n", - " 389: (11, 0),\n", - " 388: (11, 0),\n", - " 387: (11, 0),\n", - " 386: (11, 0),\n", - " 385: (11, 0),\n", - " 384: (11, 0),\n", - " 383: (11, 0),\n", - " 382: (11, 0),\n", - " 381: (11, 0),\n", - " 369: (11, 0),\n", - " 370: (11, 0),\n", - " 371: (11, 0),\n", - " 372: (11, 0),\n", - " 373: (11, 0),\n", - " 374: (11, 0),\n", - " 375: (11, 0),\n", - " 376: (11, 0),\n", - " 377: (11, 0),\n", - " 378: (11, 0),\n", - " 379: (11, 0),\n", - " 380: (11, 0),\n", - " 368: (11, 0),\n", - " 367: (11, 5),\n", - " 366: (11, 5),\n", - " 365: (11, 5),\n", - " 364: (11, 5),\n", - " 363: (11, 5),\n", - " 362: (11, 5),\n", - " 361: (11, 5),\n", - " 360: (11, 5),\n", - " 359: (11, 5),\n", - " 358: (11, 5),\n", - " 357: (11, 5),\n", - " 356: (11, 5),\n", - " 422: (12, 3),\n", - " 421: (12, 3),\n", - " 420: (12, 3),\n", - " 419: (12, 3),\n", - " 418: (12, 3),\n", - " 417: (12, 3),\n", - " 416: (12, 3),\n", - " 415: (12, 3),\n", - " 414: (12, 3),\n", - " 413: (12, 3),\n", - " 412: (12, 3),\n", - " 411: (12, 3),\n", - " 410: (12, 3),\n", - " 409: (12, 3),\n", - " 408: (12, 3),\n", - " 407: (12, 3),\n", - " 406: (12, 3),\n", - " 405: (12, 3),\n", - " 404: (12, 3),\n", - " 403: (12, 3),\n", - " 402: (12, 3),\n", - " 401: (12, 4),\n", - " 400: (12, 4),\n", - " 399: (12, 4),\n", - " 398: (12, 4),\n", - " 397: (12, 4),\n", - " 396: (12, 4),\n", - " 395: (12, 4),\n", - " 394: (12, 4),\n", - " 393: (12, 4),\n", - " 392: (12, 4),\n", - " 391: (12, 4),\n", - " 390: (12, 4),\n", - " 435: (13, 3),\n", - " 434: (13, 3),\n", - " 433: (13, 3),\n", - " 432: (13, 3),\n", - " 431: (13, 3),\n", - " 430: (13, 3),\n", - " 429: (13, 3),\n", - " 428: (13, 3),\n", - " 427: (13, 3),\n", - " 426: (13, 3),\n", - " 425: (13, 3),\n", - " 424: (13, 3),\n", - " 423: (13, 3),\n", - " 454: (13, 4),\n", - " 453: (13, 4),\n", - " 452: (13, 4),\n", - " 451: (13, 4),\n", - " 450: (13, 4),\n", - " 449: (13, 4),\n", - " 448: (13, 4),\n", - " 447: (13, 4),\n", - " 446: (13, 4),\n", - " 445: (13, 4),\n", - " 444: (13, 4),\n", - " 443: (13, 4),\n", - " 442: (13, 4),\n", - " 441: (13, 4),\n", - " 440: (13, 4),\n", - " 439: (13, 4),\n", - " 438: (13, 4),\n", - " 437: (13, 4),\n", - " 436: (13, 4)}" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "strands2\n", - "ind_map={}\n", - "for j in list(strands2.keys()):\n", - " for l in strands2[j]:\n", - " ind_map[l]=j\n", - "ind_map\n", - " " + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", + "\n", + "#class\n", + "def decode_vh_vb(virt2nuc):\n", + " vh_list={}\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " for i in pattern.keys():\n", + " s=strands()\n", + " s.row,s.col=pattern[i]\n", + " s.num=i\n", + " vh_list[s.num]=s\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + " for i in scaf_oligos:\n", + " pass\n", + " \n", + " \n", + " return vh_list" ] }, { "cell_type": "code", - "execution_count": 97, - "id": "4a86b6be-8176-489c-a8a9-c31c62d04096", + "execution_count": 187, + "id": "bc032680", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[1,\n", - " 0,\n", - " -1,\n", - " 4,\n", - " 3,\n", - " -1,\n", - " 7,\n", - " 6,\n", - " -1,\n", - " 41,\n", - " 40,\n", - " 39,\n", - " 38,\n", - " 37,\n", - " 36,\n", - " 35,\n", - " 34,\n", - " 33,\n", - " 32,\n", - " 31,\n", - " 30,\n", - " 29,\n", - " 28,\n", - " 27,\n", - " 26,\n", - " 25,\n", - " 24,\n", - " 23,\n", - " 22,\n", - " 21,\n", - " 20,\n", - " 19,\n", - " 18,\n", - " 17,\n", - " 16,\n", - " 15,\n", - " 14,\n", - " 13,\n", - " 12,\n", - " 11,\n", - " 10,\n", - " 9,\n", - " -1,\n", - " 46,\n", - " 45,\n", - " 44,\n", - " 43,\n", - " -1,\n", - " 50,\n", - " 49,\n", - " 48,\n", - " -1,\n", - " 54,\n", - " 53,\n", - " 52,\n", - " -1,\n", - " 196,\n", - " 195,\n", - " 194,\n", - " 193,\n", - " 192,\n", - " 191,\n", - " 190,\n", - " 189,\n", - " 188,\n", - " 187,\n", - " 186,\n", - " 185,\n", - " 184,\n", - " 183,\n", - " 182,\n", - " 181,\n", - " 180,\n", - " 179,\n", - " 178,\n", - " 177,\n", - " 176,\n", - " 175,\n", - " 174,\n", - " 173,\n", - " 172,\n", - " 171,\n", - " 170,\n", - " 169,\n", - " 168,\n", - " 167,\n", - " 166,\n", - " 165,\n", - " 164,\n", - " 163,\n", - " 162,\n", - " 161,\n", - " 160,\n", - " 159,\n", - " 158,\n", - " 157,\n", - " 156,\n", - " 155,\n", - " 154,\n", - " 153,\n", - " 152,\n", - " 151,\n", - " 150,\n", - " 149,\n", - " 148,\n", - " 147,\n", - " 146,\n", - " 145,\n", - " -1,\n", - " 211,\n", - " 210,\n", - " 209,\n", - " 208,\n", - " 207,\n", - " 206,\n", - " 205,\n", - " 203,\n", - " 204,\n", - " 202,\n", - " 201,\n", - " 200,\n", - " 199,\n", - " 198,\n", - " 144,\n", - " 143,\n", - " 142,\n", - " 141,\n", - " 140,\n", - " 139,\n", - " 138,\n", - " 137,\n", - " 136,\n", - " 135,\n", - " 134,\n", - " 133,\n", - " 132,\n", - " 131,\n", - " 130,\n", - " 129,\n", - " 128,\n", - " 127,\n", - " -1,\n", - " 227,\n", - " 226,\n", - " 225,\n", - " 224,\n", - " 223,\n", - " 222,\n", - " 221,\n", - " 220,\n", - " 219,\n", - " 218,\n", - " 217,\n", - " 216,\n", - " 215,\n", - " 214,\n", - " 213,\n", - " 126,\n", - " 125,\n", - " 124,\n", - " 123,\n", - " 122,\n", - " 121,\n", - " 120,\n", - " 119,\n", - " 118,\n", - " 117,\n", - " 116,\n", - " 115,\n", - " 114,\n", - " 113,\n", - " -1,\n", - " 241,\n", - " 240,\n", - " 239,\n", - " 238,\n", - " 237,\n", - " 236,\n", - " 235,\n", - " 234,\n", - " 233,\n", - " 232,\n", - " 231,\n", - " 230,\n", - " 229,\n", - " 112,\n", - " 111,\n", - " 110,\n", - " 109,\n", - " 108,\n", - " 107,\n", - " 106,\n", - " 105,\n", - " 104,\n", - " 103,\n", - " 102,\n", - " 101,\n", - " 100,\n", - " 99,\n", - " 98,\n", - " 97,\n", - " -1,\n", - " 248,\n", - " 247,\n", - " 246,\n", - " 245,\n", - " 244,\n", - " 243,\n", - " 96,\n", - " 95,\n", - " 94,\n", - " 93,\n", - " 92,\n", - " 91,\n", - " 90,\n", - " 89,\n", - " 88,\n", - " 87,\n", - " 86,\n", - " 85,\n", - " 84,\n", - " 83,\n", - " 82,\n", - " 81,\n", - " 80,\n", - " 79,\n", - " 78,\n", - " 77,\n", - " 76,\n", - " 75,\n", - " 74,\n", - " 73,\n", - " -1,\n", - " 262,\n", - " 261,\n", - " 260,\n", - " 259,\n", - " 258,\n", - " 257,\n", - " 256,\n", - " 255,\n", - " 254,\n", - " 253,\n", - " 252,\n", - " 251,\n", - " 250,\n", - " 72,\n", - " 71,\n", - " 70,\n", - " 69,\n", - " 68,\n", - " 67,\n", - " 66,\n", - " 65,\n", - " 64,\n", - " 63,\n", - " 62,\n", - " 61,\n", - " 60,\n", - " 59,\n", - " 58,\n", - " 57,\n", - " 56,\n", - " -1,\n", - " 291,\n", - " 290,\n", - " 289,\n", - " 288,\n", - " 287,\n", - " 286,\n", - " 275,\n", - " 276,\n", - " 277,\n", - " 278,\n", - " 279,\n", - " 280,\n", - " 281,\n", - " 282,\n", - " 283,\n", - " 284,\n", - " 285,\n", - " 274,\n", - " 273,\n", - " 272,\n", - " 271,\n", - " 270,\n", - " 269,\n", - " 268,\n", - " 267,\n", - " 266,\n", - " 265,\n", - " 264,\n", - " -1,\n", - " 310,\n", - " 309,\n", - " 308,\n", - " 307,\n", - " 306,\n", - " 305,\n", - " 304,\n", - " 303,\n", - " 302,\n", - " 301,\n", - " 300,\n", - " 299,\n", - " 298,\n", - " 297,\n", - " 296,\n", - " 295,\n", - " 294,\n", - " 293,\n", - " -1,\n", - " 331,\n", - " 330,\n", - " -1,\n", - " 328,\n", - " 327,\n", - " 326,\n", - " 325,\n", - " 324,\n", - " 323,\n", - " 322,\n", - " 321,\n", - " 320,\n", - " 319,\n", - " 318,\n", - " 317,\n", - " 316,\n", - " 315,\n", - " 314,\n", - " 313,\n", - " 312,\n", - " -1,\n", - " 335,\n", - " 334,\n", - " 333,\n", - " -1,\n", - " 354,\n", - " 353,\n", - " 352,\n", - " 351,\n", - " 350,\n", - " 349,\n", - " 348,\n", - " 347,\n", - " 346,\n", - " 345,\n", - " 344,\n", - " 343,\n", - " 342,\n", - " 341,\n", - " 340,\n", - " 339,\n", - " 338,\n", - " 337,\n", - " -1,\n", - " 388,\n", - " 387,\n", - " 386,\n", - " 385,\n", - " 384,\n", - " 383,\n", - " 382,\n", - " 381,\n", - " 369,\n", - " 370,\n", - " 371,\n", - " 372,\n", - " 373,\n", - " 374,\n", - " 375,\n", - " 376,\n", - " 377,\n", - " 378,\n", - " 379,\n", - " 380,\n", - " 368,\n", - " -1,\n", - " 366,\n", - " 365,\n", - " 364,\n", - " 363,\n", - " 362,\n", - " 361,\n", - " 360,\n", - " 359,\n", - " 358,\n", - " 357,\n", - " 356,\n", - " -1,\n", - " 421,\n", - " 420,\n", - " 419,\n", - " 418,\n", - " 417,\n", - " 416,\n", - " 415,\n", - " 414,\n", - " 413,\n", - " 412,\n", - " 411,\n", - " 410,\n", - " 409,\n", - " 408,\n", - " 407,\n", - " 406,\n", - " 405,\n", - " 404,\n", - " 403,\n", - " 402,\n", - " -1,\n", - " 400,\n", - " 399,\n", - " 398,\n", - " 397,\n", - " 396,\n", - " 395,\n", - " 394,\n", - " 393,\n", - " 392,\n", - " 391,\n", - " 390,\n", - " -1,\n", - " 434,\n", - " 433,\n", - " 432,\n", - " 431,\n", - " 430,\n", - " 429,\n", - " 428,\n", - " 427,\n", - " 426,\n", - " 425,\n", - " 424,\n", - " 423,\n", - " -1,\n", - " 453,\n", - " 452,\n", - " 451,\n", - " 450,\n", - " 449,\n", - " 448,\n", - " 447,\n", - " 446,\n", - " 445,\n", - " 444,\n", - " 443,\n", - " 442,\n", - " 441,\n", - " 440,\n", - " 439,\n", - " 438,\n", - " 437,\n", - " 436,\n", - " -1]" + "[[(2, 34),\n", + " (2, 33),\n", + " (2, 32),\n", + " (2, 31),\n", + " (2, 30),\n", + " (2, 29),\n", + " (2, 28),\n", + " (2, 27),\n", + " (2, 26),\n", + " (2, 25),\n", + " (2, 24),\n", + " (2, 23),\n", + " (2, 22),\n", + " (2, 21),\n", + " (2, 20),\n", + " (2, 19),\n", + " (2, 18),\n", + " (2, 17),\n", + " (2, 16),\n", + " (2, 15),\n", + " (2, 14),\n", + " (2, 13),\n", + " (2, 12),\n", + " (2, 11),\n", + " (2, 10),\n", + " (2, 9),\n", + " (2, 8),\n", + " (2, 7),\n", + " (2, 6),\n", + " (2, 5),\n", + " (2, 4),\n", + " (2, 3),\n", + " (2, 2),\n", + " (2, 1),\n", + " (2, 0)],\n", + " [(1, 3),\n", + " (1, 4),\n", + " (1, 5),\n", + " (1, 6),\n", + " (1, 7),\n", + " (1, 8),\n", + " (1, 9),\n", + " (1, 10),\n", + " (1, 11),\n", + " (1, 12),\n", + " (1, 13),\n", + " (1, 14),\n", + " (1, 15),\n", + " (1, 16),\n", + " (1, 17),\n", + " (1, 18),\n", + " (1, 19),\n", + " (1, 20),\n", + " (0, 20),\n", + " (0, 19),\n", + " (0, 18),\n", + " (0, 17),\n", + " (0, 16),\n", + " (0, 15),\n", + " (0, 14),\n", + " (0, 13),\n", + " (0, 12),\n", + " (0, 11),\n", + " (0, 10),\n", + " (0, 9),\n", + " (0, 8),\n", + " (0, 7),\n", + " (0, 6),\n", + " (0, 5),\n", + " (0, 4),\n", + " (0, 3),\n", + " (0, 2)],\n", + " [(0, 23),\n", + " (0, 22),\n", + " (0, 21),\n", + " (1, 21),\n", + " (1, 22),\n", + " (1, 23),\n", + " (1, 24),\n", + " (1, 25),\n", + " (1, 26),\n", + " (1, 27),\n", + " (1, 28),\n", + " (1, 29),\n", + " (1, 30),\n", + " (1, 31),\n", + " (1, 32),\n", + " (1, 33),\n", + " (1, 34),\n", + " (1, 35),\n", + " (1, 36),\n", + " (1, 37),\n", + " (1, 38)],\n", + " [(5, 9),\n", + " (5, 10),\n", + " (5, 11),\n", + " (5, 12),\n", + " (5, 13),\n", + " (5, 14),\n", + " (5, 15),\n", + " (5, 16),\n", + " (5, 17),\n", + " (5, 18),\n", + " (5, 19),\n", + " (5, 20),\n", + " (5, 21),\n", + " (5, 22),\n", + " (5, 23),\n", + " (5, 24),\n", + " (5, 25),\n", + " (5, 26),\n", + " (5, 27),\n", + " (0, 27),\n", + " (0, 26),\n", + " (0, 25),\n", + " (0, 24)],\n", + " [(0, 38),\n", + " (0, 37),\n", + " (0, 36),\n", + " (0, 35),\n", + " (0, 34),\n", + " (0, 33),\n", + " (0, 32),\n", + " (0, 31),\n", + " (0, 30),\n", + " (0, 29),\n", + " (0, 28),\n", + " (5, 28),\n", + " (5, 29),\n", + " (5, 30),\n", + " (5, 31),\n", + " (5, 32),\n", + " (5, 33),\n", + " (5, 34),\n", + " (5, 35),\n", + " (5, 36),\n", + " (5, 37),\n", + " (5, 38),\n", + " (5, 39)],\n", + " [(3, 0),\n", + " (3, 1),\n", + " (3, 2),\n", + " (3, 3),\n", + " (3, 4),\n", + " (3, 5),\n", + " (3, 6),\n", + " (3, 7),\n", + " (3, 8),\n", + " (3, 9),\n", + " (3, 10),\n", + " (3, 11),\n", + " (3, 12),\n", + " (3, 13),\n", + " (3, 14),\n", + " (3, 15),\n", + " (3, 16),\n", + " (3, 17),\n", + " (3, 18),\n", + " (3, 19),\n", + " (3, 20),\n", + " (4, 20),\n", + " (4, 19),\n", + " (4, 18),\n", + " (4, 17),\n", + " (4, 16),\n", + " (4, 15),\n", + " (4, 14),\n", + " (4, 13),\n", + " (4, 12),\n", + " (4, 11),\n", + " (4, 10),\n", + " (4, 9)],\n", + " [(4, 39),\n", + " (4, 38),\n", + " (4, 37),\n", + " (4, 36),\n", + " (4, 35),\n", + " (4, 34),\n", + " (4, 33),\n", + " (4, 32),\n", + " (4, 31),\n", + " (4, 30),\n", + " (4, 29),\n", + " (4, 28),\n", + " (4, 27),\n", + " (4, 26),\n", + " (4, 25),\n", + " (4, 24),\n", + " (4, 23),\n", + " (4, 22),\n", + " (4, 21),\n", + " (3, 21),\n", + " (3, 22),\n", + " (3, 23),\n", + " (3, 24),\n", + " (3, 25),\n", + " (3, 26),\n", + " (3, 27),\n", + " (3, 28),\n", + " (3, 29),\n", + " (3, 30),\n", + " (3, 31),\n", + " (3, 32),\n", + " (3, 33),\n", + " (3, 34)]]" ] }, - "execution_count": 97, + "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "strands2=df.groupby(['strand',\"vh\"]).groups\n", - "stacks=[]\n", - "for i in list(strands2.values()):\n", - " m=list(i)\n", - " m.append(-1)\n", - " for j in range(1,len(m)):\n", - " stacks.append(m[j])\n", - "stacks" + "s1=decode_vh_vb(\"test.virt2nuc\")\n", + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "list(find_segs(vh_vb._stap).values())" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "087e2625", + "execution_count": 142, + "id": "29ff7990", "metadata": {}, "outputs": [], "source": [ @@ -7828,7 +2867,7 @@ { "cell_type": "code", "execution_count": 117, - "id": "56387503", + "id": "3cb9542c", "metadata": {}, "outputs": [], "source": [ @@ -7848,8 +2887,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "c73234d5", + "execution_count": 116, + "id": "4219838b", "metadata": {}, "outputs": [ { @@ -8045,7 +3084,7 @@ " (5, 23)]" ] }, - "execution_count": 13, + "execution_count": 116, "metadata": {}, "output_type": "execute_result" } @@ -8061,7 +3100,7 @@ { "cell_type": "code", "execution_count": 157, - "id": "b37f7a4c", + "id": "45168e4b", "metadata": {}, "outputs": [ { @@ -8085,7 +3124,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "d11f5b9c", + "id": "33e5c80d", "metadata": {}, "outputs": [ { @@ -8106,7 +3145,7 @@ { "cell_type": "code", "execution_count": 62, - "id": "7bd3df35", + "id": "9b79e902", "metadata": {}, "outputs": [ { @@ -8334,7 +3373,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "efe70397", + "id": "cecd1c9f", "metadata": {}, "outputs": [], "source": [ @@ -8460,7 +3499,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "646b1ae9", + "id": "b4af37c1", "metadata": {}, "outputs": [], "source": [ @@ -8481,7 +3520,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "7ead2ea3", + "id": "2fa89abc", "metadata": {}, "outputs": [ { @@ -8543,7 +3582,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "3095b830", + "id": "6e620d24", "metadata": {}, "outputs": [ { @@ -8605,7 +3644,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b749b541", + "id": "fef6094a", "metadata": {}, "outputs": [], "source": [ @@ -8618,7 +3657,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "a5a89254", + "id": "be089a18", "metadata": {}, "outputs": [ { @@ -8639,7 +3678,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "04759ac6", + "id": "6f75d365", "metadata": {}, "outputs": [ { @@ -8688,7 +3727,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "7c36faba", + "id": "8dfadf61", "metadata": {}, "outputs": [ { @@ -8750,7 +3789,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "9985773f", + "id": "ba097c82", "metadata": {}, "outputs": [ { @@ -8776,7 +3815,7 @@ { "cell_type": "code", "execution_count": null, - "id": "134923d4", + "id": "1c4d4fa9", "metadata": {}, "outputs": [], "source": [] @@ -8784,7 +3823,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "f7bd6aef", + "id": "d4ff1f83", "metadata": {}, "outputs": [ { @@ -8804,7 +3843,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "9913320b", + "id": "6f1dab46", "metadata": {}, "outputs": [ { @@ -8825,7 +3864,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "46af2b4f", + "id": "615964fe", "metadata": {}, "outputs": [ { @@ -8895,7 +3934,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "6ae574e4", + "id": "a71b0639", "metadata": {}, "outputs": [], "source": [ @@ -8906,7 +3945,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "4b56fb9d", + "id": "2bb83a1e", "metadata": {}, "outputs": [ { @@ -9015,7 +4054,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "0c061135", + "id": "86f5c21e", "metadata": {}, "outputs": [ { @@ -9036,7 +4075,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "1d7952e2", + "id": "f3fae511", "metadata": {}, "outputs": [ { @@ -9067,7 +4106,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "3a02aa96", + "id": "07f3352b", "metadata": {}, "outputs": [ { @@ -9564,7 +4603,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "6ab2279a", + "id": "61be89c1", "metadata": {}, "outputs": [ { @@ -9598,7 +4637,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "d7dbbbbf", + "id": "a00c445c", "metadata": {}, "outputs": [ { @@ -9618,7 +4657,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "35968795", + "id": "a648fae4", "metadata": {}, "outputs": [], "source": [ @@ -9628,7 +4667,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "7512de77", + "id": "9d100033", "metadata": {}, "outputs": [ { @@ -9649,7 +4688,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "74944c42", + "id": "66d3eaf4", "metadata": {}, "outputs": [ { @@ -9676,7 +4715,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e1d9ef64", + "id": "f60b0cdc", "metadata": {}, "outputs": [], "source": []