diff --git a/mrdna/readers/segmentmodel_from_scadnano.py b/mrdna/readers/segmentmodel_from_scadnano.py index f47b9d3bbcb683e75448d9f335aad792404680b6..a2ae7bfaf80b00172660d43d8132726fe19e5afc 100644 --- a/mrdna/readers/segmentmodel_from_scadnano.py +++ b/mrdna/readers/segmentmodel_from_scadnano.py @@ -122,10 +122,10 @@ def gen_prop_table(fname): nt_prop["bp"]=bp non_stack_ind,=np.where(nt_prop["stack"]==-1) for i in non_stack_ind: - zid=int(nt_prop.loc[i]["zid"])+int(nt_prop.loc[i]["fwd"])*2-1 - try: - nt_prop["stack"][i]=bp_map[(nt_prop.loc[i]["vh"],str(zid),nt_prop.loc[i]["fwd"])] - except: + zid=int(nt_prop.loc[i]["zid"])+int(nt_prop.loc[i]["fwd"])*2.0-1.0 + if (nt_prop.loc[i]["vh"],zid,nt_prop.loc[i]["fwd"]) in bp_map.keys(): + nt_prop["stack"][i]=bp_map[(nt_prop.loc[i]["vh"],zid,nt_prop.loc[i]["fwd"])] + else: continue nt_prop["r"]=list(np.array([nt_prop["x"],nt_prop["y"],nt_prop["z"]],dtype="<f4").T) diff --git a/mrdna/readers/test/Untitled.ipynb b/mrdna/readers/test/Untitled.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b76be740b4212cb66f134a256eb698a4e5819fbf --- /dev/null +++ b/mrdna/readers/test/Untitled.ipynb @@ -0,0 +1,580 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b3d4de16", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev121 \n", + "it/its\n", + "\n" + ] + } + ], + "source": [ + "from mrdna import logger, devlogger\n", + "import pickle\n", + "import pandas as pd\n", + "import sys\n", + "import mrdna.readers.libs as libs\n", + "import numpy as np\n", + "from scipy.spatial import distance_matrix\n", + "import mrdna" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "85f99c60", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from mrdna.readers.segmentmodel_from_oxdna_pinyi import*" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "409255b3", + "metadata": {}, + "outputs": [], + "source": [ + "virt2nuc=\"test_insert.json.virt2nuc\"\n", + "top_data = np.loadtxt(\"test_insert.json.top\", skiprows=1,\n", + " unpack=True,\n", + " dtype=np.dtype('i4,U1,i4,i4')\n", + " )\n", + "def _find_vh_vb_table(s,is_scaf):\n", + " L=[]\n", + " for i in list(s.keys()):\n", + " vh,zid=i\n", + " strand,indices=s[i]\n", + " if len(indices)==0:\n", + " continue\n", + " else:\n", + " if len(indices)==1:\n", + " zids=[str(zid)]\n", + " else:\n", + " zids=[str(zid)+\".\"+str(j) for j in range(len(indices))]\n", + " for index,z in zip(indices,zids):\n", + " L.append(pd.Series({\"index\":index,\"vh\":vh,\"zid\":z,\"strand\":strand,\"is_scaf\":bool(is_scaf)}))\n", + " return L\n", + "\n", + "virt_pickle=open(virt2nuc,\"rb\")\n", + "vh_vb,pattern=pickle.load(virt_pickle)\n", + "L1=_find_vh_vb_table(vh_vb._scaf,1)\n", + "L2=_find_vh_vb_table(vh_vb._stap,0)\n", + "nt_prop=pd.DataFrame(L1+L2)\n", + "nt_prop.set_index(\"index\",inplace=True)\n", + "nt_prop.sort_index(inplace=True)\n", + "nt_prop[\"threeprime\"]=top_data[2]\n", + "nt_prop[\"stack\"]=top_data[2]\n", + "vh_bool=1-(nt_prop[\"vh\"]%2)*2\n", + "is_scaf_bool=nt_prop[\"is_scaf\"]*2-1\n", + "nt_prop[\"fwd\"]=np.array((is_scaf_bool.T*vh_bool+1)/2,dtype=bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9649d37f", + "metadata": {}, + "outputs": [], + "source": [ + "stack_map=dict(zip(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"fwd\"]),nt_prop.index,))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "dfe5ba1e", + "metadata": {}, + "outputs": [], + "source": [ + "bp_map=dict(zip(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"fwd\"]),nt_prop.index))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0b94dcec", + "metadata": {}, + "outputs": [], + "source": [ + "non_stack_ind,=np.where(nt_prop[\"stack\"]==-1)\n", + "for i in non_stack_ind:\n", + " zid=int(nt_prop.loc[i][\"zid\"])+int(nt_prop.loc[i][\"fwd\"])*2-1\n", + " try:\n", + " nt_prop[\"stack\"][i]=bp_map[(nt_prop.loc[i][\"vh\"],str(zid),nt_prop.loc[i][\"fwd\"])]\n", + " except:\n", + " continue\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d17224f8", + "metadata": {}, + "outputs": [], + "source": [ + "n,=np.where(nt_prop[\"vh\"]==0)\n", + "t,=np.where(nt_prop[\"fwd\"][n]==True)\n", + "f,=np.where(nt_prop[\"fwd\"][n]==False)\n", + "\n", + "dt=nt_prop.loc[n[t]]\n", + "dt.set_index([\"vh\",\"zid\"],inplace=True)\n", + "df=nt_prop.loc[n[f]]\n", + "df.set_index([\"vh\",\"zid\"],inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b2e7198d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "vh 0\n", + "zid 24\n", + "strand 10\n", + "is_scaf False\n", + "threeprime -1\n", + "stack 332\n", + "fwd False\n", + "Name: 333, dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[333]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "169aa122", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "strand 9\n", + "is_scaf False\n", + "threeprime 329\n", + "stack 329\n", + "Name: (0, 21, False), dtype: object strand 8\n", + "is_scaf False\n", + "threeprime 291\n", + "stack 291\n", + "Name: (0, 20, False), dtype: object\n" + ] + } + ], + "source": [ + "n=nt_prop.set_index([\"vh\",\"zid\",\"fwd\"])\n", + "print(n.loc[(0,\"21\",False)],n.loc[(0,\"20\",False)])" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "c0605ccd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "strand 9\n", + "is_scaf False\n", + "threeprime 328\n", + "stack 328\n", + "Name: (1, 21, True), dtype: object strand 8\n", + "is_scaf False\n", + "threeprime 292\n", + "stack 292\n", + "Name: (1, 20, True), dtype: object\n" + ] + } + ], + "source": [ + "print(n.loc[(1,\"21\",True)],n.loc[(1,\"20\",True)])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d787b235", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>strand</th>\n", + " <th>is_scaf</th>\n", + " <th>threeprime</th>\n", + " <th>stack</th>\n", + " <th>fwd</th>\n", + " </tr>\n", + " <tr>\n", + " <th>index</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>327</th>\n", + " <td>1</td>\n", + " <td>23</td>\n", + " <td>9</td>\n", + " <td>False</td>\n", + " <td>326</td>\n", + " <td>326</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>328</th>\n", + " <td>1</td>\n", + " <td>22</td>\n", + " <td>9</td>\n", + " <td>False</td>\n", + " <td>327</td>\n", + " <td>327</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>290</th>\n", + " <td>0</td>\n", + " <td>18</td>\n", + " <td>8</td>\n", + " <td>False</td>\n", + " <td>289</td>\n", + " <td>289</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>291</th>\n", + " <td>0</td>\n", + " <td>19</td>\n", + " <td>8</td>\n", + " <td>False</td>\n", + " <td>290</td>\n", + " <td>290</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>292</th>\n", + " <td>0</td>\n", + " <td>20</td>\n", + " <td>8</td>\n", + " <td>False</td>\n", + " <td>291</td>\n", + " <td>291</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>293</th>\n", + " <td>1</td>\n", + " <td>20</td>\n", + " <td>8</td>\n", + " <td>False</td>\n", + " <td>292</td>\n", + " <td>292</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " vh zid strand is_scaf threeprime stack fwd\n", + "index \n", + "327 1 23 9 False 326 326 True\n", + "328 1 22 9 False 327 327 True\n", + "290 0 18 8 False 289 289 False\n", + "291 0 19 8 False 290 290 False\n", + "292 0 20 8 False 291 291 False\n", + "293 1 20 8 False 292 292 True" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[[327,328,290,291,292,293]]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9088a520", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([['0', '41', 'True'],\n", + " ['1', '39', 'False'],\n", + " ['2', '41', 'True'],\n", + " ['2', '0', 'False'],\n", + " ['3', '37', 'False'],\n", + " ['4', '3', 'True'],\n", + " ['5', '0', 'False'],\n", + " ['5', '23', 'False'],\n", + " ['0', '2', 'False'],\n", + " ['1', '38', 'True'],\n", + " ['0', '24', 'False'],\n", + " ['5', '39', 'True'],\n", + " ['4', '9', 'False'],\n", + " ['3', '34', 'True']], dtype='<U21')" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n,=np.where(nt_prop[\"stack\"]==-1)\n", + "for vh,zid,fwd in stack[n]:\n", + " if fwd==True:\n", + " zid=str(int(zid)+1)\n", + " else:\n", + " zid=str(int(zid)-1)\n", + " if (vh,zid,fwd) in stack:\n", + " nt_prop[]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6508fe8e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>strand</th>\n", + " <th>is_scaf</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>39</td>\n", + " <td>0</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>40</td>\n", + " <td>0</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>41</td>\n", + " <td>0</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " <td>41</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>40</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>450</th>\n", + " <td>427</td>\n", + " <td>3</td>\n", + " <td>30</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>451</th>\n", + " <td>426</td>\n", + " <td>3</td>\n", + " <td>31</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>452</th>\n", + " <td>425</td>\n", + " <td>3</td>\n", + " <td>32</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>453</th>\n", + " <td>424</td>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>454</th>\n", + " <td>423</td>\n", + " <td>3</td>\n", + " <td>34</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>455 rows × 5 columns</p>\n", + "</div>" + ], + "text/plain": [ + " index vh zid strand is_scaf\n", + "0 2 0 39 0 True\n", + "1 1 0 40 0 True\n", + "2 0 0 41 0 True\n", + "3 5 1 41 1 True\n", + "4 4 1 40 1 True\n", + ".. ... .. .. ... ...\n", + "450 427 3 30 13 False\n", + "451 426 3 31 13 False\n", + "452 425 3 32 13 False\n", + "453 424 3 33 13 False\n", + "454 423 3 34 13 False\n", + "\n", + "[455 rows x 5 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e163e703", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/mrdna/readers/test/libs b/mrdna/readers/test/libs new file mode 120000 index 0000000000000000000000000000000000000000..d4bda9b46951e0c8c2007f84bbc447f93995502d --- /dev/null +++ b/mrdna/readers/test/libs @@ -0,0 +1 @@ +../libs \ No newline at end of file diff --git a/mrdna/readers/test/test2.ipynb b/mrdna/readers/test/test2.ipynb index 825e255b883e0a49cde86e0bb75fda657d1bd01f..d0cca2fb700e21cf8abfd26e49a9f0314ba27da3 100644 --- a/mrdna/readers/test/test2.ipynb +++ b/mrdna/readers/test/test2.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "9ea65628", + "id": "dc6eff4b", "metadata": {}, "outputs": [ { @@ -28,7 +28,7 @@ { "cell_type": "code", "execution_count": 585, - "id": "1aaf81df-7464-4bd0-ace5-3b04e63815ac", + "id": "8b2c0150", "metadata": {}, "outputs": [], "source": [ @@ -93,7 +93,7 @@ { "cell_type": "code", "execution_count": 586, - "id": "c72e5d53-f082-4daa-8e36-6d74bfe8802b", + "id": "03e94b3f", "metadata": {}, "outputs": [ { @@ -111,7 +111,7 @@ { "cell_type": "code", "execution_count": 587, - "id": "ff1c4a3c-9dde-4ea1-b360-f5d5d149b7d0", + "id": "b4a95a34", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ { "cell_type": "code", "execution_count": 612, - "id": "2356f6ba-8109-4ca1-8704-94f3c6a3bebd", + "id": "d917f1db", "metadata": {}, "outputs": [], "source": [ @@ -223,7 +223,7 @@ { "cell_type": "code", "execution_count": 613, - "id": "d1e57e0e-efba-43c9-abdb-9fd7a7d0760b", + "id": "02380150", "metadata": {}, "outputs": [ { @@ -525,7 +525,7 @@ { "cell_type": "code", "execution_count": 609, - "id": "c042b68c-6651-418e-b07c-dc3444afebdd", + "id": "1649bd22", "metadata": {}, "outputs": [ { @@ -897,7 +897,7 @@ { "cell_type": "code", "execution_count": 604, - "id": "259da9ce-568d-4f9e-9394-bb64e56f0eb2", + "id": "ddd2a089", "metadata": {}, "outputs": [ { @@ -1207,7 +1207,7 @@ { "cell_type": "code", "execution_count": 573, - "id": "e8fb6171-a275-46b6-ac62-ac2867a30739", + "id": "2e5785a9", "metadata": {}, "outputs": [ { @@ -1228,7 +1228,7 @@ { "cell_type": "code", "execution_count": 572, - "id": "675f4a88-18fe-4d6a-bae9-de364e2f4174", + "id": "a7dd2412", "metadata": {}, "outputs": [ { @@ -1249,7 +1249,7 @@ { "cell_type": "code", "execution_count": 412, - "id": "946198dc-c458-481f-a9aa-3dd4b0f86676", + "id": "c47ecad6", "metadata": {}, "outputs": [ { @@ -1283,7 +1283,7 @@ { "cell_type": "code", "execution_count": 411, - "id": "59f3720b-b3c0-44c7-b7ea-6923d2e7f2d6", + "id": "960e1971", "metadata": {}, "outputs": [ { @@ -1305,7 +1305,7 @@ { "cell_type": "code", "execution_count": 404, - "id": "7f94d3f9-2f55-4e25-be6c-735e8de905f2", + "id": "ef2bbb72", "metadata": {}, "outputs": [ { @@ -1359,7 +1359,7 @@ { "cell_type": "code", "execution_count": 340, - "id": "b29dc9a7-b4cd-43a8-b374-1a6c3c37e326", + "id": "78791389", "metadata": {}, "outputs": [ { @@ -1390,7 +1390,7 @@ { "cell_type": "code", "execution_count": 355, - "id": "c26b53aa-2b60-4a42-85f0-2cdf626cf14a", + "id": "cea2904e", "metadata": {}, "outputs": [], "source": [ @@ -1423,7 +1423,7 @@ { "cell_type": "code", "execution_count": 362, - "id": "25f9ab74-6b64-4bbf-a6c3-c67c811d42e6", + "id": "97741ea0", "metadata": {}, "outputs": [ { @@ -1444,7 +1444,7 @@ { "cell_type": "code", "execution_count": 342, - "id": "5d3b60a4-14ec-4723-9693-4b05b95554d1", + "id": "8c17bcd4", "metadata": {}, "outputs": [ { @@ -1491,7 +1491,7 @@ { "cell_type": "code", "execution_count": 343, - "id": "0ec83cb6-9abf-4fca-a639-ec6aedf1b14a", + "id": "7d8e5f7c", "metadata": {}, "outputs": [ { @@ -1599,7 +1599,7 @@ { "cell_type": "code", "execution_count": 333, - "id": "fc57ba88-6a6a-4d47-8ac6-da84a9f2ab34", + "id": "599ac0c4", "metadata": {}, "outputs": [ { @@ -1620,7 +1620,7 @@ { "cell_type": "code", "execution_count": 334, - "id": "85312ea2-5b2c-4fff-a44c-c506686d74ca", + "id": "fa362209", "metadata": {}, "outputs": [ { @@ -1641,7 +1641,7 @@ { "cell_type": "code", "execution_count": 363, - "id": "fe8f6f2c-f9ac-48c5-868d-a9e90a4fc675", + "id": "75556652", "metadata": {}, "outputs": [], "source": [ @@ -1655,7 +1655,7 @@ { "cell_type": "code", "execution_count": 364, - "id": "64967867-a819-4345-9153-5dba2ec33ec3", + "id": "723a82ee", "metadata": {}, "outputs": [ { @@ -1796,7 +1796,7 @@ { "cell_type": "code", "execution_count": 351, - "id": "6b942c47-1109-4ac2-ac1b-7210fa7a505e", + "id": "f533d602", "metadata": {}, "outputs": [ { @@ -1973,7 +1973,7 @@ { "cell_type": "code", "execution_count": 338, - "id": "678e48ea-568b-4ad3-98f3-2556b596b70f", + "id": "90373add", "metadata": {}, "outputs": [ { @@ -1994,7 +1994,7 @@ { "cell_type": "code", "execution_count": 339, - "id": "a27de404-e5c5-4526-9d18-bcff3d8d7b5f", + "id": "41730a11", "metadata": {}, "outputs": [ { @@ -2044,7 +2044,7 @@ { "cell_type": "code", "execution_count": 383, - "id": "02a111ba-ea1e-410b-918e-bccbbdd4141e", + "id": "74149aca", "metadata": {}, "outputs": [ { @@ -2067,7 +2067,7 @@ { "cell_type": "code", "execution_count": 274, - "id": "8a27e1a0-fb25-475d-bc62-0adc23d6b261", + "id": "8897c37a", "metadata": {}, "outputs": [], "source": [ @@ -2106,7 +2106,7 @@ { "cell_type": "code", "execution_count": 247, - "id": "1389dcb5-51f9-480c-b657-aa7abbd4a2fa", + "id": "ca5ab939", "metadata": {}, "outputs": [ { @@ -2127,7 +2127,7 @@ { "cell_type": "code", "execution_count": 242, - "id": "c817bd74-a725-4520-826c-8c1f80c02d56", + "id": "72ff2747", "metadata": {}, "outputs": [ { @@ -2155,7 +2155,7 @@ { "cell_type": "code", "execution_count": 359, - "id": "ca879ef9-1acc-4943-8c05-62b95558638c", + "id": "9c47abe2", "metadata": {}, "outputs": [ { @@ -2176,7 +2176,7 @@ { "cell_type": "code", "execution_count": 134, - "id": "1b55d792-37eb-453d-a3db-382b239cf62d", + "id": "869b4c51", "metadata": {}, "outputs": [ { @@ -2262,7 +2262,7 @@ { "cell_type": "code", "execution_count": 139, - "id": "f8302226-2614-4d47-8050-e332e0ade9f2", + "id": "4a723a6d", "metadata": {}, "outputs": [ { @@ -2283,7 +2283,7 @@ { "cell_type": "code", "execution_count": 74, - "id": "4221131e-75af-4676-bb67-24b91d978f72", + "id": "05e4da72", "metadata": {}, "outputs": [ { @@ -2760,7 +2760,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bb4c1c28-0bed-4f3a-b046-241137968efd", + "id": "f6127aee", "metadata": {}, "outputs": [], "source": [ @@ -2770,7 +2770,7 @@ { "cell_type": "code", "execution_count": 94, - "id": "86772cec-5ebd-4a09-9068-3fe5684a8a00", + "id": "ebc61bad", "metadata": {}, "outputs": [], "source": [ @@ -2785,7 +2785,7 @@ { "cell_type": "code", "execution_count": 96, - "id": "ca575331-81bb-4d12-8230-65450bbb7100", + "id": "e805d776", "metadata": {}, "outputs": [], "source": [ @@ -2800,7 +2800,7 @@ { "cell_type": "code", "execution_count": 106, - "id": "9238476c-157b-4e47-bd2d-e2d66ebc91d8", + "id": "c4233e44", "metadata": {}, "outputs": [ { @@ -2821,7 +2821,7 @@ { "cell_type": "code", "execution_count": 77, - "id": "4ed59693-4783-4dda-bffa-e9f256c3d211", + "id": "7f754821", "metadata": {}, "outputs": [ { @@ -2844,7 +2844,7 @@ { "cell_type": "code", "execution_count": 648, - "id": "38a3dacc-7991-4475-b356-44043fd90984", + "id": "3b2b320c", "metadata": {}, "outputs": [], "source": [ @@ -2859,7 +2859,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3d9610cb-d6ea-4c56-be9b-1b23f623fe66", + "id": "d6f94b1a", "metadata": {}, "outputs": [], "source": [ @@ -2936,7 +2936,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "dbcb2c6d-2e3f-4952-b21e-c624ca2026dc", + "id": "e8e1e470", "metadata": {}, "outputs": [], "source": [ @@ -2946,7 +2946,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "e7b1dda8-1c43-4f22-b7bb-54e006134cb5", + "id": "8f43874a", "metadata": {}, "outputs": [ { @@ -3121,7 +3121,7 @@ { "cell_type": "code", "execution_count": 65, - "id": "898974c3-dcbb-4374-bf1d-86c87a643d6e", + "id": "110d5497", "metadata": {}, "outputs": [], "source": [ @@ -3170,7 +3170,7 @@ { "cell_type": "code", "execution_count": 66, - "id": "722d5ad3-52bc-4c95-856c-a0766e28c701", + "id": "c08f2d64", "metadata": {}, "outputs": [ { @@ -3191,7 +3191,7 @@ { "cell_type": "code", "execution_count": 67, - "id": "325c75d6-6951-439d-b2b3-ff2e74cc2245", + "id": "6f52e5fd", "metadata": {}, "outputs": [ { @@ -3212,7 +3212,7 @@ { "cell_type": "code", "execution_count": 151, - "id": "76d1979d-74c2-4ffb-8dc5-659b889d52b6", + "id": "6f3833ba", "metadata": {}, "outputs": [], "source": [ @@ -3264,7 +3264,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "45e6434e-58a9-4aa2-b1f7-6b871daedba3", + "id": "2e9d3446", "metadata": {}, "outputs": [], "source": [ @@ -3299,7 +3299,7 @@ { "cell_type": "code", "execution_count": 153, - "id": "44f994b6-6c49-4b8a-916b-7ac224558ded", + "id": "48cdbf0e", "metadata": {}, "outputs": [], "source": [ @@ -3310,7 +3310,7 @@ { "cell_type": "code", "execution_count": 150, - "id": "fae9db1a-6148-4cd6-9d30-6c6d8ccbcec6", + "id": "cecfc5a0", "metadata": {}, "outputs": [ { @@ -3345,7 +3345,7 @@ { "cell_type": "code", "execution_count": 117, - "id": "6db39eca-d003-4690-a140-18e6abb6b246", + "id": "c5b1e868", "metadata": {}, "outputs": [ { @@ -3501,7 +3501,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4e4ae946-464f-4033-8462-5c5931198123", + "id": "071f75e4", "metadata": {}, "outputs": [], "source": [] @@ -3509,7 +3509,7 @@ { "cell_type": "code", "execution_count": 119, - "id": "8c52f71c-0333-4d50-a9f7-92dbe60226cb", + "id": "48922b22", "metadata": {}, "outputs": [ { @@ -3665,7 +3665,7 @@ { "cell_type": "code", "execution_count": 355, - "id": "039b65c8-5a0d-4127-a2dc-2808ce11fba1", + "id": "fee43329", "metadata": {}, "outputs": [ { @@ -3687,7 +3687,7 @@ { "cell_type": "code", "execution_count": 368, - "id": "570999f4-a7b9-400b-a61d-c1d66eaf0999", + "id": "a7497054", "metadata": {}, "outputs": [], "source": [ @@ -3705,7 +3705,7 @@ { "cell_type": "code", "execution_count": 370, - "id": "b0e5cb3c-8cb4-4fd6-8fcc-1b63c7733acd", + "id": "9009e4cd", "metadata": {}, "outputs": [ { @@ -3724,7 +3724,7 @@ { "cell_type": "code", "execution_count": 371, - "id": "837c9a06-1e50-434d-b97b-70bfb8d20c42", + "id": "e2d6bdf4", "metadata": {}, "outputs": [ { @@ -3744,7 +3744,7 @@ { "cell_type": "code", "execution_count": 225, - "id": "998f4321-f520-45f7-bffb-92177b6a0fec", + "id": "7d633643", "metadata": {}, "outputs": [ { @@ -3958,7 +3958,7 @@ { "cell_type": "code", "execution_count": 211, - "id": "10ec47a4-3221-468d-a185-b171c5bd82f6", + "id": "85ee4835", "metadata": {}, "outputs": [ { @@ -4149,7 +4149,7 @@ { "cell_type": "code", "execution_count": 216, - "id": "4a136eaa-919b-4ad7-88be-42169cfab25f", + "id": "27e36674", "metadata": {}, "outputs": [ { @@ -4351,7 +4351,7 @@ { "cell_type": "code", "execution_count": 54, - "id": "fc6acf26-2c3a-405b-b905-dd116fc5c875", + "id": "21312024", "metadata": {}, "outputs": [], "source": [ @@ -4362,7 +4362,7 @@ { "cell_type": "code", "execution_count": 48, - "id": "87066a06-49a9-4cd2-9082-135372adc0d7", + "id": "d5d42a45", "metadata": {}, "outputs": [ { @@ -4383,7 +4383,7 @@ { "cell_type": "code", "execution_count": 52, - "id": "6cf497ad-08f1-4901-b855-bb44be284f87", + "id": "a9db4952", "metadata": {}, "outputs": [ { @@ -4404,7 +4404,7 @@ { "cell_type": "code", "execution_count": 43, - "id": "27717aa9-0a17-444d-866a-50fc3fa31ca2", + "id": "0e365769", "metadata": {}, "outputs": [ { @@ -4425,7 +4425,7 @@ { "cell_type": "code", "execution_count": 34, - "id": "5dc7d2bf-5221-4afd-ad45-e97dab156d83", + "id": "fcce904b", "metadata": {}, "outputs": [ { @@ -4446,7 +4446,7 @@ { "cell_type": "code", "execution_count": 33, - "id": "12fd269b-1dc5-488c-a246-0d91d9e2fdd5", + "id": "3df07b51", "metadata": {}, "outputs": [ { @@ -4468,7 +4468,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "ef80053e-350c-4c5a-ac94-c1ffbedc1e46", + "id": "45a7b213", "metadata": {}, "outputs": [ { @@ -4495,7 +4495,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "4e2dd66e-fe4d-4728-8233-60cb1d9b156a", + "id": "8bb0fc3c", "metadata": {}, "outputs": [], "source": [ @@ -4505,7 +4505,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "4952a867-7839-4d65-b3d3-228b401fa4de", + "id": "22b8b30f", "metadata": {}, "outputs": [ { @@ -4527,7 +4527,7 @@ { "cell_type": "code", "execution_count": 626, - "id": "5d1d1bea-7d71-476b-928b-18b5cf0bd2e5", + "id": "bd153170", "metadata": {}, "outputs": [ { @@ -4548,7 +4548,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "91b1be83-f406-47f3-8e63-11a0262a653a", + "id": "24f087bc", "metadata": {}, "outputs": [ { @@ -4569,7 +4569,7 @@ { "cell_type": "code", "execution_count": 723, - "id": "e616db25-a0fa-47ea-917d-28f2ec18c67e", + "id": "413d249f", "metadata": {}, "outputs": [ { @@ -4590,7 +4590,7 @@ { "cell_type": "code", "execution_count": 721, - "id": "216ec8a1-d779-4725-b3c2-4a2f01c3dd52", + "id": "f5d6e341", "metadata": {}, "outputs": [ { @@ -4611,7 +4611,7 @@ { "cell_type": "code", "execution_count": 700, - "id": "0d5d750d-f978-4096-b819-8a26ca2be63d", + "id": "533c907f", "metadata": {}, "outputs": [ { @@ -4632,7 +4632,7 @@ { "cell_type": "code", "execution_count": 710, - "id": "855c45bb-5744-48ff-9ea3-4a7ab6221208", + "id": "e09cfdbf", "metadata": {}, "outputs": [ { @@ -4654,7 +4654,7 @@ { "cell_type": "code", "execution_count": 726, - "id": "7af4b044-e9e9-43e3-ab29-1cac35a9ac46", + "id": "2c437b78", "metadata": {}, "outputs": [], "source": [ @@ -4698,7 +4698,7 @@ { "cell_type": "code", "execution_count": 734, - "id": "3c488e25-5333-4a34-bbe4-6467dc291894", + "id": "f2347ff4", "metadata": {}, "outputs": [ { @@ -4722,7 +4722,7 @@ { "cell_type": "code", "execution_count": 741, - "id": "d9c3d962-831f-4b5f-b889-e6173f7e2c5c", + "id": "7c0c4211", "metadata": {}, "outputs": [ { @@ -4743,7 +4743,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eff8f4a8-a228-40d5-8487-e091b1c425d9", + "id": "57fdb681", "metadata": {}, "outputs": [], "source": [] @@ -4751,7 +4751,7 @@ { "cell_type": "code", "execution_count": 728, - "id": "eb4f45b4-3064-4238-830b-0dfea9a0e99d", + "id": "ef323244", "metadata": {}, "outputs": [ { @@ -4772,7 +4772,7 @@ { "cell_type": "code", "execution_count": 729, - "id": "42960c56-4516-4021-989a-4e2848ad4215", + "id": "d150a90e", "metadata": {}, "outputs": [ { @@ -4793,7 +4793,7 @@ { "cell_type": "code", "execution_count": 727, - "id": "2c4153a3-88a1-4b6a-ac1d-0bdd3d9ef621", + "id": "7f887064", "metadata": {}, "outputs": [ { @@ -4827,7 +4827,7 @@ { "cell_type": "code", "execution_count": null, - "id": "83578dcc-15c5-4112-9225-ba93370bfd3c", + "id": "d1801a30", "metadata": {}, "outputs": [], "source": [] @@ -4835,7 +4835,7 @@ { "cell_type": "code", "execution_count": 672, - "id": "09e48bf8-ae24-4d32-817e-f576a9ca019c", + "id": "98228b41", "metadata": {}, "outputs": [ { @@ -4856,7 +4856,7 @@ { "cell_type": "code", "execution_count": 642, - "id": "9517ec2a-d5f9-4a7f-8af9-1d042486b6c5", + "id": "29af8a0b", "metadata": {}, "outputs": [ { @@ -4936,7 +4936,7 @@ { "cell_type": "code", "execution_count": 732, - "id": "c0e69e86-6319-480e-a781-80c2a3524ea9", + "id": "31f41e6f", "metadata": {}, "outputs": [ { @@ -5022,7 +5022,7 @@ { "cell_type": "code", "execution_count": 740, - "id": "66d0e76d-3ab9-4caf-8a11-e73069372672", + "id": "908a6a7c", "metadata": {}, "outputs": [ { @@ -5054,7 +5054,7 @@ { "cell_type": "code", "execution_count": 164, - "id": "c907f808-bcdc-4157-ac33-508aa748e42e", + "id": "1e1b275c", "metadata": {}, "outputs": [ { @@ -5075,7 +5075,7 @@ { "cell_type": "code", "execution_count": 119, - "id": "5d8aeaca-e795-41a0-a9be-f5e5db5e111f", + "id": "0877b142", "metadata": {}, "outputs": [], "source": [ @@ -5085,7 +5085,7 @@ { "cell_type": "code", "execution_count": 139, - "id": "93c6bfde-9974-4b99-b65c-85b99ed70895", + "id": "f08e6106", "metadata": {}, "outputs": [ { @@ -5106,7 +5106,7 @@ { "cell_type": "code", "execution_count": 153, - "id": "0dc56391-5cc7-49a5-bdc0-674c1ff93488", + "id": "dd1f0da6", "metadata": {}, "outputs": [ { @@ -5132,7 +5132,7 @@ { "cell_type": "code", "execution_count": null, - "id": "522678a9-81e9-40e5-9e56-400f054bd5d4", + "id": "781af10a", "metadata": {}, "outputs": [], "source": [] @@ -5140,7 +5140,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "39f36967-8c42-45c5-84b5-e1fab790be36", + "id": "1ec8afe3", "metadata": {}, "outputs": [], "source": [] @@ -5148,7 +5148,7 @@ { "cell_type": "code", "execution_count": 88, - "id": "ab8aa441-4b59-4d54-b44d-f776ab8e9e61", + "id": "83841b72", "metadata": {}, "outputs": [], "source": [ @@ -5158,7 +5158,7 @@ { "cell_type": "code", "execution_count": 91, - "id": "73e69794-fc0a-486e-85f9-88a57276ee68", + "id": "0ffa8dce", "metadata": {}, "outputs": [ { @@ -6250,7 +6250,7 @@ { "cell_type": "code", "execution_count": 92, - "id": "acf7fe2e-0203-485e-a176-d711c847477a", + "id": "8016534d", "metadata": {}, "outputs": [ { @@ -6270,7 +6270,7 @@ { "cell_type": "code", "execution_count": 962, - "id": "bce3f7a8-b003-4a82-ade5-37501b68c0f4", + "id": "d1b135ec", "metadata": {}, "outputs": [ { @@ -6302,7 +6302,7 @@ { "cell_type": "code", "execution_count": 97, - "id": "8208056b-181d-43bd-a344-ec6d0a24b7f6", + "id": "b9f5de0c", "metadata": {}, "outputs": [ { @@ -6324,7 +6324,7 @@ { "cell_type": "code", "execution_count": 961, - "id": "f07e1b32", + "id": "b716b68c", "metadata": {}, "outputs": [], "source": [ @@ -6349,7 +6349,7 @@ { "cell_type": "code", "execution_count": 575, - "id": "6931b352-2f85-4ba1-9de3-a53c24041c0c", + "id": "b647bf01", "metadata": {}, "outputs": [ { @@ -6394,7 +6394,7 @@ { "cell_type": "code", "execution_count": 600, - "id": "43cda523-ec1a-42ca-b546-17ffac4dea17", + "id": "7658064d", "metadata": {}, "outputs": [ { @@ -6931,7 +6931,7 @@ { "cell_type": "code", "execution_count": 584, - "id": "ec3b6b7c-f03a-4c34-85d9-e4f44ed59792", + "id": "92cc1374", "metadata": {}, "outputs": [ { @@ -6952,7 +6952,7 @@ { "cell_type": "code", "execution_count": 596, - "id": "dadccba2-b9b3-4110-8328-1076ede1432c", + "id": "ef75c586", "metadata": {}, "outputs": [ { @@ -7060,7 +7060,7 @@ { "cell_type": "code", "execution_count": 588, - "id": "0db298ba-4f29-49d8-bdc3-a6faa4674e22", + "id": "81cbd9b5", "metadata": {}, "outputs": [ { @@ -7114,7 +7114,7 @@ { "cell_type": "code", "execution_count": null, - "id": "72874b29-ac4d-4c89-abd6-187e1d2a99aa", + "id": "de5a0dec", "metadata": {}, "outputs": [], "source": [ @@ -7124,7 +7124,7 @@ { "cell_type": "code", "execution_count": 578, - "id": "4af733b6-ebfd-47fc-aaef-2ee5d4267c88", + "id": "02bfe5a7", "metadata": {}, "outputs": [ { @@ -7145,7 +7145,7 @@ { "cell_type": "code", "execution_count": 582, - "id": "828c35da-7c02-469d-9a8e-ee2420bed581", + "id": "a1f7b5f7", "metadata": {}, "outputs": [ { @@ -7199,7 +7199,7 @@ { "cell_type": "code", "execution_count": 563, - "id": "ffd99357-946a-4334-8202-4fd78e63e062", + "id": "a3b9ba54", "metadata": {}, "outputs": [ { @@ -7307,7 +7307,7 @@ { "cell_type": "code", "execution_count": 562, - "id": "828c7161-0700-4bcd-a51e-eeb195340e35", + "id": "855e6ac0", "metadata": {}, "outputs": [ { @@ -7336,7 +7336,7 @@ { "cell_type": "code", "execution_count": 439, - "id": "4c0546f8-b0d4-45b2-9d81-d3699ee95921", + "id": "24b34ab1", "metadata": {}, "outputs": [], "source": [ @@ -7350,7 +7350,7 @@ { "cell_type": "code", "execution_count": 557, - "id": "7c15d8bc-7c0e-46bb-860b-cfa5cf5a5a0c", + "id": "2a7f64d5", "metadata": {}, "outputs": [ { @@ -7371,7 +7371,7 @@ { "cell_type": "code", "execution_count": 556, - "id": "eaef916c-98d2-47c9-948e-a7fd76a1d3b4", + "id": "9625b974", "metadata": {}, "outputs": [ { @@ -7392,7 +7392,7 @@ { "cell_type": "code", "execution_count": 543, - "id": "ca6571df-2523-484a-a5b8-32cf44bb5afc", + "id": "b9faf1ec", "metadata": {}, "outputs": [ { @@ -7428,7 +7428,7 @@ { "cell_type": "code", "execution_count": 544, - "id": "b5d1a3b8-f616-4050-ae12-de34bd18b4c9", + "id": "e9da09cb", "metadata": {}, "outputs": [ { @@ -7464,7 +7464,7 @@ { "cell_type": "code", "execution_count": 442, - "id": "559bab5d-b474-4d30-99dc-e97ae61e39e5", + "id": "e3164ef8", "metadata": {}, "outputs": [], "source": [ @@ -7479,7 +7479,7 @@ { "cell_type": "code", "execution_count": 969, - "id": "a500615f-07df-4e65-9dc6-ef94af3dfcdf", + "id": "7db1e084", "metadata": {}, "outputs": [ { @@ -7501,7 +7501,7 @@ { "cell_type": "code", "execution_count": 495, - "id": "69e89135-155f-4413-a4f4-d7323528c211", + "id": "c05dcdf7", "metadata": {}, "outputs": [ { @@ -7522,7 +7522,7 @@ { "cell_type": "code", "execution_count": 502, - "id": "9139dfed-d055-408f-986f-c96da4cb88af", + "id": "d21a4c75", "metadata": {}, "outputs": [ { @@ -7549,7 +7549,7 @@ { "cell_type": "code", "execution_count": 971, - "id": "7e39d2ea-b5d7-40a6-b43f-7e43ab26cc4f", + "id": "1fc5a43d", "metadata": {}, "outputs": [], "source": [ @@ -7560,7 +7560,7 @@ { "cell_type": "code", "execution_count": 972, - "id": "a5ba9310-16b1-4faa-b9c7-6ac5073df569", + "id": "bad64953", "metadata": {}, "outputs": [], "source": [ @@ -7574,7 +7574,7 @@ { "cell_type": "code", "execution_count": 975, - "id": "05b7fa7d-5329-4f0c-8a94-c3631494e9bb", + "id": "71938d89", "metadata": {}, "outputs": [], "source": [ @@ -7584,7 +7584,7 @@ { "cell_type": "code", "execution_count": 980, - "id": "cfc0060c-188c-4cc5-9cbe-ff7fa2eb89e4", + "id": "3ded5871", "metadata": {}, "outputs": [ { @@ -7605,7 +7605,7 @@ { "cell_type": "code", "execution_count": 989, - "id": "2b8a31bf-f956-4bf1-8546-140ebca169f4", + "id": "f51f621d", "metadata": {}, "outputs": [ { @@ -7626,7 +7626,7 @@ { "cell_type": "code", "execution_count": 928, - "id": "36855d5e-c95c-4642-a162-ca1ef378a3d4", + "id": "f82ad368", "metadata": {}, "outputs": [ { @@ -7647,7 +7647,7 @@ { "cell_type": "code", "execution_count": 1000, - "id": "dfaef123-43a8-43af-869d-cd4407c41e8c", + "id": "ceff793a", "metadata": {}, "outputs": [ { @@ -7668,7 +7668,7 @@ { "cell_type": "code", "execution_count": 1004, - "id": "a9a3b1f8-eae4-42f6-ae88-99832ab47070", + "id": "97021a7d", "metadata": {}, "outputs": [], "source": [ @@ -7678,7 +7678,7 @@ { "cell_type": "code", "execution_count": 1007, - "id": "13618171-1df2-4db0-b24c-7a7470a6c615", + "id": "a55d6ed0", "metadata": {}, "outputs": [ { @@ -7699,7 +7699,7 @@ { "cell_type": "code", "execution_count": 1030, - "id": "d9e4b3c5-cecb-4d1f-a283-6069b6837dad", + "id": "ec00022c", "metadata": { "scrolled": true }, @@ -7855,7 +7855,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "f18fdcc9-6ead-4bd2-ab0e-1c759e328f2f", + "id": "f01ff5f5", "metadata": {}, "outputs": [], "source": [ @@ -7901,7 +7901,7 @@ { "cell_type": "code", "execution_count": 951, - "id": "2d929c54-7b65-48d2-b28b-68562af15104", + "id": "209a9ec4", "metadata": {}, "outputs": [], "source": [ @@ -7943,7 +7943,7 @@ { "cell_type": "code", "execution_count": 1013, - "id": "d20b67b7-94fc-4200-a84b-edd66b355c01", + "id": "1a4362b4", "metadata": {}, "outputs": [ { @@ -7965,7 +7965,7 @@ { "cell_type": "code", "execution_count": 948, - "id": "5daca1b7-beb5-42ba-94c6-911ab6ca33be", + "id": "e75969ed", "metadata": {}, "outputs": [ { @@ -7983,7 +7983,7 @@ { "cell_type": "code", "execution_count": 901, - "id": "d4bc3787-567f-4e57-823e-7c844c7eb262", + "id": "fd487bbd", "metadata": {}, "outputs": [], "source": [ @@ -7993,7 +7993,7 @@ { "cell_type": "code", "execution_count": 902, - "id": "3a01b186-cb93-4f92-b8d0-0db0bc45e104", + "id": "c4d6d76f", "metadata": {}, "outputs": [ { @@ -8426,7 +8426,7 @@ { "cell_type": "code", "execution_count": 864, - "id": "1d2a4032-a6b6-4ad7-bcf5-03bf37cb68e0", + "id": "ab80c811", "metadata": {}, "outputs": [], "source": [ @@ -8436,7 +8436,7 @@ { "cell_type": "code", "execution_count": 865, - "id": "3cd82283-e341-4dfd-95c4-e789d3630f74", + "id": "bdd17c47", "metadata": {}, "outputs": [ { @@ -8475,7 +8475,7 @@ { "cell_type": "code", "execution_count": 847, - "id": "c5eba0d7-3972-4553-a9dd-f6fccaef7a4e", + "id": "a6ec640f", "metadata": {}, "outputs": [], "source": [ @@ -8485,7 +8485,7 @@ { "cell_type": "code", "execution_count": 848, - "id": "a650a181-1258-4dfd-b41c-f4142f6034ef", + "id": "866dea3c", "metadata": {}, "outputs": [ { @@ -8509,7 +8509,7 @@ { "cell_type": "code", "execution_count": 823, - "id": "324e02ef-7688-45bf-80be-f80d25cc1516", + "id": "85107410", "metadata": {}, "outputs": [ { @@ -8530,7 +8530,7 @@ { "cell_type": "code", "execution_count": 820, - "id": "6b7d23a1-b66a-47ca-b59e-42d1addabee5", + "id": "1d0c53e2", "metadata": {}, "outputs": [], "source": [ @@ -8540,7 +8540,7 @@ { "cell_type": "code", "execution_count": 821, - "id": "ab0b76c4-c77d-4bbb-9883-bd7b8c1569f2", + "id": "7dfcd349", "metadata": {}, "outputs": [ { @@ -8579,7 +8579,7 @@ { "cell_type": "code", "execution_count": 712, - "id": "0e8099a0-70d1-4119-bd56-33b79a83359c", + "id": "3463c436", "metadata": {}, "outputs": [ { @@ -8597,7 +8597,7 @@ { "cell_type": "code", "execution_count": 746, - "id": "55d4b8da-692a-40ae-9e1a-3030f2dd659f", + "id": "2f7b05ea", "metadata": {}, "outputs": [], "source": [ @@ -8610,7 +8610,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7a02cdd-7409-441a-8e60-07b5912d8e56", + "id": "aa6a38b4", "metadata": {}, "outputs": [], "source": [] @@ -8618,7 +8618,7 @@ { "cell_type": "code", "execution_count": null, - "id": "65ebfe49-aae9-42a3-acb5-0a7afb390e8d", + "id": "1cbba0c4", "metadata": {}, "outputs": [], "source": [ @@ -8628,7 +8628,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8a5ea8a4-3353-4779-b6aa-98c8919259c9", + "id": "77f98a7c", "metadata": {}, "outputs": [], "source": [ @@ -8638,7 +8638,7 @@ { "cell_type": "code", "execution_count": 710, - "id": "ff047223-c82c-4ac4-b30e-65c41613a9e4", + "id": "d84bbc60", "metadata": {}, "outputs": [ { @@ -8656,7 +8656,7 @@ { "cell_type": "code", "execution_count": 510, - "id": "c5832bdd-1df9-4af7-8cb7-89d522a33c86", + "id": "84c8d193", "metadata": {}, "outputs": [ { @@ -8718,7 +8718,7 @@ { "cell_type": "code", "execution_count": 644, - "id": "a7a8c302-165a-404f-aa98-f857b8839638", + "id": "0f676819", "metadata": {}, "outputs": [ { @@ -8739,7 +8739,7 @@ { "cell_type": "code", "execution_count": 618, - "id": "6b6ab174-90a3-4414-a798-806fb1793937", + "id": "fb77d05b", "metadata": {}, "outputs": [], "source": [ @@ -8749,7 +8749,7 @@ { "cell_type": "code", "execution_count": 623, - "id": "843291d1-530f-4c2b-97e3-65b30d814cf9", + "id": "8773e21e", "metadata": {}, "outputs": [ { @@ -8770,7 +8770,7 @@ { "cell_type": "code", "execution_count": 629, - "id": "4a4cbdb1-f626-4817-9bee-c2d9fed0675d", + "id": "e9474108", "metadata": {}, "outputs": [ { @@ -8911,7 +8911,7 @@ { "cell_type": "code", "execution_count": 633, - "id": "a6e188a0-52c9-4f27-9e91-f9f0f6b43829", + "id": "cc427ae3", "metadata": {}, "outputs": [ { @@ -8932,7 +8932,7 @@ { "cell_type": "code", "execution_count": 663, - "id": "6dbd83c9-1542-4acf-8e54-1f279db6da3a", + "id": "c9e8ca40", "metadata": {}, "outputs": [ { @@ -8953,7 +8953,7 @@ { "cell_type": "code", "execution_count": 640, - "id": "08856a0e-eb36-4e2f-a68d-55b43ca38a82", + "id": "6acfc452", "metadata": {}, "outputs": [ { @@ -8974,7 +8974,7 @@ { "cell_type": "code", "execution_count": 642, - "id": "15747c0b-8bcc-414e-b570-03c1a61cf8b9", + "id": "b81e769a", "metadata": {}, "outputs": [ { @@ -8995,7 +8995,7 @@ { "cell_type": "code", "execution_count": 631, - "id": "5164cbab-4cad-49cb-b578-162f4df8fb3d", + "id": "2feba736", "metadata": {}, "outputs": [ { @@ -9016,7 +9016,7 @@ { "cell_type": "code", "execution_count": 628, - "id": "9de2a715-9a6d-48ca-98d3-154c6856f73c", + "id": "8c43c66a", "metadata": {}, "outputs": [ { @@ -9037,7 +9037,7 @@ { "cell_type": "code", "execution_count": 539, - "id": "4fa350d1-50a9-4d03-a808-a948f9db12af", + "id": "c901f8b6", "metadata": {}, "outputs": [ { @@ -9058,7 +9058,7 @@ { "cell_type": "code", "execution_count": 532, - "id": "f9246900-a13c-40d8-b345-8167199f45de", + "id": "b4857eb2", "metadata": {}, "outputs": [ { @@ -9079,7 +9079,7 @@ { "cell_type": "code", "execution_count": 537, - "id": "bc02e0de-08a8-42d3-afd8-2b21f616d470", + "id": "558950e7", "metadata": {}, "outputs": [ { @@ -9102,7 +9102,7 @@ { "cell_type": "code", "execution_count": 538, - "id": "e5238c66-a33b-4e93-ad9a-1a8fefe574cc", + "id": "539ce808", "metadata": {}, "outputs": [ { @@ -9123,7 +9123,7 @@ { "cell_type": "code", "execution_count": 541, - "id": "803533ed-9577-4eca-9d51-c106f3efb3d1", + "id": "7ead24d8", "metadata": {}, "outputs": [ { @@ -9146,7 +9146,7 @@ { "cell_type": "code", "execution_count": 505, - "id": "75d655fc-46e1-4b4d-9e05-ee1403e2e363", + "id": "b3cbd599", "metadata": {}, "outputs": [ { @@ -9169,7 +9169,7 @@ { "cell_type": "code", "execution_count": 460, - "id": "1dda0acd-7dd4-467c-9633-66c5350d1911", + "id": "afb4f5a2", "metadata": {}, "outputs": [ { @@ -9449,7 +9449,7 @@ { "cell_type": "code", "execution_count": 445, - "id": "67b2283f-b3ec-4941-9753-8939883fc634", + "id": "748fe7b3", "metadata": {}, "outputs": [], "source": [ @@ -9459,7 +9459,7 @@ { "cell_type": "code", "execution_count": 451, - "id": "055da421-6f35-47ba-b54a-dedaff3851f5", + "id": "56ca49e4", "metadata": {}, "outputs": [], "source": [ @@ -9470,7 +9470,7 @@ { "cell_type": "code", "execution_count": 452, - "id": "b9337cb3-5725-4a22-956d-0f7bfdf98f1e", + "id": "aedfee5c", "metadata": {}, "outputs": [], "source": [ @@ -9481,7 +9481,7 @@ { "cell_type": "code", "execution_count": 455, - "id": "307dcec8-b15f-48c0-895e-b57794d96c1e", + "id": "bca51eab", "metadata": {}, "outputs": [ { @@ -9502,7 +9502,7 @@ { "cell_type": "code", "execution_count": 456, - "id": "9ff52284-e37d-466c-a844-3cbc929433b8", + "id": "e8ded03d", "metadata": {}, "outputs": [ { @@ -9523,7 +9523,7 @@ { "cell_type": "code", "execution_count": 457, - "id": "319ede75-f8a8-4f80-9d90-f990d351cc1b", + "id": "5bc9446c", "metadata": {}, "outputs": [ { @@ -9542,7 +9542,7 @@ { "cell_type": "code", "execution_count": 431, - "id": "9e6bd392-559f-449b-8a4e-049d8b100349", + "id": "87a3d3d4", "metadata": {}, "outputs": [ { @@ -9565,7 +9565,7 @@ { "cell_type": "code", "execution_count": 421, - "id": "03cc2f40-8c54-44ff-b590-81cf857c2965", + "id": "5cf1bda5", "metadata": {}, "outputs": [ { @@ -9588,7 +9588,7 @@ { "cell_type": "code", "execution_count": 386, - "id": "6efd2667-59d7-4ad3-b9a9-faf8f003399b", + "id": "b5623d70", "metadata": {}, "outputs": [ { @@ -9612,7 +9612,7 @@ { "cell_type": "code", "execution_count": 399, - "id": "9063878f-b3bd-4856-9d5b-68f459ea6e06", + "id": "252c7557", "metadata": {}, "outputs": [], "source": [ @@ -9622,7 +9622,7 @@ { "cell_type": "code", "execution_count": 397, - "id": "69c6d275-9b13-4add-bcb0-75c281628f09", + "id": "97b5272f", "metadata": {}, "outputs": [], "source": [] @@ -9630,7 +9630,7 @@ { "cell_type": "code", "execution_count": 398, - "id": "dcba5559-a9c1-40ce-854d-ca2ceefd61d0", + "id": "c04586ae", "metadata": {}, "outputs": [], "source": [ @@ -9640,7 +9640,7 @@ { "cell_type": "code", "execution_count": 401, - "id": "0b3beb34-9715-4719-b0b2-778c54891555", + "id": "38e35695", "metadata": {}, "outputs": [ { @@ -9669,7 +9669,7 @@ { "cell_type": "code", "execution_count": 258, - "id": "efdc824b-d784-40c8-98fd-af86f4887319", + "id": "fb6c1940", "metadata": {}, "outputs": [ { @@ -9690,7 +9690,7 @@ { "cell_type": "code", "execution_count": 232, - "id": "07cb8a8b-32c0-4aac-a8e2-9b10ab938fbc", + "id": "c97ca649", "metadata": {}, "outputs": [ { @@ -9727,7 +9727,7 @@ { "cell_type": "code", "execution_count": 225, - "id": "fc06b8ed-283f-4365-a063-c87f43598bef", + "id": "3f5fd50c", "metadata": {}, "outputs": [], "source": [ @@ -9737,7 +9737,7 @@ { "cell_type": "code", "execution_count": 64, - "id": "9c67d96b-a9e9-43cb-8617-bae9d6fb36bf", + "id": "a1fa67e1", "metadata": {}, "outputs": [ { @@ -9841,7 +9841,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "66a215f5-fac0-455b-b990-a0713a2e3e62", + "id": "671c1103", "metadata": {}, "outputs": [ { @@ -10121,7 +10121,7 @@ { "cell_type": "code", "execution_count": 344, - "id": "6c6b8a93-aa6d-4a5e-83c9-fb66381c6ce3", + "id": "9d40f975", "metadata": {}, "outputs": [ { @@ -10144,7 +10144,7 @@ { "cell_type": "code", "execution_count": 90, - "id": "fe082e4f-7622-4a9c-ac54-17cd2cb741d2", + "id": "beb6f2e1", "metadata": {}, "outputs": [ { @@ -10286,7 +10286,7 @@ { "cell_type": "code", "execution_count": 59, - "id": "4d9e756f-8f34-4738-909c-a1714e4a4459", + "id": "b937875e", "metadata": {}, "outputs": [], "source": [ @@ -10394,7 +10394,7 @@ { "cell_type": "code", "execution_count": 60, - "id": "dd26d9a2-9676-4286-8032-442997d872c2", + "id": "959da213", "metadata": {}, "outputs": [ { @@ -10424,7 +10424,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "26d573d8-7e97-40bb-81f2-554009fbe689", + "id": "11fe1806", "metadata": {}, "outputs": [], "source": [ @@ -10434,7 +10434,7 @@ { "cell_type": "code", "execution_count": 141, - "id": "caa9367f-1047-4d70-b0c0-2ea99015adc9", + "id": "9ff3e583", "metadata": {}, "outputs": [ { @@ -10869,7 +10869,7 @@ { "cell_type": "code", "execution_count": 133, - "id": "2ac38001-5cb0-4df8-b6ff-0853f3b4a7f3", + "id": "e03ffa2d", "metadata": {}, "outputs": [], "source": [ @@ -10879,7 +10879,7 @@ { "cell_type": "code", "execution_count": 134, - "id": "7c2c51bb-c1d5-4ec3-abb6-15d7d357cf63", + "id": "18babab0", "metadata": {}, "outputs": [], "source": [ @@ -10889,7 +10889,7 @@ { "cell_type": "code", "execution_count": 126, - "id": "3fc0b810-6b65-4cbe-adbb-13ebe72ee1b8", + "id": "0c86903a", "metadata": {}, "outputs": [ { @@ -11324,7 +11324,7 @@ { "cell_type": "code", "execution_count": 24, - "id": "a4d48fe0", + "id": "8e415fab", "metadata": {}, "outputs": [ { @@ -11350,7 +11350,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "03d8b6e7", + "id": "40b69f5d", "metadata": {}, "outputs": [ { @@ -11426,7 +11426,7 @@ { "cell_type": "code", "execution_count": null, - "id": "884f07ed", + "id": "e1a470a1", "metadata": {}, "outputs": [], "source": [] @@ -11434,7 +11434,7 @@ { "cell_type": "code", "execution_count": 92, - "id": "1e934f2b", + "id": "39ca5192", "metadata": {}, "outputs": [], "source": [ @@ -11445,7 +11445,7 @@ { "cell_type": "code", "execution_count": 94, - "id": "1f942831-b170-4d1b-b445-1f58565bf2f1", + "id": "e7696931", "metadata": {}, "outputs": [ { @@ -11479,7 +11479,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "e5b59a91", + "id": "28df4d21", "metadata": {}, "outputs": [ { @@ -11508,7 +11508,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "78ecf1e1", + "id": "f138e2a6", "metadata": {}, "outputs": [ { @@ -11616,7 +11616,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "680d09ca", + "id": "551b9c83", "metadata": {}, "outputs": [ { @@ -11647,7 +11647,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "190a91a3", + "id": "16df6d54", "metadata": {}, "outputs": [ { @@ -12144,7 +12144,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "ba82a4ed", + "id": "78158c38", "metadata": {}, "outputs": [ { @@ -12178,7 +12178,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "79e8a471", + "id": "aec96741", "metadata": {}, "outputs": [ { @@ -12198,7 +12198,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "71b7ffdc", + "id": "e444c4d7", "metadata": {}, "outputs": [], "source": [ @@ -12208,7 +12208,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "f7c95c63", + "id": "10f8f13b", "metadata": {}, "outputs": [ { @@ -12229,7 +12229,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "e905bb35", + "id": "999e4520", "metadata": {}, "outputs": [ { @@ -12256,7 +12256,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8964b3a5", + "id": "58798ad1", "metadata": {}, "outputs": [], "source": [] @@ -12264,7 +12264,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/mrdna/readers/test/test3.ipynb b/mrdna/readers/test/test3.ipynb index 6f0498793c5889dcbb67d320129db43b5861fe21..9af0e8487266696db83962d5a9c9b839b752a5d3 100644 --- a/mrdna/readers/test/test3.ipynb +++ b/mrdna/readers/test/test3.ipynb @@ -2,36 +2,25 @@ "cells": [ { "cell_type": "code", - "execution_count": 124, - "id": "303b2da7", + "execution_count": 15, + "id": "47cc3f7f", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/plain": [ - "dict_keys(['row', 'col', 'num', 'scaf', 'stap', 'loop', 'skip', 'scafLoop', 'stapLoop', 'stap_colors'])" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "import pickle\n", - "import numpy as np\n", - "df=pd.read_json(\"test.json\")\n", - "df[\"vstrands\"][0].keys()" + "import numpy as np\n" ] }, { "cell_type": "code", "execution_count": 164, - "id": "2d42bba6", - "metadata": {}, + "id": "b993f66a", + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -91,107 +80,433 @@ }, { "cell_type": "code", - "execution_count": 65, - "id": "5e1ea864", + "execution_count": 21, + "id": "5361b91a", "metadata": {}, - "outputs": [ - { - "ename": "UnicodeDecodeError", - "evalue": "'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-65-71f198107f5e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.virt2nuc\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"r\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte" - ] - } - ], + "outputs": [], "source": [ - "f=open(\"test.virt2nuc\",\"r\")\n", - "pickle.load(f)" + "import pickle\n", + "f=open(\"test.virt2nuc\",\"rb\")\n", + "vh_vb,pattern=pickle.load(f)\n", + "def find_vh_vb_table(s,is_scaf):\n", + " L=[]\n", + " for i in list(s.keys()):\n", + " vh,zid=i\n", + " strand,indices=s[i]\n", + " if len(indices)==0:\n", + " continue\n", + " else:\n", + " if len(indices)==1:\n", + " zids=[str(zid)]\n", + " else:\n", + " zids=[str(zid)+\".\"+str(j) for j in range(len(indices))]\n", + " for index,z in zip(indices,zids):\n", + " L.append(pd.Series({\"index\":index,\"vh\":vh,\"zid\":z,\"strand\":strand,\"is_scaf\":bool(is_scaf)}))\n", + " return L\n", + "L1=find_vh_vb_table(vh_vb._scaf,1)\n", + "L2=find_vh_vb_table(vh_vb._stap,0)\n", + "nt_prop=pd.DataFrame(L1+L2,dtype=object)" ] }, { "cell_type": "code", - "execution_count": 119, - "id": "86a109f4", - "metadata": {}, + "execution_count": 22, + "id": "a10c3f52", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "find_vh_vb_table(vh_vb._stap,0)\n", + "nt_prop=pd.DataFrame(L1+L2,dtype=object)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6c4a0fa6", + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>strand</th>\n", + " <th>is_scaf</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>39</td>\n", + " <td>0</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>40</td>\n", + " <td>0</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>41</td>\n", + " <td>0</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " <td>41</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>40</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>410</th>\n", + " <td>386</td>\n", + " <td>3</td>\n", + " <td>30</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>411</th>\n", + " <td>385</td>\n", + " <td>3</td>\n", + " <td>31</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>412</th>\n", + " <td>384</td>\n", + " <td>3</td>\n", + " <td>32</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>413</th>\n", + " <td>383</td>\n", + " <td>3</td>\n", + " <td>33</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>414</th>\n", + " <td>382</td>\n", + " <td>3</td>\n", + " <td>34</td>\n", + " <td>13</td>\n", + " <td>False</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>415 rows × 5 columns</p>\n", + "</div>" + ], "text/plain": [ - "210" + " index vh zid strand is_scaf\n", + "0 2 0 39 0 True\n", + "1 1 0 40 0 True\n", + "2 0 0 41 0 True\n", + "3 5 1 41 1 True\n", + "4 4 1 40 1 True\n", + ".. ... .. .. ... ...\n", + "410 386 3 30 13 False\n", + "411 385 3 31 13 False\n", + "412 384 3 32 13 False\n", + "413 383 3 33 13 False\n", + "414 382 3 34 13 False\n", + "\n", + "[415 rows x 5 columns]" ] }, - "execution_count": 119, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", - "len(vh_vb._scaf)" + "nt_prop[]" ] }, { "cell_type": "code", - "execution_count": 66, - "id": "ab020675", + "execution_count": 12, + "id": "0d9ff4d1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}" + "{(2, 34): (3, [43]),\n", + " (2, 33): (3, [42]),\n", + " (2, 32): (3, [41]),\n", + " (2, 31): (3, [40]),\n", + " (2, 30): (3, [39]),\n", + " (2, 29): (3, [38]),\n", + " (2, 28): (3, [37]),\n", + " (2, 27): (3, [36]),\n", + " (2, 26): (3, [35]),\n", + " (2, 25): (3, [34]),\n", + " (2, 24): (3, [33]),\n", + " (2, 23): (3, [32]),\n", + " (2, 22): (3, [31]),\n", + " (2, 21): (3, [30]),\n", + " (2, 20): (3, [29]),\n", + " (2, 19): (3, [28]),\n", + " (2, 18): (3, [27]),\n", + " (2, 17): (3, [26]),\n", + " (2, 16): (3, [25]),\n", + " (2, 15): (3, [24]),\n", + " (2, 14): (3, [23]),\n", + " (2, 13): (3, [22]),\n", + " (2, 12): (3, [21]),\n", + " (2, 11): (3, [20]),\n", + " (2, 10): (3, [19]),\n", + " (2, 9): (3, [18]),\n", + " (2, 8): (3, [17]),\n", + " (2, 7): (3, [16]),\n", + " (2, 6): (3, [15]),\n", + " (2, 5): (3, [14]),\n", + " (2, 4): (3, [13]),\n", + " (2, 3): (3, [12]),\n", + " (2, 2): (3, [11]),\n", + " (2, 1): (3, [10]),\n", + " (2, 0): (3, [9]),\n", + " (1, 3): (8, [281]),\n", + " (1, 4): (8, [280]),\n", + " (1, 5): (8, [279]),\n", + " (1, 6): (8, [278]),\n", + " (1, 7): (8, [277]),\n", + " (1, 8): (8, [276]),\n", + " (1, 9): (8, [275]),\n", + " (1, 10): (8, [274]),\n", + " (1, 11): (8, [273]),\n", + " (1, 12): (8, [272]),\n", + " (1, 13): (8, [271]),\n", + " (1, 14): (8, [270]),\n", + " (1, 15): (8, [269]),\n", + " (1, 16): (8, [268]),\n", + " (1, 17): (8, [267]),\n", + " (1, 18): (8, [266]),\n", + " (1, 19): (8, [265]),\n", + " (1, 20): (8, [264]),\n", + " (0, 20): (8, [263]),\n", + " (0, 19): (8, [262]),\n", + " (0, 18): (8, [261]),\n", + " (0, 17): (8, [260]),\n", + " (0, 16): (8, [259]),\n", + " (0, 15): (8, [258]),\n", + " (0, 14): (8, [257]),\n", + " (0, 13): (8, [256]),\n", + " (0, 12): (8, [255]),\n", + " (0, 11): (8, [254]),\n", + " (0, 10): (8, [253]),\n", + " (0, 9): (8, [252]),\n", + " (0, 8): (8, [251]),\n", + " (0, 7): (8, [250]),\n", + " (0, 6): (8, [249]),\n", + " (0, 5): (8, [248]),\n", + " (0, 4): (8, [247]),\n", + " (0, 3): (8, [246]),\n", + " (0, 2): (8, [245]),\n", + " (0, 23): (9, [302]),\n", + " (0, 22): (9, [301]),\n", + " (0, 21): (9, [300]),\n", + " (1, 21): (9, [299]),\n", + " (1, 22): (9, [298]),\n", + " (1, 23): (9, [297]),\n", + " (1, 24): (9, [296]),\n", + " (1, 25): (9, [295]),\n", + " (1, 26): (9, [294]),\n", + " (1, 27): (9, [293]),\n", + " (1, 28): (9, [292]),\n", + " (1, 29): (9, [291]),\n", + " (1, 30): (9, [290]),\n", + " (1, 31): (9, [289]),\n", + " (1, 32): (9, [288]),\n", + " (1, 33): (9, [287]),\n", + " (1, 34): (9, [286]),\n", + " (1, 35): (9, [285]),\n", + " (1, 36): (9, [284]),\n", + " (1, 37): (9, [283]),\n", + " (1, 38): (9, [282]),\n", + " (5, 9): (10, [325]),\n", + " (5, 10): (10, [324]),\n", + " (5, 11): (10, [323]),\n", + " (5, 12): (10, [322]),\n", + " (5, 13): (10, [321]),\n", + " (5, 14): (10, [320]),\n", + " (5, 15): (10, [319]),\n", + " (5, 16): (10, [318]),\n", + " (5, 17): (10, [317]),\n", + " (5, 18): (10, [316]),\n", + " (5, 19): (10, [315]),\n", + " (5, 20): (10, [314]),\n", + " (5, 21): (10, [313]),\n", + " (5, 22): (10, [312]),\n", + " (5, 23): (10, [311]),\n", + " (5, 24): (10, [310]),\n", + " (5, 25): (10, [309]),\n", + " (5, 26): (10, [308]),\n", + " (5, 27): (10, [307]),\n", + " (0, 27): (10, [306]),\n", + " (0, 26): (10, [305]),\n", + " (0, 25): (10, [304]),\n", + " (0, 24): (10, [303]),\n", + " (0, 38): (11, [348]),\n", + " (0, 37): (11, [347]),\n", + " (0, 36): (11, [346]),\n", + " (0, 35): (11, [345]),\n", + " (0, 34): (11, [344]),\n", + " (0, 33): (11, [343]),\n", + " (0, 32): (11, [342]),\n", + " (0, 31): (11, [341]),\n", + " (0, 30): (11, [340]),\n", + " (0, 29): (11, [339]),\n", + " (0, 28): (11, [338]),\n", + " (5, 28): (11, [337]),\n", + " (5, 29): (11, [336]),\n", + " (5, 30): (11, [335]),\n", + " (5, 31): (11, [334]),\n", + " (5, 32): (11, [333]),\n", + " (5, 33): (11, [332]),\n", + " (5, 34): (11, [331]),\n", + " (5, 35): (11, [330]),\n", + " (5, 36): (11, [329]),\n", + " (5, 37): (11, [328]),\n", + " (5, 38): (11, [327]),\n", + " (5, 39): (11, [326]),\n", + " (3, 0): (12, [381]),\n", + " (3, 1): (12, [380]),\n", + " (3, 2): (12, [379]),\n", + " (3, 3): (12, [378]),\n", + " (3, 4): (12, [377]),\n", + " (3, 5): (12, [376]),\n", + " (3, 6): (12, [375]),\n", + " (3, 7): (12, [374]),\n", + " (3, 8): (12, [373]),\n", + " (3, 9): (12, [372]),\n", + " (3, 10): (12, [371]),\n", + " (3, 11): (12, [370]),\n", + " (3, 12): (12, [369]),\n", + " (3, 13): (12, [368]),\n", + " (3, 14): (12, [367]),\n", + " (3, 15): (12, [366]),\n", + " (3, 16): (12, [365]),\n", + " (3, 17): (12, [364]),\n", + " (3, 18): (12, [363]),\n", + " (3, 19): (12, [362]),\n", + " (3, 20): (12, [361]),\n", + " (4, 20): (12, [360]),\n", + " (4, 19): (12, [359]),\n", + " (4, 18): (12, [358]),\n", + " (4, 17): (12, [357]),\n", + " (4, 16): (12, [356]),\n", + " (4, 15): (12, [355]),\n", + " (4, 14): (12, [354]),\n", + " (4, 13): (12, [353]),\n", + " (4, 12): (12, [352]),\n", + " (4, 11): (12, [351]),\n", + " (4, 10): (12, [350]),\n", + " (4, 9): (12, [349]),\n", + " (4, 39): (13, [414]),\n", + " (4, 38): (13, [413]),\n", + " (4, 37): (13, [412]),\n", + " (4, 36): (13, [411]),\n", + " (4, 35): (13, [410]),\n", + " (4, 34): (13, [409]),\n", + " (4, 33): (13, [408]),\n", + " (4, 32): (13, [407]),\n", + " (4, 31): (13, [406]),\n", + " (4, 30): (13, [405]),\n", + " (4, 29): (13, [404]),\n", + " (4, 28): (13, [403]),\n", + " (4, 27): (13, [402]),\n", + " (4, 26): (13, [401]),\n", + " (4, 25): (13, [400]),\n", + " (4, 24): (13, [399]),\n", + " (4, 23): (13, [398]),\n", + " (4, 22): (13, [397]),\n", + " (4, 21): (13, [396]),\n", + " (3, 21): (13, [395]),\n", + " (3, 22): (13, [394]),\n", + " (3, 23): (13, [393]),\n", + " (3, 24): (13, [392]),\n", + " (3, 25): (13, [391]),\n", + " (3, 26): (13, [390]),\n", + " (3, 27): (13, [389]),\n", + " (3, 28): (13, [388]),\n", + " (3, 29): (13, [387]),\n", + " (3, 30): (13, [386]),\n", + " (3, 31): (13, [385]),\n", + " (3, 32): (13, [384]),\n", + " (3, 33): (13, [383]),\n", + " (3, 34): (13, [382])}" ] }, - "execution_count": 66, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pattern" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "id": "0b152186", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(0, 40)\n", - "(0, 41)\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "'NoneType' object has no attribute 'append'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-141-a0e59df19136>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_vh_vb\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.virt2nuc\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m<ipython-input-140-99891f72666f>\u001b[0m in \u001b[0;36mdecode_vh_vb\u001b[0;34m(virt2nuc)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mscafs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvh_vb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_scaf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mstaps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvh_vb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stap\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mscaf_strands\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfind_segs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscafs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvh_list\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m<ipython-input-140-99891f72666f>\u001b[0m in \u001b[0;36mfind_segs\u001b[0;34m(vir2nuc_scaf)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvir2nuc_scaf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0moligos\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0moligo\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moligos\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0moligo\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvir2nuc_scaf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moligos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'append'" - ] - } - ], - "source": [ - "s=decode_vh_vb(\"test.virt2nuc\")[0]\n" + "n=list(vh_vb._stap.keys())\n", + "vh_vb._stap" ] }, { "cell_type": "code", "execution_count": 165, - "id": "852198d8", + "id": "cd863c53", "metadata": {}, "outputs": [], "source": [ @@ -229,7 +544,7 @@ { "cell_type": "code", "execution_count": 160, - "id": "b4064749", + "id": "4f91e6e1", "metadata": {}, "outputs": [], "source": [ @@ -273,7 +588,7 @@ { "cell_type": "code", "execution_count": 163, - "id": "ae57cd6f", + "id": "7b654425", "metadata": {}, "outputs": [ { @@ -490,7 +805,7 @@ { "cell_type": "code", "execution_count": 157, - "id": "d4698785", + "id": "0b77e10b", "metadata": {}, "outputs": [ { @@ -514,7 +829,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "2b98d0e2", + "id": "afd7317c", "metadata": {}, "outputs": [ { @@ -535,7 +850,7 @@ { "cell_type": "code", "execution_count": 62, - "id": "f666ffb1", + "id": "c65032d0", "metadata": {}, "outputs": [ { @@ -763,7 +1078,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "04da1a1e", + "id": "e15d2aa1", "metadata": {}, "outputs": [], "source": [ @@ -889,7 +1204,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "c168b2f4", + "id": "c6dfa08f", "metadata": {}, "outputs": [], "source": [ @@ -910,7 +1225,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "1e29e6a7", + "id": "4dcbd8e3", "metadata": {}, "outputs": [ { @@ -972,7 +1287,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "fd26bfad", + "id": "9404f68b", "metadata": {}, "outputs": [ { @@ -1034,7 +1349,7 @@ { "cell_type": "code", "execution_count": null, - "id": "438d3345", + "id": "c758c48e", "metadata": {}, "outputs": [], "source": [ @@ -1047,7 +1362,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "595fa2ff", + "id": "14885dc7", "metadata": {}, "outputs": [ { @@ -1068,7 +1383,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "3d614740", + "id": "2d9f9b60", "metadata": {}, "outputs": [ { @@ -1117,7 +1432,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "831d5ea6", + "id": "c82d79df", "metadata": {}, "outputs": [ { @@ -1179,7 +1494,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "7831eb35", + "id": "6fe9bc07", "metadata": {}, "outputs": [ { @@ -1205,7 +1520,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28c9253b", + "id": "99e0edfc", "metadata": {}, "outputs": [], "source": [] @@ -1213,7 +1528,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "a018193b", + "id": "6b01ed08", "metadata": {}, "outputs": [ { @@ -1233,7 +1548,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "2bc520fd", + "id": "ada91cfc", "metadata": {}, "outputs": [ { @@ -1254,7 +1569,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "117520a1", + "id": "5545caf9", "metadata": {}, "outputs": [ { @@ -1324,7 +1639,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "d8eb98b3", + "id": "86b74eac", "metadata": {}, "outputs": [], "source": [ @@ -1335,7 +1650,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "44db9666", + "id": "1e2447da", "metadata": {}, "outputs": [ { @@ -1444,7 +1759,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "52ccbb65", + "id": "9ae6bbc0", "metadata": {}, "outputs": [ { @@ -1465,7 +1780,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "66171e52", + "id": "1f66b770", "metadata": {}, "outputs": [ { @@ -1496,7 +1811,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "34ff0906", + "id": "71886708", "metadata": {}, "outputs": [ { @@ -1993,7 +2308,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "53a3183e", + "id": "639aea0c", "metadata": {}, "outputs": [ { @@ -2027,7 +2342,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "fdcadbe9", + "id": "457d395d", "metadata": {}, "outputs": [ { @@ -2047,7 +2362,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "d27fd998", + "id": "3c98514b", "metadata": {}, "outputs": [], "source": [ @@ -2057,7 +2372,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "fe094a9b", + "id": "490f9736", "metadata": {}, "outputs": [ { @@ -2078,7 +2393,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "5e54cf99", + "id": "c6058056", "metadata": {}, "outputs": [ { @@ -2105,7 +2420,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bbeb8724", + "id": "07504a71", "metadata": {}, "outputs": [], "source": [] @@ -2113,21 +2428,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.19" } }, "nbformat": 4, diff --git a/mrdna/readers/test/tests.ipynb b/mrdna/readers/test/tests.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3de002b34ac0c2434e62c062f5b2f71852aff4cd --- /dev/null +++ b/mrdna/readers/test/tests.ipynb @@ -0,0 +1,5758 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 52, + "id": "0ed58664", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pickle\n", + "import numpy as np\n", + "import json\n", + "import re\n", + "import cadnano\n", + "from cadnano.document import Document\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "10b47d85", + "metadata": {}, + "outputs": [], + "source": [ + "from cadnano.views.pathview import pathstyles" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fa8a0a91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev116 \n", + "it/its\n", + "\n" + ] + } + ], + "source": [ + "import cadnano\n", + "from cadnano.document import Document\n", + "from mrdna.arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "05a6db77", + "metadata": {}, + "outputs": [], + "source": [ + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(lattice_type)\n", + " return lattice_type\n", + "\n", + "\n", + "def read_json_file(filename):\n", + " import cadnano\n", + " from cadnano.document import Document\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " json_data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " json_data = json.loads(content)\n", + "\n", + " try:\n", + " doc = Document()\n", + " cadnano.fileio.v3decode.decode(doc, json_data)\n", + " decoder = 3\n", + " except:\n", + " doc = Document()\n", + " cadnano.fileio.v2decode.decode(doc, json_data)\n", + " decoder = 2\n", + "\n", + " parts = [p for p in doc.getParts()]\n", + " if len(parts) != 1:\n", + " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", + " part = parts[0]\n", + "\n", + " if decoder == 2:\n", + " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", + " l = get_lattice(part)\n", + " if l == 'square':\n", + " for id_num in part.getIdNums():\n", + " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", + " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", + " df=pd.DataFrame(json_data[\"vstrands\"])\n", + " n_df=df.set_index(\"num\")\n", + " return part\n", + "\n", + "def get_helix_angle(part, helix_id, indices):\n", + " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", + " # import ipdb\n", + " # ipdb.set_trace()\n", + "\n", + " \"\"\" FROM CADNANO2.5\n", + " + angle is CCW\n", + " - angle is CW\n", + " Right handed DNA rotates clockwise from 5' to 3'\n", + " we use the convention the 5' end starts at 0 degrees\n", + " and it's pair is minor_groove_angle degrees away\n", + " direction, hence the minus signs. eulerZ\n", + " \"\"\"\n", + "\n", + " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", + " ['helical_pitch',\n", + " 'bases_per_repeat',\n", + " 'turns_per_repeat',\n", + " 'eulerZ',\n", + " 'minor_groove_angle']]\n", + " twist_per_base = tpr*360./bpr\n", + " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", + " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", + " return angle\n", + "\n", + "def gen_id_series(strand,part):\n", + " df=pd.DataFrame(columns=[\"vh\",\"zid\",\"fwd\",\"stack_tuple\",\"threeprime_tuple\",\"x\",\"y\",\"z\"],index=range(strand.totalLength()),dtype=object)\n", + " df[\"vh\"]=strand._id_num\n", + " df[\"fwd\"]=strand.isForward()\n", + " df[\"x\"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10\n", + " df[\"y\"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10\n", + " id_lo,id_hi=strand.idxs()\n", + " zids=[str(i) for i in range(id_lo,id_hi+1)]\n", + " insert_dict={}\n", + " insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])\n", + " z=np.arange(id_lo,id_hi+1)\n", + " zids=[str(i) for i in range(id_lo,id_hi+1)]\n", + " z=list(np.arange(id_lo,id_hi+1))\n", + " zids=[str(i) for i in range(id_lo,id_hi+1)]\n", + " for insert_base in insert_dict:\n", + " z_ind=zids.index(str(insert_base))\n", + " z_val=insert_dict[insert_base]\n", + " z_pos_ind=z.index(insert_base)\n", + " zids.pop(z_ind)\n", + " z.pop(z_pos_ind)\n", + " if z_val!=-1:\n", + " #l=[str(insert_base)+\".\"+str(i) for i in range(z_val+1)]\n", + " l=list(range(z_val+1))\n", + " l.reverse()\n", + " for k in l: \n", + " zids.insert(z_ind,str(insert_base)+\".\"+str(k))\n", + " z.insert(z_pos_ind,insert_base+k/(z_val+1))\n", + " df[\"zid\"]=zids\n", + " df[\"z\"]=np.array(z)*3.4\n", + " \n", + " \n", + " L=[(df[\"vh\"][i],df[\"zid\"][i],df[\"fwd\"][i]) for i in df.index]\n", + " if strand.isForward()==True:\n", + " df[\"stack_tuple\"]=L[1:]+[-1]\n", + " if strand.connection3p() is None:\n", + " df[\"threeprime_tuple\"]=L[1:]+[-1]\n", + " else:\n", + " df[\"threeprime_tuple\"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]\n", + " \n", + " \n", + " else:\n", + " df[\"stack_tuple\"]=[-1]+L[0:-1]\n", + " if strand.connection3p() is None:\n", + " df[\"threeprime_tuple\"]=[-1]+L[0:-1]\n", + " else:\n", + " df[\"threeprime_tuple\"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]\n", + " ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. \n", + " df[\"r\"]=[np.array([df[\"x\"][i],df[\"y\"][i],df[\"z\"][i]],dtype=np.float32) for i in df.index]\n", + " \n", + " return [pd.Series(df.loc[i]) for i in df.index]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e13a5231", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "n=read_json_file(\"test_insert.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "d41d4f68", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<ipython-input-43-f3af6cfd4751>:25: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " nt_prop[\"stack_tuple\"][i]=(nt_prop.loc[i][\"vh\"],str(zid),fwd)\n" + ] + } + ], + "source": [ + "m=gen_prop_table(n)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "e4da7f1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, '6', True),\n", + " (0, '7', True),\n", + " (0, '8', True),\n", + " (0, '9', True),\n", + " (0, '10', True),\n", + " (0, '11', True),\n", + " (0, '12', True),\n", + " (0, '13.0', True),\n", + " (0, '13.1', True),\n", + " (0, '13.2', True),\n", + " (0, '13.3', True),\n", + " (0, '13.4', True),\n", + " (0, '13.5', True),\n", + " (0, '13.6', True),\n", + " (0, '13.7', True),\n", + " (0, '13.8', True),\n", + " (0, '13.9', True),\n", + " (0, '13.10', True),\n", + " (0, '14', True),\n", + " (0, '15', True),\n", + " (0, '16', True),\n", + " (0, '17', True),\n", + " (0, '18', True),\n", + " (0, '19', True),\n", + " (0, '20', True),\n", + " (0, '21', True),\n", + " (0, '22', True),\n", + " (0, '23', True),\n", + " (0, '24', True),\n", + " (0, '25', True),\n", + " (0, '26', True),\n", + " (0, '27', True),\n", + " (0, '28', True),\n", + " (0, '29.0', True),\n", + " (0, '29.1', True),\n", + " (0, '29.2', True),\n", + " (0, '29.3', True),\n", + " (0, '29.4', True),\n", + " (0, '29.5', True),\n", + " (0, '29.6', True),\n", + " (0, '29.7', True),\n", + " (0, '29.8', True),\n", + " (0, '29.9', True),\n", + " (0, '29.10', True),\n", + " (0, '29.11', True),\n", + " (0, '30', True),\n", + " (0, '31', True),\n", + " (0, '32', True),\n", + " (0, '33', True),\n", + " (0, '34', True),\n", + " (0, '35', True),\n", + " (0, '36', True),\n", + " -1,\n", + " (0, '40', True),\n", + " (0, '41', True),\n", + " -1,\n", + " -1,\n", + " (0, '2', False),\n", + " (0, '3', False),\n", + " (0, '4', False),\n", + " (0, '5', False),\n", + " (0, '6', False),\n", + " (0, '7', False),\n", + " (0, '8', False),\n", + " (0, '9', False),\n", + " (0, '10', False),\n", + " (0, '11', False),\n", + " (0, '12', False),\n", + " (0, '13.0', False),\n", + " (0, '13.1', False),\n", + " (0, '13.2', False),\n", + " (0, '13.3', False),\n", + " (0, '13.4', False),\n", + " (0, '13.5', False),\n", + " (0, '13.6', False),\n", + " (0, '13.7', False),\n", + " (0, '13.8', False),\n", + " (0, '13.9', False),\n", + " (0, '13.10', False),\n", + " (0, '14', False),\n", + " (0, '15', False),\n", + " (0, '16', False),\n", + " (0, '17', False),\n", + " (0, '18', False),\n", + " (0, '19', False),\n", + " (0, '20', False),\n", + " (0, '21', False),\n", + " (0, '22', False),\n", + " (0, '23', False),\n", + " (0, '24', False),\n", + " (0, '25', False),\n", + " (0, '26', False),\n", + " (0, '27', False),\n", + " (0, '28', False),\n", + " (0, '29.0', False),\n", + " (0, '29.1', False),\n", + " (0, '29.2', False),\n", + " (0, '29.3', False),\n", + " (0, '29.4', False),\n", + " (0, '29.5', False),\n", + " (0, '29.6', False),\n", + " (0, '29.7', False),\n", + " (0, '29.8', False),\n", + " (0, '29.9', False),\n", + " (0, '29.10', False),\n", + " (0, '29.11', False),\n", + " (0, '30', False),\n", + " (0, '31', False),\n", + " (0, '32', False),\n", + " (0, '33', False),\n", + " (0, '34', False),\n", + " (0, '35', False),\n", + " (0, '36', False),\n", + " (0, '37', False),\n", + " (1, '4', True),\n", + " (1, '5', True),\n", + " (1, '6', True),\n", + " (1, '7', True),\n", + " (1, '8', True),\n", + " (1, '9', True),\n", + " (1, '10.0', True),\n", + " (1, '10.1', True),\n", + " (1, '11', True),\n", + " (1, '12', True),\n", + " (1, '13', True),\n", + " (1, '14', True),\n", + " (1, '15', True),\n", + " (1, '16', True),\n", + " (1, '17', True),\n", + " (1, '18', True),\n", + " (1, '19', True),\n", + " (1, '20', True),\n", + " (1, '21', True),\n", + " (1, '22', True),\n", + " (1, '23', True),\n", + " (1, '24', True),\n", + " (1, '25', True),\n", + " (1, '26', True),\n", + " (1, '27', True),\n", + " (1, '28', True),\n", + " (1, '29', True),\n", + " (1, '30', True),\n", + " (1, '31', True),\n", + " (1, '32', True),\n", + " (1, '33', True),\n", + " (1, '34', True),\n", + " (1, '35', True),\n", + " (1, '36', True),\n", + " (1, '37', True),\n", + " (1, '38', True),\n", + " -1,\n", + " -1,\n", + " (1, '5', False),\n", + " (1, '6', False),\n", + " (1, '7', False),\n", + " (1, '8', False),\n", + " (1, '9', False),\n", + " (1, '10.0', False),\n", + " (1, '10.1', False),\n", + " (1, '11', False),\n", + " (1, '12', False),\n", + " (1, '13', False),\n", + " (1, '14', False),\n", + " (1, '15', False),\n", + " (1, '16', False),\n", + " (1, '17', False),\n", + " (1, '18', False),\n", + " (1, '19', False),\n", + " (1, '20', False),\n", + " (1, '21', False),\n", + " (1, '22', False),\n", + " (1, '23', False),\n", + " (1, '24', False),\n", + " (1, '25', False),\n", + " (1, '26', False),\n", + " (1, '27', False),\n", + " (1, '28', False),\n", + " (1, '29', False),\n", + " (1, '30', False),\n", + " (1, '31', False),\n", + " (1, '32', False),\n", + " (1, '33', False),\n", + " (1, '34', False),\n", + " (1, '35', False),\n", + " -1,\n", + " (1, '39', False),\n", + " (1, '40', False),\n", + " (2, '3', True),\n", + " (2, '4', True),\n", + " (2, '5', True),\n", + " (2, '6', True),\n", + " (2, '7', True),\n", + " (2, '8', True),\n", + " (2, '9', True),\n", + " (2, '10', True),\n", + " (2, '12', True),\n", + " (2, '13', True),\n", + " (2, '14', True),\n", + " (2, '15', True),\n", + " (2, '16', True),\n", + " (2, '17', True),\n", + " (2, '18', True),\n", + " (2, '19', True),\n", + " (2, '20', True),\n", + " (2, '21', True),\n", + " (2, '22', True),\n", + " (2, '23', True),\n", + " (2, '24', True),\n", + " (2, '25', True),\n", + " (2, '26', True),\n", + " (2, '27', True),\n", + " (2, '28', True),\n", + " (2, '29', True),\n", + " (2, '30', True),\n", + " (2, '31', True),\n", + " (2, '32', True),\n", + " -1,\n", + " (2, '40', True),\n", + " (2, '41', True),\n", + " -1,\n", + " -1,\n", + " (2, '0', False),\n", + " (2, '1', False),\n", + " (2, '2', False),\n", + " (2, '3', False),\n", + " (2, '4', False),\n", + " (2, '5', False),\n", + " (2, '6', False),\n", + " (2, '7', False),\n", + " (2, '8', False),\n", + " (2, '9', False),\n", + " (2, '10', False),\n", + " (2, '12', False),\n", + " (2, '13', False),\n", + " (2, '14', False),\n", + " (2, '15', False),\n", + " (2, '16', False),\n", + " (2, '17', False),\n", + " (2, '18', False),\n", + " (2, '19', False),\n", + " (2, '20', False),\n", + " (2, '21', False),\n", + " (2, '22', False),\n", + " (2, '23', False),\n", + " (2, '24', False),\n", + " (2, '25', False),\n", + " (2, '26', False),\n", + " (2, '27', False),\n", + " (2, '28', False),\n", + " (2, '29', False),\n", + " (2, '30', False),\n", + " (2, '31', False),\n", + " (2, '32', False),\n", + " (2, '33', False),\n", + " (3, '1', True),\n", + " (3, '2', True),\n", + " (3, '3', True),\n", + " (3, '4', True),\n", + " (3, '5', True),\n", + " (3, '6', True),\n", + " (3, '7', True),\n", + " (3, '8', True),\n", + " (3, '9', True),\n", + " (3, '10', True),\n", + " (3, '11', True),\n", + " (3, '12', True),\n", + " (3, '13', True),\n", + " (3, '14', True),\n", + " (3, '15', True),\n", + " (3, '16', True),\n", + " (3, '17', True),\n", + " (3, '18', True),\n", + " (3, '19', True),\n", + " (3, '20', True),\n", + " (3, '21', True),\n", + " (3, '22', True),\n", + " (3, '23', True),\n", + " (3, '24', True),\n", + " (3, '25', True),\n", + " (3, '26', True),\n", + " (3, '27', True),\n", + " (3, '29', True),\n", + " (3, '30', True),\n", + " (3, '31', True),\n", + " (3, '32', True),\n", + " (3, '33', True),\n", + " (3, '34', True),\n", + " -1,\n", + " -1,\n", + " (3, '2', False),\n", + " (3, '3', False),\n", + " (3, '4', False),\n", + " (3, '5', False),\n", + " (3, '6', False),\n", + " (3, '7', False),\n", + " (3, '8', False),\n", + " (3, '9', False),\n", + " (3, '10', False),\n", + " (3, '11', False),\n", + " (3, '12', False),\n", + " (3, '13', False),\n", + " (3, '14', False),\n", + " (3, '15', False),\n", + " (3, '16', False),\n", + " (3, '17', False),\n", + " (3, '18', False),\n", + " (3, '19', False),\n", + " (3, '20', False),\n", + " (3, '21', False),\n", + " (3, '22', False),\n", + " (3, '23', False),\n", + " (3, '24', False),\n", + " (3, '25', False),\n", + " (3, '26', False),\n", + " (3, '27', False),\n", + " (3, '29', False),\n", + " (3, '30', False),\n", + " (3, '31', False),\n", + " -1,\n", + " (3, '37', False),\n", + " (3, '38', False),\n", + " (3, '39', False),\n", + " (3, '40', False),\n", + " (4, '1', True),\n", + " (4, '2', True),\n", + " (4, '3', True),\n", + " -1,\n", + " (4, '10', True),\n", + " (4, '11', True),\n", + " (4, '12', True),\n", + " (4, '13', True),\n", + " (4, '14', True),\n", + " (4, '15', True),\n", + " (4, '16', True),\n", + " (4, '17', True),\n", + " (4, '18', True),\n", + " (4, '19', True),\n", + " (4, '20', True),\n", + " (4, '21', True),\n", + " (4, '22', True),\n", + " (4, '23', True),\n", + " (4, '24', True),\n", + " (4, '25', True),\n", + " (4, '26', True),\n", + " (4, '27', True),\n", + " (4, '28', True),\n", + " (4, '29', True),\n", + " (4, '30', True),\n", + " (4, '31', True),\n", + " (4, '32', True),\n", + " (4, '33', True),\n", + " (4, '34', True),\n", + " (4, '35', True),\n", + " (4, '36', True),\n", + " (4, '37', True),\n", + " (4, '38', True),\n", + " (4, '39', True),\n", + " -1,\n", + " -1,\n", + " (4, '9', False),\n", + " (4, '10', False),\n", + " (4, '11', False),\n", + " (4, '12', False),\n", + " (4, '13', False),\n", + " (4, '14', False),\n", + " (4, '15', False),\n", + " (4, '16', False),\n", + " (4, '17', False),\n", + " (4, '18', False),\n", + " (4, '19', False),\n", + " (4, '20', False),\n", + " (4, '21', False),\n", + " (4, '22', False),\n", + " (4, '23', False),\n", + " (4, '24', False),\n", + " (4, '25', False),\n", + " (4, '26', False),\n", + " (4, '27', False),\n", + " (4, '28', False),\n", + " (4, '29', False),\n", + " (4, '30', False),\n", + " (4, '31', False),\n", + " (4, '32', False),\n", + " (4, '33', False),\n", + " (4, '34', False),\n", + " (4, '35', False),\n", + " (4, '36', False),\n", + " (4, '37', False),\n", + " (4, '38', False),\n", + " (5, '10', True),\n", + " (5, '11', True),\n", + " (5, '12', True),\n", + " (5, '13', True),\n", + " (5, '14', True),\n", + " (5, '15', True),\n", + " (5, '16', True),\n", + " (5, '17', True),\n", + " (5, '18', True),\n", + " (5, '19', True),\n", + " (5, '20', True),\n", + " (5, '21', True),\n", + " (5, '22', True),\n", + " (5, '23', True),\n", + " (5, '24', True),\n", + " (5, '25', True),\n", + " (5, '26', True),\n", + " (5, '27', True),\n", + " (5, '28', True),\n", + " (5, '29', True),\n", + " (5, '30', True),\n", + " (5, '31', True),\n", + " (5, '32', True),\n", + " (5, '33', True),\n", + " (5, '34', True),\n", + " (5, '35', True),\n", + " (5, '36', True),\n", + " (5, '37', True),\n", + " (5, '38', True),\n", + " (5, '39', True),\n", + " -1,\n", + " -1,\n", + " (5, '0', False),\n", + " (5, '1', False),\n", + " (5, '2', False),\n", + " -1,\n", + " (5, '9', False),\n", + " (5, '10', False),\n", + " (5, '11', False),\n", + " (5, '12', False),\n", + " (5, '13', False),\n", + " (5, '14', False),\n", + " (5, '15', False),\n", + " (5, '16', False),\n", + " (5, '17', False),\n", + " (5, '18', False),\n", + " (5, '19', False),\n", + " (5, '20', False),\n", + " (5, '21', False),\n", + " (5, '22', False),\n", + " (5, '23', False),\n", + " (5, '24', False),\n", + " (5, '25', False),\n", + " (5, '26', False),\n", + " (5, '27', False),\n", + " (5, '28', False),\n", + " (5, '29', False),\n", + " (5, '30', False),\n", + " (5, '31', False),\n", + " (5, '32', False),\n", + " (5, '33', False),\n", + " (5, '34', False),\n", + " (5, '35', False),\n", + " (5, '36', False),\n", + " (5, '37', False),\n", + " (5, '38', False)]" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(m[\"stack_tuple\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "4ba4afde", + "metadata": {}, + "outputs": [], + "source": [ + "vhzid=list(zip(m[\"vh\"],m[\"zid\"],m[\"fwd\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4f08e344", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "86" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vhzid.index((0,str(int(\"21\")+1),False))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "a6597822", + "metadata": {}, + "outputs": [], + "source": [ + "ind,=np.where(m[\"stack_tuple\"]==-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "d5c11476", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<ipython-input-42-30730821dc20>:10: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " m[\"stack_tuple\"][i]=(vh,str(zid),fwd)\n" + ] + } + ], + "source": [ + "for i in ind:\n", + " vh,zid,fwd=m[\"vh\"][i],int(m[\"zid\"][i]),m[\"fwd\"][i]\n", + " if fwd is True:\n", + " print(fwd)\n", + " zid+=1\n", + " else:\n", + " zid-=1\n", + " try:\n", + " n=vhzid.index((vh,str(zid),fwd))\n", + " m[\"stack_tuple\"][i]=(vh,str(zid),fwd)\n", + " except:\n", + " continue" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "4703a0b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>fwd</th>\n", + " <th>stack_tuple</th>\n", + " <th>threeprime_tuple</th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " <th>z</th>\n", + " <th>r</th>\n", + " <th>seq</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>orientation</th>\n", + " <th>bp</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>52</th>\n", + " <td>52</td>\n", + " <td>0</td>\n", + " <td>36</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>(1, 36, False)</td>\n", + " <td>77.942295</td>\n", + " <td>90.00</td>\n", + " <td>122.4</td>\n", + " <td>[77.94229, 90.0, 122.4]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>183</td>\n", + " <td>1144.285714</td>\n", + " <td>111</td>\n", + " </tr>\n", + " <tr>\n", + " <th>55</th>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>41</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>77.942295</td>\n", + " <td>90.00</td>\n", + " <td>139.4</td>\n", + " <td>[77.94229, 90.0, 139.4]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1315.714286</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>56</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>77.942295</td>\n", + " <td>90.00</td>\n", + " <td>6.8</td>\n", + " <td>[77.94229, 90.0, 6.8]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-21.428571</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>85</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>21</td>\n", + " <td>False</td>\n", + " <td>(0, 20, False)</td>\n", + " <td>(1, 21, True)</td>\n", + " <td>77.942295</td>\n", + " <td>90.00</td>\n", + " <td>71.4</td>\n", + " <td>[77.94229, 90.0, 71.4]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>133</td>\n", + " <td>630.000000</td>\n", + " <td>26</td>\n", + " </tr>\n", + " <tr>\n", + " <th>88</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>24</td>\n", + " <td>False</td>\n", + " <td>(0, 23, False)</td>\n", + " <td>-1</td>\n", + " <td>77.942295</td>\n", + " <td>90.00</td>\n", + " <td>81.6</td>\n", + " <td>[77.94229, 90.0, 81.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>732.857143</td>\n", + " <td>29</td>\n", + " </tr>\n", + " <tr>\n", + " <th>92</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>28</td>\n", + " <td>False</td>\n", + " <td>(0, 27, False)</td>\n", + " <td>(5, 28, True)</td>\n", + " <td>77.942295</td>\n", + " <td>90.00</td>\n", + " <td>95.2</td>\n", + " <td>[77.94229, 90.0, 95.2]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>408</td>\n", + " <td>870.000000</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>132</th>\n", + " <td>18</td>\n", + " <td>1</td>\n", + " <td>20</td>\n", + " <td>True</td>\n", + " <td>(1, 21, True)</td>\n", + " <td>(0, 20, False)</td>\n", + " <td>58.456721</td>\n", + " <td>78.75</td>\n", + " <td>68.0</td>\n", + " <td>[58.456722, 78.75, 68.0]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>84</td>\n", + " <td>595.714286</td>\n", + " <td>167</td>\n", + " </tr>\n", + " <tr>\n", + " <th>150</th>\n", + " <td>17</td>\n", + " <td>1</td>\n", + " <td>38</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>58.456721</td>\n", + " <td>78.75</td>\n", + " <td>129.2</td>\n", + " <td>[58.456722, 78.75, 129.2]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1212.857143</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>151</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>(0, 5, True)</td>\n", + " <td>58.456721</td>\n", + " <td>78.75</td>\n", + " <td>17.0</td>\n", + " <td>[58.456722, 78.75, 17.0]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>0</td>\n", + " <td>81.428571</td>\n", + " <td>116</td>\n", + " </tr>\n", + " <tr>\n", + " <th>166</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>19</td>\n", + " <td>False</td>\n", + " <td>(1, 18, False)</td>\n", + " <td>(2, 19, True)</td>\n", + " <td>58.456721</td>\n", + " <td>78.75</td>\n", + " <td>64.6</td>\n", + " <td>[58.456722, 78.75, 64.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>203</td>\n", + " <td>561.428571</td>\n", + " <td>131</td>\n", + " </tr>\n", + " <tr>\n", + " <th>184</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>39</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>58.456721</td>\n", + " <td>78.75</td>\n", + " <td>132.6</td>\n", + " <td>[58.456722, 78.75, 132.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1247.142857</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>202</th>\n", + " <td>15</td>\n", + " <td>2</td>\n", + " <td>18</td>\n", + " <td>True</td>\n", + " <td>(2, 19, True)</td>\n", + " <td>(1, 18, False)</td>\n", + " <td>58.456721</td>\n", + " <td>56.25</td>\n", + " <td>61.2</td>\n", + " <td>[58.456722, 56.25, 61.2]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>165</td>\n", + " <td>527.142857</td>\n", + " <td>237</td>\n", + " </tr>\n", + " <tr>\n", + " <th>216</th>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " <td>32</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>(3, 32, False)</td>\n", + " <td>58.456721</td>\n", + " <td>56.25</td>\n", + " <td>108.8</td>\n", + " <td>[58.456722, 56.25, 108.8]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>317</td>\n", + " <td>1007.142857</td>\n", + " <td>251</td>\n", + " </tr>\n", + " <tr>\n", + " <th>219</th>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>41</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>58.456721</td>\n", + " <td>56.25</td>\n", + " <td>139.4</td>\n", + " <td>[58.456722, 56.25, 139.4]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1315.714286</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>220</th>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>58.456721</td>\n", + " <td>56.25</td>\n", + " <td>0.0</td>\n", + " <td>[58.456722, 56.25, 0.0]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-90.000000</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>274</th>\n", + " <td>20</td>\n", + " <td>3</td>\n", + " <td>20</td>\n", + " <td>True</td>\n", + " <td>(3, 21, True)</td>\n", + " <td>(4, 20, False)</td>\n", + " <td>77.942295</td>\n", + " <td>45.00</td>\n", + " <td>68.0</td>\n", + " <td>[77.94229, 45.0, 68.0]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>369</td>\n", + " <td>595.714286</td>\n", + " <td>306</td>\n", + " </tr>\n", + " <tr>\n", + " <th>287</th>\n", + " <td>12</td>\n", + " <td>3</td>\n", + " <td>34</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>77.942295</td>\n", + " <td>45.00</td>\n", + " <td>115.6</td>\n", + " <td>[77.94229, 45.0, 115.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1075.714286</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>288</th>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>(2, 2, True)</td>\n", + " <td>77.942295</td>\n", + " <td>45.00</td>\n", + " <td>6.8</td>\n", + " <td>[77.94229, 45.0, 6.8]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>187</td>\n", + " <td>-21.428571</td>\n", + " <td>256</td>\n", + " </tr>\n", + " <tr>\n", + " <th>302</th>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>16</td>\n", + " <td>False</td>\n", + " <td>(3, 15, False)</td>\n", + " <td>(4, 16, True)</td>\n", + " <td>77.942295</td>\n", + " <td>45.00</td>\n", + " <td>54.4</td>\n", + " <td>[77.94229, 45.0, 54.4]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>334</td>\n", + " <td>458.571429</td>\n", + " <td>270</td>\n", + " </tr>\n", + " <tr>\n", + " <th>318</th>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>37</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>77.942295</td>\n", + " <td>45.00</td>\n", + " <td>125.8</td>\n", + " <td>[77.94229, 45.0, 125.8]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1178.571429</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>326</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>97.427869</td>\n", + " <td>56.25</td>\n", + " <td>10.2</td>\n", + " <td>[97.42787, 56.25, 10.2]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>12.857143</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>333</th>\n", + " <td>6</td>\n", + " <td>4</td>\n", + " <td>15</td>\n", + " <td>True</td>\n", + " <td>(4, 16, True)</td>\n", + " <td>(3, 15, False)</td>\n", + " <td>97.427869</td>\n", + " <td>56.25</td>\n", + " <td>51.0</td>\n", + " <td>[97.42787, 56.25, 51.0]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>301</td>\n", + " <td>424.285714</td>\n", + " <td>364</td>\n", + " </tr>\n", + " <tr>\n", + " <th>357</th>\n", + " <td>23</td>\n", + " <td>4</td>\n", + " <td>39</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>(5, 39, False)</td>\n", + " <td>97.427869</td>\n", + " <td>56.25</td>\n", + " <td>132.6</td>\n", + " <td>[97.42787, 56.25, 132.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>454</td>\n", + " <td>1247.142857</td>\n", + " <td>388</td>\n", + " </tr>\n", + " <tr>\n", + " <th>358</th>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>9</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>97.427869</td>\n", + " <td>56.25</td>\n", + " <td>30.6</td>\n", + " <td>[97.42787, 56.25, 30.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>218.571429</td>\n", + " <td>327</td>\n", + " </tr>\n", + " <tr>\n", + " <th>370</th>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>21</td>\n", + " <td>False</td>\n", + " <td>(4, 20, False)</td>\n", + " <td>(3, 21, True)</td>\n", + " <td>97.427869</td>\n", + " <td>56.25</td>\n", + " <td>71.4</td>\n", + " <td>[97.42787, 56.25, 71.4]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>275</td>\n", + " <td>630.000000</td>\n", + " <td>339</td>\n", + " </tr>\n", + " <tr>\n", + " <th>407</th>\n", + " <td>18</td>\n", + " <td>5</td>\n", + " <td>27</td>\n", + " <td>True</td>\n", + " <td>(5, 28, True)</td>\n", + " <td>(0, 27, False)</td>\n", + " <td>97.427869</td>\n", + " <td>78.75</td>\n", + " <td>91.8</td>\n", + " <td>[97.42787, 78.75, 91.8]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>91</td>\n", + " <td>835.714286</td>\n", + " <td>442</td>\n", + " </tr>\n", + " <tr>\n", + " <th>419</th>\n", + " <td>11</td>\n", + " <td>5</td>\n", + " <td>39</td>\n", + " <td>True</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>97.427869</td>\n", + " <td>78.75</td>\n", + " <td>132.6</td>\n", + " <td>[97.42787, 78.75, 132.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>1247.142857</td>\n", + " <td>454</td>\n", + " </tr>\n", + " <tr>\n", + " <th>420</th>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>97.427869</td>\n", + " <td>78.75</td>\n", + " <td>0.0</td>\n", + " <td>[97.42787, 78.75, 0.0]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-90.000000</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>424</th>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " <td>9</td>\n", + " <td>False</td>\n", + " <td>-1</td>\n", + " <td>(4, 9, True)</td>\n", + " <td>97.427869</td>\n", + " <td>78.75</td>\n", + " <td>30.6</td>\n", + " <td>[97.42787, 78.75, 30.6]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>327</td>\n", + " <td>218.571429</td>\n", + " <td>389</td>\n", + " </tr>\n", + " <tr>\n", + " <th>438</th>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " <td>23</td>\n", + " <td>False</td>\n", + " <td>(5, 22, False)</td>\n", + " <td>-1</td>\n", + " <td>97.427869</td>\n", + " <td>78.75</td>\n", + " <td>78.2</td>\n", + " <td>[97.42787, 78.75, 78.2]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>698.571429</td>\n", + " <td>403</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index vh zid fwd stack_tuple threeprime_tuple x y \\\n", + "52 52 0 36 True -1 (1, 36, False) 77.942295 90.00 \n", + "55 2 0 41 True -1 -1 77.942295 90.00 \n", + "56 0 0 2 False -1 -1 77.942295 90.00 \n", + "85 0 0 21 False (0, 20, False) (1, 21, True) 77.942295 90.00 \n", + "88 0 0 24 False (0, 23, False) -1 77.942295 90.00 \n", + "92 0 0 28 False (0, 27, False) (5, 28, True) 77.942295 90.00 \n", + "132 18 1 20 True (1, 21, True) (0, 20, False) 58.456721 78.75 \n", + "150 17 1 38 True -1 -1 58.456721 78.75 \n", + "151 0 1 5 False -1 (0, 5, True) 58.456721 78.75 \n", + "166 0 1 19 False (1, 18, False) (2, 19, True) 58.456721 78.75 \n", + "184 0 1 39 False -1 -1 58.456721 78.75 \n", + "202 15 2 18 True (2, 19, True) (1, 18, False) 58.456721 56.25 \n", + "216 13 2 32 True -1 (3, 32, False) 58.456721 56.25 \n", + "219 2 2 41 True -1 -1 58.456721 56.25 \n", + "220 0 2 0 False -1 -1 58.456721 56.25 \n", + "274 20 3 20 True (3, 21, True) (4, 20, False) 77.942295 45.00 \n", + "287 12 3 34 True -1 -1 77.942295 45.00 \n", + "288 0 3 2 False -1 (2, 2, True) 77.942295 45.00 \n", + "302 0 3 16 False (3, 15, False) (4, 16, True) 77.942295 45.00 \n", + "318 0 3 37 False -1 -1 77.942295 45.00 \n", + "326 3 4 3 True -1 -1 97.427869 56.25 \n", + "333 6 4 15 True (4, 16, True) (3, 15, False) 97.427869 56.25 \n", + "357 23 4 39 True -1 (5, 39, False) 97.427869 56.25 \n", + "358 0 4 9 False -1 -1 97.427869 56.25 \n", + "370 0 4 21 False (4, 20, False) (3, 21, True) 97.427869 56.25 \n", + "407 18 5 27 True (5, 28, True) (0, 27, False) 97.427869 78.75 \n", + "419 11 5 39 True -1 -1 97.427869 78.75 \n", + "420 0 5 0 False -1 -1 97.427869 78.75 \n", + "424 0 5 9 False -1 (4, 9, True) 97.427869 78.75 \n", + "438 0 5 23 False (5, 22, False) -1 97.427869 78.75 \n", + "\n", + " z r seq stack threeprime orientation \\\n", + "52 122.4 [77.94229, 90.0, 122.4] -1 -1 183 1144.285714 \n", + "55 139.4 [77.94229, 90.0, 139.4] -1 -1 -1 1315.714286 \n", + "56 6.8 [77.94229, 90.0, 6.8] -1 -1 -1 -21.428571 \n", + "85 71.4 [77.94229, 90.0, 71.4] -1 -1 133 630.000000 \n", + "88 81.6 [77.94229, 90.0, 81.6] -1 -1 -1 732.857143 \n", + "92 95.2 [77.94229, 90.0, 95.2] -1 -1 408 870.000000 \n", + "132 68.0 [58.456722, 78.75, 68.0] -1 -1 84 595.714286 \n", + "150 129.2 [58.456722, 78.75, 129.2] -1 -1 -1 1212.857143 \n", + "151 17.0 [58.456722, 78.75, 17.0] -1 -1 0 81.428571 \n", + "166 64.6 [58.456722, 78.75, 64.6] -1 -1 203 561.428571 \n", + "184 132.6 [58.456722, 78.75, 132.6] -1 -1 -1 1247.142857 \n", + "202 61.2 [58.456722, 56.25, 61.2] -1 -1 165 527.142857 \n", + "216 108.8 [58.456722, 56.25, 108.8] -1 -1 317 1007.142857 \n", + "219 139.4 [58.456722, 56.25, 139.4] -1 -1 -1 1315.714286 \n", + "220 0.0 [58.456722, 56.25, 0.0] -1 -1 -1 -90.000000 \n", + "274 68.0 [77.94229, 45.0, 68.0] -1 -1 369 595.714286 \n", + "287 115.6 [77.94229, 45.0, 115.6] -1 -1 -1 1075.714286 \n", + "288 6.8 [77.94229, 45.0, 6.8] -1 -1 187 -21.428571 \n", + "302 54.4 [77.94229, 45.0, 54.4] -1 -1 334 458.571429 \n", + "318 125.8 [77.94229, 45.0, 125.8] -1 -1 -1 1178.571429 \n", + "326 10.2 [97.42787, 56.25, 10.2] -1 -1 -1 12.857143 \n", + "333 51.0 [97.42787, 56.25, 51.0] -1 -1 301 424.285714 \n", + "357 132.6 [97.42787, 56.25, 132.6] -1 -1 454 1247.142857 \n", + "358 30.6 [97.42787, 56.25, 30.6] -1 -1 -1 218.571429 \n", + "370 71.4 [97.42787, 56.25, 71.4] -1 -1 275 630.000000 \n", + "407 91.8 [97.42787, 78.75, 91.8] -1 -1 91 835.714286 \n", + "419 132.6 [97.42787, 78.75, 132.6] -1 -1 -1 1247.142857 \n", + "420 0.0 [97.42787, 78.75, 0.0] -1 -1 -1 -90.000000 \n", + "424 30.6 [97.42787, 78.75, 30.6] -1 -1 327 218.571429 \n", + "438 78.2 [97.42787, 78.75, 78.2] -1 -1 -1 698.571429 \n", + "\n", + " bp \n", + "52 111 \n", + "55 -1 \n", + "56 -1 \n", + "85 26 \n", + "88 29 \n", + "92 33 \n", + "132 167 \n", + "150 -1 \n", + "151 116 \n", + "166 131 \n", + "184 -1 \n", + "202 237 \n", + "216 251 \n", + "219 -1 \n", + "220 -1 \n", + "274 306 \n", + "287 -1 \n", + "288 256 \n", + "302 270 \n", + "318 -1 \n", + "326 -1 \n", + "333 364 \n", + "357 388 \n", + "358 327 \n", + "370 339 \n", + "407 442 \n", + "419 454 \n", + "420 -1 \n", + "424 389 \n", + "438 403 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m.loc[ind]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f16c2de5", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_prop_table(part):\n", + " strand_set=[]\n", + " for i in part.getidNums():\n", + " fwd,rev=part.getStrandSets(i)\n", + " [strand_set.append(i) for i in fwd.strands()]\n", + " [strand_set.append(i) for i in rev.strands()]\n", + " id_series=[]\n", + " for i in strand_set:\n", + " id_series=id_series+gen_id_series(i,part)\n", + " \n", + " nt_prop=pd.DataFrame(id_series)\n", + " nt_prop.reset_index(inplace=True)\n", + " nt_prop[\"seq\"]=-1\n", + " ind_tuple=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"],nt_prop[\"fwd\"]))\n", + " not_stacked,=np.where(nt_prop[\"stack_tuple\"]==-1)\n", + " for i in not_stacked:\n", + " zid=int(nt_prop.loc[i][\"zid\"])\n", + " fwd=nt_prop.loc[i][\"fwd\"]\n", + " if fwd == True:\n", + " zid+=1\n", + " else:\n", + " zid-=1\n", + " try:\n", + " ind_tuple.index((nt_prop.loc[i][\"vh\"],str(zid),fwd))\n", + " nt_prop[\"stack_tuple\"][i]=(nt_prop.loc[i][\"vh\"],str(zid),fwd)\n", + " except:\n", + " continue\n", + " stacks=[]\n", + " for i in list(nt_prop[\"stack_tuple\"]):\n", + " if i ==-1:\n", + " stacks.append(i)\n", + " else:\n", + " stacks.append(ind_tuple.index(i))\n", + " nt_prop[\"stack\"]=stacks\n", + " tprime=[]\n", + " for i in list(nt_prop[\"threeprime_tuple\"]):\n", + " if i ==-1:\n", + " tprime.append(i)\n", + " else:\n", + " tprime.append(ind_tuple.index(i))\n", + " nt_prop[\"threeprime\"]=tprime\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " counter=-1\n", + " bp=-np.ones(len(nt_prop.index),dtype=int)\n", + " bp_map=dict(zip(ind_tuple,nt_prop.index))\n", + " for i,j,k in ind_tuple:\n", + " counter+=1\n", + " try:\n", + " bp[counter]=bp_map[(i,j,not(k))]\n", + " except:\n", + " pass\n", + " nt_prop[\"bp\"]=bp\n", + "\n", + " return nt_prop\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce93cf6e", + "metadata": {}, + "outputs": [], + "source": [ + "import mrdna\n", + "from mrdna.readers import read_list" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c9ca5e1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(415,)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(list(nt_prop['bp'])).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 560, + "id": "c9a33ae6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 12, 13, 14, ..., 13920, 13921, 13922]),)" + ] + }, + "execution_count": 560, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(np.array(nt_prop[\"bp\"])!=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "4454b0e1", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'nt_prop' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-51-602cce887422>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"bp\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"stack\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'nt_prop' is not defined" + ] + } + ], + "source": [ + "np.where(np.array(nt_prop[\"bp\"])!=-1 and np.array(nt_prop[\"stack\"])!=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "5a43528b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>is_scaf</th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 1.7000000000000002]</td>\n", + " <td>213</td>\n", + " <td>-1</td>\n", + " <td>1</td>\n", + " <td>-1</td>\n", + " <td>[[0.14904226617617466, -0.9888308262251284, 0....</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>6</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 2.04]</td>\n", + " <td>214</td>\n", + " <td>-1</td>\n", + " <td>2</td>\n", + " <td>-1</td>\n", + " <td>[[-0.4338837391175583, -0.900968867902419, 0.0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>7</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 2.3800000000000003]</td>\n", + " <td>215</td>\n", + " <td>-1</td>\n", + " <td>3</td>\n", + " <td>-1</td>\n", + " <td>[[-0.8660254037844388, -0.49999999999999994, 0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>8</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 2.72]</td>\n", + " <td>216</td>\n", + " <td>-1</td>\n", + " <td>4</td>\n", + " <td>-1</td>\n", + " <td>[[-0.9972037971811805, 0.07473009358642399, 0....</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>[0.0, 2.25, 3.06]</td>\n", + " <td>217</td>\n", + " <td>-1</td>\n", + " <td>5</td>\n", + " <td>-1</td>\n", + " <td>[[-0.7818314824680299, 0.6234898018587334, 0.0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>410</th>\n", + " <td>5</td>\n", + " <td>35</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 11.9]</td>\n", + " <td>205</td>\n", + " <td>-1</td>\n", + " <td>411</td>\n", + " <td>-1</td>\n", + " <td>[[0.8660254037844375, -0.5000000000000019, 0.0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>411</th>\n", + " <td>5</td>\n", + " <td>36</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 12.24]</td>\n", + " <td>206</td>\n", + " <td>-1</td>\n", + " <td>412</td>\n", + " <td>-1</td>\n", + " <td>[[0.4338837391175605, -0.900968867902418, 0.0]...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>412</th>\n", + " <td>5</td>\n", + " <td>37</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 12.58]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>413</td>\n", + " <td>-1</td>\n", + " <td>[[-0.14904226617617078, -0.9888308262251292, 0...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>413</th>\n", + " <td>5</td>\n", + " <td>38</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 12.920000000000002]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>414</td>\n", + " <td>-1</td>\n", + " <td>[[-0.6801727377709186, -0.7330518718298275, 0....</td>\n", + " </tr>\n", + " <tr>\n", + " <th>414</th>\n", + " <td>5</td>\n", + " <td>39</td>\n", + " <td>False</td>\n", + " <td>[1.948557375, 1.125, 13.260000000000002]</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>[[-0.9749279121818233, -0.222520933956317, 0.0...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>415 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " vh zid is_scaf r bp stack \\\n", + "0 0 5 True [0.0, 2.25, 1.7000000000000002] 213 -1 \n", + "1 0 6 True [0.0, 2.25, 2.04] 214 -1 \n", + "2 0 7 True [0.0, 2.25, 2.3800000000000003] 215 -1 \n", + "3 0 8 True [0.0, 2.25, 2.72] 216 -1 \n", + "4 0 9 True [0.0, 2.25, 3.06] 217 -1 \n", + ".. .. ... ... ... ... ... \n", + "410 5 35 False [1.948557375, 1.125, 11.9] 205 -1 \n", + "411 5 36 False [1.948557375, 1.125, 12.24] 206 -1 \n", + "412 5 37 False [1.948557375, 1.125, 12.58] -1 -1 \n", + "413 5 38 False [1.948557375, 1.125, 12.920000000000002] -1 -1 \n", + "414 5 39 False [1.948557375, 1.125, 13.260000000000002] -1 -1 \n", + "\n", + " threeprime seq orientation \n", + "0 1 -1 [[0.14904226617617466, -0.9888308262251284, 0.... \n", + "1 2 -1 [[-0.4338837391175583, -0.900968867902419, 0.0... \n", + "2 3 -1 [[-0.8660254037844388, -0.49999999999999994, 0... \n", + "3 4 -1 [[-0.9972037971811805, 0.07473009358642399, 0.... \n", + "4 5 -1 [[-0.7818314824680299, 0.6234898018587334, 0.0... \n", + ".. ... ... ... \n", + "410 411 -1 [[0.8660254037844375, -0.5000000000000019, 0.0... \n", + "411 412 -1 [[0.4338837391175605, -0.900968867902418, 0.0]... \n", + "412 413 -1 [[-0.14904226617617078, -0.9888308262251292, 0... \n", + "413 414 -1 [[-0.6801727377709186, -0.7330518718298275, 0.... \n", + "414 -1 -1 [[-0.9749279121818233, -0.222520933956317, 0.0... \n", + "\n", + "[415 rows x 9 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop" + ] + }, + { + "cell_type": "code", + "execution_count": 468, + "id": "17167c74", + "metadata": {}, + "outputs": [], + "source": [ + "scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 500, + "id": "bab6c65d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 500, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nttype(vslist[\"scaf\"][30])[146]" + ] + }, + { + "cell_type": "code", + "execution_count": 498, + "id": "0f62f6cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 498, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vhi,zidi=np.where(np.array(scaf_id)==1)\n", + "scaf_id[30][146]" + ] + }, + { + "cell_type": "code", + "execution_count": 480, + "id": "bc2021f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 480, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaf_id[30][146]==np.array(scaf_id)[0][9]" + ] + }, + { + "cell_type": "code", + "execution_count": 549, + "id": "a747eb2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " ...\n", + " 39, 39, 39, 39, 39, 39, 39, 39, 39, 39],\n", + " dtype='int64', name='num', length=7560)" + ] + }, + "execution_count": 549, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "d={}\n", + "vslist.index[vhi]" + ] + }, + { + "cell_type": "code", + "execution_count": 544, + "id": "6f50d801", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" + ] + }, + "execution_count": 544, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(vslist.index[vhi]!=vhi)" + ] + }, + { + "cell_type": "code", + "execution_count": 550, + "id": "423e7163", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "vh 29\n", + "zid 83\n", + "is_scaf True\n", + "r [-17.537016375, 28.125, 28.220000000000002]\n", + "bp -1\n", + "stack -1\n", + "threeprime -1\n", + "seq -1\n", + "orientation [[-0.5633200580636211, 0.8262387743159955, 0.0...\n", + "Name: 7394, dtype: object" + ] + }, + "execution_count": 550, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[7394]" + ] + }, + { + "cell_type": "code", + "execution_count": 548, + "id": "f523be6f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 29, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 41, 40, 42, 44, 46, 48, 50],\n", + " dtype='int64', name='num')" + ] + }, + "execution_count": 548, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vslist.index" + ] + }, + { + "cell_type": "code", + "execution_count": 527, + "id": "d61008dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" + ] + }, + "execution_count": 527, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n=list(nt_prop[\"zid\"])\n", + "np.where(np.array(list(nt_prop[\"vh\"]))==29)" + ] + }, + { + "cell_type": "code", + "execution_count": 503, + "id": "0176640a", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-503-1b9956d4cdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], + "source": [ + "vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + "index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n", + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " \n", + "print(index2.index(((30,146),(True))))" + ] + }, + { + "cell_type": "code", + "execution_count": 537, + "id": "daab30d3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11 135 30 146 3010\n" + ] + }, + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-537-f07d5cbf0867>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], + "source": [ + " \n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k==30 and l==146:\n", + " print(m,n,k,l,i)\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + " ## Todo: sequence " + ] + }, + { + "cell_type": "code", + "execution_count": 491, + "id": "c41afe8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[11, 135, 30, 147]" + ] + }, + "execution_count": 491, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(vslist.loc[30][\"scaf\"])[146]" + ] + }, + { + "cell_type": "code", + "execution_count": 493, + "id": "3f525aa9", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "(30, 146) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-493-d1dd239124c3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvhzid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: (30, 146) is not in list" + ] + } + ], + "source": [ + "vhzid.index((30,146))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a78afb7a", + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(data=d)\n", + "df=df.set_index(\"num\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43ec49f1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "121d4fb1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "eac77008", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + }, + { + "data": { + "text/plain": [ + "NucleicAcidPart_-1_2800" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p=read_json_file(\"test/test.json\")\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": 441, + "id": "4626babf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", + " </tr>\n", + " <tr>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " + ] + }, + "execution_count": 441, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "id": "9126a63f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 2.25, 3.4 ])" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.getCoordinate(0,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 434, + "id": "7840798e", + "metadata": {}, + "outputs": [], + "source": [ + "def mrdna_model_from_cadnano(json_file,**model_parameters):\n", + " part,vslist=read_json_file(json_file)\n", + " props = part.getModelProperties().copy()\n", + " try:\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " except:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", + " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt,dtype=bool)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " tot_id=scaf_id+stap_id\n", + " vhi,zidi=np.where(np.array(scaf_id)==1)\n", + " vhj,zidj=np.where(np.array(stap_id)==1)\n", + " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", + " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " for i in range(int(len(vhzid)/2)):\n", + " try:\n", + " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", + " nt_prop[\"bp\"][bp1]=bp2\n", + " nt_prop[\"bp\"][bp2]=bp1\n", + " except:\n", + " pass\n", + " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + " nt_prop[\"threeprime\"]=tprime_list\n", + " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + "\n", + "\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 442, + "id": "1a1c4e53", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1])" + ] + }, + "execution_count": 442, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(nt_prop[\"seq\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 302, + "id": "02d50bab", + "metadata": {}, + "outputs": [], + "source": [ + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + " \n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list" + ] + }, + { + "cell_type": "code", + "execution_count": 368, + "id": "5b3cff6b", + "metadata": {}, + "outputs": [], + "source": [ + "def get_helix_angle(part, helix_id, indices):\n", + " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", + " # import ipdb\n", + " # ipdb.set_trace()\n", + "\n", + " \"\"\" FROM CADNANO2.5\n", + " + angle is CCW\n", + " - angle is CW\n", + " Right handed DNA rotates clockwise from 5' to 3'\n", + " we use the convention the 5' end starts at 0 degrees\n", + " and it's pair is minor_groove_angle degrees away\n", + " direction, hence the minus signs. eulerZ\n", + " \"\"\"\n", + "\n", + " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", + " ['helical_pitch',\n", + " 'bases_per_repeat',\n", + " 'turns_per_repeat',\n", + " 'eulerZ',\n", + " 'minor_groove_angle']]\n", + " twist_per_base = tpr*360./bpr\n", + " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", + " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", + " return rotationAboutAxis(np.array((0,0,1)),angle)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 429, + "id": "c27027df", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 430, + "id": "a943a204", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33 -1\n", + "68 -1\n", + "102 -1\n", + "136 -1\n", + "142 -1\n", + "176 -1\n", + "194 399\n", + "211 -1\n", + "233 20\n", + "281 -1\n", + "284 -1\n", + "351 -1\n", + "354 145\n", + "413 -1\n", + "Name: bp, dtype: int64" + ] + }, + "execution_count": 430, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stackid" + ] + }, + { + "cell_type": "code", + "execution_count": 431, + "id": "c1d76688", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "232" + ] + }, + "execution_count": 431, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][233]" + ] + }, + { + "cell_type": "code", + "execution_count": 433, + "id": "163d7316", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "353" + ] + }, + "execution_count": 433, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][354]" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "5731c87a", + "metadata": {}, + "outputs": [], + "source": [ + "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n", + "nts=scaf_id+stap_id" + ] + }, + { + "cell_type": "code", + "execution_count": 360, + "id": "c3faf3f7", + "metadata": {}, + "outputs": [], + "source": [ + "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "id": "c66f7e42", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", + " </tr>\n", + " <tr>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " + ] + }, + "execution_count": 190, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vslist" + ] + }, + { + "cell_type": "code", + "execution_count": 200, + "id": "b8b5f079", + "metadata": {}, + "outputs": [], + "source": [ + "def mrdna_model_from_cadnano(json_data,**model_parameters):\n", + " part,vslist=decode_cadnano_part(json_data)\n", + " props = part.getModelProperties().copy()\n", + "\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " else:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n", + " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n", + " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n", + " nt_prop[\"vh\"]=vhi\n", + " nt_prop[\"zid\"]=zids\n", + " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "id": "a6f87acc", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'decode_cadnano_part' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined" + ] + } + ], + "source": [ + "mrdna_model_from_cadnano(\"test.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "d3be63cc", + "metadata": {}, + "outputs": [], + "source": [ + "a,b=np.where(np.array(nts)==1)" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "8f869fd3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n", + " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n", + " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", + " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n", + " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", + " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39])" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop=pd.DataFrame(index)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "6a623964", + "metadata": {}, + "outputs": [], + "source": [ + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2a34380", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "20260f38", + "metadata": {}, + "outputs": [], + "source": [ + "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n", + "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "574e177c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "i=range(5)\n", + "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n", + "d=pd.DataFrame(index=i,columns=col)\n", + "d['vh']=[0,1,2,3,1]\n", + "d['zid']=[0,3,1,2,8]\n", + "d.set_index([\"vh\",\"zid\"],inplace=True)\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "b8e3a819", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " vh zid r bp stack threeprime seq orientation\n", + "0 0 0 NaN NaN NaN NaN NaN NaN\n", + "1 1 3 NaN NaN NaN NaN NaN NaN\n", + "2 2 1 NaN NaN NaN NaN NaN NaN\n", + "3 3 2 NaN NaN NaN NaN NaN NaN\n", + "4 1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "d=d.reset_index()\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "2b3e6e89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 2]),)" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s=[True,False,True,False,False]\n", + "np.where(np.array(s)==True)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "36d4d897", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "f4b4ea63", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"" + ] + } + ], + "source": [ + "d[[0,0]]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3c12d5dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[23, 13369809], [38, 12060012]]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"vstrands\"][0][\"stap_colors\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "40855dd8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "210" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "len(vh_vb._scaf)" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "id": "f4ebcdc6", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "file must have 'read' and 'readline' attributes", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[198], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.virt2nuc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: file must have 'read' and 'readline' attributes" + ] + } + ], + "source": [ + "df = pickle.load(\"test.virt2nuc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4037f0aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pattern" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "id": "b9ac3514", + "metadata": {}, + "outputs": [], + "source": [ + "class strands():\n", + " def __init__(self):\n", + " self.row=0 \n", + " self.col=0\n", + " self.num=0\n", + " self.scaf=[]\n", + " self.stap=[]\n", + " self.loop=[]\n", + " self.skip=[]\n", + " self.scafLoop=[]\n", + " self.stapLoop=[]\n", + " self.stap_colors=[]\n", + " self.scaf_contact={}\n", + " self.stap_connect={}\n", + " def to_dict(self):\n", + " d={}\n", + " d['row']=self.row\n", + " d['col']=self.col\n", + " d['num']=self.num\n", + " d['scaf']=self.scaf\n", + " d['stap']=self.stap\n", + " d['loop']=self.loop\n", + " d['skip']=self.skip\n", + " d['scafLoop']=self.scafLoop\n", + " d['stapLoop']=self.stapLoop\n", + " d['stap_colors']=self.stap_colors\n", + " return d\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "e3a81edb", + "metadata": {}, + "outputs": [], + "source": [ + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", + "\n", + "#class\n", + "def decode_vh_vb(virt2nuc):\n", + " vh_list={}\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " for i in pattern.keys():\n", + " s=strands()\n", + " s.row,s.col=pattern[i]\n", + " s.num=i\n", + " vh_list[s.num]=s\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + " for i in scaf_oligos:\n", + " pass\n", + " \n", + " \n", + " return vh_list" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "id": "e362ba9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[(2, 34),\n", + " (2, 33),\n", + " (2, 32),\n", + " (2, 31),\n", + " (2, 30),\n", + " (2, 29),\n", + " (2, 28),\n", + " (2, 27),\n", + " (2, 26),\n", + " (2, 25),\n", + " (2, 24),\n", + " (2, 23),\n", + " (2, 22),\n", + " (2, 21),\n", + " (2, 20),\n", + " (2, 19),\n", + " (2, 18),\n", + " (2, 17),\n", + " (2, 16),\n", + " (2, 15),\n", + " (2, 14),\n", + " (2, 13),\n", + " (2, 12),\n", + " (2, 11),\n", + " (2, 10),\n", + " (2, 9),\n", + " (2, 8),\n", + " (2, 7),\n", + " (2, 6),\n", + " (2, 5),\n", + " (2, 4),\n", + " (2, 3),\n", + " (2, 2),\n", + " (2, 1),\n", + " (2, 0)],\n", + " [(1, 3),\n", + " (1, 4),\n", + " (1, 5),\n", + " (1, 6),\n", + " (1, 7),\n", + " (1, 8),\n", + " (1, 9),\n", + " (1, 10),\n", + " (1, 11),\n", + " (1, 12),\n", + " (1, 13),\n", + " (1, 14),\n", + " (1, 15),\n", + " (1, 16),\n", + " (1, 17),\n", + " (1, 18),\n", + " (1, 19),\n", + " (1, 20),\n", + " (0, 20),\n", + " (0, 19),\n", + " (0, 18),\n", + " (0, 17),\n", + " (0, 16),\n", + " (0, 15),\n", + " (0, 14),\n", + " (0, 13),\n", + " (0, 12),\n", + " (0, 11),\n", + " (0, 10),\n", + " (0, 9),\n", + " (0, 8),\n", + " (0, 7),\n", + " (0, 6),\n", + " (0, 5),\n", + " (0, 4),\n", + " (0, 3),\n", + " (0, 2)],\n", + " [(0, 23),\n", + " (0, 22),\n", + " (0, 21),\n", + " (1, 21),\n", + " (1, 22),\n", + " (1, 23),\n", + " (1, 24),\n", + " (1, 25),\n", + " (1, 26),\n", + " (1, 27),\n", + " (1, 28),\n", + " (1, 29),\n", + " (1, 30),\n", + " (1, 31),\n", + " (1, 32),\n", + " (1, 33),\n", + " (1, 34),\n", + " (1, 35),\n", + " (1, 36),\n", + " (1, 37),\n", + " (1, 38)],\n", + " [(5, 9),\n", + " (5, 10),\n", + " (5, 11),\n", + " (5, 12),\n", + " (5, 13),\n", + " (5, 14),\n", + " (5, 15),\n", + " (5, 16),\n", + " (5, 17),\n", + " (5, 18),\n", + " (5, 19),\n", + " (5, 20),\n", + " (5, 21),\n", + " (5, 22),\n", + " (5, 23),\n", + " (5, 24),\n", + " (5, 25),\n", + " (5, 26),\n", + " (5, 27),\n", + " (0, 27),\n", + " (0, 26),\n", + " (0, 25),\n", + " (0, 24)],\n", + " [(0, 38),\n", + " (0, 37),\n", + " (0, 36),\n", + " (0, 35),\n", + " (0, 34),\n", + " (0, 33),\n", + " (0, 32),\n", + " (0, 31),\n", + " (0, 30),\n", + " (0, 29),\n", + " (0, 28),\n", + " (5, 28),\n", + " (5, 29),\n", + " (5, 30),\n", + " (5, 31),\n", + " (5, 32),\n", + " (5, 33),\n", + " (5, 34),\n", + " (5, 35),\n", + " (5, 36),\n", + " (5, 37),\n", + " (5, 38),\n", + " (5, 39)],\n", + " [(3, 0),\n", + " (3, 1),\n", + " (3, 2),\n", + " (3, 3),\n", + " (3, 4),\n", + " (3, 5),\n", + " (3, 6),\n", + " (3, 7),\n", + " (3, 8),\n", + " (3, 9),\n", + " (3, 10),\n", + " (3, 11),\n", + " (3, 12),\n", + " (3, 13),\n", + " (3, 14),\n", + " (3, 15),\n", + " (3, 16),\n", + " (3, 17),\n", + " (3, 18),\n", + " (3, 19),\n", + " (3, 20),\n", + " (4, 20),\n", + " (4, 19),\n", + " (4, 18),\n", + " (4, 17),\n", + " (4, 16),\n", + " (4, 15),\n", + " (4, 14),\n", + " (4, 13),\n", + " (4, 12),\n", + " (4, 11),\n", + " (4, 10),\n", + " (4, 9)],\n", + " [(4, 39),\n", + " (4, 38),\n", + " (4, 37),\n", + " (4, 36),\n", + " (4, 35),\n", + " (4, 34),\n", + " (4, 33),\n", + " (4, 32),\n", + " (4, 31),\n", + " (4, 30),\n", + " (4, 29),\n", + " (4, 28),\n", + " (4, 27),\n", + " (4, 26),\n", + " (4, 25),\n", + " (4, 24),\n", + " (4, 23),\n", + " (4, 22),\n", + " (4, 21),\n", + " (3, 21),\n", + " (3, 22),\n", + " (3, 23),\n", + " (3, 24),\n", + " (3, 25),\n", + " (3, 26),\n", + " (3, 27),\n", + " (3, 28),\n", + " (3, 29),\n", + " (3, 30),\n", + " (3, 31),\n", + " (3, 32),\n", + " (3, 33),\n", + " (3, 34)]]" + ] + }, + "execution_count": 187, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1=decode_vh_vb(\"test.virt2nuc\")\n", + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "list(find_segs(vh_vb._stap).values())" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "c0f2e5be", + "metadata": {}, + "outputs": [], + "source": [ + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", + "\n", + "def decode_vh_vb(virt2nuc):\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " vi={'row':0, 'col':0, 'num':0, 'scaf':dict(), 'stap':dict(), 'loop':[], 'skip':[], 'scafLoop':[],'stapLoop':[], 'stap_colors':[],\"scaf53\":True}\n", + " vs=[] \n", + " for i in range(len(pattern.keys())):\n", + " vhi=vi.copy()\n", + " vhi[\"row\"],vhi[\"col\"]=list(pattern.values())[i]\n", + " vhi[\"num\"]=list(pattern.keys())[i]\n", + " vs.append(vhi)\n", + " vhelices=pd.DataFrame(vs)\n", + " vhelices=vhelices.set_index('num')\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " stap_strands=find_segs(staps)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + "\n", + " \n", + " return vhelices" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "9a8a0b95", + "metadata": {}, + "outputs": [], + "source": [ + "def find_base_map(oligo,i,vhx,scaf=True):\n", + " vh0,vb0=oligo[i]\n", + " vh1,vb1=oligo[i+1]\n", + " if scaf==True:\n", + " if vb0 not in vhx[\"scaf\"][vh0].keys():\n", + " \n", + " if vh0==vh1 and scaf==True:\n", + " if vb0>vb1:\n", + " vhx[vh0][\"scaf\"][vb0]=\n", + " \n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "deba51fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(5, 22),\n", + " (5, 21),\n", + " (5, 20),\n", + " (5, 19),\n", + " (5, 18),\n", + " (5, 17),\n", + " (5, 16),\n", + " (5, 15),\n", + " (5, 14),\n", + " (5, 13),\n", + " (5, 12),\n", + " (5, 11),\n", + " (5, 10),\n", + " (5, 9),\n", + " (4, 9),\n", + " (4, 10),\n", + " (4, 11),\n", + " (4, 12),\n", + " (4, 13),\n", + " (4, 14),\n", + " (4, 15),\n", + " (3, 15),\n", + " (3, 14),\n", + " (3, 13),\n", + " (3, 12),\n", + " (3, 11),\n", + " (3, 10),\n", + " (3, 9),\n", + " (3, 8),\n", + " (3, 7),\n", + " (3, 6),\n", + " (3, 5),\n", + " (3, 4),\n", + " (3, 3),\n", + " (3, 2),\n", + " (2, 2),\n", + " (2, 3),\n", + " (2, 4),\n", + " (2, 5),\n", + " (2, 6),\n", + " (2, 7),\n", + " (2, 8),\n", + " (2, 9),\n", + " (2, 10),\n", + " (2, 11),\n", + " (2, 12),\n", + " (2, 13),\n", + " (2, 14),\n", + " (2, 15),\n", + " (2, 16),\n", + " (2, 17),\n", + " (2, 18),\n", + " (1, 18),\n", + " (1, 17),\n", + " (1, 16),\n", + " (1, 15),\n", + " (1, 14),\n", + " (1, 13),\n", + " (1, 12),\n", + " (1, 11),\n", + " (1, 10),\n", + " (1, 9),\n", + " (1, 8),\n", + " (1, 7),\n", + " (1, 6),\n", + " (1, 5),\n", + " (0, 5),\n", + " (0, 6),\n", + " (0, 7),\n", + " (0, 8),\n", + " (0, 9),\n", + " (0, 10),\n", + " (0, 11),\n", + " (0, 12),\n", + " (0, 13),\n", + " (0, 14),\n", + " (0, 15),\n", + " (0, 16),\n", + " (0, 17),\n", + " (0, 18),\n", + " (0, 19),\n", + " (0, 20),\n", + " (0, 21),\n", + " (0, 22),\n", + " (0, 23),\n", + " (0, 24),\n", + " (0, 25),\n", + " (0, 26),\n", + " (0, 27),\n", + " (0, 28),\n", + " (0, 29),\n", + " (0, 30),\n", + " (0, 31),\n", + " (0, 32),\n", + " (0, 33),\n", + " (0, 34),\n", + " (0, 35),\n", + " (0, 36),\n", + " (1, 36),\n", + " (1, 35),\n", + " (1, 34),\n", + " (1, 33),\n", + " (1, 32),\n", + " (1, 31),\n", + " (1, 30),\n", + " (1, 29),\n", + " (1, 28),\n", + " (1, 27),\n", + " (1, 26),\n", + " (1, 25),\n", + " (1, 24),\n", + " (1, 23),\n", + " (1, 22),\n", + " (1, 21),\n", + " (1, 20),\n", + " (1, 19),\n", + " (2, 19),\n", + " (2, 20),\n", + " (2, 21),\n", + " (2, 22),\n", + " (2, 23),\n", + " (2, 24),\n", + " (2, 25),\n", + " (2, 26),\n", + " (2, 27),\n", + " (2, 28),\n", + " (2, 29),\n", + " (2, 30),\n", + " (2, 31),\n", + " (2, 32),\n", + " (3, 32),\n", + " (3, 31),\n", + " (3, 30),\n", + " (3, 29),\n", + " (3, 28),\n", + " (3, 27),\n", + " (3, 26),\n", + " (3, 25),\n", + " (3, 24),\n", + " (3, 23),\n", + " (3, 22),\n", + " (3, 21),\n", + " (3, 20),\n", + " (3, 19),\n", + " (3, 18),\n", + " (3, 17),\n", + " (3, 16),\n", + " (4, 16),\n", + " (4, 17),\n", + " (4, 18),\n", + " (4, 19),\n", + " (4, 20),\n", + " (4, 21),\n", + " (4, 22),\n", + " (4, 23),\n", + " (4, 24),\n", + " (4, 25),\n", + " (4, 26),\n", + " (4, 27),\n", + " (4, 28),\n", + " (4, 29),\n", + " (4, 30),\n", + " (4, 31),\n", + " (4, 32),\n", + " (4, 33),\n", + " (4, 34),\n", + " (4, 35),\n", + " (4, 36),\n", + " (4, 37),\n", + " (4, 38),\n", + " (4, 39),\n", + " (5, 39),\n", + " (5, 38),\n", + " (5, 37),\n", + " (5, 36),\n", + " (5, 35),\n", + " (5, 34),\n", + " (5, 33),\n", + " (5, 32),\n", + " (5, 31),\n", + " (5, 30),\n", + " (5, 29),\n", + " (5, 28),\n", + " (5, 27),\n", + " (5, 26),\n", + " (5, 25),\n", + " (5, 24),\n", + " (5, 23)]" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scafs=vh_vb._scaf\n", + "s=list(scafs.values())\n", + "len(scafs)\n", + "ss=find_segs(scafs)[7]\n", + "ss" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "b31e177e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 2)]" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "oligos[0]=[]\n", + "L=[]\n", + "L.append((1,2))\n", + "L" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "87cb62c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 39)" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(scafs.keys())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "283ce125", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{(2, 34): (3, [43]),\n", + " (2, 33): (3, [42]),\n", + " (2, 32): (3, [41]),\n", + " (2, 31): (3, [40]),\n", + " (2, 30): (3, [39]),\n", + " (2, 29): (3, [38]),\n", + " (2, 28): (3, [37]),\n", + " (2, 27): (3, [36]),\n", + " (2, 26): (3, [35]),\n", + " (2, 25): (3, [34]),\n", + " (2, 24): (3, [33]),\n", + " (2, 23): (3, [32]),\n", + " (2, 22): (3, [31]),\n", + " (2, 21): (3, [30]),\n", + " (2, 20): (3, [29]),\n", + " (2, 19): (3, [28]),\n", + " (2, 18): (3, [27]),\n", + " (2, 17): (3, [26]),\n", + " (2, 16): (3, [25]),\n", + " (2, 15): (3, [24]),\n", + " (2, 14): (3, [23]),\n", + " (2, 13): (3, [22]),\n", + " (2, 12): (3, [21]),\n", + " (2, 11): (3, [20]),\n", + " (2, 10): (3, [19]),\n", + " (2, 9): (3, [18]),\n", + " (2, 8): (3, [17]),\n", + " (2, 7): (3, [16]),\n", + " (2, 6): (3, [15]),\n", + " (2, 5): (3, [14]),\n", + " (2, 4): (3, [13]),\n", + " (2, 3): (3, [12]),\n", + " (2, 2): (3, [11]),\n", + " (2, 1): (3, [10]),\n", + " (2, 0): (3, [9]),\n", + " (1, 3): (8, [281]),\n", + " (1, 4): (8, [280]),\n", + " (1, 5): (8, [279]),\n", + " (1, 6): (8, [278]),\n", + " (1, 7): (8, [277]),\n", + " (1, 8): (8, [276]),\n", + " (1, 9): (8, [275]),\n", + " (1, 10): (8, [274]),\n", + " (1, 11): (8, [273]),\n", + " (1, 12): (8, [272]),\n", + " (1, 13): (8, [271]),\n", + " (1, 14): (8, [270]),\n", + " (1, 15): (8, [269]),\n", + " (1, 16): (8, [268]),\n", + " (1, 17): (8, [267]),\n", + " (1, 18): (8, [266]),\n", + " (1, 19): (8, [265]),\n", + " (1, 20): (8, [264]),\n", + " (0, 20): (8, [263]),\n", + " (0, 19): (8, [262]),\n", + " (0, 18): (8, [261]),\n", + " (0, 17): (8, [260]),\n", + " (0, 16): (8, [259]),\n", + " (0, 15): (8, [258]),\n", + " (0, 14): (8, [257]),\n", + " (0, 13): (8, [256]),\n", + " (0, 12): (8, [255]),\n", + " (0, 11): (8, [254]),\n", + " (0, 10): (8, [253]),\n", + " (0, 9): (8, [252]),\n", + " (0, 8): (8, [251]),\n", + " (0, 7): (8, [250]),\n", + " (0, 6): (8, [249]),\n", + " (0, 5): (8, [248]),\n", + " (0, 4): (8, [247]),\n", + " (0, 3): (8, [246]),\n", + " (0, 2): (8, [245]),\n", + " (0, 23): (9, [302]),\n", + " (0, 22): (9, [301]),\n", + " (0, 21): (9, [300]),\n", + " (1, 21): (9, [299]),\n", + " (1, 22): (9, [298]),\n", + " (1, 23): (9, [297]),\n", + " (1, 24): (9, [296]),\n", + " (1, 25): (9, [295]),\n", + " (1, 26): (9, [294]),\n", + " (1, 27): (9, [293]),\n", + " (1, 28): (9, [292]),\n", + " (1, 29): (9, [291]),\n", + " (1, 30): (9, [290]),\n", + " (1, 31): (9, [289]),\n", + " (1, 32): (9, [288]),\n", + " (1, 33): (9, [287]),\n", + " (1, 34): (9, [286]),\n", + " (1, 35): (9, [285]),\n", + " (1, 36): (9, [284]),\n", + " (1, 37): (9, [283]),\n", + " (1, 38): (9, [282]),\n", + " (5, 9): (10, [325]),\n", + " (5, 10): (10, [324]),\n", + " (5, 11): (10, [323]),\n", + " (5, 12): (10, [322]),\n", + " (5, 13): (10, [321]),\n", + " (5, 14): (10, [320]),\n", + " (5, 15): (10, [319]),\n", + " (5, 16): (10, [318]),\n", + " (5, 17): (10, [317]),\n", + " (5, 18): (10, [316]),\n", + " (5, 19): (10, [315]),\n", + " (5, 20): (10, [314]),\n", + " (5, 21): (10, [313]),\n", + " (5, 22): (10, [312]),\n", + " (5, 23): (10, [311]),\n", + " (5, 24): (10, [310]),\n", + " (5, 25): (10, [309]),\n", + " (5, 26): (10, [308]),\n", + " (5, 27): (10, [307]),\n", + " (0, 27): (10, [306]),\n", + " (0, 26): (10, [305]),\n", + " (0, 25): (10, [304]),\n", + " (0, 24): (10, [303]),\n", + " (0, 38): (11, [348]),\n", + " (0, 37): (11, [347]),\n", + " (0, 36): (11, [346]),\n", + " (0, 35): (11, [345]),\n", + " (0, 34): (11, [344]),\n", + " (0, 33): (11, [343]),\n", + " (0, 32): (11, [342]),\n", + " (0, 31): (11, [341]),\n", + " (0, 30): (11, [340]),\n", + " (0, 29): (11, [339]),\n", + " (0, 28): (11, [338]),\n", + " (5, 28): (11, [337]),\n", + " (5, 29): (11, [336]),\n", + " (5, 30): (11, [335]),\n", + " (5, 31): (11, [334]),\n", + " (5, 32): (11, [333]),\n", + " (5, 33): (11, [332]),\n", + " (5, 34): (11, [331]),\n", + " (5, 35): (11, [330]),\n", + " (5, 36): (11, [329]),\n", + " (5, 37): (11, [328]),\n", + " (5, 38): (11, [327]),\n", + " (5, 39): (11, [326]),\n", + " (3, 0): (12, [381]),\n", + " (3, 1): (12, [380]),\n", + " (3, 2): (12, [379]),\n", + " (3, 3): (12, [378]),\n", + " (3, 4): (12, [377]),\n", + " (3, 5): (12, [376]),\n", + " (3, 6): (12, [375]),\n", + " (3, 7): (12, [374]),\n", + " (3, 8): (12, [373]),\n", + " (3, 9): (12, [372]),\n", + " (3, 10): (12, [371]),\n", + " (3, 11): (12, [370]),\n", + " (3, 12): (12, [369]),\n", + " (3, 13): (12, [368]),\n", + " (3, 14): (12, [367]),\n", + " (3, 15): (12, [366]),\n", + " (3, 16): (12, [365]),\n", + " (3, 17): (12, [364]),\n", + " (3, 18): (12, [363]),\n", + " (3, 19): (12, [362]),\n", + " (3, 20): (12, [361]),\n", + " (4, 20): (12, [360]),\n", + " (4, 19): (12, [359]),\n", + " (4, 18): (12, [358]),\n", + " (4, 17): (12, [357]),\n", + " (4, 16): (12, [356]),\n", + " (4, 15): (12, [355]),\n", + " (4, 14): (12, [354]),\n", + " (4, 13): (12, [353]),\n", + " (4, 12): (12, [352]),\n", + " (4, 11): (12, [351]),\n", + " (4, 10): (12, [350]),\n", + " (4, 9): (12, [349]),\n", + " (4, 39): (13, [414]),\n", + " (4, 38): (13, [413]),\n", + " (4, 37): (13, [412]),\n", + " (4, 36): (13, [411]),\n", + " (4, 35): (13, [410]),\n", + " (4, 34): (13, [409]),\n", + " (4, 33): (13, [408]),\n", + " (4, 32): (13, [407]),\n", + " (4, 31): (13, [406]),\n", + " (4, 30): (13, [405]),\n", + " (4, 29): (13, [404]),\n", + " (4, 28): (13, [403]),\n", + " (4, 27): (13, [402]),\n", + " (4, 26): (13, [401]),\n", + " (4, 25): (13, [400]),\n", + " (4, 24): (13, [399]),\n", + " (4, 23): (13, [398]),\n", + " (4, 22): (13, [397]),\n", + " (4, 21): (13, [396]),\n", + " (3, 21): (13, [395]),\n", + " (3, 22): (13, [394]),\n", + " (3, 23): (13, [393]),\n", + " (3, 24): (13, [392]),\n", + " (3, 25): (13, [391]),\n", + " (3, 26): (13, [390]),\n", + " (3, 27): (13, [389]),\n", + " (3, 28): (13, [388]),\n", + " (3, 29): (13, [387]),\n", + " (3, 30): (13, [386]),\n", + " (3, 31): (13, [385]),\n", + " (3, 32): (13, [384]),\n", + " (3, 33): (13, [383]),\n", + " (3, 34): (13, [382])}" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s=vh_vb.__dict__\n", + "scafs=s[\"_scaf\"]\n", + "staps=s[\"_stap\"]\n", + "staps" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "549ad98c", + "metadata": {}, + "outputs": [], + "source": [ + "class vstrands (object):\n", + "\n", + " def __init__(self):\n", + " self.vhelices = []\n", + "\n", + " def add_vhelix(self, toadd):\n", + " self.vhelices.append(toadd)\n", + "\n", + " def bbox(self):\n", + " rows = []\n", + " cols = []\n", + " lens = []\n", + " for h in self.vhelices:\n", + " rows.append(h.row)\n", + " cols.append(h.col)\n", + " lens.append(len(h.stap))\n", + "\n", + " dr = DIST_SQUARE * (max(rows) - min(rows) + 2)\n", + " dc = DIST_SQUARE * (max(cols) - min(cols) + 2)\n", + " dl = 0.34 * (max(lens) + 2)\n", + " \n", + " return 2 * max([dr, dc, dl]) * BOX_FACTOR\n", + " \n", + " def __str__(self):\n", + " a = '{\\n\"vstrands\":[\\n'\n", + " if len(self.vhelices) > 0:\n", + " for h in self.vhelices:\n", + " a = a + str(h) + ','\n", + " a = a[0:len(a) - 1]\n", + " a = a + '}\\n'\n", + " return a\n", + "class vhelix (object):\n", + "\n", + " def __init__(self):\n", + " self.stapLoop = []\n", + " self.scafLoop = []\n", + " self.skip = []\n", + " self.loop = []\n", + " self.stap_colors = []\n", + " self.row = 0\n", + " self.col = 0\n", + " self.num = 0\n", + " self.stap = []\n", + " self.scaf = []\n", + " self.cad_index = -1\n", + " self.skiploop_bases = 0\n", + "\n", + " def get_length(self):\n", + " return max (len(self.scaf), len(self.stap))\n", + "\n", + " len = property (get_length)\n", + "\n", + " def add_square(self, toadd, which):\n", + " if which == 'stap':\n", + " self.stap.append(toadd)\n", + " elif which == 'scaf':\n", + " self.scaf.append (toadd)\n", + " else:\n", + " base.Logger.log(\"Cannot add square that is not scaf or stap. Dying now\", base.Logger.CRITICAL)\n", + " sys.exit(1)\n", + " \n", + " def __str__(self):\n", + " a = '{\\n'\n", + "\n", + " a = a + '\"stapLoop\":['\n", + " if len(self.stapLoop) > 0:\n", + " for i in self.stapLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " a = a + '\"skip\":['\n", + " if len(self.skip) > 0:\n", + " for e in self.skip:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"loop\":['\n", + " if len(self.loop) > 0:\n", + " for e in self.loop:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap_colors\":['\n", + " if len (self.stap_colors) > 0:\n", + " for e in self.stap_colors:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + "\n", + " a = a + '\"row\":' + str(self.row) + ',\\n'\n", + " a = a + '\"col\":' + str(self.col) + ',\\n'\n", + " a = a + '\"num\":' + str(self.num) + ',\\n'\n", + " \n", + " a = a + '\"scafLoop\":['\n", + " if len(self.scafLoop) > 0:\n", + " for i in self.scafLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap\":['\n", + " if len(self.stap) > 0:\n", + " for i in self.stap:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"scaf\":['\n", + " if len(self.scaf) > 0:\n", + " for i in self.scaf:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + ']\\n}'\n", + " return a\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "611050e9", + "metadata": {}, + "outputs": [], + "source": [ + "L=[]\n", + "for i in df[\"vstrands\"]:\n", + " L.append(i)\n", + "\n", + "cadsys = vstrands()\n", + "vh = vhelix()\n", + "for s in L:\n", + " \n", + " vh.stap = [ i for i in s[\"scaf\"]]\n", + " vh.scaf = [i for i in s[\"stap\"]]\n", + " vh.skiploop_bases = len(s[\"skip\"]) + sum(s[\"loop\"]) - sum(s[\"skip\"])\n", + " cadsys.add_vhelix(vh)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7511a3e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " 'add_square',\n", + " 'cad_index',\n", + " 'col',\n", + " 'get_length',\n", + " 'len',\n", + " 'loop',\n", + " 'num',\n", + " 'row',\n", + " 'scaf',\n", + " 'scafLoop',\n", + " 'skip',\n", + " 'skiploop_bases',\n", + " 'stap',\n", + " 'stapLoop',\n", + " 'stap_colors']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0=cadsys.vhelices[0]\n", + "dir(s0)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "06db198d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, 5, 10],\n", + " [5, 9, 5, 11],\n", + " [5, 10, 5, 12],\n", + " [5, 11, 5, 13],\n", + " [5, 12, 5, 14],\n", + " [5, 13, 5, 15],\n", + " [5, 14, 5, 16],\n", + " [5, 15, 5, 17],\n", + " [5, 16, 5, 18],\n", + " [5, 17, 5, 19],\n", + " [5, 18, 5, 20],\n", + " [5, 19, 5, 21],\n", + " [5, 20, 5, 22],\n", + " [5, 21, 5, 23],\n", + " [5, 22, 5, 24],\n", + " [5, 23, 5, 25],\n", + " [5, 24, 5, 26],\n", + " [5, 25, 5, 27],\n", + " [5, 26, 0, 27],\n", + " [0, 28, 5, 29],\n", + " [5, 28, 5, 30],\n", + " [5, 29, 5, 31],\n", + " [5, 30, 5, 32],\n", + " [5, 31, 5, 33],\n", + " [5, 32, 5, 34],\n", + " [5, 33, 5, 35],\n", + " [5, 34, 5, 36],\n", + " [5, 35, 5, 37],\n", + " [5, 36, 5, 38],\n", + " [5, 37, 5, 39],\n", + " [5, 38, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0.scaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "064dbe09", + "metadata": {}, + "outputs": [], + "source": [ + "for s in s0.scaf:\n", + " if s[0]==-1 and s[1]==-1:\n", + " pass\n", + " elif s[2]==len(s0.scaf) and abs(s[3])==1" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "30f03d63", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-1, -1, -1, -1]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0.scaf[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a2b7becf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " 'add_vhelix',\n", + " 'bbox',\n", + " 'vhelices']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(cadsys)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "8fc5529a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[5, 1, -1, -1],\n", + " [5, 2, 5, 0],\n", + " [5, 3, 5, 1],\n", + " [-1, -1, 5, 2],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [5, 10, 4, 9],\n", + " [5, 11, 5, 9],\n", + " [5, 12, 5, 10],\n", + " [5, 13, 5, 11],\n", + " [5, 14, 5, 12],\n", + " [5, 15, 5, 13],\n", + " [5, 16, 5, 14],\n", + " [5, 17, 5, 15],\n", + " [5, 18, 5, 16],\n", + " [5, 19, 5, 17],\n", + " [5, 20, 5, 18],\n", + " [5, 21, 5, 19],\n", + " [5, 22, 5, 20],\n", + " [-1, -1, 5, 21],\n", + " [5, 24, -1, -1],\n", + " [5, 25, 5, 23],\n", + " [5, 26, 5, 24],\n", + " [5, 27, 5, 25],\n", + " [5, 28, 5, 26],\n", + " [5, 29, 5, 27],\n", + " [5, 30, 5, 28],\n", + " [5, 31, 5, 29],\n", + " [5, 32, 5, 30],\n", + " [5, 33, 5, 31],\n", + " [5, 34, 5, 32],\n", + " [5, 35, 5, 33],\n", + " [5, 36, 5, 34],\n", + " [5, 37, 5, 35],\n", + " [5, 38, 5, 36],\n", + " [5, 39, 5, 37],\n", + " [4, 39, 5, 38],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh.stap" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "02959074", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'mrdna'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreaders\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcadnano_segments\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'" + ] + } + ], + "source": [ + "\n", + "\n", + "from mrdna.readers.cadnano_segments import *\n", + "from cadnano.document import Document\n", + "import cadnano" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb9ce180", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a87de3e3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "json_data=read_json_file(\"test.json\")\n", + "part=decode_cadnano_part(json_data)\n", + "model=cadnano_part(part)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ba0073f2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc=Document()\n", + "cadnano.fileio.v2decode.decode(doc, json_data)\n", + "parts = [p for p in doc.getParts()]\n", + "part=parts[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8a45b04f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Oligo_(0.1[38])_1328\t23\t'None\n", + "Oligo_(2.1[34])_9584\t35\t'None\n", + "Oligo_(1.1[36])_7488\t188\t'None\n", + "Oligo_(4.1[39])_4384\t33\t'None\n", + "Oligo_(5.0[9])_0240\t23\t'None\n", + "Oligo_(1.0[3])_8256\t37\t'None\n", + "Oligo_(3.0[0])_3296\t33\t'None\n", + "Oligo_(0.1[23])_9088\t21\t'None\n", + "VH0\n", + "\t <fwd_StrandSet(0)> \t [(5, 36)] \n", + "\t\t\t\t ['#0066cc']\n", + "\t <rev_StrandSet(0)> \t [(2, 20), (21, 23), (24, 27), (28, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c', '#f74308', '#1700de']\n", + "VH1\n", + "\t <fwd_StrandSet(1)> \t [(3, 20), (21, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c']\n", + "\t <rev_StrandSet(1)> \t [(5, 18), (19, 36)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH2\n", + "\t <fwd_StrandSet(2)> \t [(2, 18), (19, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(2)> \t [(0, 34)] \n", + "\t\t\t\t ['#888888']\n", + "VH3\n", + "\t <fwd_StrandSet(3)> \t [(0, 20), (21, 34)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "\t <rev_StrandSet(3)> \t [(2, 15), (16, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH4\n", + "\t <fwd_StrandSet(4)> \t [(9, 15), (16, 39)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(4)> \t [(9, 20), (21, 39)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "VH5\n", + "\t <fwd_StrandSet(5)> \t [(9, 27), (28, 39)] \n", + "\t\t\t\t ['#f74308', '#1700de']\n", + "\t <rev_StrandSet(5)> \t [(9, 39)] \n", + "\t\t\t\t ['#0066cc']\n" + ] + } + ], + "source": [ + "part.__dict__.keys()\n", + "\n", + "oligos = part.oligos()\n", + "for oligo in oligos:\n", + " print(\"{0}\\t{1}\\t\\'{2}\".format(oligo,\n", + " oligo.length(),\n", + " oligo.sequence()))\n", + "\n", + "vhs = list(part.getIdNums()) # convert set to list\n", + "for vh_id in vhs: # display first 3 vhs\n", + " fwd_ss, rev_ss = part.getStrandSets(vh_id)\n", + " print('VH{0}'.format(vh_id))\n", + " print('\\t', fwd_ss, '\\t', [s.idxs() for s in fwd_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in fwd_ss.strands()])\n", + " print('\\t', rev_ss, '\\t', [s.idxs() for s in rev_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in rev_ss.strands()])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "12b582e0", + "metadata": {}, + "outputs": [], + "source": [ + "strands5 = [o.strand5p() for o in part.oligos()]\n", + "strands3 = [o.strand3p() for o in part.oligos()]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "0df7bddb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__slots__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " '_decrementLength',\n", + " '_incrementLength',\n", + " '_is_circular',\n", + " '_parent',\n", + " '_part',\n", + " '_props',\n", + " '_setColor',\n", + " '_setLength',\n", + " '_setLoop',\n", + " '_setProperty',\n", + " '_signals',\n", + " '_strand5p',\n", + " '_strandMergeUpdate',\n", + " '_strandSplitUpdate',\n", + " 'addToPart',\n", + " 'applyAbstractSequences',\n", + " 'applyColor',\n", + " 'applySequence',\n", + " 'applySequenceCMD',\n", + " 'clearAbstractSequences',\n", + " 'connect',\n", + " 'deleteLater',\n", + " 'destroy',\n", + " 'disconnect',\n", + " 'displayAbstractSequences',\n", + " 'dump',\n", + " 'editable_properties',\n", + " 'getAbsolutePositionAtLength',\n", + " 'getColor',\n", + " 'getModelProperties',\n", + " 'getName',\n", + " 'getNumberOfBasesToEachXover',\n", + " 'getOutlineProperties',\n", + " 'getProperty',\n", + " 'getStrandLengths',\n", + " 'isCircular',\n", + " 'length',\n", + " 'locString',\n", + " 'oligoPropertyChangedSignal',\n", + " 'oligoRemovedSignal',\n", + " 'oligoSelectedChangedSignal',\n", + " 'oligoSequenceAddedSignal',\n", + " 'oligoSequenceClearedSignal',\n", + " 'parent',\n", + " 'part',\n", + " 'refreshLength',\n", + " 'remove',\n", + " 'removeFromPart',\n", + " 'sequence',\n", + " 'sequenceExport',\n", + " 'setParent',\n", + " 'setPart',\n", + " 'setProperty',\n", + " 'setStrand5p',\n", + " 'shallowCopy',\n", + " 'shouldHighlight',\n", + " 'signals',\n", + " 'splitAtAbsoluteLengths',\n", + " 'strand3p',\n", + " 'strand5p',\n", + " 'undoStack']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "L=[o for o in part.oligos()]\n", + "dir(L[2])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "fa34a0b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(dict, {0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}})" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.insertions()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fa74a1d1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'NaPart1',\n", + " 'color': '#0066cc',\n", + " 'is_visible': True,\n", + " 'active_phos': None,\n", + " 'crossover_span_angle': 45,\n", + " 'max_vhelix_length': 42,\n", + " 'neighbor_active_angle': '',\n", + " 'grid_type': <GridEnum.HONEYCOMB: 2>,\n", + " 'virtual_helix_order': [0, 1, 2, 3, 4, 5],\n", + " 'is_lattice': True,\n", + " <GridEnum.HONEYCOMB: 2>: <GridEnum.NONE: 0>}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.getModelProperties()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "425b6ab6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", + "it/its\n", + "\n" + ] + } + ], + "source": [ + "import pdb\n", + "import numpy as np\n", + "import os,sys\n", + "import scipy\n", + "\n", + "from mrdna import logger, devlogger\n", + "from mrdna.segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment\n", + "from mrdna.arbdmodel.coords import quaternion_from_matrix, rotationAboutAxis, quaternion_slerp\n", + "from mrdna import get_resource_path\n", + "\n", + "ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))\n", + "\n", + "def _three_prime_list_to_five_prime(three_prime):\n", + " five_prime = -np.ones(three_prime.shape, dtype=int)\n", + " has_three_prime = np.where(three_prime >= 0)[0]\n", + " five_prime[three_prime[has_three_prime]] = has_three_prime\n", + " return five_prime \n", + "def _primes_list_to_strands(three_prime, five_prime):\n", + " five_prime_ends = np.where(five_prime < 0)[0]\n", + " strands = []\n", + " strand_is_circular = []\n", + " \n", + " idx_to_strand = -np.ones(three_prime.shape, dtype=int)\n", + "\n", + " def build_strand(nt_idx, conditional):\n", + " strand = [nt_idx]\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " while conditional(nt_idx):\n", + " nt_idx = three_prime[nt_idx]\n", + " strand.append(nt_idx)\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " strands.append( np.array(strand, dtype=int) )\n", + "\n", + " for nt_idx in five_prime_ends:\n", + " build_strand(nt_idx,\n", + " lambda nt: three_prime[nt] >= 0)\n", + " strand_is_circular.append(False)\n", + "\n", + " while True:\n", + " ## print(\"WARNING: working on circular strand {}\".format(len(strands)))\n", + " ids = np.where(idx_to_strand < 0)[0]\n", + " if len(ids) == 0: break\n", + " build_strand(ids[0],\n", + " lambda nt: three_prime[nt] >= 0 and \\\n", + " idx_to_strand[three_prime[nt]] < 0)\n", + " strand_is_circular.append(True)\n", + "\n", + " return strands, strand_is_circular\n", + "\n", + "def find_stacks(centers, transforms):\n", + "\n", + " ## Find orientation and center of each nucleotide\n", + " expected_stack_positions = []\n", + " for R,c in zip(transforms,centers):\n", + " expected_stack_positions.append( c + ref_stack_position.dot(R) )\n", + "\n", + " expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)\n", + "\n", + " dists = scipy.spatial.distance_matrix(expected_stack_positions, centers)\n", + " dists = dists + 5*np.eye(len(dists))\n", + " idx1, idx2 = np.where(dists < 3.5)\n", + "\n", + " ## Convert distances to stacks\n", + " stacks_above = -np.ones(len(centers), dtype=int)\n", + " _z = np.array((0,0,1))\n", + " for i in np.unique(idx1):\n", + " js = idx2[ idx1 == i ]\n", + " with np.errstate(divide='ignore',invalid='ignore'):\n", + " angles = [np.arccos( transforms[j].T.dot( transforms[i].dot(_z) ).dot( _z ) ) for j in js]\n", + " angles = np.array( angles )\n", + " tmp = np.argmin(dists[i][js] + 1.0*angles)\n", + " j = js[tmp]\n", + " stacks_above[i] = j\n", + "\n", + " return stacks_above\n", + "\n", + "def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):\n", + "\n", + " helixmap = -np.ones(basepairs.shape, dtype=int)\n", + " helixrank = -np.ones(basepairs.shape)\n", + " is_fwd = np.ones(basepairs.shape, dtype=int)\n", + " \n", + " ## Remove stacks with nts lacking a basepairs\n", + " nobp = np.where(basepairs < 0)[0]\n", + " stacks_above[nobp] = -1\n", + " stacks_with_nobp = np.in1d(stacks_above, nobp)\n", + " stacks_above[stacks_with_nobp] = -1\n", + "\n", + " end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]\n", + "\n", + " hid = 0\n", + " for end in end_ids:\n", + " if helixmap[end] >= 0:\n", + " continue\n", + " rank = 0\n", + " nt = basepairs[end]\n", + " bp = basepairs[nt]\n", + " assert( bp == end )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " # assert(helixmap[nt] == -1)\n", + " # assert(helixmap[bp] == -1)\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + "\n", + " _tmp = [(nt,bp)]\n", + " \n", + " while stacks_above[nt] >= 0:\n", + " nt = stacks_above[nt]\n", + " if basepairs[nt] < 0: break\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " _tmp.append((nt,bp))\n", + " rank +=1\n", + "\n", + " hid += 1\n", + "\n", + " ## Create \"helix\" for each circular segment\n", + " intrahelical = []\n", + " processed = set()\n", + " unclaimed_bases = np.where( (basepairs >= 0)*(helixmap == -1) )[0]\n", + " for nt0 in unclaimed_bases:\n", + " if nt0 in processed: continue\n", + "\n", + " nt = nt0\n", + " all_nts = [nt]\n", + "\n", + " rank = 0\n", + " nt = nt0\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cylic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + "\n", + " counter = 0\n", + " while stacks_above[nt] >= 0:\n", + " lastnt = nt\n", + " nt = stacks_above[nt]\n", + " bp = basepairs[nt]\n", + " if nt == nt0 or nt == basepairs[nt0]:\n", + " intrahelical.append((lastnt,nt0))\n", + " break\n", + " \n", + " assert( bp >= 0 )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cyclic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " \n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + " rank +=1\n", + " hid += 1\n", + "\n", + " return helixmap, helixrank, is_fwd, intrahelical\n", + "\n", + "\n", + "def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=None):\n", + " maxrank = np.max( hrank[hmap==hid] )\n", + " if maxrank == 0:\n", + " ids = np.where((hmap == hid))[0]\n", + " pos = np.mean( [coordinate[r,:] for r in ids ], axis=0 )\n", + " coords = [pos,pos]\n", + " contours = [0,1]\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * fwd)[0]\n", + " assert( len(ids) == 1 )\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + " quats = [q, q]\n", + " coords[-1] = pos + orientation[ids[0]].dot(np.array((0,0,1)))\n", + "\n", + " else:\n", + " coords,contours,quats = [[],[],[]]\n", + " last_q = None\n", + " for rank in range(int(maxrank)+1):\n", + " ids = np.where((hmap == hid) * (hrank == rank))[0]\n", + " \n", + " coords.append(np.mean( [coordinate[r,:] for r in ids ], axis=0 ))\n", + " contours.append( float(rank+0.5)/(maxrank+1) )\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * (hrank == rank) * fwd)[0]\n", + " assert(len(ids) == 1)\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + "\n", + " if last_q is not None and last_q.dot(q) < 0:\n", + " q = -q\n", + "\n", + " ## Average quaterion with reverse direction\n", + " bp = basepair[ids[0]]\n", + " if bp >= 0:\n", + " bp_o = orientation[bp].dot(rotationAboutAxis(np.array((1,0,0)),180))\n", + " q2 = quaternion_from_matrix( bp_o )\n", + " if q.dot(q2) < 0:\n", + " q2 = -q2\n", + "\n", + " ## probably good enough, but slerp is better: q = (q + q2)*0.5\n", + " q = quaternion_slerp(q,q2,0.5)\n", + "\n", + " quats.append(q)\n", + " last_q = q\n", + "\n", + " coords = np.array(coords)\n", + " seg.set_splines(contours,coords)\n", + " if orientation is not None:\n", + " quats = np.array(quats)\n", + " seg.set_orientation_splines(contours,quats)\n", + "\n", + " seg.start_position = coords[0,:]\n", + " seg.end_position = coords[-1,:]\n", + "\n", + "\n", + "def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " sequence=None, orientation=None,\n", + " max_basepairs_per_bead = 5,\n", + " max_nucleotides_per_bead = 5,\n", + " local_twist = False,\n", + " dimensions=(5000,5000,5000),\n", + " **model_parameters):\n", + " \"\"\" \n", + " Creates a SegmentModel object from lists of each nucleotide's\n", + " basepair, its stack (on 3' side) and its 3'-connected nucleotide\n", + "\n", + " The first argument should be an N-by-3 numpy array containing the\n", + " coordinate of each nucleotide, where N is the number of\n", + " nucleotides. The following three arguments should be integer lists\n", + " where the i-th element corresponds to the i-th nucleotide; the\n", + " list element should the integer index of the corresponding\n", + " basepaired / stacked / phosphodiester-bonded nucleotide. If there\n", + " is no such nucleotide, the value should be -1.\n", + "\n", + " Args:\n", + " basepair: List of each nucleotide's basepair's index\n", + " stack: List containing index of the nucleotide stacked on the 3' of each nucleotide\n", + " three_prime: List of each nucleotide's the 3' end of each nucleotide\n", + "\n", + " Returns:\n", + " SegmentModel\n", + " \"\"\"\n", + "\n", + " \"\"\" Validate Input \"\"\"\n", + " inputs = (basepair,three_prime)\n", + " try:\n", + " basepair,three_prime = [np.array(a,dtype=int) for a in inputs]\n", + " except:\n", + " raise TypeError(\"One or more of the input lists could not be converted into a numpy array\")\n", + " inputs = (basepair,three_prime)\n", + " coordinate = np.array(coordinate)\n", + "\n", + " if np.any( [len(a.shape) > 1 for a in inputs] ):\n", + " raise ValueError(\"One or more of the input lists has the wrong dimensionality\")\n", + "\n", + " if len(coordinate.shape) != 2:\n", + " raise ValueError(\"Coordinate array has the wrong dimensionality\")\n", + "\n", + " inputs = (coordinate,basepair,three_prime)\n", + " if not np.all(np.diff([len(a) for a in inputs]) == 0):\n", + " raise ValueError(\"Inputs are not the same length\")\n", + " \n", + " num_nt = len(basepair)\n", + " if sequence is not None and len(sequence) != num_nt:\n", + " raise ValueError(\"The 'sequence' parameter is the wrong length {} != {}\".format(len(sequence),num_nt))\n", + "\n", + " if orientation is not None:\n", + " orientation = np.array(orientation)\n", + " if len(orientation.shape) != 3:\n", + " raise ValueError(\"The 'orientation' array has the wrong dimensionality (should be Nx3x3)\")\n", + " if orientation.shape != (num_nt,3,3):\n", + " raise ValueError(\"The 'orientation' array is not properly formatted\")\n", + "\n", + " if stack is None:\n", + " if orientation is not None:\n", + " stack = find_stacks(coordinate, orientation)\n", + " else:\n", + " ## Guess stacking based on 3' connectivity\n", + " stack = np.array(three_prime,dtype=int) # Assume nts on 3' ends are stacked\n", + " _stack_below = _three_prime_list_to_five_prime(stack)\n", + " _has_bp = (basepair >= 0)\n", + " _nostack = np.where( (stack == -1)*_has_bp )[0]\n", + " _has_stack_below = _stack_below[basepair[_nostack]] >= 0\n", + " _nostack2 = _nostack[_has_stack_below]\n", + " stack[_nostack2] = basepair[_stack_below[basepair[_nostack2]]]\n", + "\n", + " else:\n", + " try:\n", + " stack = np.array(stack,dtype=int)\n", + " except:\n", + " raise TypeError(\"The 'stack' array could not be converted into a numpy integer array\")\n", + "\n", + " if len(stack.shape) != 1:\n", + " raise ValueError(\"The 'stack' array has the wrong dimensionality\")\n", + "\n", + " if len(stack) != num_nt:\n", + " raise ValueError(\"The length of the 'stack' array does not match other inputs\")\n", + "\n", + " bps = basepair # alias\n", + "\n", + " \"\"\" Fix stacks: require that the stack of a bp of a base's stack is its bp \"\"\"\n", + " _has_bp = (bps >= 0)\n", + " _has_stack = (stack >= 0)\n", + " _stack_has_basepair = (bps[stack] >= 0) * _has_stack\n", + " stack = np.where( (stack[bps[stack]] == bps) * _has_bp * _has_stack * _has_bp,\n", + " stack, -np.ones(len(stack),dtype=int) )\n", + "\n", + " five_prime = _three_prime_list_to_five_prime(three_prime)\n", + "\n", + " \"\"\" Build map of dsDNA helices and strands \"\"\"\n", + " hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack)\n", + " double_stranded_helices = np.unique(hmap[hmap >= 0]) \n", + " strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)\n", + "\n", + " \"\"\" Add ssDNA to hmap \"\"\"\n", + " if len(double_stranded_helices) > 0:\n", + " hid = double_stranded_helices[-1]+1\n", + " else:\n", + " hid = 0\n", + " ss_residues = hmap < 0\n", + " #\n", + " if np.any(bps[ss_residues] != -1):\n", + " logger.warning(f'{np.sum(bps[ss_residues] != -1)} ssDNA nucleotides appear to have basepairs... ignoring')\n", + " \n", + " for s,c in zip(strands, strand_is_circular):\n", + " strand_segment_ends = [i for i in np.where( np.diff(hmap[s]) != 0 )[0]] + [len(s)-1]\n", + " seg_start = 0\n", + " for i in strand_segment_ends:\n", + " if hmap[s[i]] < 0:\n", + " ## Found single-stranded segment\n", + " ids = s[seg_start:i+1]\n", + " assert( np.all(hmap[ids] == -1) )\n", + " hmap[ids] = hid\n", + " hrank[ids] = np.arange(i+1-seg_start)\n", + " hid+=1\n", + " seg_start = i+1\n", + "\n", + " if len(double_stranded_helices) > 0:\n", + " single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)\n", + " else:\n", + " single_stranded_helices = np.arange(hid)\n", + "\n", + " ## Create double-stranded segments\n", + " doubleSegments = []\n", + " for hid in double_stranded_helices:\n", + " seg = DoubleStrandedSegment(name=str(hid),\n", + " num_bp = np.sum(hmap==hid)//2)\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments))\n", + " doubleSegments.append(seg)\n", + "\n", + " ## Create single-stranded segments\n", + " singleSegments = []\n", + " for hid in single_stranded_helices:\n", + " seg = SingleStrandedSegment(name=str(hid),\n", + " num_nt = np.sum(hmap==hid))\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments) + len(singleSegments))\n", + " singleSegments.append(seg)\n", + "\n", + " ## Find crossovers and 5prime/3prime ends\n", + " crossovers,prime5,prime3 = [[],[],[]]\n", + " for s,c in zip(strands,strand_is_circular):\n", + " tmp = np.where(np.diff(hmap[s]) != 0)[0]\n", + " for i in tmp:\n", + " crossovers.append( (s[i],s[i+1]) )\n", + " if c:\n", + " if hmap[s[-1]] != hmap[s[0]]:\n", + " crossovers.append( (s[-1],s[0]) )\n", + " else:\n", + " prime5.append(s[0])\n", + " prime3.append(s[-1])\n", + "\n", + " ## Add connections\n", + " allSegments = doubleSegments+singleSegments\n", + "\n", + " for r1,r2 in crossovers:\n", + " seg1,seg2 = [allSegments[hmap[i]] for i in (r1,r2)]\n", + " nt1,nt2 = [hrank[i] for i in (r1,r2)]\n", + " f1,f2 = [fwd[i] for i in (r1,r2)]\n", + "\n", + " ## Handle connections at the ends\n", + " is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))\n", + " is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))\n", + "\n", + " print(seg1,seg2, r1, r2, is_terminal1, is_terminal2)\n", + " if is_terminal1 or is_terminal2:\n", + " \"\"\" Ensure that we don't have three-way dsDNA junctions \"\"\"\n", + " if is_terminal1 and (bps[r1] >= 0) and (five_prime[bps[r1]] >= 0) and (three_prime[r1] >= 0):\n", + " if (bps[five_prime[bps[r1]]] >= 0) and (bps[three_prime[r1]] >= 0):\n", + " # is_terminal1 = (three_prime[r1] == bps[five_prime[bps[r1]]])\n", + " is_terminal1 = hmap[five_prime[bps[r1]]] == hmap[three_prime[r1]]\n", + " if is_terminal2 and (bps[r2] >= 0) and (three_prime[bps[r2]] >= 0) and (five_prime[r2] >= 0):\n", + " if (bps[three_prime[bps[r2]]] >= 0) and (bps[five_prime[r2]] >= 0):\n", + " # is_terminal2 = (five_prime[r2] == bps[three_prime[bps[r2]]])\n", + " is_terminal2 = hmap[three_prime[bps[r2]]] == hmap[five_prime[r2]]\n", + " \n", + " \"\"\" Place connection \"\"\"\n", + " if is_terminal1 and is_terminal2:\n", + " end1 = seg1.end3 if f1 else seg1.start3\n", + " end2 = seg2.start5 if f2 else seg2.end5\n", + " seg1._connect_ends( end1, end2, type_='intrahelical')\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_=\"terminal_crossover\")\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2])\n", + "\n", + " ## Add 5prime/3prime ends\n", + " for r in prime5:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_5prime(hrank[r],fwd[r])\n", + " for r in prime3:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_3prime(hrank[r],fwd[r])\n", + "\n", + " ## Add intrahelical connections to circular helical sections\n", + " for nt0,nt1 in intrahelical:\n", + " seg = allSegments[hmap[nt0]]\n", + " assert( seg is allSegments[hmap[nt1]] )\n", + " if three_prime[nt0] >= 0:\n", + " if hmap[nt0] == hmap[three_prime[nt0]]:\n", + " seg.connect_end3(seg.start5)\n", + "\n", + " bp0,bp1 = [bps[nt] for nt in (nt0,nt1)]\n", + " if three_prime[bp1] >= 0:\n", + " if hmap[bp1] == hmap[three_prime[bp1]]:\n", + " seg.connect_start3(seg.end5)\n", + "\n", + " ## Assign sequence\n", + " if sequence is not None:\n", + " for hid in range(len(allSegments)):\n", + " resids = np.where( (hmap==hid)*(fwd==1) )[0]\n", + " s = allSegments[hid]\n", + " s.sequence = [sequence[r] for r in sorted(resids,key=lambda x: hrank[x])]\n", + "\n", + "\n", + " ## Build model\n", + " model = SegmentModel( allSegments,\n", + " max_basepairs_per_bead = max_basepairs_per_bead,\n", + " max_nucleotides_per_bead = max_nucleotides_per_bead,\n", + " local_twist = local_twist,\n", + " dimensions = dimensions,\n", + " **model_parameters )\n", + "\n", + "\n", + " model._reader_list_coordinates = coordinate\n", + " model._reader_list_basepair = basepair\n", + " model._reader_list_stack = stack\n", + " model._reader_list_three_prime = three_prime\n", + " model._reader_list_five_prime = five_prime\n", + " model._reader_list_sequence = sequence\n", + " model._reader_list_orientation = orientation\n", + " model._reader_list_hmap = hmap\n", + " model._reader_list_fwd = fwd\n", + " model._reader_list_hrank = hrank\n", + "\n", + " if sequence is None:\n", + " for s in model.segments:\n", + " s.randomize_unset_sequence()\n", + "\n", + " return model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e986bda7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<DoubleStrandedSegment'> 1[1]> <DoubleStrandedSegment'> 0[1]> 5 3 True True\n", + "<SingleStrandedSegment'> 2[2]> <DoubleStrandedSegment'> 0[1]> 1 2 True True\n", + "<DoubleStrandedSegment'> 0[1]> <DoubleStrandedSegment'> 1[1]> 2 4 True True\n", + "<DoubleStrandedSegment'> 1[1]> <SingleStrandedSegment'> 3[1]> 4 6 True True\n", + "<SingleStrandedSegment'> 3[1]> <SingleStrandedSegment'> 2[2]> 6 0 True True\n" + ] + } + ], + "source": [ + "coordinate = [(0,0,3.4*i) for i in range(7)]\n", + "three_prime = [ 1, 2, 4,-1, 6, 3, 0]\n", + "basepair = [-1,-1, 3, 2, 5, 4,-1]\n", + "stack = [-1,-1, -1,-1,-1, -1,-1]\n", + "for i in [3,5]:\n", + " coordinate[i] = (1,0,3.4*i)\n", + "\n", + "model = model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " max_basepairs_per_bead=1,\n", + " max_nucleotides_per_bead=1,\n", + " local_twist=False)\n", + "model.writePsf(\"list.psf\")\n", + "model.writePdb(\"list.pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7e1ee20f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[<SegmentParticle DNA on <DoubleStrandedSegment'> 0[1]>[1.00]>], [<SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[0.00]>, <SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.00]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.50]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 3[1]>[0.50]>]]\n", + "[[<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>], [<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>, <Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>], [<Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>], [<Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>]]\n" + ] + } + ], + "source": [ + "print([i.children for i in model.children])\n", + "print([i.connections for i in model.children])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2c710b99", + "metadata": {}, + "outputs": [], + "source": [ + "s=model.children[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "550d0450", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.segname" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3d2f102f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, 0, 0.0),\n", + " (0, 0, 3.4),\n", + " (0, 0, 6.8),\n", + " (1, 0, 10.2),\n", + " (0, 0, 13.6),\n", + " (1, 0, 17.0),\n", + " (0, 0, 20.4)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coordinate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba54a37", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}