From 1afeeb3a2865e72ab90222cefc5cb9822210e710 Mon Sep 17 00:00:00 2001 From: pinyili2 <pinyili2@illinois.edu> Date: Wed, 7 Aug 2024 16:36:04 -0500 Subject: [PATCH] add segmentmodel_from_cadnano --- mrdna/readers/.gitignore | 1 - .../.ipynb_checkpoints/test-checkpoint.ipynb | 1479 ++++++- mrdna/readers/.nfs0000000000093fec000043e9 | Bin 16384 -> 0 bytes mrdna/readers/__init__.py | 5 + mrdna/readers/list.pdb | 8 - mrdna/readers/list.psf | 39 - mrdna/readers/segmentmodel_from_cadnano.py | 31 +- mrdna/readers/test.ipynb | 902 ++-- mrdna/readers/test.json | 1 - mrdna/readers/{ => test}/Na_liu.json | 0 mrdna/readers/{ => test}/rest_scaf_col.json | 0 mrdna/readers/test/test.ipynb | 3735 +++++++++++++++++ mrdna/readers/{ => test}/test.json.oxdna | 0 mrdna/readers/{ => test}/test.json.top | 0 mrdna/readers/{ => test}/test.seq.json | 0 mrdna/readers/{ => test}/test.virt2nuc | Bin mrdna/readers/{ => test}/test2.ipynb | 0 mrdna/readers/{ => test}/test3.ipynb | 0 18 files changed, 5678 insertions(+), 523 deletions(-) delete mode 100644 mrdna/readers/.nfs0000000000093fec000043e9 delete mode 100644 mrdna/readers/list.pdb delete mode 100644 mrdna/readers/list.psf delete mode 100644 mrdna/readers/test.json rename mrdna/readers/{ => test}/Na_liu.json (100%) rename mrdna/readers/{ => test}/rest_scaf_col.json (100%) create mode 100644 mrdna/readers/test/test.ipynb rename mrdna/readers/{ => test}/test.json.oxdna (100%) rename mrdna/readers/{ => test}/test.json.top (100%) rename mrdna/readers/{ => test}/test.seq.json (100%) rename mrdna/readers/{ => test}/test.virt2nuc (100%) rename mrdna/readers/{ => test}/test2.ipynb (100%) rename mrdna/readers/{ => test}/test3.ipynb (100%) diff --git a/mrdna/readers/.gitignore b/mrdna/readers/.gitignore index 6db9d75..01b1df4 100644 --- a/mrdna/readers/.gitignore +++ b/mrdna/readers/.gitignore @@ -1,3 +1,2 @@ -libs *.swp .ipynb* diff --git a/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb b/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb index e061577..f708f52 100644 --- a/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb +++ b/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, - "id": "b7b6b57a", + "execution_count": 342, + "id": "8abe7172", "metadata": { "scrolled": true }, @@ -11,37 +11,1052 @@ "source": [ "import pandas as pd\n", "import pickle\n", - "import numpy as np\n", - "df=pd.read_json(\"test.json\")\n", - "d=list(df[\"vstrands\"])" + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "id": "a6c10574", + "metadata": {}, + "outputs": [], + "source": [ + "import cadnano\n", + "from cadnano.document import Document\n", + "from mrdna.arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis" + ] + }, + { + "cell_type": "code", + "execution_count": 553, + "id": "1322927e", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_prop_table(json_file):\n", + " part,vslist=read_json_file(json_file)\n", + " props = part.getModelProperties().copy()\n", + " try:\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " except:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", + " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt,dtype=bool)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " tot_id=scaf_id+stap_id\n", + " vhi,zidi=np.where(np.array(scaf_id)==1)\n", + " vhj,zidj=np.where(np.array(stap_id)==1)\n", + " vhi=vslist.index[vhi]\n", + " vhj=vslist.index[vhj]\n", + " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", + " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"r\"]=[part.getCoordinate(helix_id, indices) for helix_id, indices in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for helix_id,indices in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " for i in range(int(len(vhzid)/2)):\n", + " try:\n", + " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", + " nt_prop[\"bp\"][bp1]=bp2\n", + " nt_prop[\"bp\"][bp2]=bp1\n", + " except:\n", + " pass\n", + " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n", + " for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + " nt_prop[\"threeprime\"]=tprime_list\n", + " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + " ## Todo: sequence \n", + "\n", + "\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 555, + "id": "c787cb64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "nt_prop=gen_prop_table(\"test/Na_liu.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 560, + "id": "927f76f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 12, 13, 14, ..., 13920, 13921, 13922]),)" + ] + }, + "execution_count": 560, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(np.array(nt_prop[\"bp\"])!=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 562, + "id": "2093c036", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "vh 0\n", + "zid 21\n", + "is_scaf True\n", + "r [0.0, 15.75, 7.140000000000001]\n", + "bp 7560\n", + "stack -1\n", + "threeprime 13\n", + "seq -1\n", + "orientation [[-4.440892098500626e-16, 1.0, 0.0], [-1.0, -4...\n", + "Name: 12, dtype: object" + ] + }, + "execution_count": 562, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[12]" + ] + }, + { + "cell_type": "code", + "execution_count": 468, + "id": "51b62714", + "metadata": {}, + "outputs": [], + "source": [ + "scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 500, + "id": "66e9eacd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 500, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nttype(vslist[\"scaf\"][30])[146]" + ] + }, + { + "cell_type": "code", + "execution_count": 498, + "id": "ddecdaef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 498, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vhi,zidi=np.where(np.array(scaf_id)==1)\n", + "scaf_id[30][146]" + ] + }, + { + "cell_type": "code", + "execution_count": 480, + "id": "ea39cbd0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 480, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaf_id[30][146]==np.array(scaf_id)[0][9]" + ] + }, + { + "cell_type": "code", + "execution_count": 549, + "id": "5008fa72", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " ...\n", + " 39, 39, 39, 39, 39, 39, 39, 39, 39, 39],\n", + " dtype='int64', name='num', length=7560)" + ] + }, + "execution_count": 549, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "d={}\n", + "vslist.index[vhi]" + ] + }, + { + "cell_type": "code", + "execution_count": 544, + "id": "7409a9a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" + ] + }, + "execution_count": 544, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(vslist.index[vhi]!=vhi)" + ] + }, + { + "cell_type": "code", + "execution_count": 550, + "id": "7e3b3abc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "vh 29\n", + "zid 83\n", + "is_scaf True\n", + "r [-17.537016375, 28.125, 28.220000000000002]\n", + "bp -1\n", + "stack -1\n", + "threeprime -1\n", + "seq -1\n", + "orientation [[-0.5633200580636211, 0.8262387743159955, 0.0...\n", + "Name: 7394, dtype: object" + ] + }, + "execution_count": 550, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[7394]" + ] + }, + { + "cell_type": "code", + "execution_count": 548, + "id": "33c599fb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 29, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 41, 40, 42, 44, 46, 48, 50],\n", + " dtype='int64', name='num')" + ] + }, + "execution_count": 548, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vslist.index" + ] + }, + { + "cell_type": "code", + "execution_count": 527, + "id": "c781d982", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" + ] + }, + "execution_count": 527, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n=list(nt_prop[\"zid\"])\n", + "np.where(np.array(list(nt_prop[\"vh\"]))==29)" + ] + }, + { + "cell_type": "code", + "execution_count": 503, + "id": "7efc3ee8", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-503-1b9956d4cdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], + "source": [ + "vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + "index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n", + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " \n", + "print(index2.index(((30,146),(True))))" + ] + }, + { + "cell_type": "code", + "execution_count": 537, + "id": "0b3eaca4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11 135 30 146 3010\n" + ] + }, + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-537-f07d5cbf0867>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], + "source": [ + " \n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k==30 and l==146:\n", + " print(m,n,k,l,i)\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + " ## Todo: sequence " + ] + }, + { + "cell_type": "code", + "execution_count": 491, + "id": "99e544cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[11, 135, 30, 147]" + ] + }, + "execution_count": 491, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(vslist.loc[30][\"scaf\"])[146]" + ] + }, + { + "cell_type": "code", + "execution_count": 493, + "id": "264bc035", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "(30, 146) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-493-d1dd239124c3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvhzid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: (30, 146) is not in list" + ] + } + ], + "source": [ + "vhzid.index((30,146))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4f596a68", + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(data=d)\n", + "df=df.set_index(\"num\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce978dbf", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e10006d7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" + ] + }, + { + "cell_type": "code", + "execution_count": 358, + "id": "ed9e6b6f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + }, + { + "data": { + "text/plain": [ + "NucleicAcidPart_-1_3904" + ] + }, + "execution_count": 358, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p,f=read_json_file(\"test.json\")\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": 441, + "id": "8ec446ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", + " </tr>\n", + " <tr>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " + ] + }, + "execution_count": 441, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "id": "263415de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 2.25, 3.4 ])" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.getCoordinate(0,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 434, + "id": "6e58a7e7", + "metadata": {}, + "outputs": [], + "source": [ + "def mrdna_model_from_cadnano(json_file,**model_parameters):\n", + " part,vslist=read_json_file(json_file)\n", + " props = part.getModelProperties().copy()\n", + " try:\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " except:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", + " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt,dtype=bool)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " tot_id=scaf_id+stap_id\n", + " vhi,zidi=np.where(np.array(scaf_id)==1)\n", + " vhj,zidj=np.where(np.array(stap_id)==1)\n", + " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", + " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " for i in range(int(len(vhzid)/2)):\n", + " try:\n", + " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", + " nt_prop[\"bp\"][bp1]=bp2\n", + " nt_prop[\"bp\"][bp2]=bp1\n", + " except:\n", + " pass\n", + " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + " nt_prop[\"threeprime\"]=tprime_list\n", + " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + "\n", + "\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 442, + "id": "3ee34e17", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1])" + ] + }, + "execution_count": 442, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(nt_prop[\"seq\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 302, + "id": "72245714", + "metadata": {}, + "outputs": [], + "source": [ + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + " \n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list" + ] + }, + { + "cell_type": "code", + "execution_count": 368, + "id": "c3891b4a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_helix_angle(part, helix_id, indices):\n", + " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", + " # import ipdb\n", + " # ipdb.set_trace()\n", + "\n", + " \"\"\" FROM CADNANO2.5\n", + " + angle is CCW\n", + " - angle is CW\n", + " Right handed DNA rotates clockwise from 5' to 3'\n", + " we use the convention the 5' end starts at 0 degrees\n", + " and it's pair is minor_groove_angle degrees away\n", + " direction, hence the minus signs. eulerZ\n", + " \"\"\"\n", + "\n", + " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", + " ['helical_pitch',\n", + " 'bases_per_repeat',\n", + " 'turns_per_repeat',\n", + " 'eulerZ',\n", + " 'minor_groove_angle']]\n", + " twist_per_base = tpr*360./bpr\n", + " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", + " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", + " return rotationAboutAxis(np.array((0,0,1)),angle)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 429, + "id": "b3fcb6ac", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "d7794e7b", + "execution_count": 430, + "id": "2565f375", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33 -1\n", + "68 -1\n", + "102 -1\n", + "136 -1\n", + "142 -1\n", + "176 -1\n", + "194 399\n", + "211 -1\n", + "233 20\n", + "281 -1\n", + "284 -1\n", + "351 -1\n", + "354 145\n", + "413 -1\n", + "Name: bp, dtype: int64" + ] + }, + "execution_count": 430, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stackid" + ] + }, + { + "cell_type": "code", + "execution_count": 431, + "id": "ecf2414e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "232" + ] + }, + "execution_count": 431, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][233]" + ] + }, + { + "cell_type": "code", + "execution_count": 433, + "id": "5bc1a2b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "353" + ] + }, + "execution_count": 433, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][354]" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "5f500be3", "metadata": {}, "outputs": [], "source": [ - "import cadnano\n", - "from cadnano.document import Document\n" + "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n", + "nts=scaf_id+stap_id" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "a18ffc26", + "execution_count": 360, + "id": "d7f0b909", "metadata": {}, "outputs": [], "source": [ - "df=pd.DataFrame(data=d)\n", - "df=df.set_index(\"num\")" + "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "22e5fd1e", + "execution_count": 190, + "id": "686640b1", "metadata": {}, "outputs": [ { @@ -212,215 +1227,146 @@ "5 [[9, 0]] " ] }, - "execution_count": 5, + "execution_count": 190, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "6ddb3f62", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found cadnano version 2 file\n" - ] - } - ], - "source": [ - "doc = Document()\n", - "def read_json_file(filename):\n", - " import json\n", - " import re\n", - "\n", - " try:\n", - " with open(filename) as ch:\n", - " data = json.load(ch)\n", - " except:\n", - " with open(filename) as ch:\n", - " content = \"\"\n", - " for l in ch:\n", - " l = re.sub(r\"'\", r'\"', l)\n", - " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", - " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", - " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", - " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", - " content += l+\"\\n\"\n", - " data = json.loads(content)\n", - " return data\n", - "f=read_json_file(\"test.json\")\n", - "cadnano.fileio.v2decode.decode(doc, f)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "9a6e2f17", - "metadata": {}, - "outputs": [], - "source": [ - "def get_lattice(part):\n", - " lattice_type = None\n", - " _gt = part.getGridType()\n", - " try:\n", - " lattice_type = _gt.name.lower()\n", - " except:\n", - " if _gt == 1:\n", - " lattice_type = 'square'\n", - " elif _gt == 2:\n", - " lattice_type = 'honeycomb'\n", - " else:\n", - " print(\"WARNING: unable to determine cadnano part lattice type\")\n", - " return lattice_type\n" + "vslist" ] }, { "cell_type": "code", - "execution_count": 121, - "id": "0253e593", + "execution_count": 200, + "id": "ff964cef", "metadata": {}, "outputs": [], "source": [ - "def read_json_file(filename):\n", - " import json\n", - " import re\n", - " import cadnano\n", - " from cadnano.document import Document\n", - "\n", - " try:\n", - " with open(filename) as ch:\n", - " json_data = json.load(ch)\n", - " except:\n", - " with open(filename) as ch:\n", - " content = \"\"\n", - " for l in ch:\n", - " l = re.sub(r\"'\", r'\"', l)\n", - " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", - " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", - " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", - " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", - " content += l+\"\\n\"\n", - " json_data = json.loads(content)\n", - "\n", - " try:\n", - " doc = Document()\n", - " cadnano.fileio.v3decode.decode(doc, json_data)\n", - " decoder = 3\n", - " except:\n", - " doc = Document()\n", - " cadnano.fileio.v2decode.decode(doc, json_data)\n", - " decoder = 2\n", - "\n", - " parts = [p for p in doc.getParts()]\n", - " if len(parts) != 1:\n", - " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", - " part = parts[0]\n", + "def mrdna_model_from_cadnano(json_data,**model_parameters):\n", + " part,vslist=decode_cadnano_part(json_data)\n", + " props = part.getModelProperties().copy()\n", "\n", - " if decoder == 2:\n", - " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", - " l = get_lattice(part)\n", - " if l == 'square':\n", - " for id_num in part.getIdNums():\n", - " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", - " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", - " df=pd.DataFrame(json_data[\"vstrands\"])\n", - " n_df=df.set_index(\"num\")\n", - " return part,n_df\n" + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " else:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n", + " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n", + " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n", + " nt_prop[\"vh\"]=vhi\n", + " nt_prop[\"zid\"]=zids\n", + " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n", + " return nt_prop\n" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "2929c2ed", + "execution_count": 201, + "id": "97bce4fa", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[42, 42, 42, 42, 42, 42]" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'decode_cadnano_part' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined" + ] } ], "source": [ - "bpnum=[len(df[\"scaf\"][i]) for i in df.index]\n", - "bpnum" + "mrdna_model_from_cadnano(\"test.json\")" ] }, { "cell_type": "code", - "execution_count": 122, - "id": "20eab478", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found cadnano version 2 file\n" - ] - } - ], + "execution_count": 146, + "id": "30246519", + "metadata": {}, + "outputs": [], "source": [ - "f,b=read_json_file(\"test.json\")\n" + "a,b=np.where(np.array(nts)==1)" ] }, { "cell_type": "code", - "execution_count": 52, - "id": "4ab55565", + "execution_count": 148, + "id": "2b287b99", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Int64Index([0, 1, 2, 3, 4, 5], dtype='int64', name='num')" + "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n", + " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n", + " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", + " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n", + " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", + " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39])" ] }, - "execution_count": 52, + "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "b.index" + "nt_prop=pd.DataFrame(index)" ] }, { "cell_type": "code", - "execution_count": 83, - "id": "c55fdc19", + "execution_count": 137, + "id": "781327b0", "metadata": {}, "outputs": [], "source": [ - "def ntcount(scafs):\n", + "def nttype(scafs):\n", " def judge(i):\n", " if i ==[-1,-1,-1,-1]:\n", " return 0\n", " else: return 1\n", " n=np.array([judge(i) for i in scafs])\n", - " return np.sum(n)\n", + " return n\n", "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "91188f0a", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 84, - "id": "3552d3d6", + "id": "a4317b6b", "metadata": {}, "outputs": [], "source": [ @@ -430,23 +1376,134 @@ }, { "cell_type": "code", - "execution_count": 123, - "id": "58b92427", + "execution_count": 156, + "id": "7154c666", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "i=range(5)\n", "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n", "d=pd.DataFrame(index=i,columns=col)\n", "d['vh']=[0,1,2,3,1]\n", "d['zid']=[0,3,1,2,8]\n", - "d.set_index([\"vh\",\"zid\"],inplace=True)\n" + "d.set_index([\"vh\",\"zid\"],inplace=True)\n", + "d" ] }, { "cell_type": "code", - "execution_count": 126, - "id": "aaf6cb45", + "execution_count": 157, + "id": "f82be5d8", "metadata": {}, "outputs": [ { @@ -549,19 +1606,21 @@ "4 1 8 NaN NaN NaN NaN NaN NaN" ] }, - "execution_count": 126, + "execution_count": 157, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "d.reset_index()" + "\n", + "d=d.reset_index()\n", + "d" ] }, { "cell_type": "code", "execution_count": 128, - "id": "6daf8f3b", + "id": "4053cca5", "metadata": {}, "outputs": [ { @@ -583,7 +1642,7 @@ { "cell_type": "code", "execution_count": 127, - "id": "126916f8", + "id": "2a012588", "metadata": {}, "outputs": [ { @@ -703,7 +1762,7 @@ { "cell_type": "code", "execution_count": 100, - "id": "18f368b0", + "id": "acfa6f0c", "metadata": {}, "outputs": [ { @@ -728,7 +1787,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "998eed5e", + "id": "7e82631e", "metadata": {}, "outputs": [ { @@ -749,7 +1808,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "6e62da4d", + "id": "c69e710a", "metadata": {}, "outputs": [ { @@ -771,7 +1830,7 @@ { "cell_type": "code", "execution_count": 198, - "id": "b786e74c", + "id": "c93f7b4b", "metadata": {}, "outputs": [ { @@ -793,7 +1852,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "e36b19a3", + "id": "f44d5593", "metadata": {}, "outputs": [ { @@ -814,7 +1873,7 @@ { "cell_type": "code", "execution_count": 173, - "id": "46b64c39", + "id": "da99c2f2", "metadata": {}, "outputs": [], "source": [ @@ -850,7 +1909,7 @@ { "cell_type": "code", "execution_count": 177, - "id": "4d59fd91", + "id": "bf41fbbe", "metadata": {}, "outputs": [], "source": [ @@ -886,7 +1945,7 @@ { "cell_type": "code", "execution_count": 187, - "id": "d49175db", + "id": "becc8ec8", "metadata": {}, "outputs": [ { @@ -1113,7 +2172,7 @@ { "cell_type": "code", "execution_count": 142, - "id": "a033b838", + "id": "65d304e7", "metadata": {}, "outputs": [], "source": [ @@ -1150,7 +2209,7 @@ { "cell_type": "code", "execution_count": 117, - "id": "2f0d79a6", + "id": "1102332d", "metadata": {}, "outputs": [], "source": [ @@ -1171,7 +2230,7 @@ { "cell_type": "code", "execution_count": 116, - "id": "7874ae4e", + "id": "9349147e", "metadata": {}, "outputs": [ { @@ -1383,7 +2442,7 @@ { "cell_type": "code", "execution_count": 157, - "id": "ba99db6a", + "id": "23c69dfb", "metadata": {}, "outputs": [ { @@ -1407,7 +2466,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "c26f006c", + "id": "c6e2518a", "metadata": {}, "outputs": [ { @@ -1428,7 +2487,7 @@ { "cell_type": "code", "execution_count": 62, - "id": "108484f8", + "id": "bf85089c", "metadata": {}, "outputs": [ { @@ -1656,7 +2715,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "3c44e8e6", + "id": "3fd57867", "metadata": {}, "outputs": [], "source": [ @@ -1782,7 +2841,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "2b15ebca", + "id": "014caa04", "metadata": {}, "outputs": [], "source": [ @@ -1803,7 +2862,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "b115cc56", + "id": "95719bec", "metadata": {}, "outputs": [ { @@ -1865,7 +2924,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "3ef5fda5", + "id": "f4034eb9", "metadata": {}, "outputs": [ { @@ -1927,7 +2986,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6e4cec2f", + "id": "b9adb4fe", "metadata": {}, "outputs": [], "source": [ @@ -1940,7 +2999,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "c194aaf1", + "id": "ca67b08f", "metadata": {}, "outputs": [ { @@ -1961,7 +3020,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "fd8812c1", + "id": "86a7a627", "metadata": {}, "outputs": [ { @@ -2010,7 +3069,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "eb8c3d36", + "id": "63614edc", "metadata": {}, "outputs": [ { @@ -2072,7 +3131,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "fc31059a", + "id": "177fe2f1", "metadata": {}, "outputs": [ { @@ -2098,7 +3157,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87e1b359", + "id": "0c4a7b82", "metadata": {}, "outputs": [], "source": [] @@ -2106,7 +3165,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "8ed06a4e", + "id": "82b5073c", "metadata": {}, "outputs": [ { @@ -2126,7 +3185,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "dc56eb01", + "id": "684cdf08", "metadata": {}, "outputs": [ { @@ -2147,7 +3206,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "e24820fe", + "id": "95b9f60b", "metadata": {}, "outputs": [ { @@ -2217,7 +3276,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "adabfec6", + "id": "da04d8df", "metadata": {}, "outputs": [], "source": [ @@ -2228,7 +3287,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "83617f96", + "id": "fc9fa817", "metadata": {}, "outputs": [ { @@ -2337,7 +3396,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "a9e7ae70", + "id": "d030eea1", "metadata": {}, "outputs": [ { @@ -2358,7 +3417,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "062ccc4d", + "id": "6586e97f", "metadata": {}, "outputs": [ { @@ -2389,7 +3448,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "225701b3", + "id": "f087752e", "metadata": {}, "outputs": [ { @@ -2886,7 +3945,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "eb0dfa9d", + "id": "d548b882", "metadata": {}, "outputs": [ { @@ -2920,7 +3979,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "2ea725af", + "id": "bd65fa30", "metadata": {}, "outputs": [ { @@ -2940,7 +3999,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "e7ecad72", + "id": "96fe4106", "metadata": {}, "outputs": [], "source": [ @@ -2950,7 +4009,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "921ab252", + "id": "70b6c9a6", "metadata": {}, "outputs": [ { @@ -2971,7 +4030,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "5e3ddee4", + "id": "fc17c5e7", "metadata": {}, "outputs": [ { @@ -2998,7 +4057,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16b9a0f0", + "id": "ba6c81e7", "metadata": {}, "outputs": [], "source": [] diff --git a/mrdna/readers/.nfs0000000000093fec000043e9 b/mrdna/readers/.nfs0000000000093fec000043e9 deleted file mode 100644 index 7123f88b4445178a630adc7268e37e691a15e980..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHOZ-^vE74I00G5<+Ig5T<9h3VPr>HU|xoQtcjcbCf=Zui)`%O%-e+n(;4nacHa zcdBc4cXA#Q6^)4aMNLE{elQY<|30W7Br%Ey62*KIA|#@M3K2!npvD+~ulnE2?A~!9 z@q^M0zwPO&datTpy?XWD>)zJ%ol8gA0ejlOb)8{6`rF6bPd#wCarUnbBNpL61Y(w| zE_A6@CunX(^7PEDcHA8#!b@xs1R{(?VspQr(@y!Wa;Fk5H@Q48m2;o!Nk4S6zmmH? zmkAnmdOTFU$mfC6mQl~~T(k-!J09fhs(U4YN&>G*0#_Szvom_DPt^9X8?Hb3ny9PB zl>{mYR1&BpP)VSYKqY}n0+j?R3H)zMK*g6CAI22klug6S?w5?+U(K$&vT}c{{@+>s zN>={FSp6$meNJ~})<NFSzq0xdW^~b5{iRv?-P!%wvHGmIx>XXWBv479l0YSaN&=Mx zDhX5)s3cHHpprl(fl30qNWeuveiO={qIeqb|MU3&57!#T^T0E}6TsJiF9BZ!jsSnT z#xQ;a{1A8=h=Ez)2H>Tu4Pz5n1MUIt25tr34!nAmVf-EV74U7~Tfj$vyMRl9$F4Mt zGr&5q09*n5<_g33HSkT~LqHq219(4R12+Sg06%$~VSEZ$1sHH6a077ea>Iy#n}8?Z zY8WAKJ@CiNFfX7B90yi`cK|m7zk7>e`~o-!JP0UY5@-O|0>6K=VSE?(4)9Une&9X8 zpOL8e9`G3O5U>io8+ZW;mgj+|fG2^+fX@J1z)4^qa0^feUdCFy2>bw`b$Jpv&&7tW zu;06V=@46Bu`89`;cCfW?)Q$m+h&99Wiv+Jly7@HR`bO(mG7VhnI<2jRdO&7;bvFD zgSNdL;d73}Q4xCfg{REMuJ!a0$Icd-*a<|UP`9>GQYz_&{w@`p-J!y=ISnL|q9eit zjIv+lK(q1Y6=-s%EByIfP<INc`fhML55)G0=3=6{u{J-u!P=2zcwizd<SIz6iP<<? z^iTYE6DyTt#IcOx#2V^dcig4!j-iT6C3vzJ`f1<bY!G37X_@B_JV^NH(n(h&oGnLc zsTT*FzMA3wVJRbN)=<bqSy~(AJHbw@99m(n?=uhI*)Xgi5B0BOP@sfu3DsqbtM6Mn zvAVc=H;W??DzpsSqSKeI5>c2K`LINr=}D5>nRVUNQG=M*8*E{L9V5~cc(e<4<s1qg zngNz3UzMZu@lPoqvtfHtJb-Wt3ax)yj1JVL!)fiu9(U77YT^}*(_cH<>F<w-VOCRg znon6Pbn#V^jJC0cwij}2B@%ihRXXp(RNyA15JotiPirogJ4tnf;i$Ikveid8?=s#I zjs-WLw^{xTmP0Fq4ew-MX(c8tghi!|A?qNy>dTN}VS;jq`Uz6y{ea7RV3*VuHot>C z;I!O?Pl^!o#@aO5w7qY#*O3v-OEJNTlv_#PJRkkuVR|xu2c`=yqg{fTisj_o{%L!v z70aR(dp%dZhp*qHlMdg}ldf-+=I!;cX7yBdb?XfpDy>qYS3vTZyXp;NObhy7V7yk> z8`uq;SSYVMG~8}%F)X1)3u<L5X0c3(u+G|EX!DX8<<#=pLFBEac0>49X<llzS|MUz zNbZr%vtVID57x5F85X&putapi#Kr-jRl1<S`Nf2)C$MKiGLca0F_mCG???`9cemU@ zLAb?P9~PZDfVJI(K}WaxabF!4+GmN<_SsczI_FRjuZm7ru`W92K0CC$$Yi8kg)Tfy z@-+4uXW9UO73}?7y&!C6TMGj{c&aGfJ5)MU49T$DR#%R)gNx{6d1cn#SBh<diP&%1 z35Si4uE&RtGgPIPky;d;5Vs>-v6`NkgVP&x)(1>2J$~t+Wcg}nON3*_Nmoqot<(Tz zwjXPV(^{L6VROWGy(TA5Z@IFQlt<(7^wU#)`q+*mLZKWd>p&||ZuQFjr;8mNq&VqC zk2_eT;X}Nv_GFYK+Fu$HN?DClQo6ZvW9`FK<#zDOM*x*x!|&WjvG6+~M<v1G!+kww zTeHK-Borr~rJ)-W5w%16(kM-&`vZmKM9|T<J9F~2#<*UH>GAW+)3)tW?x`5E19j&S zqfeJo$#m<$RSKtt_B1eDhqNLqGt>6@n(O9Ngl2Tpx(DsZ>x+A?54uLH+NcH)vBeB8 za1-pQ^qXWoSaW!uq<6-|1h_VtC#2U8TuGZbQL^u;B(KV(h`o)yoOGjp;IkH|{q3E` z_I0(oU<WeZi`9Th-XTk1mRgkVWwJD67qFsaG>Gdq?QVQ7jc2)|u95feNBveXXof;} zmyL!h$uXSQRwTf1V(tkyV7*9*wjO(0?YXeN<@7K&3`FZ+jTT~`-V3!{NVo^V5Ou;K zN?0Z~EVpz%i?ZeG?FU6>y?&qs99z9?B=V3%<VC%f2)Pe2YjbAX`crWm--yF2xv#HH zbY^RMGjrsB;f9*Ld8Xx>DmO~QBeB?VL5|CDJuB{Kbrw5BG8-?>xw;s6IuI|4Bncxm zcl231&FY{owXl+S-t3GNk)6i!F1Dh+THF?iVHCtE`%VzGa)$Ifua6Qd86^1=yv=w( z^9|#?h-~cyYfeHJBOkMxXMNRf9-tC~;{WFnpKc(grT9O8zy4{&_@{t9z@HG){|tB( z_#E(Afa3iPU=E<TzX4naJck(n9PoYMD}Vq_0w(|qxDj|9asETV=Ya=+R}kaB0Q?wu z7&rx71zZWdjJW=v0ObIl0looz6`(vo46FgS0u1;EV*M9^M}SWN9|t}F+zFfnt^xjr z9KcV3uLGY19t6$;61W3+7hnSh@GNo!KL;KGJ_Z~BY=H6xF9N>=o&(MSUjQBe@HvIi z0$kuW;4tuBU=Q#TWc)PnWq{<p4pdhqfl31ZrxGBqf}8-xWIuEgfwN58cuEMV$cqA# z!Pr}@c5l>b3cqGC`VT3Kb>jZQG)rBhl%C4ry3o1d+?zz9(-r|Y+rv%3Nw=_jbS|59 zYBpuslMS1G)vb2JMmXA!&6!~;9MYjOYp3wvTR0gQ+>0}3Yc`Tz-~#@6Q_6z1$-$t5 zC*wU?&m1UNc_as%(1q*kP`;<`I6YT{j#GypG=a<z^?^yj%buXgY2R|%nz3d#sCp1? z_5z#!9Pjp(A8m#3j*Esmvttf5I>a~O^dVq481=yyV;teJ1cyCO5@jg}eI%E@V|qeU ze3H8gaFtV^!D0|>P?Rr>Jf*@I&A{!#D|Iw1h|J>)x4B5zT?AKB5;$J)I9)hA<gE1K znF9tDL%8|yD2!Z=LXK1-3wT%zTW8s(v)oInF+`aSE_KY-rZ;p<mh&Rwe0g{22EJq3 z6?vD`SA(VQa>^dAyFpYF5LgJ$4r$@^k^YuK$vxzv)4azbv6MQS<>C<i$d!BKDWzr& zJ<l1FKdycKG#aBv)cTzAm?QZrQ*RsEM=2u;I#;osNU8PiqWF>=e!a(ZHzkQxtiS?^ ze|$abC+Zd}#+fYxNr?ExaOT|(cU<Xp#U?iy?Gx;DoluD6faO7IrmVw3jWH~Ve5)?5 zE-x(~ffE<H@I=Yc_qpN{=>_=3X+)x(WtMSbl!zV5{f;Uy^)TEeq*w1^jstSB%k%(Z z3heO?guB3IN8_e(>g2w@D|ty;63(QA<2lbU2I$2m86hpR#bp*K7y6>dpyx-q<|9)0 zl(5WNjd7y2(rd6YGRzh!lR2r)?kY$%_RegP@EthXR-TI6fYgvcoHX}O%^+`?YS{4F zK?WDiX@OxU4-=E|O*<rI)qF5@{dxuwec2YB1ls&&D^S*;_(IV!?S&;opd2ZJT!oO* zVJ5<esLedSzulgmd1r(95i&_+8DKnhta&(1<I)JzPIFyC5KEzMUsCERl9W6P2GDxE z3FuH*hq#m%DIE+v-LW!LFFk#lD8@~<1jefnN&3R$uug3wU@~hfVK4w^ub<{E5N9i# zKGczm<QzqD`J^;62JNIcDt}qR5PYjHEUQB9#?Ceh$r46Sciv!o7Fedcx&WhEi*?Q; z*5)JfNEeUMK+VQRsYu17*laQSY=_!(fQ!i9oJ||q>CMD6EH-AUNEc9!UJqrH%+!5+ zBbEKlowvWa*U_KegqrinON3nXBvg9sk&jZh%*wK9>)n`+;C$A3bZ3%jF6P2;Wu6d; z2nNDMqK(!Y39Fr(E~{XB7g(^jwjS0-7!k7x+Ay&AcG7Q|vSzMtHRj=k$@=<CZ5}o~ z*vWK^#9B`#iA+;jwe^{cqB=87b>;$8U8Uk=erigmgeY;?hEc?Ou-+8>l|;XTo4RFs wZf^hF+!Q5!@lW@iX5ijGXp(@MB5cM31#{ly+sNW5s-@R#$a^F00~>4n2cRed(*OVf diff --git a/mrdna/readers/__init__.py b/mrdna/readers/__init__.py index 7e0e4db..2778b41 100644 --- a/mrdna/readers/__init__.py +++ b/mrdna/readers/__init__.py @@ -7,6 +7,11 @@ def read_cadnano(json_file, sequence=None, fill_sequence='T', **model_parameters data = read_json_file(json_file) return model_from_cadnano_json(data, sequence, fill_sequence, **model_parameters) +def read_cadnano_input(json_file,sequence=None,**model_parameters): + from .segmentmodel_from_cadnano import mrdna_model_from_cadnano + + return mrdna_model_from_cadnano(json_file,seq=sequence,**model_parameters) + def read_vhelix(maya_file, **model_parameters): from .polygon_mesh import parse_maya_file, convert_maya_bases_to_segment_model diff --git a/mrdna/readers/list.pdb b/mrdna/readers/list.pdb deleted file mode 100644 index a986d3d..0000000 --- a/mrdna/readers/list.pdb +++ /dev/null @@ -1,8 +0,0 @@ -CRYST1 5000.000 5000.000 5000.000 90.00 90.00 90.00 P 1 1 -ATOM 1 DNA DNA A 1 0.50000 0.00000 8.50000 0.00 0.00 0 0 -ATOM 2 DNA DNA A 2 0.50000 0.00000 15.3000 0.00 0.00 0 1 -ATOM 3 DNA DNA A 3 0.50000 0.00000 15.3000 0.00 0.00 0 1 -ATOM 4 NAS NAS A 4 0.00000 0.00000-1.70000 0.00 0.00 0 2 -ATOM 5 NAS NAS A 5 0.00000 0.00000 1.70000 0.00 0.00 0 2 -ATOM 6 NAS NAS A 6 0.00000 0.00000 5.10000 0.00 0.00 0 2 -ATOM 7 NAS NAS A 7 0.00000 0.00000 20.4000 0.00 0.00 0 3 diff --git a/mrdna/readers/list.psf b/mrdna/readers/list.psf deleted file mode 100644 index 828fb57..0000000 --- a/mrdna/readers/list.psf +++ /dev/null @@ -1,39 +0,0 @@ -PSF NAMD - - 0 !NTITLE - - 7 !NATOM - 1 0 1 DNA DNA D000 0.000000 225.0000 0 - 2 1 2 DNA DNA D001 0.000000 300.0000 0 - 3 1 3 DNA DNA D002 0.000000 375.0000 0 - 4 2 4 NAS NAS S000 0.000000 150.0000 0 - 5 2 5 NAS NAS S000 0.000000 150.0000 0 - 6 2 6 NAS NAS S001 0.000000 75.0000 0 - 7 3 7 NAS NAS S000 0.000000 150.0000 0 - - 7 !NBOND - 1 6 4 5 5 6 4 7 - 1 3 2 3 2 7 - - 3 !NTHETA - 4 5 6 5 4 7 1 3 2 - - 0 !NPHI - - 0 !NIMPHI - - - 0 !NDON: donors - - - - 0 !NACC: acceptors - - - - 0 !NNB - - 0 0 0 0 0 0 0 - - 1 0 !NGRP - diff --git a/mrdna/readers/segmentmodel_from_cadnano.py b/mrdna/readers/segmentmodel_from_cadnano.py index c642396..9bccb0c 100644 --- a/mrdna/readers/segmentmodel_from_cadnano.py +++ b/mrdna/readers/segmentmodel_from_cadnano.py @@ -6,10 +6,16 @@ from glob import glob import re import pandas as pd pd.options.mode.chained_assignment = None # default='warn' +from .segmentmodel_from_lists import model_from_basepair_stack_3prime from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment from ..model.dna_sequence import m13 as m13seq +import json +import re +import cadnano +from cadnano.document import Document + ## Only testing on cadnano2.5 ## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined @@ -34,10 +40,6 @@ def get_lattice(part): def read_json_file(filename): - import json - import re - import cadnano - from cadnano.document import Document try: with open(filename) as ch: @@ -138,11 +140,13 @@ def gen_prop_table(json_file): tot_id=scaf_id+stap_id vhi,zidi=np.where(np.array(scaf_id)==1) vhj,zidj=np.where(np.array(stap_id)==1) + vhi=vslist.index[vhi] + vhj=vslist.index[vhj] nt_prop["vh"]=list(vhi)+list(vhj) nt_prop["zid"]=list(zidi)+list(zidj) vhzid=list(zip(nt_prop["vh"],nt_prop["zid"])) - nt_prop["r"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop["vh"],nt_prop["zid"])] - nt_prop["orientation"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop["vh"],nt_prop["zid"])] + nt_prop["r"]=[part.getCoordinate(helix_id, indices) for helix_id, indices in zip(nt_prop["vh"],nt_prop["zid"])] + nt_prop["orientation"]=[get_helix_angle(part, helix_id, indices) for helix_id,indices in zip(nt_prop["vh"],nt_prop["zid"])] nt_prop=nt_prop.fillna(-1) for i in range(int(len(vhzid)/2)): try: @@ -152,6 +156,7 @@ def gen_prop_table(json_file): except: pass tprime_list=-np.ones(len(nt_prop.index),dtype=int) + index2=list(zip(vhzid,nt_prop["is_scaf"])) for i in range(len(nt_prop.index)): ((m,n),p)=list(zip(vhzid,nt_prop["is_scaf"]))[i] if p==True: @@ -169,13 +174,17 @@ def gen_prop_table(json_file): (n,)=np.where(nt_prop["threeprime"]==-1) stackid=nt_prop["bp"][[list(nt_prop["threeprime"]).index(i) for i in n]] nt_prop["stack"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop["threeprime"][stackid.index[np.where(np.array(stackid)!=-1)]] + ## Todo: sequence return nt_prop -def mrdna_model_from_cadnano(json_file,**model_parameters): +def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): nt_prop=gen_prop_table(json_file) - model = model_from_basepair_stack_3prime( nt_prop["r"], nt_prop["bp"], stack, three_prime, seq, orientation, **model_parameters ) - -if __name__ == '__main__': - + if seq is None: + if nt_prop["seq"][0]==-1: + seq=None + else: + seq=nt_prop["seq"] + model = model_from_basepair_stack_3prime( nt_prop["r"], nt_prop["bp"], nt_prop["stack"], nt_prop["threeprime"], seq,nt_prop["orientation"], **model_parameters ) + diff --git a/mrdna/readers/test.ipynb b/mrdna/readers/test.ipynb index 217f357..85e11c4 100644 --- a/mrdna/readers/test.ipynb +++ b/mrdna/readers/test.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 342, - "id": "767fe31e", + "id": "090f69c6", "metadata": { "scrolled": true }, @@ -11,28 +11,517 @@ "source": [ "import pandas as pd\n", "import pickle\n", - "import numpy as np\n", - "\n", - "df=pd.read_json(\"test.json\")\n", - "d=list(df[\"vstrands\"])" + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 343, - "id": "52f3059d", + "execution_count": 2, + "id": "a99e1791", "metadata": {}, "outputs": [], + "source": [ + "from cadnano.views.pathview import pathstyles" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0b58d7e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", + "it/its\n", + "\n" + ] + } + ], "source": [ "import cadnano\n", "from cadnano.document import Document\n", "from mrdna.arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis" ] }, + { + "cell_type": "code", + "execution_count": 553, + "id": "6033edc9", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_prop_table(json_file):\n", + " part,vslist=read_json_file(json_file)\n", + " props = part.getModelProperties().copy()\n", + " try:\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " except:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", + " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt,dtype=bool)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " tot_id=scaf_id+stap_id\n", + " vhi,zidi=np.where(np.array(scaf_id)==1)\n", + " vhj,zidj=np.where(np.array(stap_id)==1)\n", + " vhi=vslist.index[vhi]\n", + " vhj=vslist.index[vhj]\n", + " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", + " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"r\"]=[part.getCoordinate(helix_id, indices) for helix_id, indices in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for helix_id,indices in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " for i in range(int(len(vhzid)/2)):\n", + " try:\n", + " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", + " nt_prop[\"bp\"][bp1]=bp2\n", + " nt_prop[\"bp\"][bp2]=bp1\n", + " except:\n", + " pass\n", + " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n", + " for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + " nt_prop[\"threeprime\"]=tprime_list\n", + " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + " ## Todo: sequence \n", + "\n", + "\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 555, + "id": "8e48e7d1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "nt_prop=gen_prop_table(\"test/Na_liu.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 560, + "id": "4da2c91e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 12, 13, 14, ..., 13920, 13921, 13922]),)" + ] + }, + "execution_count": 560, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(np.array(nt_prop[\"bp\"])!=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 562, + "id": "da512048", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "vh 0\n", + "zid 21\n", + "is_scaf True\n", + "r [0.0, 15.75, 7.140000000000001]\n", + "bp 7560\n", + "stack -1\n", + "threeprime 13\n", + "seq -1\n", + "orientation [[-4.440892098500626e-16, 1.0, 0.0], [-1.0, -4...\n", + "Name: 12, dtype: object" + ] + }, + "execution_count": 562, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[12]" + ] + }, + { + "cell_type": "code", + "execution_count": 468, + "id": "95fb0515", + "metadata": {}, + "outputs": [], + "source": [ + "scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 500, + "id": "dcc227ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 500, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nttype(vslist[\"scaf\"][30])[146]" + ] + }, + { + "cell_type": "code", + "execution_count": 498, + "id": "08d33dfb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 498, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vhi,zidi=np.where(np.array(scaf_id)==1)\n", + "scaf_id[30][146]" + ] + }, + { + "cell_type": "code", + "execution_count": 480, + "id": "c2347336", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 480, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaf_id[30][146]==np.array(scaf_id)[0][9]" + ] + }, + { + "cell_type": "code", + "execution_count": 549, + "id": "795140b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " ...\n", + " 39, 39, 39, 39, 39, 39, 39, 39, 39, 39],\n", + " dtype='int64', name='num', length=7560)" + ] + }, + "execution_count": 549, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "d={}\n", + "vslist.index[vhi]" + ] + }, + { + "cell_type": "code", + "execution_count": 544, + "id": "f98c2927", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" + ] + }, + "execution_count": 544, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(vslist.index[vhi]!=vhi)" + ] + }, + { + "cell_type": "code", + "execution_count": 550, + "id": "56385511", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "vh 29\n", + "zid 83\n", + "is_scaf True\n", + "r [-17.537016375, 28.125, 28.220000000000002]\n", + "bp -1\n", + "stack -1\n", + "threeprime -1\n", + "seq -1\n", + "orientation [[-0.5633200580636211, 0.8262387743159955, 0.0...\n", + "Name: 7394, dtype: object" + ] + }, + "execution_count": 550, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop.loc[7394]" + ] + }, + { + "cell_type": "code", + "execution_count": 548, + "id": "8565bdb9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 29, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 41, 40, 42, 44, 46, 48, 50],\n", + " dtype='int64', name='num')" + ] + }, + "execution_count": 548, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vslist.index" + ] + }, + { + "cell_type": "code", + "execution_count": 527, + "id": "ffa55e8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([7394, 7395, 7396, 7397, 7398, 7399, 7400, 7401, 7402, 7403, 7404,\n", + " 7405, 7406, 7407, 7408, 7409, 7410, 7411, 7412, 7413, 7414, 7415,\n", + " 7416, 7417, 7418, 7419, 7420, 7421, 7422, 7423, 7424, 7425, 7426,\n", + " 7427, 7428, 7429, 7430, 7431, 7432, 7433]),)" + ] + }, + "execution_count": 527, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n=list(nt_prop[\"zid\"])\n", + "np.where(np.array(list(nt_prop[\"vh\"]))==29)" + ] + }, + { + "cell_type": "code", + "execution_count": 503, + "id": "43a9cef8", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-503-1b9956d4cdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnt_prop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], + "source": [ + "vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + "index2=list(zip(vhzid,nt_prop[\"is_scaf\"]))\n", + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " \n", + "print(index2.index(((30,146),(True))))" + ] + }, + { + "cell_type": "code", + "execution_count": 537, + "id": "5d92876d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11 135 30 146 3010\n" + ] + }, + { + "ename": "ValueError", + "evalue": "((30, 146), True) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-537-f07d5cbf0867>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mtprime_list\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: ((30, 146), True) is not in list" + ] + } + ], + "source": [ + " \n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k==30 and l==146:\n", + " print(m,n,k,l,i)\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + " ## Todo: sequence " + ] + }, + { + "cell_type": "code", + "execution_count": 491, + "id": "6f5d2675", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[11, 135, 30, 147]" + ] + }, + "execution_count": 491, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(vslist.loc[30][\"scaf\"])[146]" + ] + }, + { + "cell_type": "code", + "execution_count": 493, + "id": "15fa1dec", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "(30, 146) is not in list", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-493-d1dd239124c3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvhzid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m146\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: (30, 146) is not in list" + ] + } + ], + "source": [ + "vhzid.index((30,146))" + ] + }, { "cell_type": "code", "execution_count": 4, - "id": "2fb0f9e3", + "id": "abd8b729", "metadata": {}, "outputs": [], "source": [ @@ -42,8 +531,67 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "312a3b3c", + "execution_count": null, + "id": "86dc559b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b7a36513", + "metadata": {}, + "outputs": [], + "source": [ + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" + ] + }, + { + "cell_type": "code", + "execution_count": 358, + "id": "c590e405", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + }, + { + "data": { + "text/plain": [ + "NucleicAcidPart_-1_3904" + ] + }, + "execution_count": 358, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p,f=read_json_file(\"test.json\")\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": 441, + "id": "28e8fc5f", "metadata": {}, "outputs": [ { @@ -214,164 +762,19 @@ "5 [[9, 0]] " ] }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "df00a798", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found cadnano version 2 file\n" - ] - } - ], - "source": [ - "doc = Document()\n", - "def read_json_file(filename):\n", - " import json\n", - " import re\n", - "\n", - " try:\n", - " with open(filename) as ch:\n", - " data = json.load(ch)\n", - " except:\n", - " with open(filename) as ch:\n", - " content = \"\"\n", - " for l in ch:\n", - " l = re.sub(r\"'\", r'\"', l)\n", - " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", - " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", - " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", - " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", - " content += l+\"\\n\"\n", - " data = json.loads(content)\n", - " return data\n", - "f=read_json_file(\"test.json\")\n", - "cadnano.fileio.v2decode.decode(doc, f)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "ab971d26", - "metadata": {}, - "outputs": [], - "source": [ - "def get_lattice(part):\n", - " lattice_type = None\n", - " _gt = part.getGridType()\n", - " try:\n", - " lattice_type = _gt.name.lower()\n", - " except:\n", - " if _gt == 1:\n", - " lattice_type = 'square'\n", - " elif _gt == 2:\n", - " lattice_type = 'honeycomb'\n", - " else:\n", - " print(\"WARNING: unable to determine cadnano part lattice type\")\n", - " return lattice_type\n" - ] - }, - { - "cell_type": "code", - "execution_count": 195, - "id": "1a5439a1", - "metadata": {}, - "outputs": [], - "source": [ - "def read_json_file(filename):\n", - " import json\n", - " import re\n", - " import cadnano\n", - " from cadnano.document import Document\n", - "\n", - " try:\n", - " with open(filename) as ch:\n", - " json_data = json.load(ch)\n", - " except:\n", - " with open(filename) as ch:\n", - " content = \"\"\n", - " for l in ch:\n", - " l = re.sub(r\"'\", r'\"', l)\n", - " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", - " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", - " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", - " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", - " content += l+\"\\n\"\n", - " json_data = json.loads(content)\n", - "\n", - " try:\n", - " doc = Document()\n", - " cadnano.fileio.v3decode.decode(doc, json_data)\n", - " decoder = 3\n", - " except:\n", - " doc = Document()\n", - " cadnano.fileio.v2decode.decode(doc, json_data)\n", - " decoder = 2\n", - "\n", - " parts = [p for p in doc.getParts()]\n", - " if len(parts) != 1:\n", - " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", - " part = parts[0]\n", - "\n", - " if decoder == 2:\n", - " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", - " l = get_lattice(part)\n", - " if l == 'square':\n", - " for id_num in part.getIdNums():\n", - " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", - " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", - " df=pd.DataFrame(json_data[\"vstrands\"])\n", - " n_df=df.set_index(\"num\")\n", - " return part,n_df\n" - ] - }, - { - "cell_type": "code", - "execution_count": 358, - "id": "254f8faf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found cadnano version 2 file\n" - ] - }, - { - "data": { - "text/plain": [ - "NucleicAcidPart_-1_3904" - ] - }, - "execution_count": 358, + "execution_count": 441, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "p,f=read_json_file(\"test.json\")\n", - "p" + "f" ] }, { "cell_type": "code", "execution_count": 199, - "id": "054ffa7d", + "id": "75798a1d", "metadata": {}, "outputs": [ { @@ -392,7 +795,7 @@ { "cell_type": "code", "execution_count": 434, - "id": "5b268b86", + "id": "e90bd308", "metadata": {}, "outputs": [], "source": [ @@ -459,60 +862,53 @@ }, { "cell_type": "code", - "execution_count": 437, - "id": "67d740a5", + "execution_count": 442, + "id": "f1b789b7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225,\n", - " 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,\n", - " 239, 240, 241, 242, 243, 244, -1, -1, -1, 249, 250, 251, 252,\n", - " 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265,\n", - " 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278,\n", - " 279, 280, -1, -1, -1, 285, 286, 287, 288, 289, 290, 291, 292,\n", - " 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305,\n", - " 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, -1, -1, -1,\n", - " 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332,\n", - " 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345,\n", - " 346, 347, 348, 349, 350, -1, -1, -1, -1, -1, -1, -1, -1,\n", - " -1, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364,\n", - " 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377,\n", - " 378, 379, 380, 381, 382, 383, -1, -1, -1, -1, 384, 385, 386,\n", - " 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399,\n", - " 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, -1,\n", - " -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7,\n", - " 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", - " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1,\n", - " -1, -1, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,\n", - " 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,\n", - " 59, 60, 61, 62, 63, 64, 65, 66, -1, -1, -1, -1, 70,\n", - " 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,\n", - " 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,\n", - " 97, 98, 99, 100, -1, -1, -1, -1, 104, 105, 106, 107, 108,\n", - " 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,\n", - " 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,\n", - " -1, -1, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,\n", - " 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,\n", - " 168, 169, 170, 171, 172, 173, 174, 179, 180, 181, 182, 183, 184,\n", - " 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,\n", - " 198, 199, 200, 201, 202, 203, 204, 205, 206, -1, -1, -1])" + "array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", + " -1, -1, -1, -1, -1, -1, -1])" ] }, - "execution_count": 437, + "execution_count": 442, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "np.array(nt_prop[\"bp\"])" + "np.array(nt_prop[\"seq\"])" ] }, { "cell_type": "code", "execution_count": 302, - "id": "8df80d92", + "id": "15c2f5a3", "metadata": {}, "outputs": [], "source": [ @@ -536,7 +932,7 @@ { "cell_type": "code", "execution_count": 368, - "id": "1ba36ce9", + "id": "2103280f", "metadata": {}, "outputs": [], "source": [ @@ -569,7 +965,7 @@ { "cell_type": "code", "execution_count": 429, - "id": "c275fa92", + "id": "db2d23a1", "metadata": {}, "outputs": [], "source": [ @@ -584,7 +980,7 @@ { "cell_type": "code", "execution_count": 430, - "id": "05d1934e", + "id": "7a63c8bc", "metadata": {}, "outputs": [ { @@ -619,7 +1015,7 @@ { "cell_type": "code", "execution_count": 431, - "id": "7d0daba0", + "id": "bdf5fe8f", "metadata": {}, "outputs": [ { @@ -640,7 +1036,7 @@ { "cell_type": "code", "execution_count": 433, - "id": "214ff037", + "id": "6f25d6f3", "metadata": {}, "outputs": [ { @@ -661,7 +1057,7 @@ { "cell_type": "code", "execution_count": 167, - "id": "eb07d74e", + "id": "75d426b5", "metadata": {}, "outputs": [], "source": [ @@ -673,7 +1069,7 @@ { "cell_type": "code", "execution_count": 360, - "id": "63302624", + "id": "f7ebdd30", "metadata": {}, "outputs": [], "source": [ @@ -683,7 +1079,7 @@ { "cell_type": "code", "execution_count": 190, - "id": "bc0af039", + "id": "eead924e", "metadata": {}, "outputs": [ { @@ -866,7 +1262,7 @@ { "cell_type": "code", "execution_count": 200, - "id": "fd6b27af", + "id": "71493bd1", "metadata": {}, "outputs": [], "source": [ @@ -902,7 +1298,7 @@ { "cell_type": "code", "execution_count": 201, - "id": "1130c056", + "id": "3622ebc9", "metadata": {}, "outputs": [ { @@ -925,7 +1321,7 @@ { "cell_type": "code", "execution_count": 146, - "id": "7a77a10c", + "id": "74c4ae5a", "metadata": {}, "outputs": [], "source": [ @@ -935,7 +1331,7 @@ { "cell_type": "code", "execution_count": 148, - "id": "07bed4f1", + "id": "e6dc1404", "metadata": {}, "outputs": [ { @@ -968,7 +1364,7 @@ { "cell_type": "code", "execution_count": 137, - "id": "efe96d4a", + "id": "59fac010", "metadata": {}, "outputs": [], "source": [ @@ -985,7 +1381,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9ac7ac46", + "id": "d42bb41e", "metadata": {}, "outputs": [], "source": [] @@ -993,7 +1389,7 @@ { "cell_type": "code", "execution_count": 84, - "id": "a28688b9", + "id": "c73af530", "metadata": {}, "outputs": [], "source": [ @@ -1004,7 +1400,7 @@ { "cell_type": "code", "execution_count": 156, - "id": "7583686f", + "id": "0b16d613", "metadata": {}, "outputs": [ { @@ -1130,7 +1526,7 @@ { "cell_type": "code", "execution_count": 157, - "id": "1b71d3f6", + "id": "90f57ab5", "metadata": {}, "outputs": [ { @@ -1247,7 +1643,7 @@ { "cell_type": "code", "execution_count": 128, - "id": "65e31a02", + "id": "fac146fd", "metadata": {}, "outputs": [ { @@ -1269,7 +1665,7 @@ { "cell_type": "code", "execution_count": 127, - "id": "9e9fe8c1", + "id": "baf83a53", "metadata": {}, "outputs": [ { @@ -1389,7 +1785,7 @@ { "cell_type": "code", "execution_count": 100, - "id": "7a82b9f7", + "id": "42d9c84a", "metadata": {}, "outputs": [ { @@ -1414,7 +1810,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "759146bc", + "id": "9cbfe156", "metadata": {}, "outputs": [ { @@ -1435,7 +1831,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "817cf65c", + "id": "5c918abe", "metadata": {}, "outputs": [ { @@ -1457,7 +1853,7 @@ { "cell_type": "code", "execution_count": 198, - "id": "683deaa8", + "id": "eae94886", "metadata": {}, "outputs": [ { @@ -1479,7 +1875,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "cf734789", + "id": "8fc2335a", "metadata": {}, "outputs": [ { @@ -1500,7 +1896,7 @@ { "cell_type": "code", "execution_count": 173, - "id": "7561d936", + "id": "715d6b94", "metadata": {}, "outputs": [], "source": [ @@ -1536,7 +1932,7 @@ { "cell_type": "code", "execution_count": 177, - "id": "2fd676d8", + "id": "327e0a5e", "metadata": {}, "outputs": [], "source": [ @@ -1572,7 +1968,7 @@ { "cell_type": "code", "execution_count": 187, - "id": "6b288cdc", + "id": "f31089a3", "metadata": {}, "outputs": [ { @@ -1799,7 +2195,7 @@ { "cell_type": "code", "execution_count": 142, - "id": "c516286a", + "id": "fede9c99", "metadata": {}, "outputs": [], "source": [ @@ -1836,7 +2232,7 @@ { "cell_type": "code", "execution_count": 117, - "id": "461e5389", + "id": "b0272fe0", "metadata": {}, "outputs": [], "source": [ @@ -1857,7 +2253,7 @@ { "cell_type": "code", "execution_count": 116, - "id": "1cfe41f9", + "id": "ad669f8a", "metadata": {}, "outputs": [ { @@ -2069,7 +2465,7 @@ { "cell_type": "code", "execution_count": 157, - "id": "b1e9c75c", + "id": "95284143", "metadata": {}, "outputs": [ { @@ -2093,7 +2489,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "91717641", + "id": "1e445750", "metadata": {}, "outputs": [ { @@ -2114,7 +2510,7 @@ { "cell_type": "code", "execution_count": 62, - "id": "c2735ea2", + "id": "9768df8d", "metadata": {}, "outputs": [ { @@ -2342,7 +2738,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "21ebd651", + "id": "583c116a", "metadata": {}, "outputs": [], "source": [ @@ -2468,7 +2864,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "29d8b2a3", + "id": "1f10069b", "metadata": {}, "outputs": [], "source": [ @@ -2489,7 +2885,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "ac70d73d", + "id": "465ae2a5", "metadata": {}, "outputs": [ { @@ -2551,7 +2947,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "b531515d", + "id": "81d0004d", "metadata": {}, "outputs": [ { @@ -2613,7 +3009,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f11056d9", + "id": "91dc0c7f", "metadata": {}, "outputs": [], "source": [ @@ -2626,7 +3022,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "7c724714", + "id": "e884018f", "metadata": {}, "outputs": [ { @@ -2647,7 +3043,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "44e3bf14", + "id": "eef6f08c", "metadata": {}, "outputs": [ { @@ -2696,7 +3092,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "e0b4e485", + "id": "6a4b7695", "metadata": {}, "outputs": [ { @@ -2758,7 +3154,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "97c05d36", + "id": "30cde48b", "metadata": {}, "outputs": [ { @@ -2784,7 +3180,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5818a167", + "id": "f05ebf5f", "metadata": {}, "outputs": [], "source": [] @@ -2792,7 +3188,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "005b032e", + "id": "2bd820af", "metadata": {}, "outputs": [ { @@ -2812,7 +3208,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "95c7c24d", + "id": "394c44c5", "metadata": {}, "outputs": [ { @@ -2833,7 +3229,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "7bec19b6", + "id": "12c3ce83", "metadata": {}, "outputs": [ { @@ -2903,7 +3299,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "289d2009", + "id": "11a55f6c", "metadata": {}, "outputs": [], "source": [ @@ -2914,7 +3310,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "8ac5cdf9", + "id": "07e5d147", "metadata": {}, "outputs": [ { @@ -3023,7 +3419,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "f39f61c8", + "id": "123e2f5e", "metadata": {}, "outputs": [ { @@ -3044,7 +3440,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "a4e2fc7b", + "id": "670a0bc5", "metadata": {}, "outputs": [ { @@ -3075,7 +3471,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "3a00cb97", + "id": "bcb8f7cc", "metadata": {}, "outputs": [ { @@ -3572,7 +3968,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "8ba9d3ad", + "id": "1b177c60", "metadata": {}, "outputs": [ { @@ -3606,7 +4002,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "9dcaba3d", + "id": "677c2d67", "metadata": {}, "outputs": [ { @@ -3626,7 +4022,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "5a768b47", + "id": "7e7ba0e1", "metadata": {}, "outputs": [], "source": [ @@ -3636,7 +4032,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "d64d0055", + "id": "52014792", "metadata": {}, "outputs": [ { @@ -3657,7 +4053,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "c7e397f4", + "id": "8b1b263b", "metadata": {}, "outputs": [ { @@ -3684,7 +4080,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24d5d7e8", + "id": "abb6ad51", "metadata": {}, "outputs": [], "source": [] diff --git a/mrdna/readers/test.json b/mrdna/readers/test.json deleted file mode 100644 index 5332be9..0000000 --- a/mrdna/readers/test.json +++ /dev/null @@ -1 +0,0 @@ -{"name":"test.json","vstrands":[{"row":12,"col":16,"num":0,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[1,5,0,6],[0,5,0,7],[0,6,0,8],[0,7,0,9],[0,8,0,10],[0,9,0,11],[0,10,0,12],[0,11,0,13],[0,12,0,14],[0,13,0,15],[0,14,0,16],[0,15,0,17],[0,16,0,18],[0,17,0,19],[0,18,0,20],[0,19,0,21],[0,20,0,22],[0,21,0,23],[0,22,0,24],[0,23,0,25],[0,24,0,26],[0,25,0,27],[0,26,0,28],[0,27,0,29],[0,28,0,30],[0,29,0,31],[0,30,0,32],[0,31,0,33],[0,32,0,34],[0,33,0,35],[0,34,0,36],[0,35,1,36],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,0,40],[0,39,0,41],[0,40,-1,-1]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[0,3,-1,-1],[0,4,0,2],[0,5,0,3],[0,6,0,4],[0,7,0,5],[0,8,0,6],[0,9,0,7],[0,10,0,8],[0,11,0,9],[0,12,0,10],[0,13,0,11],[0,14,0,12],[0,15,0,13],[0,16,0,14],[0,17,0,15],[0,18,0,16],[0,19,0,17],[0,20,0,18],[1,20,0,19],[0,22,1,21],[0,23,0,21],[-1,-1,0,22],[0,25,-1,-1],[0,26,0,24],[0,27,0,25],[5,27,0,26],[0,29,5,28],[0,30,0,28],[0,31,0,29],[0,32,0,30],[0,33,0,31],[0,34,0,32],[0,35,0,33],[0,36,0,34],[0,37,0,35],[0,38,0,36],[-1,-1,0,37],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[23,13369809],[38,12060012]]},{"row":12,"col":15,"num":1,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[1,6,0,5],[1,7,1,5],[1,8,1,6],[1,9,1,7],[1,10,1,8],[1,11,1,9],[1,12,1,10],[1,13,1,11],[1,14,1,12],[1,15,1,13],[1,16,1,14],[1,17,1,15],[1,18,1,16],[2,18,1,17],[1,20,2,19],[1,21,1,19],[1,22,1,20],[1,23,1,21],[1,24,1,22],[1,25,1,23],[1,26,1,24],[1,27,1,25],[1,28,1,26],[1,29,1,27],[1,30,1,28],[1,31,1,29],[1,32,1,30],[1,33,1,31],[1,34,1,32],[1,35,1,33],[1,36,1,34],[0,36,1,35],[-1,-1,-1,-1],[-1,-1,-1,-1],[1,40,-1,-1],[1,41,1,39],[-1,-1,1,40]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,1,4],[1,3,1,5],[1,4,1,6],[1,5,1,7],[1,6,1,8],[1,7,1,9],[1,8,1,10],[1,9,1,11],[1,10,1,12],[1,11,1,13],[1,12,1,14],[1,13,1,15],[1,14,1,16],[1,15,1,17],[1,16,1,18],[1,17,1,19],[1,18,1,20],[1,19,0,20],[0,21,1,22],[1,21,1,23],[1,22,1,24],[1,23,1,25],[1,24,1,26],[1,25,1,27],[1,26,1,28],[1,27,1,29],[1,28,1,30],[1,29,1,31],[1,30,1,32],[1,31,1,33],[1,32,1,34],[1,33,1,35],[1,34,1,36],[1,35,1,37],[1,36,1,38],[1,37,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[3,1501302]]},{"row":13,"col":15,"num":2,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[3,2,2,3],[2,2,2,4],[2,3,2,5],[2,4,2,6],[2,5,2,7],[2,6,2,8],[2,7,2,9],[2,8,2,10],[2,9,2,11],[2,10,2,12],[2,11,2,13],[2,12,2,14],[2,13,2,15],[2,14,2,16],[2,15,2,17],[2,16,2,18],[2,17,1,18],[1,19,2,20],[2,19,2,21],[2,20,2,22],[2,21,2,23],[2,22,2,24],[2,23,2,25],[2,24,2,26],[2,25,2,27],[2,26,2,28],[2,27,2,29],[2,28,2,30],[2,29,2,31],[2,30,2,32],[2,31,3,32],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,2,40],[2,39,2,41],[2,40,-1,-1]],"stap":[[2,1,-1,-1],[2,2,2,0],[2,3,2,1],[2,4,2,2],[2,5,2,3],[2,6,2,4],[2,7,2,5],[2,8,2,6],[2,9,2,7],[2,10,2,8],[2,11,2,9],[2,12,2,10],[2,13,2,11],[2,14,2,12],[2,15,2,13],[2,16,2,14],[2,17,2,15],[2,18,2,16],[2,19,2,17],[2,20,2,18],[2,21,2,19],[2,22,2,20],[2,23,2,21],[2,24,2,22],[2,25,2,23],[2,26,2,24],[2,27,2,25],[2,28,2,26],[2,29,2,27],[2,30,2,28],[2,31,2,29],[2,32,2,30],[2,33,2,31],[2,34,2,32],[-1,-1,2,33],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[34,8947848]]},{"row":13,"col":16,"num":3,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[3,3,2,2],[3,4,3,2],[3,5,3,3],[3,6,3,4],[3,7,3,5],[3,8,3,6],[3,9,3,7],[3,10,3,8],[3,11,3,9],[3,12,3,10],[3,13,3,11],[3,14,3,12],[3,15,3,13],[4,15,3,14],[3,17,4,16],[3,18,3,16],[3,19,3,17],[3,20,3,18],[3,21,3,19],[3,22,3,20],[3,23,3,21],[3,24,3,22],[3,25,3,23],[3,26,3,24],[3,27,3,25],[3,28,3,26],[3,29,3,27],[3,30,3,28],[3,31,3,29],[3,32,3,30],[2,32,3,31],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[3,38,-1,-1],[3,39,3,37],[3,40,3,38],[3,41,3,39],[-1,-1,3,40]],"stap":[[-1,-1,3,1],[3,0,3,2],[3,1,3,3],[3,2,3,4],[3,3,3,5],[3,4,3,6],[3,5,3,7],[3,6,3,8],[3,7,3,9],[3,8,3,10],[3,9,3,11],[3,10,3,12],[3,11,3,13],[3,12,3,14],[3,13,3,15],[3,14,3,16],[3,15,3,17],[3,16,3,18],[3,17,3,19],[3,18,3,20],[3,19,4,20],[4,21,3,22],[3,21,3,23],[3,22,3,24],[3,23,3,25],[3,24,3,26],[3,25,3,27],[3,26,3,28],[3,27,3,29],[3,28,3,30],[3,29,3,31],[3,30,3,32],[3,31,3,33],[3,32,3,34],[3,33,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[0,13369344]]},{"row":13,"col":17,"num":4,"scaf":[[-1,-1,4,1],[4,0,4,2],[4,1,4,3],[4,2,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[5,9,4,10],[4,9,4,11],[4,10,4,12],[4,11,4,13],[4,12,4,14],[4,13,4,15],[4,14,3,15],[3,16,4,17],[4,16,4,18],[4,17,4,19],[4,18,4,20],[4,19,4,21],[4,20,4,22],[4,21,4,23],[4,22,4,24],[4,23,4,25],[4,24,4,26],[4,25,4,27],[4,26,4,28],[4,27,4,29],[4,28,4,30],[4,29,4,31],[4,30,4,32],[4,31,4,33],[4,32,4,34],[4,33,4,35],[4,34,4,36],[4,35,4,37],[4,36,4,38],[4,37,4,39],[4,38,5,39],[-1,-1,-1,-1],[-1,-1,-1,-1]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[4,10,-1,-1],[4,11,4,9],[4,12,4,10],[4,13,4,11],[4,14,4,12],[4,15,4,13],[4,16,4,14],[4,17,4,15],[4,18,4,16],[4,19,4,17],[4,20,4,18],[3,20,4,19],[4,22,3,21],[4,23,4,21],[4,24,4,22],[4,25,4,23],[4,26,4,24],[4,27,4,25],[4,28,4,26],[4,29,4,27],[4,30,4,28],[4,31,4,29],[4,32,4,30],[4,33,4,31],[4,34,4,32],[4,35,4,33],[4,36,4,34],[4,37,4,35],[4,38,4,36],[4,39,4,37],[-1,-1,4,38],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[39,8947848]]},{"row":12,"col":17,"num":5,"scaf":[[5,1,-1,-1],[5,2,5,0],[5,3,5,1],[-1,-1,5,2],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[5,10,4,9],[5,11,5,9],[5,12,5,10],[5,13,5,11],[5,14,5,12],[5,15,5,13],[5,16,5,14],[5,17,5,15],[5,18,5,16],[5,19,5,17],[5,20,5,18],[5,21,5,19],[5,22,5,20],[-1,-1,5,21],[5,24,-1,-1],[5,25,5,23],[5,26,5,24],[5,27,5,25],[5,28,5,26],[5,29,5,27],[5,30,5,28],[5,31,5,29],[5,32,5,30],[5,33,5,31],[5,34,5,32],[5,35,5,33],[5,36,5,34],[5,37,5,35],[5,38,5,36],[5,39,5,37],[4,39,5,38],[-1,-1,-1,-1],[-1,-1,-1,-1]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,5,10],[5,9,5,11],[5,10,5,12],[5,11,5,13],[5,12,5,14],[5,13,5,15],[5,14,5,16],[5,15,5,17],[5,16,5,18],[5,17,5,19],[5,18,5,20],[5,19,5,21],[5,20,5,22],[5,21,5,23],[5,22,5,24],[5,23,5,25],[5,24,5,26],[5,25,5,27],[5,26,0,27],[0,28,5,29],[5,28,5,30],[5,29,5,31],[5,30,5,32],[5,31,5,33],[5,32,5,34],[5,33,5,35],[5,34,5,36],[5,35,5,37],[5,36,5,38],[5,37,5,39],[5,38,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[9,0]]}]} \ No newline at end of file diff --git a/mrdna/readers/Na_liu.json b/mrdna/readers/test/Na_liu.json similarity index 100% rename from mrdna/readers/Na_liu.json rename to mrdna/readers/test/Na_liu.json diff --git a/mrdna/readers/rest_scaf_col.json b/mrdna/readers/test/rest_scaf_col.json similarity index 100% rename from mrdna/readers/rest_scaf_col.json rename to mrdna/readers/test/rest_scaf_col.json diff --git a/mrdna/readers/test/test.ipynb b/mrdna/readers/test/test.ipynb new file mode 100644 index 0000000..f9f7eeb --- /dev/null +++ b/mrdna/readers/test/test.ipynb @@ -0,0 +1,3735 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 342, + "id": "1955acef", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pickle\n", + "import numpy as np\n", + "\n", + "df=pd.read_json(\"test.json\")\n", + "d=list(df[\"vstrands\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "id": "1a6b8cb2", + "metadata": {}, + "outputs": [], + "source": [ + "import cadnano\n", + "from cadnano.document import Document\n", + "from mrdna.arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c7d2f43e", + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(data=d)\n", + "df=df.set_index(\"num\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "62f9b7f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", + " </tr>\n", + " <tr>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b317d21a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc = Document()\n", + "def read_json_file(filename):\n", + " import json\n", + " import re\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " data = json.loads(content)\n", + " return data\n", + "f=read_json_file(\"test.json\")\n", + "cadnano.fileio.v2decode.decode(doc, f)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "dc7eb261", + "metadata": {}, + "outputs": [], + "source": [ + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" + ] + }, + { + "cell_type": "code", + "execution_count": 195, + "id": "1bf753c6", + "metadata": {}, + "outputs": [], + "source": [ + "def read_json_file(filename):\n", + " import json\n", + " import re\n", + " import cadnano\n", + " from cadnano.document import Document\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " json_data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " json_data = json.loads(content)\n", + "\n", + " try:\n", + " doc = Document()\n", + " cadnano.fileio.v3decode.decode(doc, json_data)\n", + " decoder = 3\n", + " except:\n", + " doc = Document()\n", + " cadnano.fileio.v2decode.decode(doc, json_data)\n", + " decoder = 2\n", + "\n", + " parts = [p for p in doc.getParts()]\n", + " if len(parts) != 1:\n", + " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", + " part = parts[0]\n", + "\n", + " if decoder == 2:\n", + " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", + " l = get_lattice(part)\n", + " if l == 'square':\n", + " for id_num in part.getIdNums():\n", + " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", + " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", + " df=pd.DataFrame(json_data[\"vstrands\"])\n", + " n_df=df.set_index(\"num\")\n", + " return part,n_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 358, + "id": "9b534b7c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + }, + { + "data": { + "text/plain": [ + "NucleicAcidPart_-1_3904" + ] + }, + "execution_count": 358, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p,f=read_json_file(\"test.json\")\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": 439, + "id": "cbb83c93", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NucleicAcidPart_-1_3904" + ] + }, + "execution_count": 439, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "id": "3bb28a94", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 2.25, 3.4 ])" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.getCoordinate(0,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 434, + "id": "be894ade", + "metadata": {}, + "outputs": [], + "source": [ + "def mrdna_model_from_cadnano(json_file,**model_parameters):\n", + " part,vslist=read_json_file(json_file)\n", + " props = part.getModelProperties().copy()\n", + " try:\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " except:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=[nttype(vslist['scaf'][i]) for i in vslist.index]\n", + " stap_id=[nttype(vslist['stap'][i]) for i in vslist.index]\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(np.array(scaf_id),axis=1)\n", + " vslist[\"stapnt\"]=np.sum(np.array(stap_id),axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt,dtype=bool)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " tot_id=scaf_id+stap_id\n", + " vhi,zidi=np.where(np.array(scaf_id)==1)\n", + " vhj,zidj=np.where(np.array(stap_id)==1)\n", + " nt_prop[\"vh\"]=list(vhi)+list(vhj)\n", + " nt_prop[\"zid\"]=list(zidi)+list(zidj)\n", + " vhzid=list(zip(nt_prop[\"vh\"],nt_prop[\"zid\"]))\n", + " nt_prop[\"r\"]=[part.getCoordinate(i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop[\"orientation\"]=[get_helix_angle(part, helix_id, indices) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n", + " nt_prop=nt_prop.fillna(-1)\n", + " for i in range(int(len(vhzid)/2)):\n", + " try:\n", + " bp1,bp2=(i,1+i+vhzid[i+1:].index(vhzid[i]))\n", + " nt_prop[\"bp\"][bp1]=bp2\n", + " nt_prop[\"bp\"][bp2]=bp1\n", + " except:\n", + " pass\n", + " tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + " for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + "\n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + " nt_prop[\"threeprime\"]=tprime_list\n", + " (n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + " stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + " nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n", + "\n", + "\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 440, + "id": "9a290811", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n", + " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", + " 27, 28, 29, 30, 31, 66, 33, 34, -1, 0, 35, 36, 37,\n", + " 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 87, 49, 50,\n", + " 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,\n", + " 64, 65, -1, 67, 68, 71, 72, 73, 74, 75, 76, 77, 78,\n", + " 79, 80, 81, 82, 83, 84, 85, 86, 48, 88, 89, 90, 91,\n", + " 92, 93, 94, 95, 96, 97, 98, 99, 100, 134, 102, 103, -1,\n", + " 70, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,\n", + " 116, 151, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,\n", + " 129, 130, 131, 132, 133, -1, 135, 136, 137, 138, 141, 142, 143,\n", + " -1, 145, 146, 147, 148, 149, 150, 117, 152, 153, 154, 155, 156,\n", + " 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,\n", + " 170, 171, 172, 173, 174, 209, -1, 175, 176, 177, 144, 179, 180,\n", + " 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, -1, 193,\n", + " 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,\n", + " 207, 208, -1, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,\n", + " 220, 221, 222, 223, 224, 225, 226, 227, 265, 229, 230, -1, 232,\n", + " 233, 234, 403, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245,\n", + " 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,\n", + " 261, 262, 263, 264, 228, 266, 267, 268, 269, 270, 271, 272, 273,\n", + " 274, 275, 276, 277, 278, 279, 280, 281, 282, -1, -1, 283, 284,\n", + " 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297,\n", + " 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310,\n", + " 311, 312, 313, 314, 315, 316, 319, 320, 321, 322, 323, 324, 325,\n", + " 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338,\n", + " 364, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351,\n", + " 352, -1, -1, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,\n", + " 363, 339, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375,\n", + " 376, 377, 378, 379, 380, 381, 382, 385, 386, 387, 388, 389, 390,\n", + " 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 235,\n", + " 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, -1])" + ] + }, + "execution_count": 440, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(nt_prop[\"threeprime\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 302, + "id": "551cc70f", + "metadata": {}, + "outputs": [], + "source": [ + "tprime_list=-np.ones(len(nt_prop.index),dtype=int)\n", + "for i in range(len(nt_prop.index)):\n", + " ((m,n),p)=list(zip(vhzid,nt_prop[\"is_scaf\"]))[i]\n", + " if p==True:\n", + " k,l=(vslist[\"scaf\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),True))\n", + " tprime_list[i]=int(n)\n", + " \n", + " else:\n", + " k,l=(vslist[\"stap\"][m])[n][2:]\n", + " if k!=-1 and l!=-1:\n", + " n=index2.index(((k,l),False))\n", + " tprime_list[i]=int(n)\n", + "nt_prop[\"threeprime\"]=tprime_list" + ] + }, + { + "cell_type": "code", + "execution_count": 368, + "id": "1d40286a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_helix_angle(part, helix_id, indices):\n", + " \"\"\" Get \"start_orientation\" for helix \"\"\"\n", + " # import ipdb\n", + " # ipdb.set_trace()\n", + "\n", + " \"\"\" FROM CADNANO2.5\n", + " + angle is CCW\n", + " - angle is CW\n", + " Right handed DNA rotates clockwise from 5' to 3'\n", + " we use the convention the 5' end starts at 0 degrees\n", + " and it's pair is minor_groove_angle degrees away\n", + " direction, hence the minus signs. eulerZ\n", + " \"\"\"\n", + "\n", + " hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,\n", + " ['helical_pitch',\n", + " 'bases_per_repeat',\n", + " 'turns_per_repeat',\n", + " 'eulerZ',\n", + " 'minor_groove_angle']]\n", + " twist_per_base = tpr*360./bpr\n", + " # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180\n", + " angle = eulerZ + twist_per_base*indices - 0.5*mgroove\n", + " return rotationAboutAxis(np.array((0,0,1)),angle)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 429, + "id": "f45dd87c", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "(n,)=np.where(nt_prop[\"threeprime\"]==-1)\n", + "\n", + "stackid=nt_prop[\"bp\"][[list(nt_prop[\"threeprime\"]).index(i) for i in n]]\n", + "\n", + "nt_prop[\"stack\"][stackid.index[np.where(np.array(stackid)!=-1)]]=nt_prop[\"threeprime\"][stackid.index[np.where(np.array(stackid)!=-1)]]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 430, + "id": "ef29b662", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33 -1\n", + "68 -1\n", + "102 -1\n", + "136 -1\n", + "142 -1\n", + "176 -1\n", + "194 399\n", + "211 -1\n", + "233 20\n", + "281 -1\n", + "284 -1\n", + "351 -1\n", + "354 145\n", + "413 -1\n", + "Name: bp, dtype: int64" + ] + }, + "execution_count": 430, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stackid" + ] + }, + { + "cell_type": "code", + "execution_count": 431, + "id": "6678f56c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "232" + ] + }, + "execution_count": 431, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][233]" + ] + }, + { + "cell_type": "code", + "execution_count": 433, + "id": "cb9dbf13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "353" + ] + }, + "execution_count": 433, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop[\"stack\"][354]" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "165d3cc6", + "metadata": {}, + "outputs": [], + "source": [ + "scaf_id=[nttype(vslist[\"scaf\"][i]) for i in vslist.index]\n", + "stap_id=[nttype(vslist[\"stap\"][i]) for i in vslist.index]\n", + "nts=scaf_id+stap_id" + ] + }, + { + "cell_type": "code", + "execution_count": 360, + "id": "b2856178", + "metadata": {}, + "outputs": [], + "source": [ + "nt_prop[\"orientation\"]=[get_helix_angle(p,i,j) for i,j in zip(nt_prop[\"vh\"],nt_prop[\"zid\"])]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "id": "07918f5c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", + " </tr>\n", + " <tr>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " + ] + }, + "execution_count": 190, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vslist" + ] + }, + { + "cell_type": "code", + "execution_count": 200, + "id": "86293e8a", + "metadata": {}, + "outputs": [], + "source": [ + "def mrdna_model_from_cadnano(json_data,**model_parameters):\n", + " part,vslist=decode_cadnano_part(json_data)\n", + " props = part.getModelProperties().copy()\n", + "\n", + " if props.get('point_type') == PointType.ARBITRARY:\n", + " # TODO add code to encode Parts with ARBITRARY point configurations\n", + " raise NotImplementedError(\"Not implemented\")\n", + " else:\n", + " try:\n", + " vh_props, origins = part.helixPropertiesAndOrigins()\n", + " except:\n", + " origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()}\n", + " scaf_id=np.array([nttype(vslist['scaf'][i]) for i in vslist.index])\n", + " stap_id=np.array([nttype(vslist['stap'][i]) for i in vslist.index])\n", + " cad_bps=part.getIndices(0)\n", + " vslist[\"scafnt\"]=np.sum(scaf_id,axis=1)\n", + " vslist[\"stapnt\"]=np.sum(stap_id,axis=1)\n", + " totnt=np.sum(vslist[\"scafnt\"])+np.sum(vslist[\"stapnt\"])\n", + " is_scaf=np.zeros(totnt)\n", + " is_scaf[0:np.sum(vslist[\"scafnt\"])]=1\n", + " nt_prop=pd.DataFrame(index=range(totnt),columns=[\"vh\",\"zid\",\"is_scaf\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"])\n", + " nt_prop[\"is_scaf\"]=is_scaf\n", + " vhi,zids=np.where(np.array(scaf_id+stap_id)==1)\n", + " nt_prop[\"vh\"]=vhi\n", + " nt_prop[\"zid\"]=zids\n", + " nt_prop[\"r\"] =part.getCoordinate(nt_prop[\"vh\"],nt_prop[\"zid\"])\n", + " return nt_prop\n" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "id": "b398277c", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'decode_cadnano_part' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-201-c5d589a8b80d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m<ipython-input-200-181a924488ad>\u001b[0m in \u001b[0;36mmrdna_model_from_cadnano\u001b[0;34m(json_data, **model_parameters)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmrdna_model_from_cadnano\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mmodel_parameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpart\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvslist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_cadnano_part\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprops\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetModelProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'point_type'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mPointType\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mARBITRARY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'decode_cadnano_part' is not defined" + ] + } + ], + "source": [ + "mrdna_model_from_cadnano(\"test.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "c0d9eb64", + "metadata": {}, + "outputs": [], + "source": [ + "a,b=np.where(np.array(nts)==1)" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "ab563ec9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n", + " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40,\n", + " 41, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", + " 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39,\n", + " 40, 41, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 37,\n", + " 38, 39, 40, 41, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", + " 35, 36, 37, 38, 39, 0, 1, 2, 3, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39])" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt_prop=pd.DataFrame(index)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "3c45aeac", + "metadata": {}, + "outputs": [], + "source": [ + "def nttype(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return n\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "803b3c67", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "adb6b347", + "metadata": {}, + "outputs": [], + "source": [ + "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n", + "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "307e53ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "i=range(5)\n", + "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n", + "d=pd.DataFrame(index=i,columns=col)\n", + "d['vh']=[0,1,2,3,1]\n", + "d['zid']=[0,3,1,2,8]\n", + "d.set_index([\"vh\",\"zid\"],inplace=True)\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "d030974e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " vh zid r bp stack threeprime seq orientation\n", + "0 0 0 NaN NaN NaN NaN NaN NaN\n", + "1 1 3 NaN NaN NaN NaN NaN NaN\n", + "2 2 1 NaN NaN NaN NaN NaN NaN\n", + "3 3 2 NaN NaN NaN NaN NaN NaN\n", + "4 1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "d=d.reset_index()\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "6ddb4784", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 2]),)" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s=[True,False,True,False,False]\n", + "np.where(np.array(s)==True)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "28e3acea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <th>0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>3</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <th>1</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <th>2</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <th>8</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " r bp stack threeprime seq orientation\n", + "vh zid \n", + "0 0 NaN NaN NaN NaN NaN NaN\n", + "1 3 NaN NaN NaN NaN NaN NaN\n", + "2 1 NaN NaN NaN NaN NaN NaN\n", + "3 2 NaN NaN NaN NaN NaN NaN\n", + "1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "545acf6d", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"" + ] + } + ], + "source": [ + "d[[0,0]]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f6748d9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[23, 13369809], [38, 12060012]]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"vstrands\"][0][\"stap_colors\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5005611f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "210" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "len(vh_vb._scaf)" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "id": "83fc4ec6", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "file must have 'read' and 'readline' attributes", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[198], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.virt2nuc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: file must have 'read' and 'readline' attributes" + ] + } + ], + "source": [ + "df = pickle.load(\"test.virt2nuc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1cd359b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pattern" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "id": "078656d6", + "metadata": {}, + "outputs": [], + "source": [ + "class strands():\n", + " def __init__(self):\n", + " self.row=0 \n", + " self.col=0\n", + " self.num=0\n", + " self.scaf=[]\n", + " self.stap=[]\n", + " self.loop=[]\n", + " self.skip=[]\n", + " self.scafLoop=[]\n", + " self.stapLoop=[]\n", + " self.stap_colors=[]\n", + " self.scaf_contact={}\n", + " self.stap_connect={}\n", + " def to_dict(self):\n", + " d={}\n", + " d['row']=self.row\n", + " d['col']=self.col\n", + " d['num']=self.num\n", + " d['scaf']=self.scaf\n", + " d['stap']=self.stap\n", + " d['loop']=self.loop\n", + " d['skip']=self.skip\n", + " d['scafLoop']=self.scafLoop\n", + " d['stapLoop']=self.stapLoop\n", + " d['stap_colors']=self.stap_colors\n", + " return d\n" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "914acd5d", + "metadata": {}, + "outputs": [], + "source": [ + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", + "\n", + "#class\n", + "def decode_vh_vb(virt2nuc):\n", + " vh_list={}\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " for i in pattern.keys():\n", + " s=strands()\n", + " s.row,s.col=pattern[i]\n", + " s.num=i\n", + " vh_list[s.num]=s\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + " for i in scaf_oligos:\n", + " pass\n", + " \n", + " \n", + " return vh_list" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "id": "18132c9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[(2, 34),\n", + " (2, 33),\n", + " (2, 32),\n", + " (2, 31),\n", + " (2, 30),\n", + " (2, 29),\n", + " (2, 28),\n", + " (2, 27),\n", + " (2, 26),\n", + " (2, 25),\n", + " (2, 24),\n", + " (2, 23),\n", + " (2, 22),\n", + " (2, 21),\n", + " (2, 20),\n", + " (2, 19),\n", + " (2, 18),\n", + " (2, 17),\n", + " (2, 16),\n", + " (2, 15),\n", + " (2, 14),\n", + " (2, 13),\n", + " (2, 12),\n", + " (2, 11),\n", + " (2, 10),\n", + " (2, 9),\n", + " (2, 8),\n", + " (2, 7),\n", + " (2, 6),\n", + " (2, 5),\n", + " (2, 4),\n", + " (2, 3),\n", + " (2, 2),\n", + " (2, 1),\n", + " (2, 0)],\n", + " [(1, 3),\n", + " (1, 4),\n", + " (1, 5),\n", + " (1, 6),\n", + " (1, 7),\n", + " (1, 8),\n", + " (1, 9),\n", + " (1, 10),\n", + " (1, 11),\n", + " (1, 12),\n", + " (1, 13),\n", + " (1, 14),\n", + " (1, 15),\n", + " (1, 16),\n", + " (1, 17),\n", + " (1, 18),\n", + " (1, 19),\n", + " (1, 20),\n", + " (0, 20),\n", + " (0, 19),\n", + " (0, 18),\n", + " (0, 17),\n", + " (0, 16),\n", + " (0, 15),\n", + " (0, 14),\n", + " (0, 13),\n", + " (0, 12),\n", + " (0, 11),\n", + " (0, 10),\n", + " (0, 9),\n", + " (0, 8),\n", + " (0, 7),\n", + " (0, 6),\n", + " (0, 5),\n", + " (0, 4),\n", + " (0, 3),\n", + " (0, 2)],\n", + " [(0, 23),\n", + " (0, 22),\n", + " (0, 21),\n", + " (1, 21),\n", + " (1, 22),\n", + " (1, 23),\n", + " (1, 24),\n", + " (1, 25),\n", + " (1, 26),\n", + " (1, 27),\n", + " (1, 28),\n", + " (1, 29),\n", + " (1, 30),\n", + " (1, 31),\n", + " (1, 32),\n", + " (1, 33),\n", + " (1, 34),\n", + " (1, 35),\n", + " (1, 36),\n", + " (1, 37),\n", + " (1, 38)],\n", + " [(5, 9),\n", + " (5, 10),\n", + " (5, 11),\n", + " (5, 12),\n", + " (5, 13),\n", + " (5, 14),\n", + " (5, 15),\n", + " (5, 16),\n", + " (5, 17),\n", + " (5, 18),\n", + " (5, 19),\n", + " (5, 20),\n", + " (5, 21),\n", + " (5, 22),\n", + " (5, 23),\n", + " (5, 24),\n", + " (5, 25),\n", + " (5, 26),\n", + " (5, 27),\n", + " (0, 27),\n", + " (0, 26),\n", + " (0, 25),\n", + " (0, 24)],\n", + " [(0, 38),\n", + " (0, 37),\n", + " (0, 36),\n", + " (0, 35),\n", + " (0, 34),\n", + " (0, 33),\n", + " (0, 32),\n", + " (0, 31),\n", + " (0, 30),\n", + " (0, 29),\n", + " (0, 28),\n", + " (5, 28),\n", + " (5, 29),\n", + " (5, 30),\n", + " (5, 31),\n", + " (5, 32),\n", + " (5, 33),\n", + " (5, 34),\n", + " (5, 35),\n", + " (5, 36),\n", + " (5, 37),\n", + " (5, 38),\n", + " (5, 39)],\n", + " [(3, 0),\n", + " (3, 1),\n", + " (3, 2),\n", + " (3, 3),\n", + " (3, 4),\n", + " (3, 5),\n", + " (3, 6),\n", + " (3, 7),\n", + " (3, 8),\n", + " (3, 9),\n", + " (3, 10),\n", + " (3, 11),\n", + " (3, 12),\n", + " (3, 13),\n", + " (3, 14),\n", + " (3, 15),\n", + " (3, 16),\n", + " (3, 17),\n", + " (3, 18),\n", + " (3, 19),\n", + " (3, 20),\n", + " (4, 20),\n", + " (4, 19),\n", + " (4, 18),\n", + " (4, 17),\n", + " (4, 16),\n", + " (4, 15),\n", + " (4, 14),\n", + " (4, 13),\n", + " (4, 12),\n", + " (4, 11),\n", + " (4, 10),\n", + " (4, 9)],\n", + " [(4, 39),\n", + " (4, 38),\n", + " (4, 37),\n", + " (4, 36),\n", + " (4, 35),\n", + " (4, 34),\n", + " (4, 33),\n", + " (4, 32),\n", + " (4, 31),\n", + " (4, 30),\n", + " (4, 29),\n", + " (4, 28),\n", + " (4, 27),\n", + " (4, 26),\n", + " (4, 25),\n", + " (4, 24),\n", + " (4, 23),\n", + " (4, 22),\n", + " (4, 21),\n", + " (3, 21),\n", + " (3, 22),\n", + " (3, 23),\n", + " (3, 24),\n", + " (3, 25),\n", + " (3, 26),\n", + " (3, 27),\n", + " (3, 28),\n", + " (3, 29),\n", + " (3, 30),\n", + " (3, 31),\n", + " (3, 32),\n", + " (3, 33),\n", + " (3, 34)]]" + ] + }, + "execution_count": 187, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1=decode_vh_vb(\"test.virt2nuc\")\n", + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "list(find_segs(vh_vb._stap).values())" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "087e2625", + "metadata": {}, + "outputs": [], + "source": [ + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", + "\n", + "def decode_vh_vb(virt2nuc):\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " vi={'row':0, 'col':0, 'num':0, 'scaf':dict(), 'stap':dict(), 'loop':[], 'skip':[], 'scafLoop':[],'stapLoop':[], 'stap_colors':[],\"scaf53\":True}\n", + " vs=[] \n", + " for i in range(len(pattern.keys())):\n", + " vhi=vi.copy()\n", + " vhi[\"row\"],vhi[\"col\"]=list(pattern.values())[i]\n", + " vhi[\"num\"]=list(pattern.keys())[i]\n", + " vs.append(vhi)\n", + " vhelices=pd.DataFrame(vs)\n", + " vhelices=vhelices.set_index('num')\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " stap_strands=find_segs(staps)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + "\n", + " \n", + " return vhelices" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "56387503", + "metadata": {}, + "outputs": [], + "source": [ + "def find_base_map(oligo,i,vhx,scaf=True):\n", + " vh0,vb0=oligo[i]\n", + " vh1,vb1=oligo[i+1]\n", + " if scaf==True:\n", + " if vb0 not in vhx[\"scaf\"][vh0].keys():\n", + " \n", + " if vh0==vh1 and scaf==True:\n", + " if vb0>vb1:\n", + " vhx[vh0][\"scaf\"][vb0]=\n", + " \n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "c73234d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(5, 22),\n", + " (5, 21),\n", + " (5, 20),\n", + " (5, 19),\n", + " (5, 18),\n", + " (5, 17),\n", + " (5, 16),\n", + " (5, 15),\n", + " (5, 14),\n", + " (5, 13),\n", + " (5, 12),\n", + " (5, 11),\n", + " (5, 10),\n", + " (5, 9),\n", + " (4, 9),\n", + " (4, 10),\n", + " (4, 11),\n", + " (4, 12),\n", + " (4, 13),\n", + " (4, 14),\n", + " (4, 15),\n", + " (3, 15),\n", + " (3, 14),\n", + " (3, 13),\n", + " (3, 12),\n", + " (3, 11),\n", + " (3, 10),\n", + " (3, 9),\n", + " (3, 8),\n", + " (3, 7),\n", + " (3, 6),\n", + " (3, 5),\n", + " (3, 4),\n", + " (3, 3),\n", + " (3, 2),\n", + " (2, 2),\n", + " (2, 3),\n", + " (2, 4),\n", + " (2, 5),\n", + " (2, 6),\n", + " (2, 7),\n", + " (2, 8),\n", + " (2, 9),\n", + " (2, 10),\n", + " (2, 11),\n", + " (2, 12),\n", + " (2, 13),\n", + " (2, 14),\n", + " (2, 15),\n", + " (2, 16),\n", + " (2, 17),\n", + " (2, 18),\n", + " (1, 18),\n", + " (1, 17),\n", + " (1, 16),\n", + " (1, 15),\n", + " (1, 14),\n", + " (1, 13),\n", + " (1, 12),\n", + " (1, 11),\n", + " (1, 10),\n", + " (1, 9),\n", + " (1, 8),\n", + " (1, 7),\n", + " (1, 6),\n", + " (1, 5),\n", + " (0, 5),\n", + " (0, 6),\n", + " (0, 7),\n", + " (0, 8),\n", + " (0, 9),\n", + " (0, 10),\n", + " (0, 11),\n", + " (0, 12),\n", + " (0, 13),\n", + " (0, 14),\n", + " (0, 15),\n", + " (0, 16),\n", + " (0, 17),\n", + " (0, 18),\n", + " (0, 19),\n", + " (0, 20),\n", + " (0, 21),\n", + " (0, 22),\n", + " (0, 23),\n", + " (0, 24),\n", + " (0, 25),\n", + " (0, 26),\n", + " (0, 27),\n", + " (0, 28),\n", + " (0, 29),\n", + " (0, 30),\n", + " (0, 31),\n", + " (0, 32),\n", + " (0, 33),\n", + " (0, 34),\n", + " (0, 35),\n", + " (0, 36),\n", + " (1, 36),\n", + " (1, 35),\n", + " (1, 34),\n", + " (1, 33),\n", + " (1, 32),\n", + " (1, 31),\n", + " (1, 30),\n", + " (1, 29),\n", + " (1, 28),\n", + " (1, 27),\n", + " (1, 26),\n", + " (1, 25),\n", + " (1, 24),\n", + " (1, 23),\n", + " (1, 22),\n", + " (1, 21),\n", + " (1, 20),\n", + " (1, 19),\n", + " (2, 19),\n", + " (2, 20),\n", + " (2, 21),\n", + " (2, 22),\n", + " (2, 23),\n", + " (2, 24),\n", + " (2, 25),\n", + " (2, 26),\n", + " (2, 27),\n", + " (2, 28),\n", + " (2, 29),\n", + " (2, 30),\n", + " (2, 31),\n", + " (2, 32),\n", + " (3, 32),\n", + " (3, 31),\n", + " (3, 30),\n", + " (3, 29),\n", + " (3, 28),\n", + " (3, 27),\n", + " (3, 26),\n", + " (3, 25),\n", + " (3, 24),\n", + " (3, 23),\n", + " (3, 22),\n", + " (3, 21),\n", + " (3, 20),\n", + " (3, 19),\n", + " (3, 18),\n", + " (3, 17),\n", + " (3, 16),\n", + " (4, 16),\n", + " (4, 17),\n", + " (4, 18),\n", + " (4, 19),\n", + " (4, 20),\n", + " (4, 21),\n", + " (4, 22),\n", + " (4, 23),\n", + " (4, 24),\n", + " (4, 25),\n", + " (4, 26),\n", + " (4, 27),\n", + " (4, 28),\n", + " (4, 29),\n", + " (4, 30),\n", + " (4, 31),\n", + " (4, 32),\n", + " (4, 33),\n", + " (4, 34),\n", + " (4, 35),\n", + " (4, 36),\n", + " (4, 37),\n", + " (4, 38),\n", + " (4, 39),\n", + " (5, 39),\n", + " (5, 38),\n", + " (5, 37),\n", + " (5, 36),\n", + " (5, 35),\n", + " (5, 34),\n", + " (5, 33),\n", + " (5, 32),\n", + " (5, 31),\n", + " (5, 30),\n", + " (5, 29),\n", + " (5, 28),\n", + " (5, 27),\n", + " (5, 26),\n", + " (5, 25),\n", + " (5, 24),\n", + " (5, 23)]" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scafs=vh_vb._scaf\n", + "s=list(scafs.values())\n", + "len(scafs)\n", + "ss=find_segs(scafs)[7]\n", + "ss" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "b37f7a4c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 2)]" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "oligos[0]=[]\n", + "L=[]\n", + "L.append((1,2))\n", + "L" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "d11f5b9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 39)" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(scafs.keys())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "7bd3df35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{(2, 34): (3, [43]),\n", + " (2, 33): (3, [42]),\n", + " (2, 32): (3, [41]),\n", + " (2, 31): (3, [40]),\n", + " (2, 30): (3, [39]),\n", + " (2, 29): (3, [38]),\n", + " (2, 28): (3, [37]),\n", + " (2, 27): (3, [36]),\n", + " (2, 26): (3, [35]),\n", + " (2, 25): (3, [34]),\n", + " (2, 24): (3, [33]),\n", + " (2, 23): (3, [32]),\n", + " (2, 22): (3, [31]),\n", + " (2, 21): (3, [30]),\n", + " (2, 20): (3, [29]),\n", + " (2, 19): (3, [28]),\n", + " (2, 18): (3, [27]),\n", + " (2, 17): (3, [26]),\n", + " (2, 16): (3, [25]),\n", + " (2, 15): (3, [24]),\n", + " (2, 14): (3, [23]),\n", + " (2, 13): (3, [22]),\n", + " (2, 12): (3, [21]),\n", + " (2, 11): (3, [20]),\n", + " (2, 10): (3, [19]),\n", + " (2, 9): (3, [18]),\n", + " (2, 8): (3, [17]),\n", + " (2, 7): (3, [16]),\n", + " (2, 6): (3, [15]),\n", + " (2, 5): (3, [14]),\n", + " (2, 4): (3, [13]),\n", + " (2, 3): (3, [12]),\n", + " (2, 2): (3, [11]),\n", + " (2, 1): (3, [10]),\n", + " (2, 0): (3, [9]),\n", + " (1, 3): (8, [281]),\n", + " (1, 4): (8, [280]),\n", + " (1, 5): (8, [279]),\n", + " (1, 6): (8, [278]),\n", + " (1, 7): (8, [277]),\n", + " (1, 8): (8, [276]),\n", + " (1, 9): (8, [275]),\n", + " (1, 10): (8, [274]),\n", + " (1, 11): (8, [273]),\n", + " (1, 12): (8, [272]),\n", + " (1, 13): (8, [271]),\n", + " (1, 14): (8, [270]),\n", + " (1, 15): (8, [269]),\n", + " (1, 16): (8, [268]),\n", + " (1, 17): (8, [267]),\n", + " (1, 18): (8, [266]),\n", + " (1, 19): (8, [265]),\n", + " (1, 20): (8, [264]),\n", + " (0, 20): (8, [263]),\n", + " (0, 19): (8, [262]),\n", + " (0, 18): (8, [261]),\n", + " (0, 17): (8, [260]),\n", + " (0, 16): (8, [259]),\n", + " (0, 15): (8, [258]),\n", + " (0, 14): (8, [257]),\n", + " (0, 13): (8, [256]),\n", + " (0, 12): (8, [255]),\n", + " (0, 11): (8, [254]),\n", + " (0, 10): (8, [253]),\n", + " (0, 9): (8, [252]),\n", + " (0, 8): (8, [251]),\n", + " (0, 7): (8, [250]),\n", + " (0, 6): (8, [249]),\n", + " (0, 5): (8, [248]),\n", + " (0, 4): (8, [247]),\n", + " (0, 3): (8, [246]),\n", + " (0, 2): (8, [245]),\n", + " (0, 23): (9, [302]),\n", + " (0, 22): (9, [301]),\n", + " (0, 21): (9, [300]),\n", + " (1, 21): (9, [299]),\n", + " (1, 22): (9, [298]),\n", + " (1, 23): (9, [297]),\n", + " (1, 24): (9, [296]),\n", + " (1, 25): (9, [295]),\n", + " (1, 26): (9, [294]),\n", + " (1, 27): (9, [293]),\n", + " (1, 28): (9, [292]),\n", + " (1, 29): (9, [291]),\n", + " (1, 30): (9, [290]),\n", + " (1, 31): (9, [289]),\n", + " (1, 32): (9, [288]),\n", + " (1, 33): (9, [287]),\n", + " (1, 34): (9, [286]),\n", + " (1, 35): (9, [285]),\n", + " (1, 36): (9, [284]),\n", + " (1, 37): (9, [283]),\n", + " (1, 38): (9, [282]),\n", + " (5, 9): (10, [325]),\n", + " (5, 10): (10, [324]),\n", + " (5, 11): (10, [323]),\n", + " (5, 12): (10, [322]),\n", + " (5, 13): (10, [321]),\n", + " (5, 14): (10, [320]),\n", + " (5, 15): (10, [319]),\n", + " (5, 16): (10, [318]),\n", + " (5, 17): (10, [317]),\n", + " (5, 18): (10, [316]),\n", + " (5, 19): (10, [315]),\n", + " (5, 20): (10, [314]),\n", + " (5, 21): (10, [313]),\n", + " (5, 22): (10, [312]),\n", + " (5, 23): (10, [311]),\n", + " (5, 24): (10, [310]),\n", + " (5, 25): (10, [309]),\n", + " (5, 26): (10, [308]),\n", + " (5, 27): (10, [307]),\n", + " (0, 27): (10, [306]),\n", + " (0, 26): (10, [305]),\n", + " (0, 25): (10, [304]),\n", + " (0, 24): (10, [303]),\n", + " (0, 38): (11, [348]),\n", + " (0, 37): (11, [347]),\n", + " (0, 36): (11, [346]),\n", + " (0, 35): (11, [345]),\n", + " (0, 34): (11, [344]),\n", + " (0, 33): (11, [343]),\n", + " (0, 32): (11, [342]),\n", + " (0, 31): (11, [341]),\n", + " (0, 30): (11, [340]),\n", + " (0, 29): (11, [339]),\n", + " (0, 28): (11, [338]),\n", + " (5, 28): (11, [337]),\n", + " (5, 29): (11, [336]),\n", + " (5, 30): (11, [335]),\n", + " (5, 31): (11, [334]),\n", + " (5, 32): (11, [333]),\n", + " (5, 33): (11, [332]),\n", + " (5, 34): (11, [331]),\n", + " (5, 35): (11, [330]),\n", + " (5, 36): (11, [329]),\n", + " (5, 37): (11, [328]),\n", + " (5, 38): (11, [327]),\n", + " (5, 39): (11, [326]),\n", + " (3, 0): (12, [381]),\n", + " (3, 1): (12, [380]),\n", + " (3, 2): (12, [379]),\n", + " (3, 3): (12, [378]),\n", + " (3, 4): (12, [377]),\n", + " (3, 5): (12, [376]),\n", + " (3, 6): (12, [375]),\n", + " (3, 7): (12, [374]),\n", + " (3, 8): (12, [373]),\n", + " (3, 9): (12, [372]),\n", + " (3, 10): (12, [371]),\n", + " (3, 11): (12, [370]),\n", + " (3, 12): (12, [369]),\n", + " (3, 13): (12, [368]),\n", + " (3, 14): (12, [367]),\n", + " (3, 15): (12, [366]),\n", + " (3, 16): (12, [365]),\n", + " (3, 17): (12, [364]),\n", + " (3, 18): (12, [363]),\n", + " (3, 19): (12, [362]),\n", + " (3, 20): (12, [361]),\n", + " (4, 20): (12, [360]),\n", + " (4, 19): (12, [359]),\n", + " (4, 18): (12, [358]),\n", + " (4, 17): (12, [357]),\n", + " (4, 16): (12, [356]),\n", + " (4, 15): (12, [355]),\n", + " (4, 14): (12, [354]),\n", + " (4, 13): (12, [353]),\n", + " (4, 12): (12, [352]),\n", + " (4, 11): (12, [351]),\n", + " (4, 10): (12, [350]),\n", + " (4, 9): (12, [349]),\n", + " (4, 39): (13, [414]),\n", + " (4, 38): (13, [413]),\n", + " (4, 37): (13, [412]),\n", + " (4, 36): (13, [411]),\n", + " (4, 35): (13, [410]),\n", + " (4, 34): (13, [409]),\n", + " (4, 33): (13, [408]),\n", + " (4, 32): (13, [407]),\n", + " (4, 31): (13, [406]),\n", + " (4, 30): (13, [405]),\n", + " (4, 29): (13, [404]),\n", + " (4, 28): (13, [403]),\n", + " (4, 27): (13, [402]),\n", + " (4, 26): (13, [401]),\n", + " (4, 25): (13, [400]),\n", + " (4, 24): (13, [399]),\n", + " (4, 23): (13, [398]),\n", + " (4, 22): (13, [397]),\n", + " (4, 21): (13, [396]),\n", + " (3, 21): (13, [395]),\n", + " (3, 22): (13, [394]),\n", + " (3, 23): (13, [393]),\n", + " (3, 24): (13, [392]),\n", + " (3, 25): (13, [391]),\n", + " (3, 26): (13, [390]),\n", + " (3, 27): (13, [389]),\n", + " (3, 28): (13, [388]),\n", + " (3, 29): (13, [387]),\n", + " (3, 30): (13, [386]),\n", + " (3, 31): (13, [385]),\n", + " (3, 32): (13, [384]),\n", + " (3, 33): (13, [383]),\n", + " (3, 34): (13, [382])}" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s=vh_vb.__dict__\n", + "scafs=s[\"_scaf\"]\n", + "staps=s[\"_stap\"]\n", + "staps" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "efe70397", + "metadata": {}, + "outputs": [], + "source": [ + "class vstrands (object):\n", + "\n", + " def __init__(self):\n", + " self.vhelices = []\n", + "\n", + " def add_vhelix(self, toadd):\n", + " self.vhelices.append(toadd)\n", + "\n", + " def bbox(self):\n", + " rows = []\n", + " cols = []\n", + " lens = []\n", + " for h in self.vhelices:\n", + " rows.append(h.row)\n", + " cols.append(h.col)\n", + " lens.append(len(h.stap))\n", + "\n", + " dr = DIST_SQUARE * (max(rows) - min(rows) + 2)\n", + " dc = DIST_SQUARE * (max(cols) - min(cols) + 2)\n", + " dl = 0.34 * (max(lens) + 2)\n", + " \n", + " return 2 * max([dr, dc, dl]) * BOX_FACTOR\n", + " \n", + " def __str__(self):\n", + " a = '{\\n\"vstrands\":[\\n'\n", + " if len(self.vhelices) > 0:\n", + " for h in self.vhelices:\n", + " a = a + str(h) + ','\n", + " a = a[0:len(a) - 1]\n", + " a = a + '}\\n'\n", + " return a\n", + "class vhelix (object):\n", + "\n", + " def __init__(self):\n", + " self.stapLoop = []\n", + " self.scafLoop = []\n", + " self.skip = []\n", + " self.loop = []\n", + " self.stap_colors = []\n", + " self.row = 0\n", + " self.col = 0\n", + " self.num = 0\n", + " self.stap = []\n", + " self.scaf = []\n", + " self.cad_index = -1\n", + " self.skiploop_bases = 0\n", + "\n", + " def get_length(self):\n", + " return max (len(self.scaf), len(self.stap))\n", + "\n", + " len = property (get_length)\n", + "\n", + " def add_square(self, toadd, which):\n", + " if which == 'stap':\n", + " self.stap.append(toadd)\n", + " elif which == 'scaf':\n", + " self.scaf.append (toadd)\n", + " else:\n", + " base.Logger.log(\"Cannot add square that is not scaf or stap. Dying now\", base.Logger.CRITICAL)\n", + " sys.exit(1)\n", + " \n", + " def __str__(self):\n", + " a = '{\\n'\n", + "\n", + " a = a + '\"stapLoop\":['\n", + " if len(self.stapLoop) > 0:\n", + " for i in self.stapLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " a = a + '\"skip\":['\n", + " if len(self.skip) > 0:\n", + " for e in self.skip:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"loop\":['\n", + " if len(self.loop) > 0:\n", + " for e in self.loop:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap_colors\":['\n", + " if len (self.stap_colors) > 0:\n", + " for e in self.stap_colors:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + "\n", + " a = a + '\"row\":' + str(self.row) + ',\\n'\n", + " a = a + '\"col\":' + str(self.col) + ',\\n'\n", + " a = a + '\"num\":' + str(self.num) + ',\\n'\n", + " \n", + " a = a + '\"scafLoop\":['\n", + " if len(self.scafLoop) > 0:\n", + " for i in self.scafLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap\":['\n", + " if len(self.stap) > 0:\n", + " for i in self.stap:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"scaf\":['\n", + " if len(self.scaf) > 0:\n", + " for i in self.scaf:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + ']\\n}'\n", + " return a\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "646b1ae9", + "metadata": {}, + "outputs": [], + "source": [ + "L=[]\n", + "for i in df[\"vstrands\"]:\n", + " L.append(i)\n", + "\n", + "cadsys = vstrands()\n", + "vh = vhelix()\n", + "for s in L:\n", + " \n", + " vh.stap = [ i for i in s[\"scaf\"]]\n", + " vh.scaf = [i for i in s[\"stap\"]]\n", + " vh.skiploop_bases = len(s[\"skip\"]) + sum(s[\"loop\"]) - sum(s[\"skip\"])\n", + " cadsys.add_vhelix(vh)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7ead2ea3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " 'add_square',\n", + " 'cad_index',\n", + " 'col',\n", + " 'get_length',\n", + " 'len',\n", + " 'loop',\n", + " 'num',\n", + " 'row',\n", + " 'scaf',\n", + " 'scafLoop',\n", + " 'skip',\n", + " 'skiploop_bases',\n", + " 'stap',\n", + " 'stapLoop',\n", + " 'stap_colors']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0=cadsys.vhelices[0]\n", + "dir(s0)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "3095b830", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, 5, 10],\n", + " [5, 9, 5, 11],\n", + " [5, 10, 5, 12],\n", + " [5, 11, 5, 13],\n", + " [5, 12, 5, 14],\n", + " [5, 13, 5, 15],\n", + " [5, 14, 5, 16],\n", + " [5, 15, 5, 17],\n", + " [5, 16, 5, 18],\n", + " [5, 17, 5, 19],\n", + " [5, 18, 5, 20],\n", + " [5, 19, 5, 21],\n", + " [5, 20, 5, 22],\n", + " [5, 21, 5, 23],\n", + " [5, 22, 5, 24],\n", + " [5, 23, 5, 25],\n", + " [5, 24, 5, 26],\n", + " [5, 25, 5, 27],\n", + " [5, 26, 0, 27],\n", + " [0, 28, 5, 29],\n", + " [5, 28, 5, 30],\n", + " [5, 29, 5, 31],\n", + " [5, 30, 5, 32],\n", + " [5, 31, 5, 33],\n", + " [5, 32, 5, 34],\n", + " [5, 33, 5, 35],\n", + " [5, 34, 5, 36],\n", + " [5, 35, 5, 37],\n", + " [5, 36, 5, 38],\n", + " [5, 37, 5, 39],\n", + " [5, 38, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0.scaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b749b541", + "metadata": {}, + "outputs": [], + "source": [ + "for s in s0.scaf:\n", + " if s[0]==-1 and s[1]==-1:\n", + " pass\n", + " elif s[2]==len(s0.scaf) and abs(s[3])==1" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a5a89254", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-1, -1, -1, -1]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0.scaf[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "04759ac6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " 'add_vhelix',\n", + " 'bbox',\n", + " 'vhelices']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(cadsys)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7c36faba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[5, 1, -1, -1],\n", + " [5, 2, 5, 0],\n", + " [5, 3, 5, 1],\n", + " [-1, -1, 5, 2],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [5, 10, 4, 9],\n", + " [5, 11, 5, 9],\n", + " [5, 12, 5, 10],\n", + " [5, 13, 5, 11],\n", + " [5, 14, 5, 12],\n", + " [5, 15, 5, 13],\n", + " [5, 16, 5, 14],\n", + " [5, 17, 5, 15],\n", + " [5, 18, 5, 16],\n", + " [5, 19, 5, 17],\n", + " [5, 20, 5, 18],\n", + " [5, 21, 5, 19],\n", + " [5, 22, 5, 20],\n", + " [-1, -1, 5, 21],\n", + " [5, 24, -1, -1],\n", + " [5, 25, 5, 23],\n", + " [5, 26, 5, 24],\n", + " [5, 27, 5, 25],\n", + " [5, 28, 5, 26],\n", + " [5, 29, 5, 27],\n", + " [5, 30, 5, 28],\n", + " [5, 31, 5, 29],\n", + " [5, 32, 5, 30],\n", + " [5, 33, 5, 31],\n", + " [5, 34, 5, 32],\n", + " [5, 35, 5, 33],\n", + " [5, 36, 5, 34],\n", + " [5, 37, 5, 35],\n", + " [5, 38, 5, 36],\n", + " [5, 39, 5, 37],\n", + " [4, 39, 5, 38],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh.stap" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9985773f", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'mrdna'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreaders\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcadnano_segments\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'" + ] + } + ], + "source": [ + "\n", + "\n", + "from mrdna.readers.cadnano_segments import *\n", + "from cadnano.document import Document\n", + "import cadnano" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "134923d4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f7bd6aef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "json_data=read_json_file(\"test.json\")\n", + "part=decode_cadnano_part(json_data)\n", + "model=cadnano_part(part)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9913320b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc=Document()\n", + "cadnano.fileio.v2decode.decode(doc, json_data)\n", + "parts = [p for p in doc.getParts()]\n", + "part=parts[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "46af2b4f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Oligo_(0.1[38])_1328\t23\t'None\n", + "Oligo_(2.1[34])_9584\t35\t'None\n", + "Oligo_(1.1[36])_7488\t188\t'None\n", + "Oligo_(4.1[39])_4384\t33\t'None\n", + "Oligo_(5.0[9])_0240\t23\t'None\n", + "Oligo_(1.0[3])_8256\t37\t'None\n", + "Oligo_(3.0[0])_3296\t33\t'None\n", + "Oligo_(0.1[23])_9088\t21\t'None\n", + "VH0\n", + "\t <fwd_StrandSet(0)> \t [(5, 36)] \n", + "\t\t\t\t ['#0066cc']\n", + "\t <rev_StrandSet(0)> \t [(2, 20), (21, 23), (24, 27), (28, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c', '#f74308', '#1700de']\n", + "VH1\n", + "\t <fwd_StrandSet(1)> \t [(3, 20), (21, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c']\n", + "\t <rev_StrandSet(1)> \t [(5, 18), (19, 36)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH2\n", + "\t <fwd_StrandSet(2)> \t [(2, 18), (19, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(2)> \t [(0, 34)] \n", + "\t\t\t\t ['#888888']\n", + "VH3\n", + "\t <fwd_StrandSet(3)> \t [(0, 20), (21, 34)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "\t <rev_StrandSet(3)> \t [(2, 15), (16, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH4\n", + "\t <fwd_StrandSet(4)> \t [(9, 15), (16, 39)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(4)> \t [(9, 20), (21, 39)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "VH5\n", + "\t <fwd_StrandSet(5)> \t [(9, 27), (28, 39)] \n", + "\t\t\t\t ['#f74308', '#1700de']\n", + "\t <rev_StrandSet(5)> \t [(9, 39)] \n", + "\t\t\t\t ['#0066cc']\n" + ] + } + ], + "source": [ + "part.__dict__.keys()\n", + "\n", + "oligos = part.oligos()\n", + "for oligo in oligos:\n", + " print(\"{0}\\t{1}\\t\\'{2}\".format(oligo,\n", + " oligo.length(),\n", + " oligo.sequence()))\n", + "\n", + "vhs = list(part.getIdNums()) # convert set to list\n", + "for vh_id in vhs: # display first 3 vhs\n", + " fwd_ss, rev_ss = part.getStrandSets(vh_id)\n", + " print('VH{0}'.format(vh_id))\n", + " print('\\t', fwd_ss, '\\t', [s.idxs() for s in fwd_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in fwd_ss.strands()])\n", + " print('\\t', rev_ss, '\\t', [s.idxs() for s in rev_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in rev_ss.strands()])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6ae574e4", + "metadata": {}, + "outputs": [], + "source": [ + "strands5 = [o.strand5p() for o in part.oligos()]\n", + "strands3 = [o.strand3p() for o in part.oligos()]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4b56fb9d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__slots__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " '_decrementLength',\n", + " '_incrementLength',\n", + " '_is_circular',\n", + " '_parent',\n", + " '_part',\n", + " '_props',\n", + " '_setColor',\n", + " '_setLength',\n", + " '_setLoop',\n", + " '_setProperty',\n", + " '_signals',\n", + " '_strand5p',\n", + " '_strandMergeUpdate',\n", + " '_strandSplitUpdate',\n", + " 'addToPart',\n", + " 'applyAbstractSequences',\n", + " 'applyColor',\n", + " 'applySequence',\n", + " 'applySequenceCMD',\n", + " 'clearAbstractSequences',\n", + " 'connect',\n", + " 'deleteLater',\n", + " 'destroy',\n", + " 'disconnect',\n", + " 'displayAbstractSequences',\n", + " 'dump',\n", + " 'editable_properties',\n", + " 'getAbsolutePositionAtLength',\n", + " 'getColor',\n", + " 'getModelProperties',\n", + " 'getName',\n", + " 'getNumberOfBasesToEachXover',\n", + " 'getOutlineProperties',\n", + " 'getProperty',\n", + " 'getStrandLengths',\n", + " 'isCircular',\n", + " 'length',\n", + " 'locString',\n", + " 'oligoPropertyChangedSignal',\n", + " 'oligoRemovedSignal',\n", + " 'oligoSelectedChangedSignal',\n", + " 'oligoSequenceAddedSignal',\n", + " 'oligoSequenceClearedSignal',\n", + " 'parent',\n", + " 'part',\n", + " 'refreshLength',\n", + " 'remove',\n", + " 'removeFromPart',\n", + " 'sequence',\n", + " 'sequenceExport',\n", + " 'setParent',\n", + " 'setPart',\n", + " 'setProperty',\n", + " 'setStrand5p',\n", + " 'shallowCopy',\n", + " 'shouldHighlight',\n", + " 'signals',\n", + " 'splitAtAbsoluteLengths',\n", + " 'strand3p',\n", + " 'strand5p',\n", + " 'undoStack']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "L=[o for o in part.oligos()]\n", + "dir(L[2])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "0c061135", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(dict, {0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}})" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.insertions()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1d7952e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'NaPart1',\n", + " 'color': '#0066cc',\n", + " 'is_visible': True,\n", + " 'active_phos': None,\n", + " 'crossover_span_angle': 45,\n", + " 'max_vhelix_length': 42,\n", + " 'neighbor_active_angle': '',\n", + " 'grid_type': <GridEnum.HONEYCOMB: 2>,\n", + " 'virtual_helix_order': [0, 1, 2, 3, 4, 5],\n", + " 'is_lattice': True,\n", + " <GridEnum.HONEYCOMB: 2>: <GridEnum.NONE: 0>}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.getModelProperties()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3a02aa96", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", + "it/its\n", + "\n" + ] + } + ], + "source": [ + "import pdb\n", + "import numpy as np\n", + "import os,sys\n", + "import scipy\n", + "\n", + "from mrdna import logger, devlogger\n", + "from mrdna.segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment\n", + "from mrdna.arbdmodel.coords import quaternion_from_matrix, rotationAboutAxis, quaternion_slerp\n", + "from mrdna import get_resource_path\n", + "\n", + "ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))\n", + "\n", + "def _three_prime_list_to_five_prime(three_prime):\n", + " five_prime = -np.ones(three_prime.shape, dtype=int)\n", + " has_three_prime = np.where(three_prime >= 0)[0]\n", + " five_prime[three_prime[has_three_prime]] = has_three_prime\n", + " return five_prime \n", + "def _primes_list_to_strands(three_prime, five_prime):\n", + " five_prime_ends = np.where(five_prime < 0)[0]\n", + " strands = []\n", + " strand_is_circular = []\n", + " \n", + " idx_to_strand = -np.ones(three_prime.shape, dtype=int)\n", + "\n", + " def build_strand(nt_idx, conditional):\n", + " strand = [nt_idx]\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " while conditional(nt_idx):\n", + " nt_idx = three_prime[nt_idx]\n", + " strand.append(nt_idx)\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " strands.append( np.array(strand, dtype=int) )\n", + "\n", + " for nt_idx in five_prime_ends:\n", + " build_strand(nt_idx,\n", + " lambda nt: three_prime[nt] >= 0)\n", + " strand_is_circular.append(False)\n", + "\n", + " while True:\n", + " ## print(\"WARNING: working on circular strand {}\".format(len(strands)))\n", + " ids = np.where(idx_to_strand < 0)[0]\n", + " if len(ids) == 0: break\n", + " build_strand(ids[0],\n", + " lambda nt: three_prime[nt] >= 0 and \\\n", + " idx_to_strand[three_prime[nt]] < 0)\n", + " strand_is_circular.append(True)\n", + "\n", + " return strands, strand_is_circular\n", + "\n", + "def find_stacks(centers, transforms):\n", + "\n", + " ## Find orientation and center of each nucleotide\n", + " expected_stack_positions = []\n", + " for R,c in zip(transforms,centers):\n", + " expected_stack_positions.append( c + ref_stack_position.dot(R) )\n", + "\n", + " expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)\n", + "\n", + " dists = scipy.spatial.distance_matrix(expected_stack_positions, centers)\n", + " dists = dists + 5*np.eye(len(dists))\n", + " idx1, idx2 = np.where(dists < 3.5)\n", + "\n", + " ## Convert distances to stacks\n", + " stacks_above = -np.ones(len(centers), dtype=int)\n", + " _z = np.array((0,0,1))\n", + " for i in np.unique(idx1):\n", + " js = idx2[ idx1 == i ]\n", + " with np.errstate(divide='ignore',invalid='ignore'):\n", + " angles = [np.arccos( transforms[j].T.dot( transforms[i].dot(_z) ).dot( _z ) ) for j in js]\n", + " angles = np.array( angles )\n", + " tmp = np.argmin(dists[i][js] + 1.0*angles)\n", + " j = js[tmp]\n", + " stacks_above[i] = j\n", + "\n", + " return stacks_above\n", + "\n", + "def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):\n", + "\n", + " helixmap = -np.ones(basepairs.shape, dtype=int)\n", + " helixrank = -np.ones(basepairs.shape)\n", + " is_fwd = np.ones(basepairs.shape, dtype=int)\n", + " \n", + " ## Remove stacks with nts lacking a basepairs\n", + " nobp = np.where(basepairs < 0)[0]\n", + " stacks_above[nobp] = -1\n", + " stacks_with_nobp = np.in1d(stacks_above, nobp)\n", + " stacks_above[stacks_with_nobp] = -1\n", + "\n", + " end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]\n", + "\n", + " hid = 0\n", + " for end in end_ids:\n", + " if helixmap[end] >= 0:\n", + " continue\n", + " rank = 0\n", + " nt = basepairs[end]\n", + " bp = basepairs[nt]\n", + " assert( bp == end )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " # assert(helixmap[nt] == -1)\n", + " # assert(helixmap[bp] == -1)\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + "\n", + " _tmp = [(nt,bp)]\n", + " \n", + " while stacks_above[nt] >= 0:\n", + " nt = stacks_above[nt]\n", + " if basepairs[nt] < 0: break\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " _tmp.append((nt,bp))\n", + " rank +=1\n", + "\n", + " hid += 1\n", + "\n", + " ## Create \"helix\" for each circular segment\n", + " intrahelical = []\n", + " processed = set()\n", + " unclaimed_bases = np.where( (basepairs >= 0)*(helixmap == -1) )[0]\n", + " for nt0 in unclaimed_bases:\n", + " if nt0 in processed: continue\n", + "\n", + " nt = nt0\n", + " all_nts = [nt]\n", + "\n", + " rank = 0\n", + " nt = nt0\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cylic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + "\n", + " counter = 0\n", + " while stacks_above[nt] >= 0:\n", + " lastnt = nt\n", + " nt = stacks_above[nt]\n", + " bp = basepairs[nt]\n", + " if nt == nt0 or nt == basepairs[nt0]:\n", + " intrahelical.append((lastnt,nt0))\n", + " break\n", + " \n", + " assert( bp >= 0 )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cyclic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " \n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + " rank +=1\n", + " hid += 1\n", + "\n", + " return helixmap, helixrank, is_fwd, intrahelical\n", + "\n", + "\n", + "def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=None):\n", + " maxrank = np.max( hrank[hmap==hid] )\n", + " if maxrank == 0:\n", + " ids = np.where((hmap == hid))[0]\n", + " pos = np.mean( [coordinate[r,:] for r in ids ], axis=0 )\n", + " coords = [pos,pos]\n", + " contours = [0,1]\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * fwd)[0]\n", + " assert( len(ids) == 1 )\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + " quats = [q, q]\n", + " coords[-1] = pos + orientation[ids[0]].dot(np.array((0,0,1)))\n", + "\n", + " else:\n", + " coords,contours,quats = [[],[],[]]\n", + " last_q = None\n", + " for rank in range(int(maxrank)+1):\n", + " ids = np.where((hmap == hid) * (hrank == rank))[0]\n", + " \n", + " coords.append(np.mean( [coordinate[r,:] for r in ids ], axis=0 ))\n", + " contours.append( float(rank+0.5)/(maxrank+1) )\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * (hrank == rank) * fwd)[0]\n", + " assert(len(ids) == 1)\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + "\n", + " if last_q is not None and last_q.dot(q) < 0:\n", + " q = -q\n", + "\n", + " ## Average quaterion with reverse direction\n", + " bp = basepair[ids[0]]\n", + " if bp >= 0:\n", + " bp_o = orientation[bp].dot(rotationAboutAxis(np.array((1,0,0)),180))\n", + " q2 = quaternion_from_matrix( bp_o )\n", + " if q.dot(q2) < 0:\n", + " q2 = -q2\n", + "\n", + " ## probably good enough, but slerp is better: q = (q + q2)*0.5\n", + " q = quaternion_slerp(q,q2,0.5)\n", + "\n", + " quats.append(q)\n", + " last_q = q\n", + "\n", + " coords = np.array(coords)\n", + " seg.set_splines(contours,coords)\n", + " if orientation is not None:\n", + " quats = np.array(quats)\n", + " seg.set_orientation_splines(contours,quats)\n", + "\n", + " seg.start_position = coords[0,:]\n", + " seg.end_position = coords[-1,:]\n", + "\n", + "\n", + "def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " sequence=None, orientation=None,\n", + " max_basepairs_per_bead = 5,\n", + " max_nucleotides_per_bead = 5,\n", + " local_twist = False,\n", + " dimensions=(5000,5000,5000),\n", + " **model_parameters):\n", + " \"\"\" \n", + " Creates a SegmentModel object from lists of each nucleotide's\n", + " basepair, its stack (on 3' side) and its 3'-connected nucleotide\n", + "\n", + " The first argument should be an N-by-3 numpy array containing the\n", + " coordinate of each nucleotide, where N is the number of\n", + " nucleotides. The following three arguments should be integer lists\n", + " where the i-th element corresponds to the i-th nucleotide; the\n", + " list element should the integer index of the corresponding\n", + " basepaired / stacked / phosphodiester-bonded nucleotide. If there\n", + " is no such nucleotide, the value should be -1.\n", + "\n", + " Args:\n", + " basepair: List of each nucleotide's basepair's index\n", + " stack: List containing index of the nucleotide stacked on the 3' of each nucleotide\n", + " three_prime: List of each nucleotide's the 3' end of each nucleotide\n", + "\n", + " Returns:\n", + " SegmentModel\n", + " \"\"\"\n", + "\n", + " \"\"\" Validate Input \"\"\"\n", + " inputs = (basepair,three_prime)\n", + " try:\n", + " basepair,three_prime = [np.array(a,dtype=int) for a in inputs]\n", + " except:\n", + " raise TypeError(\"One or more of the input lists could not be converted into a numpy array\")\n", + " inputs = (basepair,three_prime)\n", + " coordinate = np.array(coordinate)\n", + "\n", + " if np.any( [len(a.shape) > 1 for a in inputs] ):\n", + " raise ValueError(\"One or more of the input lists has the wrong dimensionality\")\n", + "\n", + " if len(coordinate.shape) != 2:\n", + " raise ValueError(\"Coordinate array has the wrong dimensionality\")\n", + "\n", + " inputs = (coordinate,basepair,three_prime)\n", + " if not np.all(np.diff([len(a) for a in inputs]) == 0):\n", + " raise ValueError(\"Inputs are not the same length\")\n", + " \n", + " num_nt = len(basepair)\n", + " if sequence is not None and len(sequence) != num_nt:\n", + " raise ValueError(\"The 'sequence' parameter is the wrong length {} != {}\".format(len(sequence),num_nt))\n", + "\n", + " if orientation is not None:\n", + " orientation = np.array(orientation)\n", + " if len(orientation.shape) != 3:\n", + " raise ValueError(\"The 'orientation' array has the wrong dimensionality (should be Nx3x3)\")\n", + " if orientation.shape != (num_nt,3,3):\n", + " raise ValueError(\"The 'orientation' array is not properly formatted\")\n", + "\n", + " if stack is None:\n", + " if orientation is not None:\n", + " stack = find_stacks(coordinate, orientation)\n", + " else:\n", + " ## Guess stacking based on 3' connectivity\n", + " stack = np.array(three_prime,dtype=int) # Assume nts on 3' ends are stacked\n", + " _stack_below = _three_prime_list_to_five_prime(stack)\n", + " _has_bp = (basepair >= 0)\n", + " _nostack = np.where( (stack == -1)*_has_bp )[0]\n", + " _has_stack_below = _stack_below[basepair[_nostack]] >= 0\n", + " _nostack2 = _nostack[_has_stack_below]\n", + " stack[_nostack2] = basepair[_stack_below[basepair[_nostack2]]]\n", + "\n", + " else:\n", + " try:\n", + " stack = np.array(stack,dtype=int)\n", + " except:\n", + " raise TypeError(\"The 'stack' array could not be converted into a numpy integer array\")\n", + "\n", + " if len(stack.shape) != 1:\n", + " raise ValueError(\"The 'stack' array has the wrong dimensionality\")\n", + "\n", + " if len(stack) != num_nt:\n", + " raise ValueError(\"The length of the 'stack' array does not match other inputs\")\n", + "\n", + " bps = basepair # alias\n", + "\n", + " \"\"\" Fix stacks: require that the stack of a bp of a base's stack is its bp \"\"\"\n", + " _has_bp = (bps >= 0)\n", + " _has_stack = (stack >= 0)\n", + " _stack_has_basepair = (bps[stack] >= 0) * _has_stack\n", + " stack = np.where( (stack[bps[stack]] == bps) * _has_bp * _has_stack * _has_bp,\n", + " stack, -np.ones(len(stack),dtype=int) )\n", + "\n", + " five_prime = _three_prime_list_to_five_prime(three_prime)\n", + "\n", + " \"\"\" Build map of dsDNA helices and strands \"\"\"\n", + " hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack)\n", + " double_stranded_helices = np.unique(hmap[hmap >= 0]) \n", + " strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)\n", + "\n", + " \"\"\" Add ssDNA to hmap \"\"\"\n", + " if len(double_stranded_helices) > 0:\n", + " hid = double_stranded_helices[-1]+1\n", + " else:\n", + " hid = 0\n", + " ss_residues = hmap < 0\n", + " #\n", + " if np.any(bps[ss_residues] != -1):\n", + " logger.warning(f'{np.sum(bps[ss_residues] != -1)} ssDNA nucleotides appear to have basepairs... ignoring')\n", + " \n", + " for s,c in zip(strands, strand_is_circular):\n", + " strand_segment_ends = [i for i in np.where( np.diff(hmap[s]) != 0 )[0]] + [len(s)-1]\n", + " seg_start = 0\n", + " for i in strand_segment_ends:\n", + " if hmap[s[i]] < 0:\n", + " ## Found single-stranded segment\n", + " ids = s[seg_start:i+1]\n", + " assert( np.all(hmap[ids] == -1) )\n", + " hmap[ids] = hid\n", + " hrank[ids] = np.arange(i+1-seg_start)\n", + " hid+=1\n", + " seg_start = i+1\n", + "\n", + " if len(double_stranded_helices) > 0:\n", + " single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)\n", + " else:\n", + " single_stranded_helices = np.arange(hid)\n", + "\n", + " ## Create double-stranded segments\n", + " doubleSegments = []\n", + " for hid in double_stranded_helices:\n", + " seg = DoubleStrandedSegment(name=str(hid),\n", + " num_bp = np.sum(hmap==hid)//2)\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments))\n", + " doubleSegments.append(seg)\n", + "\n", + " ## Create single-stranded segments\n", + " singleSegments = []\n", + " for hid in single_stranded_helices:\n", + " seg = SingleStrandedSegment(name=str(hid),\n", + " num_nt = np.sum(hmap==hid))\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments) + len(singleSegments))\n", + " singleSegments.append(seg)\n", + "\n", + " ## Find crossovers and 5prime/3prime ends\n", + " crossovers,prime5,prime3 = [[],[],[]]\n", + " for s,c in zip(strands,strand_is_circular):\n", + " tmp = np.where(np.diff(hmap[s]) != 0)[0]\n", + " for i in tmp:\n", + " crossovers.append( (s[i],s[i+1]) )\n", + " if c:\n", + " if hmap[s[-1]] != hmap[s[0]]:\n", + " crossovers.append( (s[-1],s[0]) )\n", + " else:\n", + " prime5.append(s[0])\n", + " prime3.append(s[-1])\n", + "\n", + " ## Add connections\n", + " allSegments = doubleSegments+singleSegments\n", + "\n", + " for r1,r2 in crossovers:\n", + " seg1,seg2 = [allSegments[hmap[i]] for i in (r1,r2)]\n", + " nt1,nt2 = [hrank[i] for i in (r1,r2)]\n", + " f1,f2 = [fwd[i] for i in (r1,r2)]\n", + "\n", + " ## Handle connections at the ends\n", + " is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))\n", + " is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))\n", + "\n", + " print(seg1,seg2, r1, r2, is_terminal1, is_terminal2)\n", + " if is_terminal1 or is_terminal2:\n", + " \"\"\" Ensure that we don't have three-way dsDNA junctions \"\"\"\n", + " if is_terminal1 and (bps[r1] >= 0) and (five_prime[bps[r1]] >= 0) and (three_prime[r1] >= 0):\n", + " if (bps[five_prime[bps[r1]]] >= 0) and (bps[three_prime[r1]] >= 0):\n", + " # is_terminal1 = (three_prime[r1] == bps[five_prime[bps[r1]]])\n", + " is_terminal1 = hmap[five_prime[bps[r1]]] == hmap[three_prime[r1]]\n", + " if is_terminal2 and (bps[r2] >= 0) and (three_prime[bps[r2]] >= 0) and (five_prime[r2] >= 0):\n", + " if (bps[three_prime[bps[r2]]] >= 0) and (bps[five_prime[r2]] >= 0):\n", + " # is_terminal2 = (five_prime[r2] == bps[three_prime[bps[r2]]])\n", + " is_terminal2 = hmap[three_prime[bps[r2]]] == hmap[five_prime[r2]]\n", + " \n", + " \"\"\" Place connection \"\"\"\n", + " if is_terminal1 and is_terminal2:\n", + " end1 = seg1.end3 if f1 else seg1.start3\n", + " end2 = seg2.start5 if f2 else seg2.end5\n", + " seg1._connect_ends( end1, end2, type_='intrahelical')\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_=\"terminal_crossover\")\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2])\n", + "\n", + " ## Add 5prime/3prime ends\n", + " for r in prime5:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_5prime(hrank[r],fwd[r])\n", + " for r in prime3:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_3prime(hrank[r],fwd[r])\n", + "\n", + " ## Add intrahelical connections to circular helical sections\n", + " for nt0,nt1 in intrahelical:\n", + " seg = allSegments[hmap[nt0]]\n", + " assert( seg is allSegments[hmap[nt1]] )\n", + " if three_prime[nt0] >= 0:\n", + " if hmap[nt0] == hmap[three_prime[nt0]]:\n", + " seg.connect_end3(seg.start5)\n", + "\n", + " bp0,bp1 = [bps[nt] for nt in (nt0,nt1)]\n", + " if three_prime[bp1] >= 0:\n", + " if hmap[bp1] == hmap[three_prime[bp1]]:\n", + " seg.connect_start3(seg.end5)\n", + "\n", + " ## Assign sequence\n", + " if sequence is not None:\n", + " for hid in range(len(allSegments)):\n", + " resids = np.where( (hmap==hid)*(fwd==1) )[0]\n", + " s = allSegments[hid]\n", + " s.sequence = [sequence[r] for r in sorted(resids,key=lambda x: hrank[x])]\n", + "\n", + "\n", + " ## Build model\n", + " model = SegmentModel( allSegments,\n", + " max_basepairs_per_bead = max_basepairs_per_bead,\n", + " max_nucleotides_per_bead = max_nucleotides_per_bead,\n", + " local_twist = local_twist,\n", + " dimensions = dimensions,\n", + " **model_parameters )\n", + "\n", + "\n", + " model._reader_list_coordinates = coordinate\n", + " model._reader_list_basepair = basepair\n", + " model._reader_list_stack = stack\n", + " model._reader_list_three_prime = three_prime\n", + " model._reader_list_five_prime = five_prime\n", + " model._reader_list_sequence = sequence\n", + " model._reader_list_orientation = orientation\n", + " model._reader_list_hmap = hmap\n", + " model._reader_list_fwd = fwd\n", + " model._reader_list_hrank = hrank\n", + "\n", + " if sequence is None:\n", + " for s in model.segments:\n", + " s.randomize_unset_sequence()\n", + "\n", + " return model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6ab2279a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<DoubleStrandedSegment'> 1[1]> <DoubleStrandedSegment'> 0[1]> 5 3 True True\n", + "<SingleStrandedSegment'> 2[2]> <DoubleStrandedSegment'> 0[1]> 1 2 True True\n", + "<DoubleStrandedSegment'> 0[1]> <DoubleStrandedSegment'> 1[1]> 2 4 True True\n", + "<DoubleStrandedSegment'> 1[1]> <SingleStrandedSegment'> 3[1]> 4 6 True True\n", + "<SingleStrandedSegment'> 3[1]> <SingleStrandedSegment'> 2[2]> 6 0 True True\n" + ] + } + ], + "source": [ + "coordinate = [(0,0,3.4*i) for i in range(7)]\n", + "three_prime = [ 1, 2, 4,-1, 6, 3, 0]\n", + "basepair = [-1,-1, 3, 2, 5, 4,-1]\n", + "stack = [-1,-1, -1,-1,-1, -1,-1]\n", + "for i in [3,5]:\n", + " coordinate[i] = (1,0,3.4*i)\n", + "\n", + "model = model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " max_basepairs_per_bead=1,\n", + " max_nucleotides_per_bead=1,\n", + " local_twist=False)\n", + "model.writePsf(\"list.psf\")\n", + "model.writePdb(\"list.pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d7dbbbbf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[<SegmentParticle DNA on <DoubleStrandedSegment'> 0[1]>[1.00]>], [<SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[0.00]>, <SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.00]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.50]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 3[1]>[0.50]>]]\n", + "[[<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>], [<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>, <Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>], [<Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>], [<Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>]]\n" + ] + } + ], + "source": [ + "print([i.children for i in model.children])\n", + "print([i.connections for i in model.children])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "35968795", + "metadata": {}, + "outputs": [], + "source": [ + "s=model.children[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7512de77", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.segname" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "74944c42", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, 0, 0.0),\n", + " (0, 0, 3.4),\n", + " (0, 0, 6.8),\n", + " (1, 0, 10.2),\n", + " (0, 0, 13.6),\n", + " (1, 0, 17.0),\n", + " (0, 0, 20.4)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coordinate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1d9ef64", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/mrdna/readers/test.json.oxdna b/mrdna/readers/test/test.json.oxdna similarity index 100% rename from mrdna/readers/test.json.oxdna rename to mrdna/readers/test/test.json.oxdna diff --git a/mrdna/readers/test.json.top b/mrdna/readers/test/test.json.top similarity index 100% rename from mrdna/readers/test.json.top rename to mrdna/readers/test/test.json.top diff --git a/mrdna/readers/test.seq.json b/mrdna/readers/test/test.seq.json similarity index 100% rename from mrdna/readers/test.seq.json rename to mrdna/readers/test/test.seq.json diff --git a/mrdna/readers/test.virt2nuc b/mrdna/readers/test/test.virt2nuc similarity index 100% rename from mrdna/readers/test.virt2nuc rename to mrdna/readers/test/test.virt2nuc diff --git a/mrdna/readers/test2.ipynb b/mrdna/readers/test/test2.ipynb similarity index 100% rename from mrdna/readers/test2.ipynb rename to mrdna/readers/test/test2.ipynb diff --git a/mrdna/readers/test3.ipynb b/mrdna/readers/test/test3.ipynb similarity index 100% rename from mrdna/readers/test3.ipynb rename to mrdna/readers/test/test3.ipynb -- GitLab