diff --git a/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb b/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb index b2453ff2d4b542ee3e719b0c9737ef08e9a9ee8b..35dcfeebe6d1c30490746e659e54020de83c676d 100644 --- a/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb +++ b/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, - "id": "767c8226", + "execution_count": 2, + "id": "d1c58516", "metadata": { "scrolled": true }, @@ -18,42 +18,439 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "bf2d37ca", + "execution_count": 7, + "id": "df7df2a7", "metadata": {}, "outputs": [], "source": [ - "data=[]\n", - "for i in df[\"vstrands\"]:\n", - " data.append(i)\n", - "df=pd.DataFrame(data=d)" + "import cadnano\n", + "from cadnano.document import Document\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a24cd262", + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(data=d)\n", + "df=df.set_index(\"num\")" ] }, { "cell_type": "code", - "execution_count": 58, - "id": "22a5db0b-2ce5-4af9-83f1-88114342c36d", + "execution_count": 5, + "id": "806af23a", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>row</th>\n", + " <th>col</th>\n", + " <th>scaf</th>\n", + " <th>stap</th>\n", + " <th>loop</th>\n", + " <th>skip</th>\n", + " <th>scafLoop</th>\n", + " <th>stapLoop</th>\n", + " <th>stap_colors</th>\n", + " </tr>\n", + " <tr>\n", + " <th>num</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>12</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[23, 13369809], [38, 12060012]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>12</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[3, 1501302]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>13</td>\n", + " <td>15</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,...</td>\n", + " <td>[[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[34, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>13</td>\n", + " <td>16</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,...</td>\n", + " <td>[[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[0, 13369344]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>13</td>\n", + " <td>17</td>\n", + " <td>[[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[39, 8947848]]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>12</td>\n", + " <td>17</td>\n", + " <td>[[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [...</td>\n", + " <td>[[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n", + " <td>[]</td>\n", + " <td>[]</td>\n", + " <td>[[9, 0]]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], "text/plain": [ - "[-1, -1, -1, -1]" + " row col scaf \\\n", + "num \n", + "0 12 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "1 12 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 13 15 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 2, 2,... \n", + "3 13 16 [[-1, -1, -1, -1], [-1, -1, -1, -1], [3, 3, 2,... \n", + "4 13 17 [[-1, -1, 4, 1], [4, 0, 4, 2], [4, 1, 4, 3], [... \n", + "5 12 17 [[5, 1, -1, -1], [5, 2, 5, 0], [5, 3, 5, 1], [... \n", + "\n", + " stap \\\n", + "num \n", + "0 [[-1, -1, -1, -1], [-1, -1, -1, -1], [0, 3, -1... \n", + "1 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "2 [[2, 1, -1, -1], [2, 2, 2, 0], [2, 3, 2, 1], [... \n", + "3 [[-1, -1, 3, 1], [3, 0, 3, 2], [3, 1, 3, 3], [... \n", + "4 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "5 [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, -1, ... \n", + "\n", + " loop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", + "\n", + " skip scafLoop stapLoop \\\n", + "num \n", + "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "5 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [] [] \n", + "\n", + " stap_colors \n", + "num \n", + "0 [[23, 13369809], [38, 12060012]] \n", + "1 [[3, 1501302]] \n", + "2 [[34, 8947848]] \n", + "3 [[0, 13369344]] \n", + "4 [[39, 8947848]] \n", + "5 [[9, 0]] " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "db28a621", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc = Document()\n", + "def read_json_file(filename):\n", + " import json\n", + " import re\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " data = json.loads(content)\n", + " return data\n", + "f=read_json_file(\"test.json\")\n", + "cadnano.fileio.v2decode.decode(doc, f)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "cf748a26", + "metadata": {}, + "outputs": [], + "source": [ + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "fde9e6b9", + "metadata": {}, + "outputs": [], + "source": [ + "def read_json_file(filename):\n", + " import json\n", + " import re\n", + " import cadnano\n", + " from cadnano.document import Document\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " json_data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " json_data = json.loads(content)\n", + "\n", + " try:\n", + " doc = Document()\n", + " cadnano.fileio.v3decode.decode(doc, json_data)\n", + " decoder = 3\n", + " except:\n", + " doc = Document()\n", + " cadnano.fileio.v2decode.decode(doc, json_data)\n", + " decoder = 2\n", + "\n", + " parts = [p for p in doc.getParts()]\n", + " if len(parts) != 1:\n", + " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", + " part = parts[0]\n", + "\n", + " if decoder == 2:\n", + " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", + " l = get_lattice(part)\n", + " if l == 'square':\n", + " for id_num in part.getIdNums():\n", + " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", + " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", + " df=pd.DataFrame(json_data[\"vstrands\"])\n", + " n_df=df.set_index(\"num\")\n", + " return part,n_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "608b53b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[42, 42, 42, 42, 42, 42]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bpnum=[len(df[\"scaf\"][i]) for i in df.index]\n", + "bpnum" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "abf6006a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "f,b=read_json_file(\"test.json\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "57f22778", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([0, 1, 2, 3, 4, 5], dtype='int64', name='num')" ] }, - "execution_count": 58, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[\"stap\"][0][0]" + "b.index" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "399ada62", + "metadata": {}, + "outputs": [], + "source": [ + "def ntcount(b):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in b])\n", + " return np.sum(n)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "1d63a717", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "35\n", + "35\n", + "34\n", + "36\n", + "35\n", + "35\n", + "37\n", + "36\n", + "35\n", + "35\n", + "31\n", + "31\n" + ] + } + ], + "source": [ + "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n", + "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]" ] }, { "cell_type": "code", "execution_count": 13, - "id": "46079e58-4747-42ff-8b68-906c94440474", + "id": "6e30e6ec", "metadata": {}, "outputs": [ { @@ -74,7 +471,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "625c831b", + "id": "d2a55c6a", "metadata": {}, "outputs": [ { @@ -93,10 +490,32 @@ "len(vh_vb._scaf)" ] }, + { + "cell_type": "code", + "execution_count": 198, + "id": "b36a118e", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "file must have 'read' and 'readline' attributes", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[198], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest.virt2nuc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: file must have 'read' and 'readline' attributes" + ] + } + ], + "source": [ + "df = pickle.load(\"test.virt2nuc\")" + ] + }, { "cell_type": "code", "execution_count": 15, - "id": "6a4129c2", + "id": "c1fd0214", "metadata": {}, "outputs": [ { @@ -116,8 +535,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "aefe656e", + "execution_count": 173, + "id": "21f20d54", "metadata": {}, "outputs": [], "source": [ @@ -152,8 +571,8 @@ }, { "cell_type": "code", - "execution_count": 160, - "id": "e154a8c3", + "execution_count": 177, + "id": "ca75e9ec", "metadata": {}, "outputs": [], "source": [ @@ -168,26 +587,19 @@ "\n", "#class\n", "def decode_vh_vb(virt2nuc):\n", - " vh_list=[]\n", + " vh_list={}\n", " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", " for i in pattern.keys():\n", " s=strands()\n", " s.row,s.col=pattern[i]\n", " s.num=i\n", - " vh_list.append(s)\n", + " vh_list[s.num]=s\n", " scafs=vh_vb._scaf\n", " staps=vh_vb._stap\n", " scaf_strands=find_segs(scafs)\n", " scaf_oligos=list(scaf_strands.keys())\n", " for i in scaf_oligos:\n", - " bases=len(scaf_oligos)\n", - " vh0,vb0=scaf_oligos[i][0]\n", - " vh1,vb1=scaf_oligos[i][1]\n", - " if vb0>vb1 and vh0==vh1:#53\n", - " scaf_contact\n", - " \n", - " else: #35\n", - " for i in stap_oligos:\n", + " pass\n", " \n", " \n", " return vh_list" @@ -195,428 +607,489 @@ }, { "cell_type": "code", - "execution_count": 106, - "id": "125b30e0-7dcc-464a-b3cf-cfbf090d3982", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def decode_vh_vb(virt2nuc):\n", - " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", - " vi={'row':0, 'col':0, 'num':0, 'scaf':dict(), 'stap':dict(), 'loop':[], 'skip':[], 'scafLoop':[],'stapLoop':[], 'stap_colors':[]}\n", - " vs=[] \n", - " for i in range(len(pattern.keys())):\n", - " vhi=vi.copy()\n", - " vhi[\"row\"],vhi[\"col\"]=list(pattern.values())[i]\n", - " vhi[\"num\"]=list(pattern.keys())[i]\n", - " vs.append(vhi)\n", - " vhelices=pd.DataFrame(vs)\n", - " scafs=vh_vb._scaf\n", - " staps=vh_vb._stap\n", - " s\n", - " return vhelices" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "id": "5b149587-3224-4137-a7bc-e41aa4049034", + "execution_count": 187, + "id": "daa13243", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>row</th>\n", - " <th>col</th>\n", - " <th>num</th>\n", - " <th>scaf</th>\n", - " <th>stap</th>\n", - " <th>loop</th>\n", - " <th>skip</th>\n", - " <th>scafLoop</th>\n", - " <th>stapLoop</th>\n", - " <th>stap_colors</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>12</td>\n", - " <td>16</td>\n", - " <td>0</td>\n", - " <td>{}</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>12</td>\n", - " <td>15</td>\n", - " <td>1</td>\n", - " <td>{}</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>13</td>\n", - " <td>15</td>\n", - " <td>2</td>\n", - " <td>{}</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>13</td>\n", - " <td>16</td>\n", - " <td>3</td>\n", - " <td>{}</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>13</td>\n", - " <td>17</td>\n", - " <td>4</td>\n", - " <td>{}</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>12</td>\n", - " <td>17</td>\n", - " <td>5</td>\n", - " <td>{}</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " <td>[]</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], "text/plain": [ - " row col num scaf stap loop skip scafLoop stapLoop stap_colors\n", - "0 12 16 0 {} [] [] [] [] [] []\n", - "1 12 15 1 {} [] [] [] [] [] []\n", - "2 13 15 2 {} [] [] [] [] [] []\n", - "3 13 16 3 {} [] [] [] [] [] []\n", - "4 13 17 4 {} [] [] [] [] [] []\n", - "5 12 17 5 {} [] [] [] [] [] []" + "[[(2, 34),\n", + " (2, 33),\n", + " (2, 32),\n", + " (2, 31),\n", + " (2, 30),\n", + " (2, 29),\n", + " (2, 28),\n", + " (2, 27),\n", + " (2, 26),\n", + " (2, 25),\n", + " (2, 24),\n", + " (2, 23),\n", + " (2, 22),\n", + " (2, 21),\n", + " (2, 20),\n", + " (2, 19),\n", + " (2, 18),\n", + " (2, 17),\n", + " (2, 16),\n", + " (2, 15),\n", + " (2, 14),\n", + " (2, 13),\n", + " (2, 12),\n", + " (2, 11),\n", + " (2, 10),\n", + " (2, 9),\n", + " (2, 8),\n", + " (2, 7),\n", + " (2, 6),\n", + " (2, 5),\n", + " (2, 4),\n", + " (2, 3),\n", + " (2, 2),\n", + " (2, 1),\n", + " (2, 0)],\n", + " [(1, 3),\n", + " (1, 4),\n", + " (1, 5),\n", + " (1, 6),\n", + " (1, 7),\n", + " (1, 8),\n", + " (1, 9),\n", + " (1, 10),\n", + " (1, 11),\n", + " (1, 12),\n", + " (1, 13),\n", + " (1, 14),\n", + " (1, 15),\n", + " (1, 16),\n", + " (1, 17),\n", + " (1, 18),\n", + " (1, 19),\n", + " (1, 20),\n", + " (0, 20),\n", + " (0, 19),\n", + " (0, 18),\n", + " (0, 17),\n", + " (0, 16),\n", + " (0, 15),\n", + " (0, 14),\n", + " (0, 13),\n", + " (0, 12),\n", + " (0, 11),\n", + " (0, 10),\n", + " (0, 9),\n", + " (0, 8),\n", + " (0, 7),\n", + " (0, 6),\n", + " (0, 5),\n", + " (0, 4),\n", + " (0, 3),\n", + " (0, 2)],\n", + " [(0, 23),\n", + " (0, 22),\n", + " (0, 21),\n", + " (1, 21),\n", + " (1, 22),\n", + " (1, 23),\n", + " (1, 24),\n", + " (1, 25),\n", + " (1, 26),\n", + " (1, 27),\n", + " (1, 28),\n", + " (1, 29),\n", + " (1, 30),\n", + " (1, 31),\n", + " (1, 32),\n", + " (1, 33),\n", + " (1, 34),\n", + " (1, 35),\n", + " (1, 36),\n", + " (1, 37),\n", + " (1, 38)],\n", + " [(5, 9),\n", + " (5, 10),\n", + " (5, 11),\n", + " (5, 12),\n", + " (5, 13),\n", + " (5, 14),\n", + " (5, 15),\n", + " (5, 16),\n", + " (5, 17),\n", + " (5, 18),\n", + " (5, 19),\n", + " (5, 20),\n", + " (5, 21),\n", + " (5, 22),\n", + " (5, 23),\n", + " (5, 24),\n", + " (5, 25),\n", + " (5, 26),\n", + " (5, 27),\n", + " (0, 27),\n", + " (0, 26),\n", + " (0, 25),\n", + " (0, 24)],\n", + " [(0, 38),\n", + " (0, 37),\n", + " (0, 36),\n", + " (0, 35),\n", + " (0, 34),\n", + " (0, 33),\n", + " (0, 32),\n", + " (0, 31),\n", + " (0, 30),\n", + " (0, 29),\n", + " (0, 28),\n", + " (5, 28),\n", + " (5, 29),\n", + " (5, 30),\n", + " (5, 31),\n", + " (5, 32),\n", + " (5, 33),\n", + " (5, 34),\n", + " (5, 35),\n", + " (5, 36),\n", + " (5, 37),\n", + " (5, 38),\n", + " (5, 39)],\n", + " [(3, 0),\n", + " (3, 1),\n", + " (3, 2),\n", + " (3, 3),\n", + " (3, 4),\n", + " (3, 5),\n", + " (3, 6),\n", + " (3, 7),\n", + " (3, 8),\n", + " (3, 9),\n", + " (3, 10),\n", + " (3, 11),\n", + " (3, 12),\n", + " (3, 13),\n", + " (3, 14),\n", + " (3, 15),\n", + " (3, 16),\n", + " (3, 17),\n", + " (3, 18),\n", + " (3, 19),\n", + " (3, 20),\n", + " (4, 20),\n", + " (4, 19),\n", + " (4, 18),\n", + " (4, 17),\n", + " (4, 16),\n", + " (4, 15),\n", + " (4, 14),\n", + " (4, 13),\n", + " (4, 12),\n", + " (4, 11),\n", + " (4, 10),\n", + " (4, 9)],\n", + " [(4, 39),\n", + " (4, 38),\n", + " (4, 37),\n", + " (4, 36),\n", + " (4, 35),\n", + " (4, 34),\n", + " (4, 33),\n", + " (4, 32),\n", + " (4, 31),\n", + " (4, 30),\n", + " (4, 29),\n", + " (4, 28),\n", + " (4, 27),\n", + " (4, 26),\n", + " (4, 25),\n", + " (4, 24),\n", + " (4, 23),\n", + " (4, 22),\n", + " (4, 21),\n", + " (3, 21),\n", + " (3, 22),\n", + " (3, 23),\n", + " (3, 24),\n", + " (3, 25),\n", + " (3, 26),\n", + " (3, 27),\n", + " (3, 28),\n", + " (3, 29),\n", + " (3, 30),\n", + " (3, 31),\n", + " (3, 32),\n", + " (3, 33),\n", + " (3, 34)]]" ] }, - "execution_count": 107, + "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "s=decode_vh_vb(\"test.virt2nuc\")\n", - "s" + "s1=decode_vh_vb(\"test.virt2nuc\")\n", + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "list(find_segs(vh_vb._stap).values())" ] }, { "cell_type": "code", - "execution_count": 90, - "id": "e7a10fc3-6c30-49da-ae43-81cdb2048cdf", + "execution_count": 142, + "id": "687c6f64", "metadata": {}, "outputs": [], "source": [ + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", "\n", - "s=pd.Series(vi)" + "def decode_vh_vb(virt2nuc):\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " vi={'row':0, 'col':0, 'num':0, 'scaf':dict(), 'stap':dict(), 'loop':[], 'skip':[], 'scafLoop':[],'stapLoop':[], 'stap_colors':[],\"scaf53\":True}\n", + " vs=[] \n", + " for i in range(len(pattern.keys())):\n", + " vhi=vi.copy()\n", + " vhi[\"row\"],vhi[\"col\"]=list(pattern.values())[i]\n", + " vhi[\"num\"]=list(pattern.keys())[i]\n", + " vs.append(vhi)\n", + " vhelices=pd.DataFrame(vs)\n", + " vhelices=vhelices.set_index('num')\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " stap_strands=find_segs(staps)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + "\n", + " \n", + " return vhelices" ] }, { "cell_type": "code", - "execution_count": 92, - "id": "ae3c8887-0d73-4151-958c-32adfd708611", + "execution_count": 117, + "id": "4d610626", "metadata": {}, "outputs": [], "source": [ - "s[\"scaf\"].append(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "id": "f24b00b3-e0f0-42b0-88b6-1e7a956ef7ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "row 0\n", - "col 0\n", - "num 0\n", - "scaf [1]\n", - "stap []\n", - "loop []\n", - "skip []\n", - "scafLoop []\n", - "stapLoop []\n", - "stap_colors []\n", - "dtype: object" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s" + "def find_base_map(oligo,i,vhx,scaf=True):\n", + " vh0,vb0=oligo[i]\n", + " vh1,vb1=oligo[i+1]\n", + " if scaf==True:\n", + " if vb0 not in vhx[\"scaf\"][vh0].keys():\n", + " \n", + " if vh0==vh1 and scaf==True:\n", + " if vb0>vb1:\n", + " vhx[vh0][\"scaf\"][vb0]=\n", + " \n", + "\n", + " \n" ] }, { "cell_type": "code", - "execution_count": 163, - "id": "09619e4e", + "execution_count": 116, + "id": "ab5c6907", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{0: [(0, 39), (0, 40), (0, 41)],\n", - " 1: [(1, 41), (1, 40), (1, 39)],\n", - " 2: [(2, 39), (2, 40), (2, 41)],\n", - " 4: [(3, 41), (3, 40), (3, 39), (3, 38), (3, 37)],\n", - " 5: [(4, 0), (4, 1), (4, 2), (4, 3)],\n", - " 6: [(5, 3), (5, 2), (5, 1), (5, 0)],\n", - " 7: [(5, 22),\n", - " (5, 21),\n", - " (5, 20),\n", - " (5, 19),\n", - " (5, 18),\n", - " (5, 17),\n", - " (5, 16),\n", - " (5, 15),\n", - " (5, 14),\n", - " (5, 13),\n", - " (5, 12),\n", - " (5, 11),\n", - " (5, 10),\n", - " (5, 9),\n", - " (4, 9),\n", - " (4, 10),\n", - " (4, 11),\n", - " (4, 12),\n", - " (4, 13),\n", - " (4, 14),\n", - " (4, 15),\n", - " (3, 15),\n", - " (3, 14),\n", - " (3, 13),\n", - " (3, 12),\n", - " (3, 11),\n", - " (3, 10),\n", - " (3, 9),\n", - " (3, 8),\n", - " (3, 7),\n", - " (3, 6),\n", - " (3, 5),\n", - " (3, 4),\n", - " (3, 3),\n", - " (3, 2),\n", - " (2, 2),\n", - " (2, 3),\n", - " (2, 4),\n", - " (2, 5),\n", - " (2, 6),\n", - " (2, 7),\n", - " (2, 8),\n", - " (2, 9),\n", - " (2, 10),\n", - " (2, 11),\n", - " (2, 12),\n", - " (2, 13),\n", - " (2, 14),\n", - " (2, 15),\n", - " (2, 16),\n", - " (2, 17),\n", - " (2, 18),\n", - " (1, 18),\n", - " (1, 17),\n", - " (1, 16),\n", - " (1, 15),\n", - " (1, 14),\n", - " (1, 13),\n", - " (1, 12),\n", - " (1, 11),\n", - " (1, 10),\n", - " (1, 9),\n", - " (1, 8),\n", - " (1, 7),\n", - " (1, 6),\n", - " (1, 5),\n", - " (0, 5),\n", - " (0, 6),\n", - " (0, 7),\n", - " (0, 8),\n", - " (0, 9),\n", - " (0, 10),\n", - " (0, 11),\n", - " (0, 12),\n", - " (0, 13),\n", - " (0, 14),\n", - " (0, 15),\n", - " (0, 16),\n", - " (0, 17),\n", - " (0, 18),\n", - " (0, 19),\n", - " (0, 20),\n", - " (0, 21),\n", - " (0, 22),\n", - " (0, 23),\n", - " (0, 24),\n", - " (0, 25),\n", - " (0, 26),\n", - " (0, 27),\n", - " (0, 28),\n", - " (0, 29),\n", - " (0, 30),\n", - " (0, 31),\n", - " (0, 32),\n", - " (0, 33),\n", - " (0, 34),\n", - " (0, 35),\n", - " (0, 36),\n", - " (1, 36),\n", - " (1, 35),\n", - " (1, 34),\n", - " (1, 33),\n", - " (1, 32),\n", - " (1, 31),\n", - " (1, 30),\n", - " (1, 29),\n", - " (1, 28),\n", - " (1, 27),\n", - " (1, 26),\n", - " (1, 25),\n", - " (1, 24),\n", - " (1, 23),\n", - " (1, 22),\n", - " (1, 21),\n", - " (1, 20),\n", - " (1, 19),\n", - " (2, 19),\n", - " (2, 20),\n", - " (2, 21),\n", - " (2, 22),\n", - " (2, 23),\n", - " (2, 24),\n", - " (2, 25),\n", - " (2, 26),\n", - " (2, 27),\n", - " (2, 28),\n", - " (2, 29),\n", - " (2, 30),\n", - " (2, 31),\n", - " (2, 32),\n", - " (3, 32),\n", - " (3, 31),\n", - " (3, 30),\n", - " (3, 29),\n", - " (3, 28),\n", - " (3, 27),\n", - " (3, 26),\n", - " (3, 25),\n", - " (3, 24),\n", - " (3, 23),\n", - " (3, 22),\n", - " (3, 21),\n", - " (3, 20),\n", - " (3, 19),\n", - " (3, 18),\n", - " (3, 17),\n", - " (3, 16),\n", - " (4, 16),\n", - " (4, 17),\n", - " (4, 18),\n", - " (4, 19),\n", - " (4, 20),\n", - " (4, 21),\n", - " (4, 22),\n", - " (4, 23),\n", - " (4, 24),\n", - " (4, 25),\n", - " (4, 26),\n", - " (4, 27),\n", - " (4, 28),\n", - " (4, 29),\n", - " (4, 30),\n", - " (4, 31),\n", - " (4, 32),\n", - " (4, 33),\n", - " (4, 34),\n", - " (4, 35),\n", - " (4, 36),\n", - " (4, 37),\n", - " (4, 38),\n", - " (4, 39),\n", - " (5, 39),\n", - " (5, 38),\n", - " (5, 37),\n", - " (5, 36),\n", - " (5, 35),\n", - " (5, 34),\n", - " (5, 33),\n", - " (5, 32),\n", - " (5, 31),\n", - " (5, 30),\n", - " (5, 29),\n", - " (5, 28),\n", - " (5, 27),\n", - " (5, 26),\n", - " (5, 25),\n", - " (5, 24),\n", - " (5, 23)]}" + "[(5, 22),\n", + " (5, 21),\n", + " (5, 20),\n", + " (5, 19),\n", + " (5, 18),\n", + " (5, 17),\n", + " (5, 16),\n", + " (5, 15),\n", + " (5, 14),\n", + " (5, 13),\n", + " (5, 12),\n", + " (5, 11),\n", + " (5, 10),\n", + " (5, 9),\n", + " (4, 9),\n", + " (4, 10),\n", + " (4, 11),\n", + " (4, 12),\n", + " (4, 13),\n", + " (4, 14),\n", + " (4, 15),\n", + " (3, 15),\n", + " (3, 14),\n", + " (3, 13),\n", + " (3, 12),\n", + " (3, 11),\n", + " (3, 10),\n", + " (3, 9),\n", + " (3, 8),\n", + " (3, 7),\n", + " (3, 6),\n", + " (3, 5),\n", + " (3, 4),\n", + " (3, 3),\n", + " (3, 2),\n", + " (2, 2),\n", + " (2, 3),\n", + " (2, 4),\n", + " (2, 5),\n", + " (2, 6),\n", + " (2, 7),\n", + " (2, 8),\n", + " (2, 9),\n", + " (2, 10),\n", + " (2, 11),\n", + " (2, 12),\n", + " (2, 13),\n", + " (2, 14),\n", + " (2, 15),\n", + " (2, 16),\n", + " (2, 17),\n", + " (2, 18),\n", + " (1, 18),\n", + " (1, 17),\n", + " (1, 16),\n", + " (1, 15),\n", + " (1, 14),\n", + " (1, 13),\n", + " (1, 12),\n", + " (1, 11),\n", + " (1, 10),\n", + " (1, 9),\n", + " (1, 8),\n", + " (1, 7),\n", + " (1, 6),\n", + " (1, 5),\n", + " (0, 5),\n", + " (0, 6),\n", + " (0, 7),\n", + " (0, 8),\n", + " (0, 9),\n", + " (0, 10),\n", + " (0, 11),\n", + " (0, 12),\n", + " (0, 13),\n", + " (0, 14),\n", + " (0, 15),\n", + " (0, 16),\n", + " (0, 17),\n", + " (0, 18),\n", + " (0, 19),\n", + " (0, 20),\n", + " (0, 21),\n", + " (0, 22),\n", + " (0, 23),\n", + " (0, 24),\n", + " (0, 25),\n", + " (0, 26),\n", + " (0, 27),\n", + " (0, 28),\n", + " (0, 29),\n", + " (0, 30),\n", + " (0, 31),\n", + " (0, 32),\n", + " (0, 33),\n", + " (0, 34),\n", + " (0, 35),\n", + " (0, 36),\n", + " (1, 36),\n", + " (1, 35),\n", + " (1, 34),\n", + " (1, 33),\n", + " (1, 32),\n", + " (1, 31),\n", + " (1, 30),\n", + " (1, 29),\n", + " (1, 28),\n", + " (1, 27),\n", + " (1, 26),\n", + " (1, 25),\n", + " (1, 24),\n", + " (1, 23),\n", + " (1, 22),\n", + " (1, 21),\n", + " (1, 20),\n", + " (1, 19),\n", + " (2, 19),\n", + " (2, 20),\n", + " (2, 21),\n", + " (2, 22),\n", + " (2, 23),\n", + " (2, 24),\n", + " (2, 25),\n", + " (2, 26),\n", + " (2, 27),\n", + " (2, 28),\n", + " (2, 29),\n", + " (2, 30),\n", + " (2, 31),\n", + " (2, 32),\n", + " (3, 32),\n", + " (3, 31),\n", + " (3, 30),\n", + " (3, 29),\n", + " (3, 28),\n", + " (3, 27),\n", + " (3, 26),\n", + " (3, 25),\n", + " (3, 24),\n", + " (3, 23),\n", + " (3, 22),\n", + " (3, 21),\n", + " (3, 20),\n", + " (3, 19),\n", + " (3, 18),\n", + " (3, 17),\n", + " (3, 16),\n", + " (4, 16),\n", + " (4, 17),\n", + " (4, 18),\n", + " (4, 19),\n", + " (4, 20),\n", + " (4, 21),\n", + " (4, 22),\n", + " (4, 23),\n", + " (4, 24),\n", + " (4, 25),\n", + " (4, 26),\n", + " (4, 27),\n", + " (4, 28),\n", + " (4, 29),\n", + " (4, 30),\n", + " (4, 31),\n", + " (4, 32),\n", + " (4, 33),\n", + " (4, 34),\n", + " (4, 35),\n", + " (4, 36),\n", + " (4, 37),\n", + " (4, 38),\n", + " (4, 39),\n", + " (5, 39),\n", + " (5, 38),\n", + " (5, 37),\n", + " (5, 36),\n", + " (5, 35),\n", + " (5, 34),\n", + " (5, 33),\n", + " (5, 32),\n", + " (5, 31),\n", + " (5, 30),\n", + " (5, 29),\n", + " (5, 28),\n", + " (5, 27),\n", + " (5, 26),\n", + " (5, 25),\n", + " (5, 24),\n", + " (5, 23)]" ] }, - "execution_count": 163, + "execution_count": 116, "metadata": {}, "output_type": "execute_result" } @@ -625,13 +1098,14 @@ "scafs=vh_vb._scaf\n", "s=list(scafs.values())\n", "len(scafs)\n", - "find_segs(scafs)" + "ss=find_segs(scafs)[7]\n", + "ss" ] }, { "cell_type": "code", "execution_count": 157, - "id": "88d989b9", + "id": "8e05cf62", "metadata": {}, "outputs": [ { @@ -655,7 +1129,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "2909c273", + "id": "e2e30e0a", "metadata": {}, "outputs": [ { @@ -676,7 +1150,7 @@ { "cell_type": "code", "execution_count": 62, - "id": "422e2b48", + "id": "03354054", "metadata": {}, "outputs": [ { @@ -904,7 +1378,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "69c011ff", + "id": "12b28ec3", "metadata": {}, "outputs": [], "source": [ @@ -1030,7 +1504,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "f2693e7a", + "id": "6d1e1ea1", "metadata": {}, "outputs": [], "source": [ @@ -1051,7 +1525,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "a4594752", + "id": "17119618", "metadata": {}, "outputs": [ { @@ -1113,7 +1587,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "ca071163", + "id": "58213565", "metadata": {}, "outputs": [ { @@ -1175,7 +1649,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e5913163", + "id": "954a6da6", "metadata": {}, "outputs": [], "source": [ @@ -1188,7 +1662,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "3eefca8f", + "id": "900af65d", "metadata": {}, "outputs": [ { @@ -1209,7 +1683,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "268d5c04", + "id": "b3dd534e", "metadata": {}, "outputs": [ { @@ -1258,7 +1732,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "d3d42e33", + "id": "3477f518", "metadata": {}, "outputs": [ { @@ -1320,7 +1794,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "2cc3f477", + "id": "cb794762", "metadata": {}, "outputs": [ { @@ -1346,7 +1820,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5d50f58a", + "id": "7fe2cd8b", "metadata": {}, "outputs": [], "source": [] @@ -1354,7 +1828,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "3b40b71b", + "id": "0fe00cf6", "metadata": {}, "outputs": [ { @@ -1374,7 +1848,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "6915bad6", + "id": "f8bb463d", "metadata": {}, "outputs": [ { @@ -1395,7 +1869,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "1c16c59d", + "id": "bebc29de", "metadata": {}, "outputs": [ { @@ -1465,7 +1939,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "c61d9586", + "id": "e67598f0", "metadata": {}, "outputs": [], "source": [ @@ -1476,7 +1950,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "aeca0546", + "id": "51b78d60", "metadata": {}, "outputs": [ { @@ -1585,7 +2059,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "90a54834", + "id": "b58993ab", "metadata": {}, "outputs": [ { @@ -1606,7 +2080,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "de00fb7e", + "id": "2554e586", "metadata": {}, "outputs": [ { @@ -1637,7 +2111,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "716ff2ed", + "id": "c46a53a3", "metadata": {}, "outputs": [ { @@ -2134,7 +2608,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "766d0ecd", + "id": "7e1b1fb5", "metadata": {}, "outputs": [ { @@ -2168,7 +2642,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "05456be4", + "id": "14168996", "metadata": {}, "outputs": [ { @@ -2188,7 +2662,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "cd1a5604", + "id": "b1e6b703", "metadata": {}, "outputs": [], "source": [ @@ -2198,7 +2672,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "4346f1fc", + "id": "229a0b83", "metadata": {}, "outputs": [ { @@ -2219,7 +2693,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "294094f9", + "id": "ef1e3f31", "metadata": {}, "outputs": [ { @@ -2246,7 +2720,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b3d29c52", + "id": "9fcf9231", "metadata": {}, "outputs": [], "source": [] @@ -2254,7 +2728,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -2268,7 +2742,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.19" + "version": "3.8.18" } }, "nbformat": 4, diff --git a/mrdna/readers/libs b/mrdna/readers/libs index b95cadaeb159900596f7b69e0ea68571008ed0f9..e10e7dfd9849fdf43737462ce6850012e56b2149 120000 --- a/mrdna/readers/libs +++ b/mrdna/readers/libs @@ -1 +1 @@ -/Users/pinyili/Documents/research/tacoxDNA/src/libs \ No newline at end of file +/home/pinyili2/server5/tacoxDNA/src/libs \ No newline at end of file diff --git a/mrdna/readers/rest_scaf_col.json b/mrdna/readers/rest_scaf_col.json new file mode 100644 index 0000000000000000000000000000000000000000..b0a9979dafc087601de3150abe5111b1050ec9c7 --- /dev/null +++ b/mrdna/readers/rest_scaf_col.json @@ -0,0 +1 @@ +{"name":"test.json","vstrands":[{"row":12,"col":16,"num":0,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[1,5,0,6],[0,5,0,7],[0,6,0,8],[0,7,0,9],[0,8,0,10],[0,9,0,11],[0,10,0,12],[0,11,0,13],[0,12,0,14],[0,13,0,15],[0,14,0,16],[0,15,0,17],[0,16,0,18],[0,17,0,19],[0,18,0,20],[0,19,0,21],[0,20,0,22],[0,21,0,23],[0,22,0,24],[0,23,0,25],[0,24,0,26],[0,25,0,27],[0,26,0,28],[0,27,0,29],[0,28,0,30],[0,29,0,31],[0,30,0,32],[0,31,0,33],[0,32,0,34],[0,33,0,35],[0,34,0,36],[0,35,1,36],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,0,40],[0,39,0,41],[0,40,-1,-1]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[0,3,-1,-1],[0,4,0,2],[0,5,0,3],[0,6,0,4],[0,7,0,5],[0,8,0,6],[0,9,0,7],[0,10,0,8],[0,11,0,9],[0,12,0,10],[0,13,0,11],[0,14,0,12],[0,15,0,13],[0,16,0,14],[0,17,0,15],[0,18,0,16],[0,19,0,17],[0,20,0,18],[1,20,0,19],[0,22,1,21],[0,23,0,21],[-1,-1,0,22],[0,25,-1,-1],[0,26,0,24],[0,27,0,25],[5,27,0,26],[0,29,5,28],[0,30,0,28],[0,31,0,29],[0,32,0,30],[0,33,0,31],[0,34,0,32],[0,35,0,33],[0,36,0,34],[0,37,0,35],[0,38,0,36],[-1,-1,0,37],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[23,12060012],[38,1507550]]},{"row":12,"col":15,"num":1,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[1,6,0,5],[1,7,1,5],[1,8,1,6],[1,9,1,7],[1,10,1,8],[1,11,1,9],[1,12,1,10],[1,13,1,11],[1,14,1,12],[1,15,1,13],[1,16,1,14],[1,17,1,15],[1,18,1,16],[2,18,1,17],[1,20,2,19],[1,21,1,19],[1,22,1,20],[1,23,1,21],[1,24,1,22],[1,25,1,23],[1,26,1,24],[1,27,1,25],[1,28,1,26],[1,29,1,27],[1,30,1,28],[1,31,1,29],[1,32,1,30],[1,33,1,31],[1,34,1,32],[1,35,1,33],[1,36,1,34],[0,36,1,35],[-1,-1,-1,-1],[-1,-1,-1,-1],[1,40,-1,-1],[1,41,1,39],[-1,-1,1,40]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,1,4],[1,3,1,5],[1,4,1,6],[1,5,1,7],[1,6,1,8],[1,7,1,9],[1,8,1,10],[1,9,1,11],[1,10,1,12],[1,11,1,13],[1,12,1,14],[1,13,1,15],[1,14,1,16],[1,15,1,17],[1,16,1,18],[1,17,1,19],[1,18,1,20],[1,19,0,20],[0,21,1,22],[1,21,1,23],[1,22,1,24],[1,23,1,25],[1,24,1,26],[1,25,1,27],[1,26,1,28],[1,27,1,29],[1,28,1,30],[1,29,1,31],[1,30,1,32],[1,31,1,33],[1,32,1,34],[1,33,1,35],[1,34,1,36],[1,35,1,37],[1,36,1,38],[1,37,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[3,13369344]]},{"row":13,"col":15,"num":2,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[3,2,2,3],[2,2,2,4],[2,3,2,5],[2,4,2,6],[2,5,2,7],[2,6,2,8],[2,7,2,9],[2,8,2,10],[2,9,2,11],[2,10,2,12],[2,11,2,13],[2,12,2,14],[2,13,2,15],[2,14,2,16],[2,15,2,17],[2,16,2,18],[2,17,1,18],[1,19,2,20],[2,19,2,21],[2,20,2,22],[2,21,2,23],[2,22,2,24],[2,23,2,25],[2,24,2,26],[2,25,2,27],[2,26,2,28],[2,27,2,29],[2,28,2,30],[2,29,2,31],[2,30,2,32],[2,31,3,32],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,2,40],[2,39,2,41],[2,40,-1,-1]],"stap":[[2,1,-1,-1],[2,2,2,0],[2,3,2,1],[2,4,2,2],[2,5,2,3],[2,6,2,4],[2,7,2,5],[2,8,2,6],[2,9,2,7],[2,10,2,8],[2,11,2,9],[2,12,2,10],[2,13,2,11],[2,14,2,12],[2,15,2,13],[2,16,2,14],[2,17,2,15],[2,18,2,16],[2,19,2,17],[2,20,2,18],[2,21,2,19],[2,22,2,20],[2,23,2,21],[2,24,2,22],[2,25,2,23],[2,26,2,24],[2,27,2,25],[2,28,2,26],[2,29,2,27],[2,30,2,28],[2,31,2,29],[2,32,2,30],[2,33,2,31],[2,34,2,32],[-1,-1,2,33],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[34,8947848]]},{"row":13,"col":16,"num":3,"scaf":[[-1,-1,-1,-1],[-1,-1,-1,-1],[3,3,2,2],[3,4,3,2],[3,5,3,3],[3,6,3,4],[3,7,3,5],[3,8,3,6],[3,9,3,7],[3,10,3,8],[3,11,3,9],[3,12,3,10],[3,13,3,11],[3,14,3,12],[3,15,3,13],[4,15,3,14],[3,17,4,16],[3,18,3,16],[3,19,3,17],[3,20,3,18],[3,21,3,19],[3,22,3,20],[3,23,3,21],[3,24,3,22],[3,25,3,23],[3,26,3,24],[3,27,3,25],[3,28,3,26],[3,29,3,27],[3,30,3,28],[3,31,3,29],[3,32,3,30],[2,32,3,31],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[3,38,-1,-1],[3,39,3,37],[3,40,3,38],[3,41,3,39],[-1,-1,3,40]],"stap":[[-1,-1,3,1],[3,0,3,2],[3,1,3,3],[3,2,3,4],[3,3,3,5],[3,4,3,6],[3,5,3,7],[3,6,3,8],[3,7,3,9],[3,8,3,10],[3,9,3,11],[3,10,3,12],[3,11,3,13],[3,12,3,14],[3,13,3,15],[3,14,3,16],[3,15,3,17],[3,16,3,18],[3,17,3,19],[3,18,3,20],[3,19,4,20],[4,21,3,22],[3,21,3,23],[3,22,3,24],[3,23,3,25],[3,24,3,26],[3,25,3,27],[3,26,3,28],[3,27,3,29],[3,28,3,30],[3,29,3,31],[3,30,3,32],[3,31,3,33],[3,32,3,34],[3,33,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[0,13369344]]},{"row":13,"col":17,"num":4,"scaf":[[-1,-1,4,1],[4,0,4,2],[4,1,4,3],[4,2,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[5,9,4,10],[4,9,4,11],[4,10,4,12],[4,11,4,13],[4,12,4,14],[4,13,4,15],[4,14,3,15],[3,16,4,17],[4,16,4,18],[4,17,4,19],[4,18,4,20],[4,19,4,21],[4,20,4,22],[4,21,4,23],[4,22,4,24],[4,23,4,25],[4,24,4,26],[4,25,4,27],[4,26,4,28],[4,27,4,29],[4,28,4,30],[4,29,4,31],[4,30,4,32],[4,31,4,33],[4,32,4,34],[4,33,4,35],[4,34,4,36],[4,35,4,37],[4,36,4,38],[4,37,4,39],[4,38,5,39],[-1,-1,-1,-1],[-1,-1,-1,-1]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[4,10,-1,-1],[4,11,4,9],[4,12,4,10],[4,13,4,11],[4,14,4,12],[4,15,4,13],[4,16,4,14],[4,17,4,15],[4,18,4,16],[4,19,4,17],[4,20,4,18],[3,20,4,19],[4,22,3,21],[4,23,4,21],[4,24,4,22],[4,25,4,23],[4,26,4,24],[4,27,4,25],[4,28,4,26],[4,29,4,27],[4,30,4,28],[4,31,4,29],[4,32,4,30],[4,33,4,31],[4,34,4,32],[4,35,4,33],[4,36,4,34],[4,37,4,35],[4,38,4,36],[4,39,4,37],[-1,-1,4,38],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[39,8947848]]},{"row":12,"col":17,"num":5,"scaf":[[5,1,-1,-1],[5,2,5,0],[5,3,5,1],[-1,-1,5,2],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[5,10,4,9],[5,11,5,9],[5,12,5,10],[5,13,5,11],[5,14,5,12],[5,15,5,13],[5,16,5,14],[5,17,5,15],[5,18,5,16],[5,19,5,17],[5,20,5,18],[5,21,5,19],[5,22,5,20],[-1,-1,5,21],[5,24,-1,-1],[5,25,5,23],[5,26,5,24],[5,27,5,25],[5,28,5,26],[5,29,5,27],[5,30,5,28],[5,31,5,29],[5,32,5,30],[5,33,5,31],[5,34,5,32],[5,35,5,33],[5,36,5,34],[5,37,5,35],[5,38,5,36],[5,39,5,37],[4,39,5,38],[-1,-1,-1,-1],[-1,-1,-1,-1]],"stap":[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,5,10],[5,9,5,11],[5,10,5,12],[5,11,5,13],[5,12,5,14],[5,13,5,15],[5,14,5,16],[5,15,5,17],[5,16,5,18],[5,17,5,19],[5,18,5,20],[5,19,5,21],[5,20,5,22],[5,21,5,23],[5,22,5,24],[5,23,5,25],[5,24,5,26],[5,25,5,27],[5,26,0,27],[0,28,5,29],[5,28,5,30],[5,29,5,31],[5,30,5,32],[5,31,5,33],[5,32,5,34],[5,33,5,35],[5,34,5,36],[5,35,5,37],[5,36,5,38],[5,37,5,39],[5,38,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],"loop":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"skip":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"scafLoop":[],"stapLoop":[],"stap_colors":[[1,1]]}]} diff --git a/mrdna/readers/segmentmodel_from_cadnano.py b/mrdna/readers/segmentmodel_from_cadnano.py index 357638870fd6b8cebdd6b0ce81ee6573cd2292a6..676ff60362ccb613ade8070016123b9254c0a3ad 100644 --- a/mrdna/readers/segmentmodel_from_cadnano.py +++ b/mrdna/readers/segmentmodel_from_cadnano.py @@ -17,10 +17,42 @@ from ..model.dna_sequence import m13 as m13seq ## - doubly-nicked helices ## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix) ## - circular constructs -def decode_cadnano_part(json_data): +def get_lattice(part): + lattice_type = None + _gt = part.getGridType() + try: + lattice_type = _gt.name.lower() + except: + if _gt == 1: + lattice_type = 'square' + elif _gt == 2: + lattice_type = 'honeycomb' + else: + print("WARNING: unable to determine cadnano part lattice type") + return lattice_type + + +def read_json_file(filename): + import json + import re import cadnano from cadnano.document import Document + try: + with open(filename) as ch: + json_data = json.load(ch) + except: + with open(filename) as ch: + content = "" + for l in ch: + l = re.sub(r"'", r'"', l) + # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name + # l = re.sub(r"{\s*(\w)", r'{"\1', l) + # l = re.sub(r",\s*(\w)", r',"\1', l) + # l = re.sub(r"(\w):", r'\1":', l) + content += l+"\n" + json_data = json.loads(content) + try: doc = Document() cadnano.fileio.v3decode.decode(doc, json_data) @@ -37,43 +69,67 @@ def decode_cadnano_part(json_data): if decoder == 2: """ It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """ - l = _get_lattice(part) + l = get_lattice(part) if l == 'square': for id_num in part.getIdNums(): if part.vh_properties.loc[id_num,'eulerZ'] == 0: part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5) - json_df=pd.read_json("test.json") - vslist=list(df["vstrands"]) - return part,pd.DataFrame(vslist) + df=pd.DataFrame(json_data["vstrands"]) + n_df=df.set_index("num") + return part,n_df + + +def get_helix_angle(part, helix_id, indices): + """ Get "start_orientation" for helix """ + # import ipdb + # ipdb.set_trace() + + """ FROM CADNANO2.5 + + angle is CCW + - angle is CW + Right handed DNA rotates clockwise from 5' to 3' + we use the convention the 5' end starts at 0 degrees + and it's pair is minor_groove_angle degrees away + direction, hence the minus signs. eulerZ + """ + + hp, bpr, tpr, eulerZ, mgroove = self.part.vh_properties.loc[helix_id,['helical_pitch', + 'bases_per_repeat', + 'turns_per_repeat', + 'eulerZ', + 'minor_groove_angle']] + twist_per_base = tpr*360./bpr + # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180 + angle = eulerZ + twist_per_base*indices - 0.5*mgroove + return angle + +def ntcount(scafs): + def judge(i): + if i ==[-1,-1,-1,-1]: + return 0 + else: return 1 + n=np.array([judge(i) for i in scafs]) + return np.sum(n) -def get_lattice(part): - lattice_type = None - _gt = part.getGridType() - try: - lattice_type = _gt.name.lower() - except: - if _gt == 1: - lattice_type = 'square' - elif _gt == 2: - lattice_type = 'honeycomb' - else: - print("WARNING: unable to determine cadnano part lattice type") - return lattice_type - def mrdna_model_from_cadnano(json_data,**model_parameters): - part,vslist=decode_cadnano_part(json_data) - props = part.getModelProperties().copy() + part,vslist=decode_cadnano_part(json_data) + props = part.getModelProperties().copy() - if props.get('point_type') == PointType.ARBITRARY: + if props.get('point_type') == PointType.ARBITRARY: # TODO add code to encode Parts with ARBITRARY point configurations - raise NotImplementedError("Not implemented") - else: - try: - vh_props, origins = part.helixPropertiesAndOrigins() - except: - origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()} - cad_bps=part.getIndices(0) + raise NotImplementedError("Not implemented") + else: + try: + vh_props, origins = part.helixPropertiesAndOrigins() + except: + origins = {hid:part.getVirtualHelixOrigin(hid)[:2] for hid in part.getidNums()} + cad_bps=part.getIndices(0) + vslist["scafnt"]=[ntcount(vslist['scaf'][i]) for i in vslist.index] + vslist["stapnt"]=[ntcount(vslist['stap'][i]) for i in vslist.index] + totnt=np.sum(vslist["scafnt"])+np.sum(vslist["stapnt"]) + nt_prop=pd.DataFrame(index=range(totnt),columns=["vh","zid","r","bp","stack","threeprime","seq","orientation"]) + model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) diff --git a/mrdna/readers/test.ipynb b/mrdna/readers/test.ipynb index 2a29aae486818012920dfd24aa4b7be3c7e1d490..34cdcbde6db29625b2d65397714178e712761444 100644 --- a/mrdna/readers/test.ipynb +++ b/mrdna/readers/test.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 222, - "id": "767c8226", + "execution_count": 2, + "id": "ab3deccf", "metadata": { "scrolled": true }, @@ -18,8 +18,30 @@ }, { "cell_type": "code", - "execution_count": 226, - "id": "1dc9b21a-360c-49a4-9ea5-c9916f27d962", + "execution_count": 7, + "id": "0de414ce", + "metadata": {}, + "outputs": [], + "source": [ + "import cadnano\n", + "from cadnano.document import Document\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fdb603c1", + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(data=d)\n", + "df=df.set_index(\"num\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6f322908", "metadata": {}, "outputs": [ { @@ -190,7 +212,7 @@ "5 [[9, 0]] " ] }, - "execution_count": 226, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -201,19 +223,124 @@ }, { "cell_type": "code", - "execution_count": 225, - "id": "bf2d37ca", + "execution_count": 14, + "id": "f77c2bad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc = Document()\n", + "def read_json_file(filename):\n", + " import json\n", + " import re\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " data = json.loads(content)\n", + " return data\n", + "f=read_json_file(\"test.json\")\n", + "cadnano.fileio.v2decode.decode(doc, f)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a1df854a", "metadata": {}, "outputs": [], "source": [ - "df=pd.DataFrame(data=d)\n", - "df=df.set_index(\"num\")" + "def get_lattice(part):\n", + " lattice_type = None\n", + " _gt = part.getGridType()\n", + " try:\n", + " lattice_type = _gt.name.lower()\n", + " except:\n", + " if _gt == 1:\n", + " lattice_type = 'square'\n", + " elif _gt == 2:\n", + " lattice_type = 'honeycomb'\n", + " else:\n", + " print(\"WARNING: unable to determine cadnano part lattice type\")\n", + " return lattice_type\n" ] }, { "cell_type": "code", - "execution_count": 205, - "id": "22a5db0b-2ce5-4af9-83f1-88114342c36d", + "execution_count": 121, + "id": "a88086bf", + "metadata": {}, + "outputs": [], + "source": [ + "def read_json_file(filename):\n", + " import json\n", + " import re\n", + " import cadnano\n", + " from cadnano.document import Document\n", + "\n", + " try:\n", + " with open(filename) as ch:\n", + " json_data = json.load(ch)\n", + " except:\n", + " with open(filename) as ch:\n", + " content = \"\"\n", + " for l in ch:\n", + " l = re.sub(r\"'\", r'\"', l)\n", + " # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name\n", + " # l = re.sub(r\"{\\s*(\\w)\", r'{\"\\1', l)\n", + " # l = re.sub(r\",\\s*(\\w)\", r',\"\\1', l)\n", + " # l = re.sub(r\"(\\w):\", r'\\1\":', l)\n", + " content += l+\"\\n\"\n", + " json_data = json.loads(content)\n", + "\n", + " try:\n", + " doc = Document()\n", + " cadnano.fileio.v3decode.decode(doc, json_data)\n", + " decoder = 3\n", + " except:\n", + " doc = Document()\n", + " cadnano.fileio.v2decode.decode(doc, json_data)\n", + " decoder = 2\n", + "\n", + " parts = [p for p in doc.getParts()]\n", + " if len(parts) != 1:\n", + " raise Exception(\"Only documents containing a single cadnano part are implemented at this time.\")\n", + " part = parts[0]\n", + "\n", + " if decoder == 2:\n", + " \"\"\" It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here \"\"\"\n", + " l = get_lattice(part)\n", + " if l == 'square':\n", + " for id_num in part.getIdNums():\n", + " if part.vh_properties.loc[id_num,'eulerZ'] == 0:\n", + " part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)\n", + " df=pd.DataFrame(json_data[\"vstrands\"])\n", + " n_df=df.set_index(\"num\")\n", + " return part,n_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "221a82ed", "metadata": {}, "outputs": [ { @@ -222,7 +349,7 @@ "[42, 42, 42, 42, 42, 42]" ] }, - "execution_count": 205, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -232,10 +359,348 @@ "bpnum" ] }, + { + "cell_type": "code", + "execution_count": 122, + "id": "25accfa8", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "f,b=read_json_file(\"test.json\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "5a0dec07", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([0, 1, 2, 3, 4, 5], dtype='int64', name='num')" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b.index" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "413e3e88", + "metadata": {}, + "outputs": [], + "source": [ + "def ntcount(scafs):\n", + " def judge(i):\n", + " if i ==[-1,-1,-1,-1]:\n", + " return 0\n", + " else: return 1\n", + " n=np.array([judge(i) for i in scafs])\n", + " return np.sum(n)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "e36edb50", + "metadata": {}, + "outputs": [], + "source": [ + "b[\"scafnt\"]=[ntcount(b['scaf'][i]) for i in b.index]\n", + "b[\"stapnt\"]=[ntcount(b['stap'][i]) for i in b.index]" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "40764fb4", + "metadata": {}, + "outputs": [], + "source": [ + "i=range(5)\n", + "col=[\"vh\",\"zid\",\"r\",\"bp\",\"stack\",\"threeprime\",\"seq\",\"orientation\"]\n", + "d=pd.DataFrame(index=i,columns=col)\n", + "d['vh']=[0,1,2,3,1]\n", + "d['zid']=[0,3,1,2,8]\n", + "d.set_index([\"vh\",\"zid\"],inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "f6fc8912", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>vh</th>\n", + " <th>zid</th>\n", + " <th>r</th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " vh zid r bp stack threeprime seq orientation\n", + "0 0 0 NaN NaN NaN NaN NaN NaN\n", + "1 1 3 NaN NaN NaN NaN NaN NaN\n", + "2 2 1 NaN NaN NaN NaN NaN NaN\n", + "3 3 2 NaN NaN NaN NaN NaN NaN\n", + "4 1 8 NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e5e1d1b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "42d23cd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>bp</th>\n", + " <th>stack</th>\n", + " <th>threeprime</th>\n", + " <th>seq</th>\n", + " <th>orientation</th>\n", + " </tr>\n", + " <tr>\n", + " <th>r</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1.0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1.0</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>NaN</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>NaN</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>NaN</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " bp stack threeprime seq orientation\n", + "r \n", + "1.0 NaN NaN NaN NaN NaN\n", + "1.0 NaN NaN NaN NaN NaN\n", + "NaN NaN NaN NaN NaN NaN\n", + "NaN NaN NaN NaN NaN NaN\n", + "NaN NaN NaN NaN NaN NaN" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "9ed6d92a", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-100-a349feadc600>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3509\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_iterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3510\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3511\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_indexer_strict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3513\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5780\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reindex_non_unique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5782\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_if_missing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5783\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5784\u001b[0m \u001b[0mkeyarr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5840\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0muse_interval_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5841\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5842\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of [{key}] are in the [{axis_name}]\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5843\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5844\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: \"None of [Int64Index([0, 0], dtype='int64')] are in the [columns]\"" + ] + } + ], + "source": [ + "d[[0,0]]" + ] + }, { "cell_type": "code", "execution_count": 13, - "id": "46079e58-4747-42ff-8b68-906c94440474", + "id": "41cad09e", "metadata": {}, "outputs": [ { @@ -256,7 +721,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "625c831b", + "id": "ee7a5b2f", "metadata": {}, "outputs": [ { @@ -278,7 +743,7 @@ { "cell_type": "code", "execution_count": 198, - "id": "0f88b0bb-bc42-4ef3-88e7-518ae2628c0d", + "id": "5308fd27", "metadata": {}, "outputs": [ { @@ -300,7 +765,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "6a4129c2", + "id": "a173a0cc", "metadata": {}, "outputs": [ { @@ -321,7 +786,7 @@ { "cell_type": "code", "execution_count": 173, - "id": "aefe656e", + "id": "3ac307d0", "metadata": {}, "outputs": [], "source": [ @@ -357,7 +822,7 @@ { "cell_type": "code", "execution_count": 177, - "id": "e154a8c3", + "id": "6c3869e9", "metadata": {}, "outputs": [], "source": [ @@ -393,7 +858,7 @@ { "cell_type": "code", "execution_count": 187, - "id": "0a709be4-a426-4eb0-a82f-49905307b07b", + "id": "a938b5d1", "metadata": {}, "outputs": [ { @@ -620,7 +1085,7 @@ { "cell_type": "code", "execution_count": 142, - "id": "125b30e0-7dcc-464a-b3cf-cfbf090d3982", + "id": "28fa9e4a", "metadata": {}, "outputs": [], "source": [ @@ -657,7 +1122,7 @@ { "cell_type": "code", "execution_count": 117, - "id": "5b149587-3224-4137-a7bc-e41aa4049034", + "id": "5876b835", "metadata": {}, "outputs": [], "source": [ @@ -678,7 +1143,7 @@ { "cell_type": "code", "execution_count": 116, - "id": "09619e4e", + "id": "b27560ea", "metadata": {}, "outputs": [ { @@ -890,7 +1355,7 @@ { "cell_type": "code", "execution_count": 157, - "id": "88d989b9", + "id": "c2ac4f18", "metadata": {}, "outputs": [ { @@ -914,7 +1379,7 @@ { "cell_type": "code", "execution_count": 152, - "id": "2909c273", + "id": "11333080", "metadata": {}, "outputs": [ { @@ -935,7 +1400,7 @@ { "cell_type": "code", "execution_count": 62, - "id": "422e2b48", + "id": "c15af839", "metadata": {}, "outputs": [ { @@ -1163,7 +1628,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "69c011ff", + "id": "eda2056d", "metadata": {}, "outputs": [], "source": [ @@ -1289,7 +1754,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "f2693e7a", + "id": "f7efd378", "metadata": {}, "outputs": [], "source": [ @@ -1310,7 +1775,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "a4594752", + "id": "1422eca5", "metadata": {}, "outputs": [ { @@ -1372,7 +1837,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "ca071163", + "id": "cf5a3dbd", "metadata": {}, "outputs": [ { @@ -1434,7 +1899,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e5913163", + "id": "98cb9b38", "metadata": {}, "outputs": [], "source": [ @@ -1447,7 +1912,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "3eefca8f", + "id": "ff952b38", "metadata": {}, "outputs": [ { @@ -1468,7 +1933,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "268d5c04", + "id": "85083378", "metadata": {}, "outputs": [ { @@ -1517,7 +1982,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "d3d42e33", + "id": "ded55624", "metadata": {}, "outputs": [ { @@ -1579,7 +2044,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "2cc3f477", + "id": "40a7b76d", "metadata": {}, "outputs": [ { @@ -1605,7 +2070,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5d50f58a", + "id": "f994ff10", "metadata": {}, "outputs": [], "source": [] @@ -1613,7 +2078,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "3b40b71b", + "id": "5144726b", "metadata": {}, "outputs": [ { @@ -1633,7 +2098,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "6915bad6", + "id": "5c836eac", "metadata": {}, "outputs": [ { @@ -1654,7 +2119,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "1c16c59d", + "id": "2c3ce8b1", "metadata": {}, "outputs": [ { @@ -1724,7 +2189,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "c61d9586", + "id": "137db876", "metadata": {}, "outputs": [], "source": [ @@ -1735,7 +2200,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "aeca0546", + "id": "8210cbee", "metadata": {}, "outputs": [ { @@ -1844,7 +2309,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "90a54834", + "id": "4ef92d58", "metadata": {}, "outputs": [ { @@ -1865,7 +2330,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "de00fb7e", + "id": "57059a9e", "metadata": {}, "outputs": [ { @@ -1896,7 +2361,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "716ff2ed", + "id": "001aef7e", "metadata": {}, "outputs": [ { @@ -2393,7 +2858,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "766d0ecd", + "id": "8feaa1ca", "metadata": {}, "outputs": [ { @@ -2427,7 +2892,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "05456be4", + "id": "4455fa61", "metadata": {}, "outputs": [ { @@ -2447,7 +2912,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "cd1a5604", + "id": "605a19c2", "metadata": {}, "outputs": [], "source": [ @@ -2457,7 +2922,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "4346f1fc", + "id": "c45d0282", "metadata": {}, "outputs": [ { @@ -2478,7 +2943,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "294094f9", + "id": "f0d25377", "metadata": {}, "outputs": [ { @@ -2505,7 +2970,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b3d29c52", + "id": "bf3a74cf", "metadata": {}, "outputs": [], "source": [] @@ -2513,7 +2978,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -2527,7 +2992,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.19" + "version": "3.8.18" } }, "nbformat": 4, diff --git a/mrdna/readers/test3.ipynb b/mrdna/readers/test3.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0985a5505ceb46c7a9aba91935499a47da069c90 --- /dev/null +++ b/mrdna/readers/test3.ipynb @@ -0,0 +1,2137 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 124, + "id": "303b2da7", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['row', 'col', 'num', 'scaf', 'stap', 'loop', 'skip', 'scafLoop', 'stapLoop', 'stap_colors'])" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import pickle\n", + "import numpy as np\n", + "df=pd.read_json(\"test.json\")\n", + "df[\"vstrands\"][0].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "id": "2d42bba6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [0, 3, -1, -1],\n", + " [0, 4, 0, 2],\n", + " [0, 5, 0, 3],\n", + " [0, 6, 0, 4],\n", + " [0, 7, 0, 5],\n", + " [0, 8, 0, 6],\n", + " [0, 9, 0, 7],\n", + " [0, 10, 0, 8],\n", + " [0, 11, 0, 9],\n", + " [0, 12, 0, 10],\n", + " [0, 13, 0, 11],\n", + " [0, 14, 0, 12],\n", + " [0, 15, 0, 13],\n", + " [0, 16, 0, 14],\n", + " [0, 17, 0, 15],\n", + " [0, 18, 0, 16],\n", + " [0, 19, 0, 17],\n", + " [0, 20, 0, 18],\n", + " [1, 20, 0, 19],\n", + " [0, 22, 1, 21],\n", + " [0, 23, 0, 21],\n", + " [-1, -1, 0, 22],\n", + " [0, 25, -1, -1],\n", + " [0, 26, 0, 24],\n", + " [0, 27, 0, 25],\n", + " [5, 27, 0, 26],\n", + " [0, 29, 5, 28],\n", + " [0, 30, 0, 28],\n", + " [0, 31, 0, 29],\n", + " [0, 32, 0, 30],\n", + " [0, 33, 0, 31],\n", + " [0, 34, 0, 32],\n", + " [0, 35, 0, 33],\n", + " [0, 36, 0, 34],\n", + " [0, 37, 0, 35],\n", + " [0, 38, 0, 36],\n", + " [-1, -1, 0, 37],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"vstrands\"][0][\"stap\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "5e1ea864", + "metadata": {}, + "outputs": [ + { + "ename": "UnicodeDecodeError", + "evalue": "'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-65-71f198107f5e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.virt2nuc\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"r\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/data/server1/cmaffeo2/miniconda3/lib/python3.8/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte" + ] + } + ], + "source": [ + "f=open(\"test.virt2nuc\",\"r\")\n", + "pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "86a109f4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "210" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh_vb,pattern=pd.read_pickle(\"test.virt2nuc\")\n", + "len(vh_vb._scaf)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "ab020675", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: (12, 16), 1: (12, 15), 2: (13, 15), 3: (13, 16), 4: (13, 17), 5: (12, 17)}" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pattern" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "id": "0b152186", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, 40)\n", + "(0, 41)\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'append'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-141-a0e59df19136>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_vh_vb\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test.virt2nuc\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m<ipython-input-140-99891f72666f>\u001b[0m in \u001b[0;36mdecode_vh_vb\u001b[0;34m(virt2nuc)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mscafs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvh_vb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_scaf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mstaps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvh_vb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stap\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mscaf_strands\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfind_segs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscafs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvh_list\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m<ipython-input-140-99891f72666f>\u001b[0m in \u001b[0;36mfind_segs\u001b[0;34m(vir2nuc_scaf)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvir2nuc_scaf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0moligos\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0moligo\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moligos\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0moligo\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvir2nuc_scaf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moligos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'append'" + ] + } + ], + "source": [ + "s=decode_vh_vb(\"test.virt2nuc\")[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "id": "852198d8", + "metadata": {}, + "outputs": [], + "source": [ + "class strands():\n", + " def __init__(self):\n", + " self.row=0 \n", + " self.col=0\n", + " self.num=0\n", + " self.scaf=[]\n", + " self.stap=[]\n", + " self.loop=[]\n", + " self.skip=[]\n", + " self.scafLoop=[]\n", + " self.stapLoop=[]\n", + " self.stap_colors=[]\n", + " self.scaf_contact={}\n", + " self.scaf_pattern=True\n", + " self.stap_connect={}\n", + " def to_dict(self):\n", + " d={}\n", + " d['row']=self.row\n", + " d['col']=self.col\n", + " d['num']=self.num\n", + " d['scaf']=self.scaf\n", + " d['stap']=self.stap\n", + " d['loop']=self.loop\n", + " d['skip']=self.skip\n", + " d['scafLoop']=self.scafLoop\n", + " d['stapLoop']=self.stapLoop\n", + " d['stap_colors']=self.stap_colors\n", + " return d\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "b4064749", + "metadata": {}, + "outputs": [], + "source": [ + "def find_segs(vir2nuc_scaf):\n", + " oligos={}\n", + " for i in range(len(vir2nuc_scaf)):\n", + " oligo,ox_ind=list(vir2nuc_scaf.values())[i]\n", + " if oligo not in oligos.keys():\n", + " oligos[oligo]=[]\n", + " oligos[oligo].append(list(vir2nuc_scaf.keys())[i])\n", + " return oligos\n", + " \n", + "def decode_vh_vb(virt2nuc):\n", + " vh_list=[]\n", + " vh_vb,pattern=pd.read_pickle(virt2nuc)\n", + " for i in pattern.keys():\n", + " s=strands()\n", + " s.row,s.col=pattern[i]\n", + " s.num=i\n", + " vh_list.append(s)\n", + " scafs=vh_vb._scaf\n", + " staps=vh_vb._stap\n", + " scaf_strands=find_segs(scafs)\n", + " scaf_oligos=list(scaf_strands.keys())\n", + " for i in scaf_oligos:\n", + " bases=len(scaf_oligos)\n", + " vh0,vb0=scaf_oligos[i][0]\n", + " vh1,vb1=scaf_oligos[i][1]\n", + " if vb0>vb1:#53\n", + " vh_list[vh0].scaf_contact[vb0]=[-1,-1,vh1,vb1]\n", + " self.scaf_pattern=True\n", + " else: #35\n", + " vh_list[vh0].scaf_contact[vb0]=[vh1,vb1,-1,-1]\n", + " self.scaf_pattern=False\n", + " for i in range(len()):\n", + " \n", + " \n", + " return vh_list" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "ae57cd6f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: [(0, 39), (0, 40), (0, 41)],\n", + " 1: [(1, 41), (1, 40), (1, 39)],\n", + " 2: [(2, 39), (2, 40), (2, 41)],\n", + " 4: [(3, 41), (3, 40), (3, 39), (3, 38), (3, 37)],\n", + " 5: [(4, 0), (4, 1), (4, 2), (4, 3)],\n", + " 6: [(5, 3), (5, 2), (5, 1), (5, 0)],\n", + " 7: [(5, 22),\n", + " (5, 21),\n", + " (5, 20),\n", + " (5, 19),\n", + " (5, 18),\n", + " (5, 17),\n", + " (5, 16),\n", + " (5, 15),\n", + " (5, 14),\n", + " (5, 13),\n", + " (5, 12),\n", + " (5, 11),\n", + " (5, 10),\n", + " (5, 9),\n", + " (4, 9),\n", + " (4, 10),\n", + " (4, 11),\n", + " (4, 12),\n", + " (4, 13),\n", + " (4, 14),\n", + " (4, 15),\n", + " (3, 15),\n", + " (3, 14),\n", + " (3, 13),\n", + " (3, 12),\n", + " (3, 11),\n", + " (3, 10),\n", + " (3, 9),\n", + " (3, 8),\n", + " (3, 7),\n", + " (3, 6),\n", + " (3, 5),\n", + " (3, 4),\n", + " (3, 3),\n", + " (3, 2),\n", + " (2, 2),\n", + " (2, 3),\n", + " (2, 4),\n", + " (2, 5),\n", + " (2, 6),\n", + " (2, 7),\n", + " (2, 8),\n", + " (2, 9),\n", + " (2, 10),\n", + " (2, 11),\n", + " (2, 12),\n", + " (2, 13),\n", + " (2, 14),\n", + " (2, 15),\n", + " (2, 16),\n", + " (2, 17),\n", + " (2, 18),\n", + " (1, 18),\n", + " (1, 17),\n", + " (1, 16),\n", + " (1, 15),\n", + " (1, 14),\n", + " (1, 13),\n", + " (1, 12),\n", + " (1, 11),\n", + " (1, 10),\n", + " (1, 9),\n", + " (1, 8),\n", + " (1, 7),\n", + " (1, 6),\n", + " (1, 5),\n", + " (0, 5),\n", + " (0, 6),\n", + " (0, 7),\n", + " (0, 8),\n", + " (0, 9),\n", + " (0, 10),\n", + " (0, 11),\n", + " (0, 12),\n", + " (0, 13),\n", + " (0, 14),\n", + " (0, 15),\n", + " (0, 16),\n", + " (0, 17),\n", + " (0, 18),\n", + " (0, 19),\n", + " (0, 20),\n", + " (0, 21),\n", + " (0, 22),\n", + " (0, 23),\n", + " (0, 24),\n", + " (0, 25),\n", + " (0, 26),\n", + " (0, 27),\n", + " (0, 28),\n", + " (0, 29),\n", + " (0, 30),\n", + " (0, 31),\n", + " (0, 32),\n", + " (0, 33),\n", + " (0, 34),\n", + " (0, 35),\n", + " (0, 36),\n", + " (1, 36),\n", + " (1, 35),\n", + " (1, 34),\n", + " (1, 33),\n", + " (1, 32),\n", + " (1, 31),\n", + " (1, 30),\n", + " (1, 29),\n", + " (1, 28),\n", + " (1, 27),\n", + " (1, 26),\n", + " (1, 25),\n", + " (1, 24),\n", + " (1, 23),\n", + " (1, 22),\n", + " (1, 21),\n", + " (1, 20),\n", + " (1, 19),\n", + " (2, 19),\n", + " (2, 20),\n", + " (2, 21),\n", + " (2, 22),\n", + " (2, 23),\n", + " (2, 24),\n", + " (2, 25),\n", + " (2, 26),\n", + " (2, 27),\n", + " (2, 28),\n", + " (2, 29),\n", + " (2, 30),\n", + " (2, 31),\n", + " (2, 32),\n", + " (3, 32),\n", + " (3, 31),\n", + " (3, 30),\n", + " (3, 29),\n", + " (3, 28),\n", + " (3, 27),\n", + " (3, 26),\n", + " (3, 25),\n", + " (3, 24),\n", + " (3, 23),\n", + " (3, 22),\n", + " (3, 21),\n", + " (3, 20),\n", + " (3, 19),\n", + " (3, 18),\n", + " (3, 17),\n", + " (3, 16),\n", + " (4, 16),\n", + " (4, 17),\n", + " (4, 18),\n", + " (4, 19),\n", + " (4, 20),\n", + " (4, 21),\n", + " (4, 22),\n", + " (4, 23),\n", + " (4, 24),\n", + " (4, 25),\n", + " (4, 26),\n", + " (4, 27),\n", + " (4, 28),\n", + " (4, 29),\n", + " (4, 30),\n", + " (4, 31),\n", + " (4, 32),\n", + " (4, 33),\n", + " (4, 34),\n", + " (4, 35),\n", + " (4, 36),\n", + " (4, 37),\n", + " (4, 38),\n", + " (4, 39),\n", + " (5, 39),\n", + " (5, 38),\n", + " (5, 37),\n", + " (5, 36),\n", + " (5, 35),\n", + " (5, 34),\n", + " (5, 33),\n", + " (5, 32),\n", + " (5, 31),\n", + " (5, 30),\n", + " (5, 29),\n", + " (5, 28),\n", + " (5, 27),\n", + " (5, 26),\n", + " (5, 25),\n", + " (5, 24),\n", + " (5, 23)]}" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scafs=vh_vb._scaf\n", + "s=list(scafs.values())\n", + "len(scafs)\n", + "find_segs(scafs)" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "d4698785", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 2)]" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "oligos[0]=[]\n", + "L=[]\n", + "L.append((1,2))\n", + "L" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "2b98d0e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 39)" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(scafs.keys())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "f666ffb1", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{(2, 34): (3, [43]),\n", + " (2, 33): (3, [42]),\n", + " (2, 32): (3, [41]),\n", + " (2, 31): (3, [40]),\n", + " (2, 30): (3, [39]),\n", + " (2, 29): (3, [38]),\n", + " (2, 28): (3, [37]),\n", + " (2, 27): (3, [36]),\n", + " (2, 26): (3, [35]),\n", + " (2, 25): (3, [34]),\n", + " (2, 24): (3, [33]),\n", + " (2, 23): (3, [32]),\n", + " (2, 22): (3, [31]),\n", + " (2, 21): (3, [30]),\n", + " (2, 20): (3, [29]),\n", + " (2, 19): (3, [28]),\n", + " (2, 18): (3, [27]),\n", + " (2, 17): (3, [26]),\n", + " (2, 16): (3, [25]),\n", + " (2, 15): (3, [24]),\n", + " (2, 14): (3, [23]),\n", + " (2, 13): (3, [22]),\n", + " (2, 12): (3, [21]),\n", + " (2, 11): (3, [20]),\n", + " (2, 10): (3, [19]),\n", + " (2, 9): (3, [18]),\n", + " (2, 8): (3, [17]),\n", + " (2, 7): (3, [16]),\n", + " (2, 6): (3, [15]),\n", + " (2, 5): (3, [14]),\n", + " (2, 4): (3, [13]),\n", + " (2, 3): (3, [12]),\n", + " (2, 2): (3, [11]),\n", + " (2, 1): (3, [10]),\n", + " (2, 0): (3, [9]),\n", + " (1, 3): (8, [281]),\n", + " (1, 4): (8, [280]),\n", + " (1, 5): (8, [279]),\n", + " (1, 6): (8, [278]),\n", + " (1, 7): (8, [277]),\n", + " (1, 8): (8, [276]),\n", + " (1, 9): (8, [275]),\n", + " (1, 10): (8, [274]),\n", + " (1, 11): (8, [273]),\n", + " (1, 12): (8, [272]),\n", + " (1, 13): (8, [271]),\n", + " (1, 14): (8, [270]),\n", + " (1, 15): (8, [269]),\n", + " (1, 16): (8, [268]),\n", + " (1, 17): (8, [267]),\n", + " (1, 18): (8, [266]),\n", + " (1, 19): (8, [265]),\n", + " (1, 20): (8, [264]),\n", + " (0, 20): (8, [263]),\n", + " (0, 19): (8, [262]),\n", + " (0, 18): (8, [261]),\n", + " (0, 17): (8, [260]),\n", + " (0, 16): (8, [259]),\n", + " (0, 15): (8, [258]),\n", + " (0, 14): (8, [257]),\n", + " (0, 13): (8, [256]),\n", + " (0, 12): (8, [255]),\n", + " (0, 11): (8, [254]),\n", + " (0, 10): (8, [253]),\n", + " (0, 9): (8, [252]),\n", + " (0, 8): (8, [251]),\n", + " (0, 7): (8, [250]),\n", + " (0, 6): (8, [249]),\n", + " (0, 5): (8, [248]),\n", + " (0, 4): (8, [247]),\n", + " (0, 3): (8, [246]),\n", + " (0, 2): (8, [245]),\n", + " (0, 23): (9, [302]),\n", + " (0, 22): (9, [301]),\n", + " (0, 21): (9, [300]),\n", + " (1, 21): (9, [299]),\n", + " (1, 22): (9, [298]),\n", + " (1, 23): (9, [297]),\n", + " (1, 24): (9, [296]),\n", + " (1, 25): (9, [295]),\n", + " (1, 26): (9, [294]),\n", + " (1, 27): (9, [293]),\n", + " (1, 28): (9, [292]),\n", + " (1, 29): (9, [291]),\n", + " (1, 30): (9, [290]),\n", + " (1, 31): (9, [289]),\n", + " (1, 32): (9, [288]),\n", + " (1, 33): (9, [287]),\n", + " (1, 34): (9, [286]),\n", + " (1, 35): (9, [285]),\n", + " (1, 36): (9, [284]),\n", + " (1, 37): (9, [283]),\n", + " (1, 38): (9, [282]),\n", + " (5, 9): (10, [325]),\n", + " (5, 10): (10, [324]),\n", + " (5, 11): (10, [323]),\n", + " (5, 12): (10, [322]),\n", + " (5, 13): (10, [321]),\n", + " (5, 14): (10, [320]),\n", + " (5, 15): (10, [319]),\n", + " (5, 16): (10, [318]),\n", + " (5, 17): (10, [317]),\n", + " (5, 18): (10, [316]),\n", + " (5, 19): (10, [315]),\n", + " (5, 20): (10, [314]),\n", + " (5, 21): (10, [313]),\n", + " (5, 22): (10, [312]),\n", + " (5, 23): (10, [311]),\n", + " (5, 24): (10, [310]),\n", + " (5, 25): (10, [309]),\n", + " (5, 26): (10, [308]),\n", + " (5, 27): (10, [307]),\n", + " (0, 27): (10, [306]),\n", + " (0, 26): (10, [305]),\n", + " (0, 25): (10, [304]),\n", + " (0, 24): (10, [303]),\n", + " (0, 38): (11, [348]),\n", + " (0, 37): (11, [347]),\n", + " (0, 36): (11, [346]),\n", + " (0, 35): (11, [345]),\n", + " (0, 34): (11, [344]),\n", + " (0, 33): (11, [343]),\n", + " (0, 32): (11, [342]),\n", + " (0, 31): (11, [341]),\n", + " (0, 30): (11, [340]),\n", + " (0, 29): (11, [339]),\n", + " (0, 28): (11, [338]),\n", + " (5, 28): (11, [337]),\n", + " (5, 29): (11, [336]),\n", + " (5, 30): (11, [335]),\n", + " (5, 31): (11, [334]),\n", + " (5, 32): (11, [333]),\n", + " (5, 33): (11, [332]),\n", + " (5, 34): (11, [331]),\n", + " (5, 35): (11, [330]),\n", + " (5, 36): (11, [329]),\n", + " (5, 37): (11, [328]),\n", + " (5, 38): (11, [327]),\n", + " (5, 39): (11, [326]),\n", + " (3, 0): (12, [381]),\n", + " (3, 1): (12, [380]),\n", + " (3, 2): (12, [379]),\n", + " (3, 3): (12, [378]),\n", + " (3, 4): (12, [377]),\n", + " (3, 5): (12, [376]),\n", + " (3, 6): (12, [375]),\n", + " (3, 7): (12, [374]),\n", + " (3, 8): (12, [373]),\n", + " (3, 9): (12, [372]),\n", + " (3, 10): (12, [371]),\n", + " (3, 11): (12, [370]),\n", + " (3, 12): (12, [369]),\n", + " (3, 13): (12, [368]),\n", + " (3, 14): (12, [367]),\n", + " (3, 15): (12, [366]),\n", + " (3, 16): (12, [365]),\n", + " (3, 17): (12, [364]),\n", + " (3, 18): (12, [363]),\n", + " (3, 19): (12, [362]),\n", + " (3, 20): (12, [361]),\n", + " (4, 20): (12, [360]),\n", + " (4, 19): (12, [359]),\n", + " (4, 18): (12, [358]),\n", + " (4, 17): (12, [357]),\n", + " (4, 16): (12, [356]),\n", + " (4, 15): (12, [355]),\n", + " (4, 14): (12, [354]),\n", + " (4, 13): (12, [353]),\n", + " (4, 12): (12, [352]),\n", + " (4, 11): (12, [351]),\n", + " (4, 10): (12, [350]),\n", + " (4, 9): (12, [349]),\n", + " (4, 39): (13, [414]),\n", + " (4, 38): (13, [413]),\n", + " (4, 37): (13, [412]),\n", + " (4, 36): (13, [411]),\n", + " (4, 35): (13, [410]),\n", + " (4, 34): (13, [409]),\n", + " (4, 33): (13, [408]),\n", + " (4, 32): (13, [407]),\n", + " (4, 31): (13, [406]),\n", + " (4, 30): (13, [405]),\n", + " (4, 29): (13, [404]),\n", + " (4, 28): (13, [403]),\n", + " (4, 27): (13, [402]),\n", + " (4, 26): (13, [401]),\n", + " (4, 25): (13, [400]),\n", + " (4, 24): (13, [399]),\n", + " (4, 23): (13, [398]),\n", + " (4, 22): (13, [397]),\n", + " (4, 21): (13, [396]),\n", + " (3, 21): (13, [395]),\n", + " (3, 22): (13, [394]),\n", + " (3, 23): (13, [393]),\n", + " (3, 24): (13, [392]),\n", + " (3, 25): (13, [391]),\n", + " (3, 26): (13, [390]),\n", + " (3, 27): (13, [389]),\n", + " (3, 28): (13, [388]),\n", + " (3, 29): (13, [387]),\n", + " (3, 30): (13, [386]),\n", + " (3, 31): (13, [385]),\n", + " (3, 32): (13, [384]),\n", + " (3, 33): (13, [383]),\n", + " (3, 34): (13, [382])}" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s=vh_vb.__dict__\n", + "scafs=s[\"_scaf\"]\n", + "staps=s[\"_stap\"]\n", + "staps" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "04da1a1e", + "metadata": {}, + "outputs": [], + "source": [ + "class vstrands (object):\n", + "\n", + " def __init__(self):\n", + " self.vhelices = []\n", + "\n", + " def add_vhelix(self, toadd):\n", + " self.vhelices.append(toadd)\n", + "\n", + " def bbox(self):\n", + " rows = []\n", + " cols = []\n", + " lens = []\n", + " for h in self.vhelices:\n", + " rows.append(h.row)\n", + " cols.append(h.col)\n", + " lens.append(len(h.stap))\n", + "\n", + " dr = DIST_SQUARE * (max(rows) - min(rows) + 2)\n", + " dc = DIST_SQUARE * (max(cols) - min(cols) + 2)\n", + " dl = 0.34 * (max(lens) + 2)\n", + " \n", + " return 2 * max([dr, dc, dl]) * BOX_FACTOR\n", + " \n", + " def __str__(self):\n", + " a = '{\\n\"vstrands\":[\\n'\n", + " if len(self.vhelices) > 0:\n", + " for h in self.vhelices:\n", + " a = a + str(h) + ','\n", + " a = a[0:len(a) - 1]\n", + " a = a + '}\\n'\n", + " return a\n", + "class vhelix (object):\n", + "\n", + " def __init__(self):\n", + " self.stapLoop = []\n", + " self.scafLoop = []\n", + " self.skip = []\n", + " self.loop = []\n", + " self.stap_colors = []\n", + " self.row = 0\n", + " self.col = 0\n", + " self.num = 0\n", + " self.stap = []\n", + " self.scaf = []\n", + " self.cad_index = -1\n", + " self.skiploop_bases = 0\n", + "\n", + " def get_length(self):\n", + " return max (len(self.scaf), len(self.stap))\n", + "\n", + " len = property (get_length)\n", + "\n", + " def add_square(self, toadd, which):\n", + " if which == 'stap':\n", + " self.stap.append(toadd)\n", + " elif which == 'scaf':\n", + " self.scaf.append (toadd)\n", + " else:\n", + " base.Logger.log(\"Cannot add square that is not scaf or stap. Dying now\", base.Logger.CRITICAL)\n", + " sys.exit(1)\n", + " \n", + " def __str__(self):\n", + " a = '{\\n'\n", + "\n", + " a = a + '\"stapLoop\":['\n", + " if len(self.stapLoop) > 0:\n", + " for i in self.stapLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " a = a + '\"skip\":['\n", + " if len(self.skip) > 0:\n", + " for e in self.skip:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"loop\":['\n", + " if len(self.loop) > 0:\n", + " for e in self.loop:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap_colors\":['\n", + " if len (self.stap_colors) > 0:\n", + " for e in self.stap_colors:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + "\n", + " a = a + '\"row\":' + str(self.row) + ',\\n'\n", + " a = a + '\"col\":' + str(self.col) + ',\\n'\n", + " a = a + '\"num\":' + str(self.num) + ',\\n'\n", + " \n", + " a = a + '\"scafLoop\":['\n", + " if len(self.scafLoop) > 0:\n", + " for i in self.scafLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap\":['\n", + " if len(self.stap) > 0:\n", + " for i in self.stap:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"scaf\":['\n", + " if len(self.scaf) > 0:\n", + " for i in self.scaf:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + ']\\n}'\n", + " return a\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c168b2f4", + "metadata": {}, + "outputs": [], + "source": [ + "L=[]\n", + "for i in df[\"vstrands\"]:\n", + " L.append(i)\n", + "\n", + "cadsys = vstrands()\n", + "vh = vhelix()\n", + "for s in L:\n", + " \n", + " vh.stap = [ i for i in s[\"scaf\"]]\n", + " vh.scaf = [i for i in s[\"stap\"]]\n", + " vh.skiploop_bases = len(s[\"skip\"]) + sum(s[\"loop\"]) - sum(s[\"skip\"])\n", + " cadsys.add_vhelix(vh)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1e29e6a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " 'add_square',\n", + " 'cad_index',\n", + " 'col',\n", + " 'get_length',\n", + " 'len',\n", + " 'loop',\n", + " 'num',\n", + " 'row',\n", + " 'scaf',\n", + " 'scafLoop',\n", + " 'skip',\n", + " 'skiploop_bases',\n", + " 'stap',\n", + " 'stapLoop',\n", + " 'stap_colors']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0=cadsys.vhelices[0]\n", + "dir(s0)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "fd26bfad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, 5, 10],\n", + " [5, 9, 5, 11],\n", + " [5, 10, 5, 12],\n", + " [5, 11, 5, 13],\n", + " [5, 12, 5, 14],\n", + " [5, 13, 5, 15],\n", + " [5, 14, 5, 16],\n", + " [5, 15, 5, 17],\n", + " [5, 16, 5, 18],\n", + " [5, 17, 5, 19],\n", + " [5, 18, 5, 20],\n", + " [5, 19, 5, 21],\n", + " [5, 20, 5, 22],\n", + " [5, 21, 5, 23],\n", + " [5, 22, 5, 24],\n", + " [5, 23, 5, 25],\n", + " [5, 24, 5, 26],\n", + " [5, 25, 5, 27],\n", + " [5, 26, 0, 27],\n", + " [0, 28, 5, 29],\n", + " [5, 28, 5, 30],\n", + " [5, 29, 5, 31],\n", + " [5, 30, 5, 32],\n", + " [5, 31, 5, 33],\n", + " [5, 32, 5, 34],\n", + " [5, 33, 5, 35],\n", + " [5, 34, 5, 36],\n", + " [5, 35, 5, 37],\n", + " [5, 36, 5, 38],\n", + " [5, 37, 5, 39],\n", + " [5, 38, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0.scaf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "438d3345", + "metadata": {}, + "outputs": [], + "source": [ + "for s in s0.scaf:\n", + " if s[0]==-1 and s[1]==-1:\n", + " pass\n", + " elif s[2]==len(s0.scaf) and abs(s[3])==1" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "595fa2ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-1, -1, -1, -1]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0.scaf[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3d614740", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " 'add_vhelix',\n", + " 'bbox',\n", + " 'vhelices']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(cadsys)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "831d5ea6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[5, 1, -1, -1],\n", + " [5, 2, 5, 0],\n", + " [5, 3, 5, 1],\n", + " [-1, -1, 5, 2],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [5, 10, 4, 9],\n", + " [5, 11, 5, 9],\n", + " [5, 12, 5, 10],\n", + " [5, 13, 5, 11],\n", + " [5, 14, 5, 12],\n", + " [5, 15, 5, 13],\n", + " [5, 16, 5, 14],\n", + " [5, 17, 5, 15],\n", + " [5, 18, 5, 16],\n", + " [5, 19, 5, 17],\n", + " [5, 20, 5, 18],\n", + " [5, 21, 5, 19],\n", + " [5, 22, 5, 20],\n", + " [-1, -1, 5, 21],\n", + " [5, 24, -1, -1],\n", + " [5, 25, 5, 23],\n", + " [5, 26, 5, 24],\n", + " [5, 27, 5, 25],\n", + " [5, 28, 5, 26],\n", + " [5, 29, 5, 27],\n", + " [5, 30, 5, 28],\n", + " [5, 31, 5, 29],\n", + " [5, 32, 5, 30],\n", + " [5, 33, 5, 31],\n", + " [5, 34, 5, 32],\n", + " [5, 35, 5, 33],\n", + " [5, 36, 5, 34],\n", + " [5, 37, 5, 35],\n", + " [5, 38, 5, 36],\n", + " [5, 39, 5, 37],\n", + " [4, 39, 5, 38],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh.stap" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7831eb35", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'mrdna'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreaders\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcadnano_segments\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'" + ] + } + ], + "source": [ + "\n", + "\n", + "from mrdna.readers.cadnano_segments import *\n", + "from cadnano.document import Document\n", + "import cadnano" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28c9253b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a018193b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "json_data=read_json_file(\"test.json\")\n", + "part=decode_cadnano_part(json_data)\n", + "model=cadnano_part(part)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2bc520fd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc=Document()\n", + "cadnano.fileio.v2decode.decode(doc, json_data)\n", + "parts = [p for p in doc.getParts()]\n", + "part=parts[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "117520a1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Oligo_(0.1[38])_1328\t23\t'None\n", + "Oligo_(2.1[34])_9584\t35\t'None\n", + "Oligo_(1.1[36])_7488\t188\t'None\n", + "Oligo_(4.1[39])_4384\t33\t'None\n", + "Oligo_(5.0[9])_0240\t23\t'None\n", + "Oligo_(1.0[3])_8256\t37\t'None\n", + "Oligo_(3.0[0])_3296\t33\t'None\n", + "Oligo_(0.1[23])_9088\t21\t'None\n", + "VH0\n", + "\t <fwd_StrandSet(0)> \t [(5, 36)] \n", + "\t\t\t\t ['#0066cc']\n", + "\t <rev_StrandSet(0)> \t [(2, 20), (21, 23), (24, 27), (28, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c', '#f74308', '#1700de']\n", + "VH1\n", + "\t <fwd_StrandSet(1)> \t [(3, 20), (21, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c']\n", + "\t <rev_StrandSet(1)> \t [(5, 18), (19, 36)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH2\n", + "\t <fwd_StrandSet(2)> \t [(2, 18), (19, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(2)> \t [(0, 34)] \n", + "\t\t\t\t ['#888888']\n", + "VH3\n", + "\t <fwd_StrandSet(3)> \t [(0, 20), (21, 34)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "\t <rev_StrandSet(3)> \t [(2, 15), (16, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH4\n", + "\t <fwd_StrandSet(4)> \t [(9, 15), (16, 39)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(4)> \t [(9, 20), (21, 39)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "VH5\n", + "\t <fwd_StrandSet(5)> \t [(9, 27), (28, 39)] \n", + "\t\t\t\t ['#f74308', '#1700de']\n", + "\t <rev_StrandSet(5)> \t [(9, 39)] \n", + "\t\t\t\t ['#0066cc']\n" + ] + } + ], + "source": [ + "part.__dict__.keys()\n", + "\n", + "oligos = part.oligos()\n", + "for oligo in oligos:\n", + " print(\"{0}\\t{1}\\t\\'{2}\".format(oligo,\n", + " oligo.length(),\n", + " oligo.sequence()))\n", + "\n", + "vhs = list(part.getIdNums()) # convert set to list\n", + "for vh_id in vhs: # display first 3 vhs\n", + " fwd_ss, rev_ss = part.getStrandSets(vh_id)\n", + " print('VH{0}'.format(vh_id))\n", + " print('\\t', fwd_ss, '\\t', [s.idxs() for s in fwd_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in fwd_ss.strands()])\n", + " print('\\t', rev_ss, '\\t', [s.idxs() for s in rev_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in rev_ss.strands()])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d8eb98b3", + "metadata": {}, + "outputs": [], + "source": [ + "strands5 = [o.strand5p() for o in part.oligos()]\n", + "strands3 = [o.strand3p() for o in part.oligos()]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "44db9666", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__slots__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " '_decrementLength',\n", + " '_incrementLength',\n", + " '_is_circular',\n", + " '_parent',\n", + " '_part',\n", + " '_props',\n", + " '_setColor',\n", + " '_setLength',\n", + " '_setLoop',\n", + " '_setProperty',\n", + " '_signals',\n", + " '_strand5p',\n", + " '_strandMergeUpdate',\n", + " '_strandSplitUpdate',\n", + " 'addToPart',\n", + " 'applyAbstractSequences',\n", + " 'applyColor',\n", + " 'applySequence',\n", + " 'applySequenceCMD',\n", + " 'clearAbstractSequences',\n", + " 'connect',\n", + " 'deleteLater',\n", + " 'destroy',\n", + " 'disconnect',\n", + " 'displayAbstractSequences',\n", + " 'dump',\n", + " 'editable_properties',\n", + " 'getAbsolutePositionAtLength',\n", + " 'getColor',\n", + " 'getModelProperties',\n", + " 'getName',\n", + " 'getNumberOfBasesToEachXover',\n", + " 'getOutlineProperties',\n", + " 'getProperty',\n", + " 'getStrandLengths',\n", + " 'isCircular',\n", + " 'length',\n", + " 'locString',\n", + " 'oligoPropertyChangedSignal',\n", + " 'oligoRemovedSignal',\n", + " 'oligoSelectedChangedSignal',\n", + " 'oligoSequenceAddedSignal',\n", + " 'oligoSequenceClearedSignal',\n", + " 'parent',\n", + " 'part',\n", + " 'refreshLength',\n", + " 'remove',\n", + " 'removeFromPart',\n", + " 'sequence',\n", + " 'sequenceExport',\n", + " 'setParent',\n", + " 'setPart',\n", + " 'setProperty',\n", + " 'setStrand5p',\n", + " 'shallowCopy',\n", + " 'shouldHighlight',\n", + " 'signals',\n", + " 'splitAtAbsoluteLengths',\n", + " 'strand3p',\n", + " 'strand5p',\n", + " 'undoStack']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "L=[o for o in part.oligos()]\n", + "dir(L[2])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "52ccbb65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(dict, {0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}})" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.insertions()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "66171e52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'NaPart1',\n", + " 'color': '#0066cc',\n", + " 'is_visible': True,\n", + " 'active_phos': None,\n", + " 'crossover_span_angle': 45,\n", + " 'max_vhelix_length': 42,\n", + " 'neighbor_active_angle': '',\n", + " 'grid_type': <GridEnum.HONEYCOMB: 2>,\n", + " 'virtual_helix_order': [0, 1, 2, 3, 4, 5],\n", + " 'is_lattice': True,\n", + " <GridEnum.HONEYCOMB: 2>: <GridEnum.NONE: 0>}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.getModelProperties()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "34ff0906", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", + "it/its\n", + "\n" + ] + } + ], + "source": [ + "import pdb\n", + "import numpy as np\n", + "import os,sys\n", + "import scipy\n", + "\n", + "from mrdna import logger, devlogger\n", + "from mrdna.segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment\n", + "from mrdna.arbdmodel.coords import quaternion_from_matrix, rotationAboutAxis, quaternion_slerp\n", + "from mrdna import get_resource_path\n", + "\n", + "ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))\n", + "\n", + "def _three_prime_list_to_five_prime(three_prime):\n", + " five_prime = -np.ones(three_prime.shape, dtype=int)\n", + " has_three_prime = np.where(three_prime >= 0)[0]\n", + " five_prime[three_prime[has_three_prime]] = has_three_prime\n", + " return five_prime \n", + "def _primes_list_to_strands(three_prime, five_prime):\n", + " five_prime_ends = np.where(five_prime < 0)[0]\n", + " strands = []\n", + " strand_is_circular = []\n", + " \n", + " idx_to_strand = -np.ones(three_prime.shape, dtype=int)\n", + "\n", + " def build_strand(nt_idx, conditional):\n", + " strand = [nt_idx]\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " while conditional(nt_idx):\n", + " nt_idx = three_prime[nt_idx]\n", + " strand.append(nt_idx)\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " strands.append( np.array(strand, dtype=int) )\n", + "\n", + " for nt_idx in five_prime_ends:\n", + " build_strand(nt_idx,\n", + " lambda nt: three_prime[nt] >= 0)\n", + " strand_is_circular.append(False)\n", + "\n", + " while True:\n", + " ## print(\"WARNING: working on circular strand {}\".format(len(strands)))\n", + " ids = np.where(idx_to_strand < 0)[0]\n", + " if len(ids) == 0: break\n", + " build_strand(ids[0],\n", + " lambda nt: three_prime[nt] >= 0 and \\\n", + " idx_to_strand[three_prime[nt]] < 0)\n", + " strand_is_circular.append(True)\n", + "\n", + " return strands, strand_is_circular\n", + "\n", + "def find_stacks(centers, transforms):\n", + "\n", + " ## Find orientation and center of each nucleotide\n", + " expected_stack_positions = []\n", + " for R,c in zip(transforms,centers):\n", + " expected_stack_positions.append( c + ref_stack_position.dot(R) )\n", + "\n", + " expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)\n", + "\n", + " dists = scipy.spatial.distance_matrix(expected_stack_positions, centers)\n", + " dists = dists + 5*np.eye(len(dists))\n", + " idx1, idx2 = np.where(dists < 3.5)\n", + "\n", + " ## Convert distances to stacks\n", + " stacks_above = -np.ones(len(centers), dtype=int)\n", + " _z = np.array((0,0,1))\n", + " for i in np.unique(idx1):\n", + " js = idx2[ idx1 == i ]\n", + " with np.errstate(divide='ignore',invalid='ignore'):\n", + " angles = [np.arccos( transforms[j].T.dot( transforms[i].dot(_z) ).dot( _z ) ) for j in js]\n", + " angles = np.array( angles )\n", + " tmp = np.argmin(dists[i][js] + 1.0*angles)\n", + " j = js[tmp]\n", + " stacks_above[i] = j\n", + "\n", + " return stacks_above\n", + "\n", + "def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):\n", + "\n", + " helixmap = -np.ones(basepairs.shape, dtype=int)\n", + " helixrank = -np.ones(basepairs.shape)\n", + " is_fwd = np.ones(basepairs.shape, dtype=int)\n", + " \n", + " ## Remove stacks with nts lacking a basepairs\n", + " nobp = np.where(basepairs < 0)[0]\n", + " stacks_above[nobp] = -1\n", + " stacks_with_nobp = np.in1d(stacks_above, nobp)\n", + " stacks_above[stacks_with_nobp] = -1\n", + "\n", + " end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]\n", + "\n", + " hid = 0\n", + " for end in end_ids:\n", + " if helixmap[end] >= 0:\n", + " continue\n", + " rank = 0\n", + " nt = basepairs[end]\n", + " bp = basepairs[nt]\n", + " assert( bp == end )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " # assert(helixmap[nt] == -1)\n", + " # assert(helixmap[bp] == -1)\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + "\n", + " _tmp = [(nt,bp)]\n", + " \n", + " while stacks_above[nt] >= 0:\n", + " nt = stacks_above[nt]\n", + " if basepairs[nt] < 0: break\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " _tmp.append((nt,bp))\n", + " rank +=1\n", + "\n", + " hid += 1\n", + "\n", + " ## Create \"helix\" for each circular segment\n", + " intrahelical = []\n", + " processed = set()\n", + " unclaimed_bases = np.where( (basepairs >= 0)*(helixmap == -1) )[0]\n", + " for nt0 in unclaimed_bases:\n", + " if nt0 in processed: continue\n", + "\n", + " nt = nt0\n", + " all_nts = [nt]\n", + "\n", + " rank = 0\n", + " nt = nt0\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cylic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + "\n", + " counter = 0\n", + " while stacks_above[nt] >= 0:\n", + " lastnt = nt\n", + " nt = stacks_above[nt]\n", + " bp = basepairs[nt]\n", + " if nt == nt0 or nt == basepairs[nt0]:\n", + " intrahelical.append((lastnt,nt0))\n", + " break\n", + " \n", + " assert( bp >= 0 )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cyclic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " \n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + " rank +=1\n", + " hid += 1\n", + "\n", + " return helixmap, helixrank, is_fwd, intrahelical\n", + "\n", + "\n", + "def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=None):\n", + " maxrank = np.max( hrank[hmap==hid] )\n", + " if maxrank == 0:\n", + " ids = np.where((hmap == hid))[0]\n", + " pos = np.mean( [coordinate[r,:] for r in ids ], axis=0 )\n", + " coords = [pos,pos]\n", + " contours = [0,1]\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * fwd)[0]\n", + " assert( len(ids) == 1 )\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + " quats = [q, q]\n", + " coords[-1] = pos + orientation[ids[0]].dot(np.array((0,0,1)))\n", + "\n", + " else:\n", + " coords,contours,quats = [[],[],[]]\n", + " last_q = None\n", + " for rank in range(int(maxrank)+1):\n", + " ids = np.where((hmap == hid) * (hrank == rank))[0]\n", + " \n", + " coords.append(np.mean( [coordinate[r,:] for r in ids ], axis=0 ))\n", + " contours.append( float(rank+0.5)/(maxrank+1) )\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * (hrank == rank) * fwd)[0]\n", + " assert(len(ids) == 1)\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + "\n", + " if last_q is not None and last_q.dot(q) < 0:\n", + " q = -q\n", + "\n", + " ## Average quaterion with reverse direction\n", + " bp = basepair[ids[0]]\n", + " if bp >= 0:\n", + " bp_o = orientation[bp].dot(rotationAboutAxis(np.array((1,0,0)),180))\n", + " q2 = quaternion_from_matrix( bp_o )\n", + " if q.dot(q2) < 0:\n", + " q2 = -q2\n", + "\n", + " ## probably good enough, but slerp is better: q = (q + q2)*0.5\n", + " q = quaternion_slerp(q,q2,0.5)\n", + "\n", + " quats.append(q)\n", + " last_q = q\n", + "\n", + " coords = np.array(coords)\n", + " seg.set_splines(contours,coords)\n", + " if orientation is not None:\n", + " quats = np.array(quats)\n", + " seg.set_orientation_splines(contours,quats)\n", + "\n", + " seg.start_position = coords[0,:]\n", + " seg.end_position = coords[-1,:]\n", + "\n", + "\n", + "def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " sequence=None, orientation=None,\n", + " max_basepairs_per_bead = 5,\n", + " max_nucleotides_per_bead = 5,\n", + " local_twist = False,\n", + " dimensions=(5000,5000,5000),\n", + " **model_parameters):\n", + " \"\"\" \n", + " Creates a SegmentModel object from lists of each nucleotide's\n", + " basepair, its stack (on 3' side) and its 3'-connected nucleotide\n", + "\n", + " The first argument should be an N-by-3 numpy array containing the\n", + " coordinate of each nucleotide, where N is the number of\n", + " nucleotides. The following three arguments should be integer lists\n", + " where the i-th element corresponds to the i-th nucleotide; the\n", + " list element should the integer index of the corresponding\n", + " basepaired / stacked / phosphodiester-bonded nucleotide. If there\n", + " is no such nucleotide, the value should be -1.\n", + "\n", + " Args:\n", + " basepair: List of each nucleotide's basepair's index\n", + " stack: List containing index of the nucleotide stacked on the 3' of each nucleotide\n", + " three_prime: List of each nucleotide's the 3' end of each nucleotide\n", + "\n", + " Returns:\n", + " SegmentModel\n", + " \"\"\"\n", + "\n", + " \"\"\" Validate Input \"\"\"\n", + " inputs = (basepair,three_prime)\n", + " try:\n", + " basepair,three_prime = [np.array(a,dtype=int) for a in inputs]\n", + " except:\n", + " raise TypeError(\"One or more of the input lists could not be converted into a numpy array\")\n", + " inputs = (basepair,three_prime)\n", + " coordinate = np.array(coordinate)\n", + "\n", + " if np.any( [len(a.shape) > 1 for a in inputs] ):\n", + " raise ValueError(\"One or more of the input lists has the wrong dimensionality\")\n", + "\n", + " if len(coordinate.shape) != 2:\n", + " raise ValueError(\"Coordinate array has the wrong dimensionality\")\n", + "\n", + " inputs = (coordinate,basepair,three_prime)\n", + " if not np.all(np.diff([len(a) for a in inputs]) == 0):\n", + " raise ValueError(\"Inputs are not the same length\")\n", + " \n", + " num_nt = len(basepair)\n", + " if sequence is not None and len(sequence) != num_nt:\n", + " raise ValueError(\"The 'sequence' parameter is the wrong length {} != {}\".format(len(sequence),num_nt))\n", + "\n", + " if orientation is not None:\n", + " orientation = np.array(orientation)\n", + " if len(orientation.shape) != 3:\n", + " raise ValueError(\"The 'orientation' array has the wrong dimensionality (should be Nx3x3)\")\n", + " if orientation.shape != (num_nt,3,3):\n", + " raise ValueError(\"The 'orientation' array is not properly formatted\")\n", + "\n", + " if stack is None:\n", + " if orientation is not None:\n", + " stack = find_stacks(coordinate, orientation)\n", + " else:\n", + " ## Guess stacking based on 3' connectivity\n", + " stack = np.array(three_prime,dtype=int) # Assume nts on 3' ends are stacked\n", + " _stack_below = _three_prime_list_to_five_prime(stack)\n", + " _has_bp = (basepair >= 0)\n", + " _nostack = np.where( (stack == -1)*_has_bp )[0]\n", + " _has_stack_below = _stack_below[basepair[_nostack]] >= 0\n", + " _nostack2 = _nostack[_has_stack_below]\n", + " stack[_nostack2] = basepair[_stack_below[basepair[_nostack2]]]\n", + "\n", + " else:\n", + " try:\n", + " stack = np.array(stack,dtype=int)\n", + " except:\n", + " raise TypeError(\"The 'stack' array could not be converted into a numpy integer array\")\n", + "\n", + " if len(stack.shape) != 1:\n", + " raise ValueError(\"The 'stack' array has the wrong dimensionality\")\n", + "\n", + " if len(stack) != num_nt:\n", + " raise ValueError(\"The length of the 'stack' array does not match other inputs\")\n", + "\n", + " bps = basepair # alias\n", + "\n", + " \"\"\" Fix stacks: require that the stack of a bp of a base's stack is its bp \"\"\"\n", + " _has_bp = (bps >= 0)\n", + " _has_stack = (stack >= 0)\n", + " _stack_has_basepair = (bps[stack] >= 0) * _has_stack\n", + " stack = np.where( (stack[bps[stack]] == bps) * _has_bp * _has_stack * _has_bp,\n", + " stack, -np.ones(len(stack),dtype=int) )\n", + "\n", + " five_prime = _three_prime_list_to_five_prime(three_prime)\n", + "\n", + " \"\"\" Build map of dsDNA helices and strands \"\"\"\n", + " hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack)\n", + " double_stranded_helices = np.unique(hmap[hmap >= 0]) \n", + " strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)\n", + "\n", + " \"\"\" Add ssDNA to hmap \"\"\"\n", + " if len(double_stranded_helices) > 0:\n", + " hid = double_stranded_helices[-1]+1\n", + " else:\n", + " hid = 0\n", + " ss_residues = hmap < 0\n", + " #\n", + " if np.any(bps[ss_residues] != -1):\n", + " logger.warning(f'{np.sum(bps[ss_residues] != -1)} ssDNA nucleotides appear to have basepairs... ignoring')\n", + " \n", + " for s,c in zip(strands, strand_is_circular):\n", + " strand_segment_ends = [i for i in np.where( np.diff(hmap[s]) != 0 )[0]] + [len(s)-1]\n", + " seg_start = 0\n", + " for i in strand_segment_ends:\n", + " if hmap[s[i]] < 0:\n", + " ## Found single-stranded segment\n", + " ids = s[seg_start:i+1]\n", + " assert( np.all(hmap[ids] == -1) )\n", + " hmap[ids] = hid\n", + " hrank[ids] = np.arange(i+1-seg_start)\n", + " hid+=1\n", + " seg_start = i+1\n", + "\n", + " if len(double_stranded_helices) > 0:\n", + " single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)\n", + " else:\n", + " single_stranded_helices = np.arange(hid)\n", + "\n", + " ## Create double-stranded segments\n", + " doubleSegments = []\n", + " for hid in double_stranded_helices:\n", + " seg = DoubleStrandedSegment(name=str(hid),\n", + " num_bp = np.sum(hmap==hid)//2)\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments))\n", + " doubleSegments.append(seg)\n", + "\n", + " ## Create single-stranded segments\n", + " singleSegments = []\n", + " for hid in single_stranded_helices:\n", + " seg = SingleStrandedSegment(name=str(hid),\n", + " num_nt = np.sum(hmap==hid))\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments) + len(singleSegments))\n", + " singleSegments.append(seg)\n", + "\n", + " ## Find crossovers and 5prime/3prime ends\n", + " crossovers,prime5,prime3 = [[],[],[]]\n", + " for s,c in zip(strands,strand_is_circular):\n", + " tmp = np.where(np.diff(hmap[s]) != 0)[0]\n", + " for i in tmp:\n", + " crossovers.append( (s[i],s[i+1]) )\n", + " if c:\n", + " if hmap[s[-1]] != hmap[s[0]]:\n", + " crossovers.append( (s[-1],s[0]) )\n", + " else:\n", + " prime5.append(s[0])\n", + " prime3.append(s[-1])\n", + "\n", + " ## Add connections\n", + " allSegments = doubleSegments+singleSegments\n", + "\n", + " for r1,r2 in crossovers:\n", + " seg1,seg2 = [allSegments[hmap[i]] for i in (r1,r2)]\n", + " nt1,nt2 = [hrank[i] for i in (r1,r2)]\n", + " f1,f2 = [fwd[i] for i in (r1,r2)]\n", + "\n", + " ## Handle connections at the ends\n", + " is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))\n", + " is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))\n", + "\n", + " print(seg1,seg2, r1, r2, is_terminal1, is_terminal2)\n", + " if is_terminal1 or is_terminal2:\n", + " \"\"\" Ensure that we don't have three-way dsDNA junctions \"\"\"\n", + " if is_terminal1 and (bps[r1] >= 0) and (five_prime[bps[r1]] >= 0) and (three_prime[r1] >= 0):\n", + " if (bps[five_prime[bps[r1]]] >= 0) and (bps[three_prime[r1]] >= 0):\n", + " # is_terminal1 = (three_prime[r1] == bps[five_prime[bps[r1]]])\n", + " is_terminal1 = hmap[five_prime[bps[r1]]] == hmap[three_prime[r1]]\n", + " if is_terminal2 and (bps[r2] >= 0) and (three_prime[bps[r2]] >= 0) and (five_prime[r2] >= 0):\n", + " if (bps[three_prime[bps[r2]]] >= 0) and (bps[five_prime[r2]] >= 0):\n", + " # is_terminal2 = (five_prime[r2] == bps[three_prime[bps[r2]]])\n", + " is_terminal2 = hmap[three_prime[bps[r2]]] == hmap[five_prime[r2]]\n", + " \n", + " \"\"\" Place connection \"\"\"\n", + " if is_terminal1 and is_terminal2:\n", + " end1 = seg1.end3 if f1 else seg1.start3\n", + " end2 = seg2.start5 if f2 else seg2.end5\n", + " seg1._connect_ends( end1, end2, type_='intrahelical')\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_=\"terminal_crossover\")\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2])\n", + "\n", + " ## Add 5prime/3prime ends\n", + " for r in prime5:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_5prime(hrank[r],fwd[r])\n", + " for r in prime3:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_3prime(hrank[r],fwd[r])\n", + "\n", + " ## Add intrahelical connections to circular helical sections\n", + " for nt0,nt1 in intrahelical:\n", + " seg = allSegments[hmap[nt0]]\n", + " assert( seg is allSegments[hmap[nt1]] )\n", + " if three_prime[nt0] >= 0:\n", + " if hmap[nt0] == hmap[three_prime[nt0]]:\n", + " seg.connect_end3(seg.start5)\n", + "\n", + " bp0,bp1 = [bps[nt] for nt in (nt0,nt1)]\n", + " if three_prime[bp1] >= 0:\n", + " if hmap[bp1] == hmap[three_prime[bp1]]:\n", + " seg.connect_start3(seg.end5)\n", + "\n", + " ## Assign sequence\n", + " if sequence is not None:\n", + " for hid in range(len(allSegments)):\n", + " resids = np.where( (hmap==hid)*(fwd==1) )[0]\n", + " s = allSegments[hid]\n", + " s.sequence = [sequence[r] for r in sorted(resids,key=lambda x: hrank[x])]\n", + "\n", + "\n", + " ## Build model\n", + " model = SegmentModel( allSegments,\n", + " max_basepairs_per_bead = max_basepairs_per_bead,\n", + " max_nucleotides_per_bead = max_nucleotides_per_bead,\n", + " local_twist = local_twist,\n", + " dimensions = dimensions,\n", + " **model_parameters )\n", + "\n", + "\n", + " model._reader_list_coordinates = coordinate\n", + " model._reader_list_basepair = basepair\n", + " model._reader_list_stack = stack\n", + " model._reader_list_three_prime = three_prime\n", + " model._reader_list_five_prime = five_prime\n", + " model._reader_list_sequence = sequence\n", + " model._reader_list_orientation = orientation\n", + " model._reader_list_hmap = hmap\n", + " model._reader_list_fwd = fwd\n", + " model._reader_list_hrank = hrank\n", + "\n", + " if sequence is None:\n", + " for s in model.segments:\n", + " s.randomize_unset_sequence()\n", + "\n", + " return model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "53a3183e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<DoubleStrandedSegment'> 1[1]> <DoubleStrandedSegment'> 0[1]> 5 3 True True\n", + "<SingleStrandedSegment'> 2[2]> <DoubleStrandedSegment'> 0[1]> 1 2 True True\n", + "<DoubleStrandedSegment'> 0[1]> <DoubleStrandedSegment'> 1[1]> 2 4 True True\n", + "<DoubleStrandedSegment'> 1[1]> <SingleStrandedSegment'> 3[1]> 4 6 True True\n", + "<SingleStrandedSegment'> 3[1]> <SingleStrandedSegment'> 2[2]> 6 0 True True\n" + ] + } + ], + "source": [ + "coordinate = [(0,0,3.4*i) for i in range(7)]\n", + "three_prime = [ 1, 2, 4,-1, 6, 3, 0]\n", + "basepair = [-1,-1, 3, 2, 5, 4,-1]\n", + "stack = [-1,-1, -1,-1,-1, -1,-1]\n", + "for i in [3,5]:\n", + " coordinate[i] = (1,0,3.4*i)\n", + "\n", + "model = model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " max_basepairs_per_bead=1,\n", + " max_nucleotides_per_bead=1,\n", + " local_twist=False)\n", + "model.writePsf(\"list.psf\")\n", + "model.writePdb(\"list.pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fdcadbe9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[<SegmentParticle DNA on <DoubleStrandedSegment'> 0[1]>[1.00]>], [<SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[0.00]>, <SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.00]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.50]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 3[1]>[0.50]>]]\n", + "[[<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>], [<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>, <Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>], [<Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>], [<Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>]]\n" + ] + } + ], + "source": [ + "print([i.children for i in model.children])\n", + "print([i.connections for i in model.children])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d27fd998", + "metadata": {}, + "outputs": [], + "source": [ + "s=model.children[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fe094a9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.segname" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5e54cf99", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, 0, 0.0),\n", + " (0, 0, 3.4),\n", + " (0, 0, 6.8),\n", + " (1, 0, 10.2),\n", + " (0, 0, 13.6),\n", + " (1, 0, 17.0),\n", + " (0, 0, 20.4)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coordinate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbeb8724", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}