{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Applying a time-reversible nucleotide model\n",
    "\n",
    "We display the available set of nucleotide substitution models."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Specify a model using 'Abbreviation' (case sensitive).</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>Model Type</th>\n",
       "<th>Abbreviation</th>\n",
       "<th>Description</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>JC69</td>\n",
       "<td>Jukes and Cantor's 1969 model</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>K80</td>\n",
       "<td>Kimura 1980</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>F81</td>\n",
       "<td>Felsenstein's 1981 model</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>HKY85</td>\n",
       "<td>Hasegawa, Kishino and Yanamo 1985 model</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>TN93</td>\n",
       "<td>Tamura and Nei 1993 model</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>GTR</td>\n",
       "<td>General Time Reversible nucleotide substitution model.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>ssGN</td>\n",
       "<td>strand-symmetric general Markov nucleotide (non-stationary, non-reversible). Kaehler, 2017, Journal of Theoretical Biology 420: 144–51</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>GN</td>\n",
       "<td>General Markov Nucleotide (non-stationary, non-reversible). Kaehler, Yap, Zhang, Huttley, 2015, Sys Biol 64 (2): 281–93</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>BH</td>\n",
       "<td>Barry and Hartigan Discrete Time substitution model Barry and Hartigan 1987. Biometrics 43: 261–76.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>nucleotide</td>\n",
       "<td>DT</td>\n",
       "<td>Discrete Time substitution model (non-stationary, non-reversible). motif_length=2 makes this a dinucleotide model, motif_length=3 a trinucleotide model.</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<p>\n",
       "10 rows x 3 columns</p>"
      ],
      "text/plain": [
       "Specify a model using 'Abbreviation' (case sensitive).\n",
       "======================================================================================================================================================================================\n",
       "Model Type    Abbreviation                                                                                                                                                 Description\n",
       "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
       "nucleotide            JC69                                                                                                                               Jukes and Cantor's 1969 model\n",
       "nucleotide             K80                                                                                                                                                 Kimura 1980\n",
       "nucleotide             F81                                                                                                                                    Felsenstein's 1981 model\n",
       "nucleotide           HKY85                                                                                                                     Hasegawa, Kishino and Yanamo 1985 model\n",
       "nucleotide            TN93                                                                                                                                   Tamura and Nei 1993 model\n",
       "nucleotide             GTR                                                                                                      General Time Reversible nucleotide substitution model.\n",
       "nucleotide            ssGN                      strand-symmetric general Markov nucleotide (non-stationary, non-reversible). Kaehler, 2017, Journal of Theoretical Biology 420: 144–51\n",
       "nucleotide              GN                                     General Markov Nucleotide (non-stationary, non-reversible). Kaehler, Yap, Zhang, Huttley, 2015, Sys Biol 64 (2): 281–93\n",
       "nucleotide              BH                                                         Barry and Hartigan Discrete Time substitution model Barry and Hartigan 1987. Biometrics 43: 261–76.\n",
       "nucleotide              DT    Discrete Time substitution model (non-stationary, non-reversible). motif_length=2 makes this a dinucleotide model, motif_length=3 a trinucleotide model.\n",
       "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
       "\n",
       "10 rows x 3 columns"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from cogent3 import available_models\n",
    "\n",
    "available_models(\"nucleotide\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using the GTR model\n",
    "\n",
    "We specify the general time-reversible model ([Lanave et al](https://www.ncbi.nlm.nih.gov/pubmed/6429346)) by its abbreviation. By default, this model does not optimise the codon frequencies but uses the average estimated from the alignment. We configure the model to optimise the root motif probabilities."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">GTR</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>key</th>\n",
       "<th>lnL</th>\n",
       "<th>nfp</th>\n",
       "<th>DLC</th>\n",
       "<th>unique_Q</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td></td>\n",
       "<td style=\"font-family: monospace !important;\">-6992.5741</td>\n",
       "<td style=\"font-family: monospace !important;\">19</td>\n",
       "<td>True</td>\n",
       "<td></td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "GTR\n",
       "============================================\n",
       "key           lnL    nfp     DLC    unique_Q\n",
       "--------------------------------------------\n",
       "       -6992.5741     19    True            \n",
       "--------------------------------------------\n",
       "\n",
       "1 rows x 5 columns"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from cogent3.app import io, evo\n",
    "\n",
    "loader = io.load_aligned(format=\"fasta\", moltype=\"dna\")\n",
    "aln = loader(\"../data/primate_brca1.fasta\")\n",
    "model = evo.model(\"GTR\", \n",
    "                  tree=\"../data/primate_brca1.tree\", \n",
    "                  sm_args=dict(optimise_motif_probs=True))\n",
    "result = model(aln)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<h4>GTR</h4>\n",
       "<p>log-likelihood = -6992.5741</p>\n",
       "<p>number of free parameters = 19</p>\n",
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Global params</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>A/C</th>\n",
       "<th>A/G</th>\n",
       "<th>A/T</th>\n",
       "<th>C/G</th>\n",
       "<th>C/T</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"font-family: monospace !important;\">1.2296</td>\n",
       "<td style=\"font-family: monospace !important;\">5.2478</td>\n",
       "<td style=\"font-family: monospace !important;\">0.9472</td>\n",
       "<td style=\"font-family: monospace !important;\">2.3389</td>\n",
       "<td style=\"font-family: monospace !important;\">5.9666</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "\n",
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Edge params</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>edge</th>\n",
       "<th>parent</th>\n",
       "<th>length</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Galago</td>\n",
       "<td>root</td>\n",
       "<td style=\"font-family: monospace !important;\">0.1727</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">HowlerMon</td>\n",
       "<td>root</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0448</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Rhesus</td>\n",
       "<td>edge.3</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0215</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Orangutan</td>\n",
       "<td>edge.2</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0077</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Gorilla</td>\n",
       "<td>edge.1</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0025</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Human</td>\n",
       "<td>edge.0</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0060</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Chimpanzee</td>\n",
       "<td>edge.0</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0028</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.0</td>\n",
       "<td>edge.1</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0000</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.1</td>\n",
       "<td>edge.2</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0034</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.2</td>\n",
       "<td>edge.3</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0119</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.3</td>\n",
       "<td>root</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0076</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "\n",
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Motif params</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>A</th>\n",
       "<th>C</th>\n",
       "<th>G</th>\n",
       "<th>T</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"font-family: monospace !important;\">0.3792</td>\n",
       "<td style=\"font-family: monospace !important;\">0.1719</td>\n",
       "<td style=\"font-family: monospace !important;\">0.2066</td>\n",
       "<td style=\"font-family: monospace !important;\">0.2423</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "GTR\n",
       "log-likelihood = -6992.5741\n",
       "number of free parameters = 19\n",
       "==============================================\n",
       "   A/C       A/G       A/T       C/G       C/T\n",
       "----------------------------------------------\n",
       "1.2296    5.2478    0.9472    2.3389    5.9666\n",
       "----------------------------------------------\n",
       "==============================\n",
       "      edge    parent    length\n",
       "------------------------------\n",
       "    Galago      root    0.1727\n",
       " HowlerMon      root    0.0448\n",
       "    Rhesus    edge.3    0.0215\n",
       " Orangutan    edge.2    0.0077\n",
       "   Gorilla    edge.1    0.0025\n",
       "     Human    edge.0    0.0060\n",
       "Chimpanzee    edge.0    0.0028\n",
       "    edge.0    edge.1    0.0000\n",
       "    edge.1    edge.2    0.0034\n",
       "    edge.2    edge.3    0.0119\n",
       "    edge.3      root    0.0076\n",
       "------------------------------\n",
       "====================================\n",
       "     A         C         G         T\n",
       "------------------------------------\n",
       "0.3792    0.1719    0.2066    0.2423\n",
       "------------------------------------"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.lf"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:c3dev] *",
   "language": "python",
   "name": "conda-env-c3dev-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
