Unfinished

2023-11-11 22:55:21 +03:00 · 2023-11-11 22:55:21 +03:00 · c4fe35f29b
commit c4fe35f29b
parent bf10a1c9f4
3 changed files with 54813 additions and 250 deletions
--- a/practice4/FinFraud_unknown.csv
+++ b/practice4/FinFraud_unknown.csv
--- a/practice4/main.ipynb
+++ b/practice4/main.ipynb
@ -0,0 +1,755 @@
 {
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Практическая работа №4**\n",
    "\n",
    "\n",
    "# Обнаружение злоумышленников в системе мобильных денежных переводов\n",
    "\n",
    "_Вариант 5_\n",
    "\n",
    "\n",
    "1) настройка окружения"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mWARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47347ab190>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=3, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c0610>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c0b50>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c14d0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c1ed0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[31mERROR: Could not find a version that satisfies the requirement scipy==1.8.1 (from versions: none)\u001b[0m\u001b[31m\n",
      "\u001b[0m\u001b[31mERROR: No matching distribution found for scipy==1.8.1\u001b[0m\u001b[31m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Requirement already satisfied: networkx==2.7.0 in ./.venv/lib64/python3.11/site-packages (2.7)\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Requirement already satisfied: pyvis in ./.venv/lib64/python3.11/site-packages (0.3.2)\n",
      "Requirement already satisfied: pandas in ./.venv/lib64/python3.11/site-packages (2.0.1)\n",
      "Requirement already satisfied: numpy in ./.venv/lib64/python3.11/site-packages (1.24.3)\n",
      "Requirement already satisfied: plotly in ./.venv/lib64/python3.11/site-packages (5.14.1)\n",
      "Requirement already satisfied: ipython>=5.3.0 in ./.venv/lib64/python3.11/site-packages (from pyvis) (8.13.2)\n",
      "Requirement already satisfied: jinja2>=2.9.6 in ./.venv/lib64/python3.11/site-packages (from pyvis) (3.1.2)\n",
      "Requirement already satisfied: jsonpickle>=1.4.1 in ./.venv/lib64/python3.11/site-packages (from pyvis) (3.0.1)\n",
      "Requirement already satisfied: networkx>=1.11 in ./.venv/lib64/python3.11/site-packages (from pyvis) (2.7)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib64/python3.11/site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in ./.venv/lib64/python3.11/site-packages (from pandas) (2023.3)\n",
      "Requirement already satisfied: tzdata>=2022.1 in ./.venv/lib64/python3.11/site-packages (from pandas) (2023.3)\n",
      "Requirement already satisfied: tenacity>=6.2.0 in ./.venv/lib64/python3.11/site-packages (from plotly) (8.2.2)\n",
      "Requirement already satisfied: packaging in ./.venv/lib64/python3.11/site-packages (from plotly) (23.1)\n",
      "Requirement already satisfied: backcall in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.2.0)\n",
      "Requirement already satisfied: decorator in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (5.1.1)\n",
      "Requirement already satisfied: jedi>=0.16 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.18.2)\n",
      "Requirement already satisfied: matplotlib-inline in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.1.6)\n",
      "Requirement already satisfied: pickleshare in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.7.5)\n",
      "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (3.0.38)\n",
      "Requirement already satisfied: pygments>=2.4.0 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (2.15.1)\n",
      "Requirement already satisfied: stack-data in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.6.2)\n",
      "Requirement already satisfied: traitlets>=5 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (5.9.0)\n",
      "Requirement already satisfied: pexpect>4.3 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (4.8.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib64/python3.11/site-packages (from jinja2>=2.9.6->pyvis) (2.1.2)\n",
      "Requirement already satisfied: six>=1.5 in ./.venv/lib64/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
      "Requirement already satisfied: parso<0.9.0,>=0.8.0 in ./.venv/lib64/python3.11/site-packages (from jedi>=0.16->ipython>=5.3.0->pyvis) (0.8.3)\n",
      "Requirement already satisfied: ptyprocess>=0.5 in ./.venv/lib64/python3.11/site-packages (from pexpect>4.3->ipython>=5.3.0->pyvis) (0.7.0)\n",
      "Requirement already satisfied: wcwidth in ./.venv/lib64/python3.11/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=5.3.0->pyvis) (0.2.6)\n",
      "Requirement already satisfied: executing>=1.2.0 in ./.venv/lib64/python3.11/site-packages (from stack-data->ipython>=5.3.0->pyvis) (1.2.0)\n",
      "Requirement already satisfied: asttokens>=2.1.0 in ./.venv/lib64/python3.11/site-packages (from stack-data->ipython>=5.3.0->pyvis) (2.2.1)\n",
      "Requirement already satisfied: pure-eval in ./.venv/lib64/python3.11/site-packages (from stack-data->ipython>=5.3.0->pyvis) (0.2.2)\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "# %pip install scipy==1.8.1\n",
    "# %pip install networkx==2.7.0\n",
    "# %pip install pyvis pandas numpy plotly\n",
    "\n",
    "from functools import reduce\n",
    "from pyvis import network as net\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import networkx as nx\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "\n",
    "from plotly.offline import iplot\n",
    "from IPython.display import display, HTML\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "#for Jupiter notebooks\n",
    "import plotly.io as pio #comment for Google collab\n",
    "pio.renderers.default='notebook'#comment for Google collab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pyvis_deepnote_show(nt):\n",
    "    tmp_output_filename = tempfile.NamedTemporaryFile(suffix='.html').name\n",
    "    nt.save_graph(tmp_output_filename)\n",
    "\n",
    "    f = open(tmp_output_filename, \"r\")\n",
    "    display(HTML(f.read()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>User ID (sender)</th>\n",
       "      <th>User ID (receiver)</th>\n",
       "      <th>User account ID (sender)</th>\n",
       "      <th>User account ID (receiver)</th>\n",
       "      <th>Amount of transaction</th>\n",
       "      <th>Type of transaction</th>\n",
       "      <th>Transaction timestamp</th>\n",
       "      <th>Sender type</th>\n",
       "      <th>Receiver type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>54030</td>\n",
       "      <td>54030</td>\n",
       "      <td>54030</td>\n",
       "      <td>54030</td>\n",
       "      <td>54030.0</td>\n",
       "      <td>54030</td>\n",
       "      <td>54030</td>\n",
       "      <td>54030</td>\n",
       "      <td>54030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>1861</td>\n",
       "      <td>1562</td>\n",
       "      <td>1861</td>\n",
       "      <td>1562</td>\n",
       "      <td></td>\n",
       "      <td>5</td>\n",
       "      <td>46394</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>PN_Ret4</td>\n",
       "      <td>operator</td>\n",
       "      <td>RAcc4</td>\n",
       "      <td>A0</td>\n",
       "      <td></td>\n",
       "      <td>ArRC</td>\n",
       "      <td>08.07.2011 15:16</td>\n",
       "      <td>EU</td>\n",
       "      <td>operator</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>2256</td>\n",
       "      <td>27901</td>\n",
       "      <td>2256</td>\n",
       "      <td>27901</td>\n",
       "      <td></td>\n",
       "      <td>27901</td>\n",
       "      <td>5</td>\n",
       "      <td>41246</td>\n",
       "      <td>27901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>53083.47221</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>85834.97052</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>2158.2525</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>6257.375</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76821.9675</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>1053512.86</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       User ID (sender) User ID (receiver) User account ID (sender)   \n",
       "count             54030              54030                    54030  \\\n",
       "unique             1861               1562                     1861   \n",
       "top             PN_Ret4           operator                    RAcc4   \n",
       "freq               2256              27901                     2256   \n",
       "mean                                                                  \n",
       "std                                                                   \n",
       "min                                                                   \n",
       "25%                                                                   \n",
       "50%                                                                   \n",
       "75%                                                                   \n",
       "max                                                                   \n",
       "\n",
       "       User account ID (receiver) Amount of transaction Type of transaction   \n",
       "count                       54030               54030.0               54030  \\\n",
       "unique                       1562                                         5   \n",
       "top                            A0                                      ArRC   \n",
       "freq                        27901                                     27901   \n",
       "mean                                        53083.47221                       \n",
       "std                                         85834.97052                       \n",
       "min                                                 0.0                       \n",
       "25%                                           2158.2525                       \n",
       "50%                                            6257.375                       \n",
       "75%                                          76821.9675                       \n",
       "max                                          1053512.86                       \n",
       "\n",
       "       Transaction timestamp Sender type Receiver type  \n",
       "count                  54030       54030         54030  \n",
       "unique                 46394           2             4  \n",
       "top         08.07.2011 15:16          EU      operator  \n",
       "freq                       5       41246         27901  \n",
       "mean                                                    \n",
       "std                                                     \n",
       "min                                                     \n",
       "25%                                                     \n",
       "50%                                                     \n",
       "75%                                                     \n",
       "max                                                     "
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('./FinFraud_unknown.csv', sep=',', parse_dates=[15, 16, 21])\n",
    "\n",
    "df.columns = [\n",
    "    'User ID (sender)', \n",
    "    'User ID (receiver)',\n",
    "    'User account ID (sender)',\n",
    "    'User account ID (receiver)',\n",
    "    'Amount of transaction',\n",
    "    'Type of transaction',\n",
    "    'State of operation',\n",
    "    'Balance before (sender)',\n",
    "    'Balance after (sender)',\n",
    "    'Balance after (receiver)',\n",
    "    'Balance before (receiver)',    \n",
    "    'Not used',\n",
    "    'Not used',\n",
    "    'Not used',\n",
    "    'Not used',\n",
    "    'Transaction timestamp (sender)',\n",
    "    'Transaction timestamp (receiver)',\n",
    "    'Sender account ID',\n",
    "    'Not used',\n",
    "    'Not used',\n",
    "    'Not used',\n",
    "    'Transaction timestamp',\n",
    "    'Sender type',\n",
    "    'Receiver type'\n",
    "]\n",
    "df = df.loc[:, ~df.columns.str.contains('^Not used', case=False)].sort_values('Transaction timestamp') \n",
    "df = df.drop('State of operation', axis=1)\n",
    "df = df.drop('Sender account ID', axis=1)\n",
    "df = df.drop('Transaction timestamp (sender)', axis=1)\n",
    "df = df.drop('Transaction timestamp (receiver)', axis=1)\n",
    "df = df.drop('Balance before (sender)', axis=1)\n",
    "df = df.drop('Balance after (sender)', axis=1)\n",
    "df = df.drop('Balance before (receiver)', axis=1)\n",
    "df = df.drop('Balance after (receiver)', axis=1)\n",
    "\n",
    "df[\"Amount of transaction\"] = pd.to_numeric(df[\"Amount of transaction\"], errors='coerce').fillna(0)\n",
    "\n",
    "\n",
    "df.describe(include='all').fillna('')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Описание набора данных"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "| Название столбца                            | Возможные значения                                                                                                                     |Описание                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |\n",
    "|----------------------------------------|----------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n",
    "| User ID (transaction sender)           | Generated ID                                                                                                               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| User ID (transaction receiver)         | Generated ID                                                                                                               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| User account ID (transaction sender)   | Generated ID                                                                                                               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| User account ID (transaction receiver) | Generated ID                                                                                                               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Amount of transaction                  | Number                                                                                                                     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Type of transaction                    | `Ind`<br/>`Dt`<br/>`ArRC`<br/>`Wl`<br/>`Merchant`                                                                          | Тип транзакции <br/>`Ind` – денежный перевод между пользователями системы <br/>`Dt` – пополнение электронного кошелька (отправитель агент, а получатель - пользователь системы)<br/>`ArRC` – пополнение счета мобильной связи (перевод от пользователя системы к оператору мобильной связи )<br/>`Wl` – снятие электронных денег  (отправитель - пользователь системы, получатель - оператор)<br/>`Merchant` – перевод от пользователя поставщику услуг или товаров   |\n",
    "| State of operation                     | `SU`                                                                                                                       | `SU` – успешно                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Balance before (transaction sender)    | Number                                                                                                                     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Balance before (transaction receiver)  | Number                                                                                                                     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Balance after (transaction sender)     | Number                                                                                                                     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Balance after (transaction receiver)   | Number                                                                                                                     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Transaction timestamp (sender)         | Datetime                                                                                                                   |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Transaction timestamp (receiver)       | Datetime                                                                                                                   |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Sender account ID                      | Generated ID                                                                                                               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Transaction timestamp                  | Datetime                                                                                                                   |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Sender type                            | `EU`<br/>`RET`                                                                                                             |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n",
    "| Receiver type                          | `EU`<br/>`operator`<br/>`RET`<br/>`MER`                                                                                    |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Поскольку поле  `State of operation` всегда имеет значение  (`SU`) для всех транзакций, данный столбец предлагается удалить. \n",
    "Столбцы  `Sender account ID` и `User ID (transaction sender)` идентичны, также столбцы `Transaction timestamp (sender)` и `Transaction timestamp (receiver)` идентичны стобцу `Transaction timestamp`, поэтому данные стобцы удалются (остается только `Transaction timestamp`). Также удаляюся столбцы с балансом, т.к. в текущей версии набора данных они не задействованы."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "User ID (sender)               object\n",
       "User ID (receiver)             object\n",
       "User account ID (sender)       object\n",
       "User account ID (receiver)     object\n",
       "Amount of transaction         float64\n",
       "Type of transaction            object\n",
       "Transaction timestamp          object\n",
       "Sender type                    object\n",
       "Receiver type                  object\n",
       "dtype: object"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Статистика транзакций для каждого пользователя\n",
    "\n",
    "Традиционно начнем со статистического анализа данных. Рекомендуется расширить число рассчитываемых статистик, например, включив показатели, характеризующие частоту транзакций. Для такого вида мошенничества как кража телефона изменение частоты снятий является характерным признаком."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def init_stat_dict():\n",
    "    stat_dict = dict()\n",
    "    transaction_types = {\"Ind\", \"Wl\", \"Dt\", \"Merchant\", \"ArRC\"} \n",
    "    for tran_type in transaction_types:\n",
    "          amount_name = f\"Sent_amount_{tran_type}\"\n",
    "          amount_median = f\"Sent_amount_{tran_type}_median\"\n",
    "          amount_min = f\"Sent_amount_{tran_type}_min\"\n",
    "          amount_max = f\"Sent_amount_{tran_type}_max\"\n",
    "          tran_count = f\"Sent_{tran_type}_count\"\n",
    "          rec_amount_name = f\"Received_amount_{tran_type}\"\n",
    "          rec_amount_median = f\"Received_amount_{tran_type}_median\"\n",
    "          rec_amount_min = f\"Received_amount_{tran_type}_min\"\n",
    "          rec_amount_max = f\"Received_amount_{tran_type}_max\"\n",
    "          rec_tran_count = f\"Received_{tran_type}_count\"\n",
    "          \n",
    "          stat_dict[amount_name] = 0\n",
    "          stat_dict[amount_median] = 0\n",
    "          stat_dict[amount_min] = 0\n",
    "          stat_dict[amount_max] = 0\n",
    "          stat_dict[tran_count] = 0\n",
    "          stat_dict[rec_amount_name] = 0\n",
    "          stat_dict[rec_amount_median] = 0\n",
    "          stat_dict[rec_amount_min] = 0\n",
    "          stat_dict[rec_amount_max] = 0\n",
    "          stat_dict[rec_tran_count] = 0\n",
    "\n",
    "    return stat_dict\n",
    "\n",
    "\n",
    "def get_stat_df(df):\n",
    "   sent_unique_users = df[\"User ID (sender)\"].unique()\n",
    "   received_unique_users = df[\"User ID (receiver)\"].unique()\n",
    "   unique_users = np.unique(np.concatenate((sent_unique_users,received_unique_users),0))\n",
    "   print(unique_users)\n",
    "   stat_df = pd.DataFrame()\n",
    "   stat_dict = init_stat_dict()\n",
    "   transaction_types = {\"Ind\", \"Wl\", \"Dt\", \"Merchant\", \"ArRC\"}\n",
    "   for user in unique_users:\n",
    "       stat_dict = init_stat_dict() \n",
    "       stat_dict[\"User ID\"] = user\n",
    "\n",
    "       user_df = df.loc[(df[\"User ID (sender)\"] == user)]\n",
    "       \n",
    "       if (not user_df.empty):\n",
    "          #stat_dict[\"User ID\"] = user\n",
    "          \n",
    "          stat_dict[\"Unique_receivers\"] = len(user_df[\"User ID (receiver)\"].unique())\n",
    "          stat_dict[\"User type\"] = user_df[\"Sender type\"].unique()[0]\n",
    "\n",
    "          for tran_type in transaction_types:\n",
    "              amount_name = f\"Sent_amount_{tran_type}\"\n",
    "              amount_median = f\"Sent_amount_{tran_type}_median\"\n",
    "              amount_min = f\"Sent_amount_{tran_type}_min\"\n",
    "              amount_max = f\"Sent_amount_{tran_type}_max\"\n",
    "              tran_count = f\"Sent_{tran_type}_count\"\n",
    "              stat_dict[amount_name] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].sum()\n",
    "              stat_dict[amount_median] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].mean()\n",
    "              stat_dict[amount_min] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].min()\n",
    "              stat_dict[amount_max] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].max()\n",
    "              stat_dict[tran_count] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].count()\n",
    "       else:\n",
    "          stat_dict[\"User type\"] = (df.loc[(df[\"User ID (receiver)\"]==user)])[\"Receiver type\"].unique()[0]\n",
    "\n",
    "       user_df = df.loc[(df[\"User ID (receiver)\"] == user)]\n",
    "       if (not user_df.empty):\n",
    "          stat_dict[\"Unique_senders\"] = len(user_df[\"User ID (sender)\"].unique())\n",
    "          for tran_type in transaction_types:\n",
    "              rec_amount_name = f\"Received_amount_{tran_type}\"\n",
    "              rec_amount_median = f\"Received_amount_{tran_type}_median\"\n",
    "              rec_amount_min = f\"Received_amount_{tran_type}_min\"\n",
    "              rec_amount_max = f\"Received_amount_{tran_type}_max\"\n",
    "              rec_tran_count = f\"Received_{tran_type}_count\"\n",
    "              stat_dict[rec_amount_name] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].sum()\n",
    "              stat_dict[rec_amount_median] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].median()\n",
    "              stat_dict[rec_amount_min] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].min()\n",
    "              stat_dict[rec_amount_max] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].max()\n",
    "              stat_dict[rec_tran_count] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].count()\n",
    "       \n",
    "       df_temp = pd.DataFrame([stat_dict])\n",
    "       \n",
    "       #df_temp.head()\n",
    "       stat_df = pd.concat([stat_df, df_temp])\n",
    "   stat_df = stat_df.fillna(0)\n",
    "   return stat_df\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Кстати, обратите внимание уникальных пользователей в системе 2009. Это больше, чем число уникальных отправителей и уникальных получателей, значит, какие то пользователи только отправляют деньги, а какие-то только получают."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['PN_EU_0_0' 'PN_EU_0_1' 'PN_EU_0_10' ... 'PN_Ret5' 'PN_Ret6' 'operator']\n",
      "(2009, 54)\n",
      "   Sent_amount_Wl  Sent_amount_Wl_median  Sent_amount_Wl_min   \n",
      "0             0.0                    0.0                 0.0  \\\n",
      "0             0.0                    0.0                 0.0   \n",
      "0             0.0                    0.0                 0.0   \n",
      "0             0.0                    0.0                 0.0   \n",
      "0             0.0                    0.0                 0.0   \n",
      "\n",
      "   Sent_amount_Wl_max  Sent_Wl_count  Received_amount_Wl   \n",
      "0                 0.0              0                 0.0  \\\n",
      "0                 0.0              0                 0.0   \n",
      "0                 0.0              0                 0.0   \n",
      "0                 0.0              0                 0.0   \n",
      "0                 0.0              0                 0.0   \n",
      "\n",
      "   Received_amount_Wl_median  Received_amount_Wl_min  Received_amount_Wl_max   \n",
      "0                        0.0                     0.0                     0.0  \\\n",
      "0                        0.0                     0.0                     0.0   \n",
      "0                        0.0                     0.0                     0.0   \n",
      "0                        0.0                     0.0                     0.0   \n",
      "0                        0.0                     0.0                     0.0   \n",
      "\n",
      "   Received_Wl_count  ...  Sent_Dt_count  Received_amount_Dt   \n",
      "0                  0  ...              0           686643.36  \\\n",
      "0                  0  ...              0           483467.30   \n",
      "0                  0  ...              0                0.00   \n",
      "0                  0  ...              0                0.00   \n",
      "0                  0  ...              0                0.00   \n",
      "\n",
      "   Received_amount_Dt_median  Received_amount_Dt_min  Received_amount_Dt_max   \n",
      "0                  27845.615                15965.17                41729.94  \\\n",
      "0                  35925.855                 8067.95                86422.48   \n",
      "0                      0.000                    0.00                    0.00   \n",
      "0                      0.000                    0.00                    0.00   \n",
      "0                      0.000                    0.00                    0.00   \n",
      "\n",
      "   Received_Dt_count       User ID  User type  Unique_senders   \n",
      "0                 24     PN_EU_0_0         EU             2.0  \\\n",
      "0                 12     PN_EU_0_1         EU             6.0   \n",
      "0                  0    PN_EU_0_10         EU             2.0   \n",
      "0                  0   PN_EU_0_100         EU             1.0   \n",
      "0                  0  PN_EU_0_1000         EU             0.0   \n",
      "\n",
      "   Unique_receivers  \n",
      "0               0.0  \n",
      "0               0.0  \n",
      "0               2.0  \n",
      "0               1.0  \n",
      "0               1.0  \n",
      "\n",
      "[5 rows x 54 columns]\n"
     ]
    }
   ],
   "source": [
    "stat_df = get_stat_df(df)\n",
    "print(stat_df.shape)\n",
    "# print(stat_df.head())\n",
    "\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Была выбрана часть статистик и построила проекции пользователей. Анализируемые поля были выбраны на основе анализа свойств возможных финансовых аномалий (т.е. просто эвристически:))."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'sklearn'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[47], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mplotting\u001b[39;00m \u001b[39mimport\u001b[39;00m scatter_matrix\n\u001b[0;32m----> 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpreprocessing\u001b[39;00m \u001b[39mimport\u001b[39;00m StandardScaler\n\u001b[1;32m      3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpreprocessing\u001b[39;00m \u001b[39mimport\u001b[39;00m LabelEncoder\n\u001b[1;32m      4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdecomposition\u001b[39;00m \u001b[39mimport\u001b[39;00m PCA\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"
     ]
    }
   ],
   "source": [
    "from pandas.plotting import scatter_matrix\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.decomposition import PCA\n",
    "from matplotlib.ticker import FormatStrFormatter\n",
    "import plotly.express as px"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Мошенничество, связанное с заражением бот-сетью."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "Согласно описанию сценария атаки: есть множество зараженных пользователей, которые переводят деньги какому-то пользователю (\"ослу\" или \"мулу\"), и уже он выполняет операции обналичивания денег. Рассмотрен простейщий вариант сценария: цепочка мулов состоит из одного звена.  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'StandardScaler' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[48], line 11\u001b[0m\n\u001b[1;32m      8\u001b[0m x \u001b[39m=\u001b[39m stat_df[MobileBot_labels]\u001b[39m.\u001b[39mvalues\n\u001b[1;32m     10\u001b[0m \u001b[39m# нормализуем значения\u001b[39;00m\n\u001b[0;32m---> 11\u001b[0m x \u001b[39m=\u001b[39m StandardScaler()\u001b[39m.\u001b[39mfit_transform(x)\n\u001b[1;32m     13\u001b[0m pca \u001b[39m=\u001b[39m PCA(n_components\u001b[39m=\u001b[39m\u001b[39m3\u001b[39m)\n\u001b[1;32m     14\u001b[0m principalComponents \u001b[39m=\u001b[39m pca\u001b[39m.\u001b[39mfit_transform(x)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'StandardScaler' is not defined"
     ]
    }
   ],
   "source": [
    "\n",
    "#оставляем поля, связанные с переводами и снятиями и добавили число уникальных пользователей, это же бот сеть.\n",
    "\n",
    "MobileBot_labels = ['Unique_receivers','Unique_receivers','Sent_Ind_count' ,'Sent_Wl_count', 'Received_Ind_count']\n",
    "\n",
    "# а по этим полям будем пробовать найти пользователей с кражей телефона.\n",
    "MobileTheft_labels = ['Sent_amount_Wl', 'Sent_amount_Wl_median', 'Sent_amount_Wl_min', 'Sent_amount_Wl_max', 'Sent_Wl_count']\n",
    "\n",
    "x = stat_df[MobileBot_labels].values\n",
    "\n",
    "# нормализуем значения\n",
    "x = StandardScaler().fit_transform(x)\n",
    "\n",
    "pca = PCA(n_components=3)\n",
    "principalComponents = pca.fit_transform(x)\n",
    "print(f'Explained variance: {pca.explained_variance_ratio_}\\tSum: {pca.explained_variance_ratio_.sum()}')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/practice4/Практическая_работа__4.ipynb
+++ b/practice4/Практическая_работа__4.ipynb
@ -60,12 +60,12 @@
    }
   ],
   "source": [
-    "!pip install --user scipy==1.8.1\n",
+    "%pip install --user scipy==1.8.1\n",
-    "!pip install --user networkx==2.7.0\n",
+    "%pip install --user networkx==2.7.0\n",
    "#uncomment when running in Google Collab\n",
    "#!apt install python3-dev graphviz libgraphviz-dev pkg-config\n",
    "#!pip install pygraphviz\n",
-    "!pip install pyvis\n",
+    "%pip install pyvis\n",
    "\n",
    "import zipfile\n",
    "import itertools\n",
@ -108,7 +108,7 @@
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
-       "                'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']\n",
+       "                'plotly': ['https://cdn.plot.ly/plotly-2.20.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
@ -162,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -172,10 +172,15 @@
   },
   "outputs": [
    {
-     "name": "stdout",
+     "ename": "FileNotFoundError",
-     "output_type": "stream",
+     "evalue": "[Errno 2] No such file or directory: 'C:\\\\Practice\\\\data.zip'",
-     "text": [
+     "output_type": "error",
-      "['20130619.logDWH.complex.csv']\n"
+     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m zip_filepath\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mC:\u001b[39m\u001b[39m\\\u001b[39m\u001b[39mPractice\u001b[39m\u001b[39m\\\u001b[39m\u001b[39mdata.zip\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m----> 3\u001b[0m \u001b[39mwith\u001b[39;00m zipfile\u001b[39m.\u001b[39;49mZipFile(zip_filepath) \u001b[39mas\u001b[39;00m z:\n\u001b[1;32m      4\u001b[0m     \u001b[39mprint\u001b[39m(z\u001b[39m.\u001b[39mnamelist())\n\u001b[1;32m      5\u001b[0m     \u001b[39mfor\u001b[39;00m name \u001b[39min\u001b[39;00m z\u001b[39m.\u001b[39mnamelist():\n",
      "File \u001b[0;32m/usr/lib64/python3.11/zipfile.py:1283\u001b[0m, in \u001b[0;36mZipFile.__init__\u001b[0;34m(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps, metadata_encoding)\u001b[0m\n\u001b[1;32m   1281\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m   1282\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1283\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfp \u001b[39m=\u001b[39m io\u001b[39m.\u001b[39;49mopen(file, filemode)\n\u001b[1;32m   1284\u001b[0m     \u001b[39mexcept\u001b[39;00m \u001b[39mOSError\u001b[39;00m:\n\u001b[1;32m   1285\u001b[0m         \u001b[39mif\u001b[39;00m filemode \u001b[39min\u001b[39;00m modeDict:\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:\\\\Practice\\\\data.zip'"
     ]
    }
   ],
@ -219,7 +224,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 4,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -229,247 +234,19 @@
   },
   "outputs": [
    {
-     "data": {
+     "ename": "TypeError",
-      "text/html": [
+     "evalue": "NDFrame.describe() got an unexpected keyword argument 'datetime_is_numeric'",
-       "<div>\n",
+     "output_type": "error",
-       "<style scoped>\n",
+     "traceback": [
-       "    .dataframe tbody tr th:only-of-type {\n",
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-       "        vertical-align: middle;\n",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-       "    }\n",
+      "Cell \u001b[0;32mIn[4], line 44\u001b[0m\n\u001b[1;32m     38\u001b[0m df \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39mdrop(\u001b[39m'\u001b[39m\u001b[39mBalance after (receiver)\u001b[39m\u001b[39m'\u001b[39m, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[1;32m     43\u001b[0m df[\u001b[39m'\u001b[39m\u001b[39mGroundtruth\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m df[\u001b[39m'\u001b[39m\u001b[39mGroundtruth\u001b[39m\u001b[39m'\u001b[39m]\u001b[39m.\u001b[39mstr\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39m-\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m_\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m---> 44\u001b[0m df\u001b[39m.\u001b[39;49mdescribe(include\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mall\u001b[39;49m\u001b[39m'\u001b[39;49m, datetime_is_numeric\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\u001b[39m.\u001b[39mfillna(\u001b[39m'\u001b[39m\u001b[39m'\u001b[39m)\n",
-       "\n",
+      "\u001b[0;31mTypeError\u001b[0m: NDFrame.describe() got an unexpected keyword argument 'datetime_is_numeric'"
-       "    .dataframe tbody tr th {\n",
+     ]
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Groundtruth</th>\n",
       "      <th>User ID (sender)</th>\n",
       "      <th>User ID (receiver)</th>\n",
       "      <th>User account ID (sender)</th>\n",
       "      <th>User account ID (receiver)</th>\n",
       "      <th>Amount of transaction</th>\n",
       "      <th>Type of transaction</th>\n",
       "      <th>Transaction timestamp</th>\n",
       "      <th>Sender type</th>\n",
       "      <th>Receiver type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848.0</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "      <td>54848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>8</td>\n",
       "      <td>1868</td>\n",
       "      <td>1536</td>\n",
       "      <td>1868</td>\n",
       "      <td>1536</td>\n",
       "      <td></td>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>N_Reg_RC</td>\n",
       "      <td>PN_Ret5</td>\n",
       "      <td>operator</td>\n",
       "      <td>RAcc5</td>\n",
       "      <td>A0</td>\n",
       "      <td></td>\n",
       "      <td>ArRC</td>\n",
       "      <td></td>\n",
       "      <td>EU</td>\n",
       "      <td>operator</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>28312</td>\n",
       "      <td>2265</td>\n",
       "      <td>28312</td>\n",
       "      <td>2265</td>\n",
       "      <td>28312</td>\n",
       "      <td></td>\n",
       "      <td>28312</td>\n",
       "      <td></td>\n",
       "      <td>41981</td>\n",
       "      <td>28312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>55101.369953</td>\n",
       "      <td></td>\n",
       "      <td>2011-07-22 23:58:30.741376256</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>0.32</td>\n",
       "      <td></td>\n",
       "      <td>2011-01-06 00:09:01</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>2320.885</td>\n",
       "      <td></td>\n",
       "      <td>2011-06-20 20:11:10.500000</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>6796.69</td>\n",
       "      <td></td>\n",
       "      <td>2011-07-28 20:56:54</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82111.76</td>\n",
       "      <td></td>\n",
       "      <td>2011-09-09 22:22:44.500000</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>1148351.48</td>\n",
       "      <td></td>\n",
       "      <td>2011-12-09 23:54:57</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>87307.646401</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Groundtruth User ID (sender) User ID (receiver)  \\\n",
       "count        54848            54848              54848   \n",
       "unique           8             1868               1536   \n",
       "top       N_Reg_RC          PN_Ret5           operator   \n",
       "freq         28312             2265              28312   \n",
       "mean                                                     \n",
       "min                                                      \n",
       "25%                                                      \n",
       "50%                                                      \n",
       "75%                                                      \n",
       "max                                                      \n",
       "std                                                      \n",
       "\n",
       "       User account ID (sender) User account ID (receiver)  \\\n",
       "count                     54848                      54848   \n",
       "unique                     1868                       1536   \n",
       "top                       RAcc5                         A0   \n",
       "freq                       2265                      28312   \n",
       "mean                                                         \n",
       "min                                                          \n",
       "25%                                                          \n",
       "50%                                                          \n",
       "75%                                                          \n",
       "max                                                          \n",
       "std                                                          \n",
       "\n",
       "       Amount of transaction Type of transaction  \\\n",
       "count                54848.0               54848   \n",
       "unique                                         5   \n",
       "top                                         ArRC   \n",
       "freq                                       28312   \n",
       "mean            55101.369953                       \n",
       "min                     0.32                       \n",
       "25%                 2320.885                       \n",
       "50%                  6796.69                       \n",
       "75%                 82111.76                       \n",
       "max               1148351.48                       \n",
       "std             87307.646401                       \n",
       "\n",
       "                Transaction timestamp Sender type Receiver type  \n",
       "count                           54848       54848         54848  \n",
       "unique                                          2             4  \n",
       "top                                            EU      operator  \n",
       "freq                                        41981         28312  \n",
       "mean    2011-07-22 23:58:30.741376256                            \n",
       "min               2011-01-06 00:09:01                            \n",
       "25%        2011-06-20 20:11:10.500000                            \n",
       "50%               2011-07-28 20:56:54                            \n",
       "75%        2011-09-09 22:22:44.500000                            \n",
       "max               2011-12-09 23:54:57                            \n",
       "std                                                              "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "df = pd.read_csv('/practice/FinFraud_Labelled.csv', sep='|', parse_dates=[16, 17, 22])\n",
+    "df = pd.read_csv('./FinFraud_Labelled.csv', sep='|', parse_dates=[16, 17, 22])\n",
    "# в файлах с вариантом задания, разделитель - \";\" \n",
    "df.columns = [\n",
    "    'Groundtruth', \n",
@ -512,7 +289,7 @@
    "\n",
    "\n",
    "df['Groundtruth'] = df['Groundtruth'].str.replace('-', '_')\n",
-    "df.describe(include='all', datetime_is_numeric=True).fillna('')"
+    "df.describe(include='all').fillna('')"
   ]
  },
  {
@ -2840,7 +2617,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.3"
  }
 },
 "nbformat": 4,