Merge remote-tracking branch 'practice4/main'
This commit is contained in:
commit
379668d461
2
practice4/.gitignore
vendored
Normal file
2
practice4/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.venv
|
||||||
|
.~lock*
|
54849
practice4/FinFraud_Labelled.csv
Normal file
54849
practice4/FinFraud_Labelled.csv
Normal file
File diff suppressed because it is too large
Load Diff
54031
practice4/FinFraud_unknown.csv
Normal file
54031
practice4/FinFraud_unknown.csv
Normal file
File diff suppressed because it is too large
Load Diff
755
practice4/main.ipynb
Normal file
755
practice4/main.ipynb
Normal file
@ -0,0 +1,755 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"**Практическая работа №4**\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Обнаружение злоумышленников в системе мобильных денежных переводов\n",
|
||||||
|
"\n",
|
||||||
|
"_Вариант 5_\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"1) настройка окружения"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\u001b[33mWARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47347ab190>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
|
||||||
|
"\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=3, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c0610>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
|
||||||
|
"\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c0b50>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
|
||||||
|
"\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c14d0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
|
||||||
|
"\u001b[0m\u001b[33mWARNING: Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f47345c1ed0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /simple/scipy/\u001b[0m\u001b[33m\n",
|
||||||
|
"\u001b[0m\u001b[31mERROR: Could not find a version that satisfies the requirement scipy==1.8.1 (from versions: none)\u001b[0m\u001b[31m\n",
|
||||||
|
"\u001b[0m\u001b[31mERROR: No matching distribution found for scipy==1.8.1\u001b[0m\u001b[31m\n",
|
||||||
|
"\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: networkx==2.7.0 in ./.venv/lib64/python3.11/site-packages (2.7)\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"Requirement already satisfied: pyvis in ./.venv/lib64/python3.11/site-packages (0.3.2)\n",
|
||||||
|
"Requirement already satisfied: pandas in ./.venv/lib64/python3.11/site-packages (2.0.1)\n",
|
||||||
|
"Requirement already satisfied: numpy in ./.venv/lib64/python3.11/site-packages (1.24.3)\n",
|
||||||
|
"Requirement already satisfied: plotly in ./.venv/lib64/python3.11/site-packages (5.14.1)\n",
|
||||||
|
"Requirement already satisfied: ipython>=5.3.0 in ./.venv/lib64/python3.11/site-packages (from pyvis) (8.13.2)\n",
|
||||||
|
"Requirement already satisfied: jinja2>=2.9.6 in ./.venv/lib64/python3.11/site-packages (from pyvis) (3.1.2)\n",
|
||||||
|
"Requirement already satisfied: jsonpickle>=1.4.1 in ./.venv/lib64/python3.11/site-packages (from pyvis) (3.0.1)\n",
|
||||||
|
"Requirement already satisfied: networkx>=1.11 in ./.venv/lib64/python3.11/site-packages (from pyvis) (2.7)\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib64/python3.11/site-packages (from pandas) (2.8.2)\n",
|
||||||
|
"Requirement already satisfied: pytz>=2020.1 in ./.venv/lib64/python3.11/site-packages (from pandas) (2023.3)\n",
|
||||||
|
"Requirement already satisfied: tzdata>=2022.1 in ./.venv/lib64/python3.11/site-packages (from pandas) (2023.3)\n",
|
||||||
|
"Requirement already satisfied: tenacity>=6.2.0 in ./.venv/lib64/python3.11/site-packages (from plotly) (8.2.2)\n",
|
||||||
|
"Requirement already satisfied: packaging in ./.venv/lib64/python3.11/site-packages (from plotly) (23.1)\n",
|
||||||
|
"Requirement already satisfied: backcall in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.2.0)\n",
|
||||||
|
"Requirement already satisfied: decorator in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (5.1.1)\n",
|
||||||
|
"Requirement already satisfied: jedi>=0.16 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.18.2)\n",
|
||||||
|
"Requirement already satisfied: matplotlib-inline in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.1.6)\n",
|
||||||
|
"Requirement already satisfied: pickleshare in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.7.5)\n",
|
||||||
|
"Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (3.0.38)\n",
|
||||||
|
"Requirement already satisfied: pygments>=2.4.0 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (2.15.1)\n",
|
||||||
|
"Requirement already satisfied: stack-data in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (0.6.2)\n",
|
||||||
|
"Requirement already satisfied: traitlets>=5 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (5.9.0)\n",
|
||||||
|
"Requirement already satisfied: pexpect>4.3 in ./.venv/lib64/python3.11/site-packages (from ipython>=5.3.0->pyvis) (4.8.0)\n",
|
||||||
|
"Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib64/python3.11/site-packages (from jinja2>=2.9.6->pyvis) (2.1.2)\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in ./.venv/lib64/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
||||||
|
"Requirement already satisfied: parso<0.9.0,>=0.8.0 in ./.venv/lib64/python3.11/site-packages (from jedi>=0.16->ipython>=5.3.0->pyvis) (0.8.3)\n",
|
||||||
|
"Requirement already satisfied: ptyprocess>=0.5 in ./.venv/lib64/python3.11/site-packages (from pexpect>4.3->ipython>=5.3.0->pyvis) (0.7.0)\n",
|
||||||
|
"Requirement already satisfied: wcwidth in ./.venv/lib64/python3.11/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=5.3.0->pyvis) (0.2.6)\n",
|
||||||
|
"Requirement already satisfied: executing>=1.2.0 in ./.venv/lib64/python3.11/site-packages (from stack-data->ipython>=5.3.0->pyvis) (1.2.0)\n",
|
||||||
|
"Requirement already satisfied: asttokens>=2.1.0 in ./.venv/lib64/python3.11/site-packages (from stack-data->ipython>=5.3.0->pyvis) (2.2.1)\n",
|
||||||
|
"Requirement already satisfied: pure-eval in ./.venv/lib64/python3.11/site-packages (from stack-data->ipython>=5.3.0->pyvis) (0.2.2)\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# %pip install scipy==1.8.1\n",
|
||||||
|
"# %pip install networkx==2.7.0\n",
|
||||||
|
"# %pip install pyvis pandas numpy plotly\n",
|
||||||
|
"\n",
|
||||||
|
"from functools import reduce\n",
|
||||||
|
"from pyvis import network as net\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import networkx as nx\n",
|
||||||
|
"import plotly.express as px\n",
|
||||||
|
"import plotly.graph_objects as go\n",
|
||||||
|
"\n",
|
||||||
|
"from plotly.offline import iplot\n",
|
||||||
|
"from IPython.display import display, HTML\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"#for Jupiter notebooks\n",
|
||||||
|
"import plotly.io as pio #comment for Google collab\n",
|
||||||
|
"pio.renderers.default='notebook'#comment for Google collab"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def pyvis_deepnote_show(nt):\n",
|
||||||
|
" tmp_output_filename = tempfile.NamedTemporaryFile(suffix='.html').name\n",
|
||||||
|
" nt.save_graph(tmp_output_filename)\n",
|
||||||
|
"\n",
|
||||||
|
" f = open(tmp_output_filename, \"r\")\n",
|
||||||
|
" display(HTML(f.read()))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 42,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>User ID (sender)</th>\n",
|
||||||
|
" <th>User ID (receiver)</th>\n",
|
||||||
|
" <th>User account ID (sender)</th>\n",
|
||||||
|
" <th>User account ID (receiver)</th>\n",
|
||||||
|
" <th>Amount of transaction</th>\n",
|
||||||
|
" <th>Type of transaction</th>\n",
|
||||||
|
" <th>Transaction timestamp</th>\n",
|
||||||
|
" <th>Sender type</th>\n",
|
||||||
|
" <th>Receiver type</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>count</th>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030.0</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" <td>54030</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>unique</th>\n",
|
||||||
|
" <td>1861</td>\n",
|
||||||
|
" <td>1562</td>\n",
|
||||||
|
" <td>1861</td>\n",
|
||||||
|
" <td>1562</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>46394</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>top</th>\n",
|
||||||
|
" <td>PN_Ret4</td>\n",
|
||||||
|
" <td>operator</td>\n",
|
||||||
|
" <td>RAcc4</td>\n",
|
||||||
|
" <td>A0</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>ArRC</td>\n",
|
||||||
|
" <td>08.07.2011 15:16</td>\n",
|
||||||
|
" <td>EU</td>\n",
|
||||||
|
" <td>operator</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>freq</th>\n",
|
||||||
|
" <td>2256</td>\n",
|
||||||
|
" <td>27901</td>\n",
|
||||||
|
" <td>2256</td>\n",
|
||||||
|
" <td>27901</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>27901</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>41246</td>\n",
|
||||||
|
" <td>27901</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>mean</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>53083.47221</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>std</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>85834.97052</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>min</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>25%</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2158.2525</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>50%</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>6257.375</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>75%</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>76821.9675</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>max</th>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>1053512.86</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" User ID (sender) User ID (receiver) User account ID (sender) \n",
|
||||||
|
"count 54030 54030 54030 \\\n",
|
||||||
|
"unique 1861 1562 1861 \n",
|
||||||
|
"top PN_Ret4 operator RAcc4 \n",
|
||||||
|
"freq 2256 27901 2256 \n",
|
||||||
|
"mean \n",
|
||||||
|
"std \n",
|
||||||
|
"min \n",
|
||||||
|
"25% \n",
|
||||||
|
"50% \n",
|
||||||
|
"75% \n",
|
||||||
|
"max \n",
|
||||||
|
"\n",
|
||||||
|
" User account ID (receiver) Amount of transaction Type of transaction \n",
|
||||||
|
"count 54030 54030.0 54030 \\\n",
|
||||||
|
"unique 1562 5 \n",
|
||||||
|
"top A0 ArRC \n",
|
||||||
|
"freq 27901 27901 \n",
|
||||||
|
"mean 53083.47221 \n",
|
||||||
|
"std 85834.97052 \n",
|
||||||
|
"min 0.0 \n",
|
||||||
|
"25% 2158.2525 \n",
|
||||||
|
"50% 6257.375 \n",
|
||||||
|
"75% 76821.9675 \n",
|
||||||
|
"max 1053512.86 \n",
|
||||||
|
"\n",
|
||||||
|
" Transaction timestamp Sender type Receiver type \n",
|
||||||
|
"count 54030 54030 54030 \n",
|
||||||
|
"unique 46394 2 4 \n",
|
||||||
|
"top 08.07.2011 15:16 EU operator \n",
|
||||||
|
"freq 5 41246 27901 \n",
|
||||||
|
"mean \n",
|
||||||
|
"std \n",
|
||||||
|
"min \n",
|
||||||
|
"25% \n",
|
||||||
|
"50% \n",
|
||||||
|
"75% \n",
|
||||||
|
"max "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 42,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df = pd.read_csv('./FinFraud_unknown.csv', sep=',', parse_dates=[15, 16, 21])\n",
|
||||||
|
"\n",
|
||||||
|
"df.columns = [\n",
|
||||||
|
" 'User ID (sender)', \n",
|
||||||
|
" 'User ID (receiver)',\n",
|
||||||
|
" 'User account ID (sender)',\n",
|
||||||
|
" 'User account ID (receiver)',\n",
|
||||||
|
" 'Amount of transaction',\n",
|
||||||
|
" 'Type of transaction',\n",
|
||||||
|
" 'State of operation',\n",
|
||||||
|
" 'Balance before (sender)',\n",
|
||||||
|
" 'Balance after (sender)',\n",
|
||||||
|
" 'Balance after (receiver)',\n",
|
||||||
|
" 'Balance before (receiver)', \n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Transaction timestamp (sender)',\n",
|
||||||
|
" 'Transaction timestamp (receiver)',\n",
|
||||||
|
" 'Sender account ID',\n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Not used',\n",
|
||||||
|
" 'Transaction timestamp',\n",
|
||||||
|
" 'Sender type',\n",
|
||||||
|
" 'Receiver type'\n",
|
||||||
|
"]\n",
|
||||||
|
"df = df.loc[:, ~df.columns.str.contains('^Not used', case=False)].sort_values('Transaction timestamp') \n",
|
||||||
|
"df = df.drop('State of operation', axis=1)\n",
|
||||||
|
"df = df.drop('Sender account ID', axis=1)\n",
|
||||||
|
"df = df.drop('Transaction timestamp (sender)', axis=1)\n",
|
||||||
|
"df = df.drop('Transaction timestamp (receiver)', axis=1)\n",
|
||||||
|
"df = df.drop('Balance before (sender)', axis=1)\n",
|
||||||
|
"df = df.drop('Balance after (sender)', axis=1)\n",
|
||||||
|
"df = df.drop('Balance before (receiver)', axis=1)\n",
|
||||||
|
"df = df.drop('Balance after (receiver)', axis=1)\n",
|
||||||
|
"\n",
|
||||||
|
"df[\"Amount of transaction\"] = pd.to_numeric(df[\"Amount of transaction\"], errors='coerce').fillna(0)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"df.describe(include='all').fillna('')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Описание набора данных"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"| Название столбца | Возможные значения |Описание |\n",
|
||||||
|
"|----------------------------------------|----------------------------------------------------------------------------------------------------------------------------||\n",
|
||||||
|
"| User ID (transaction sender) | Generated ID | |\n",
|
||||||
|
"| User ID (transaction receiver) | Generated ID | |\n",
|
||||||
|
"| User account ID (transaction sender) | Generated ID | |\n",
|
||||||
|
"| User account ID (transaction receiver) | Generated ID | |\n",
|
||||||
|
"| Amount of transaction | Number | |\n",
|
||||||
|
"| Type of transaction | `Ind`<br/>`Dt`<br/>`ArRC`<br/>`Wl`<br/>`Merchant` | Тип транзакции <br/>`Ind` – денежный перевод между пользователями системы <br/>`Dt` – пополнение электронного кошелька (отправитель агент, а получатель - пользователь системы)<br/>`ArRC` – пополнение счета мобильной связи (перевод от пользователя системы к оператору мобильной связи )<br/>`Wl` – снятие электронных денег (отправитель - пользователь системы, получатель - оператор)<br/>`Merchant` – перевод от пользователя поставщику услуг или товаров |\n",
|
||||||
|
"| State of operation | `SU` | `SU` – успешно |\n",
|
||||||
|
"| Balance before (transaction sender) | Number | |\n",
|
||||||
|
"| Balance before (transaction receiver) | Number | |\n",
|
||||||
|
"| Balance after (transaction sender) | Number | |\n",
|
||||||
|
"| Balance after (transaction receiver) | Number | |\n",
|
||||||
|
"| Transaction timestamp (sender) | Datetime | |\n",
|
||||||
|
"| Transaction timestamp (receiver) | Datetime | |\n",
|
||||||
|
"| Sender account ID | Generated ID | |\n",
|
||||||
|
"| Transaction timestamp | Datetime | |\n",
|
||||||
|
"| Sender type | `EU`<br/>`RET` | |\n",
|
||||||
|
"| Receiver type | `EU`<br/>`operator`<br/>`RET`<br/>`MER` | |\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Поскольку поле `State of operation` всегда имеет значение (`SU`) для всех транзакций, данный столбец предлагается удалить. \n",
|
||||||
|
"Столбцы `Sender account ID` и `User ID (transaction sender)` идентичны, также столбцы `Transaction timestamp (sender)` и `Transaction timestamp (receiver)` идентичны стобцу `Transaction timestamp`, поэтому данные стобцы удалются (остается только `Transaction timestamp`). Также удаляюся столбцы с балансом, т.к. в текущей версии набора данных они не задействованы."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 43,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"User ID (sender) object\n",
|
||||||
|
"User ID (receiver) object\n",
|
||||||
|
"User account ID (sender) object\n",
|
||||||
|
"User account ID (receiver) object\n",
|
||||||
|
"Amount of transaction float64\n",
|
||||||
|
"Type of transaction object\n",
|
||||||
|
"Transaction timestamp object\n",
|
||||||
|
"Sender type object\n",
|
||||||
|
"Receiver type object\n",
|
||||||
|
"dtype: object"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 43,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.dtypes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Статистика транзакций для каждого пользователя\n",
|
||||||
|
"\n",
|
||||||
|
"Традиционно начнем со статистического анализа данных. Рекомендуется расширить число рассчитываемых статистик, например, включив показатели, характеризующие частоту транзакций. Для такого вида мошенничества как кража телефона изменение частоты снятий является характерным признаком."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 44,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def init_stat_dict():\n",
|
||||||
|
" stat_dict = dict()\n",
|
||||||
|
" transaction_types = {\"Ind\", \"Wl\", \"Dt\", \"Merchant\", \"ArRC\"} \n",
|
||||||
|
" for tran_type in transaction_types:\n",
|
||||||
|
" amount_name = f\"Sent_amount_{tran_type}\"\n",
|
||||||
|
" amount_median = f\"Sent_amount_{tran_type}_median\"\n",
|
||||||
|
" amount_min = f\"Sent_amount_{tran_type}_min\"\n",
|
||||||
|
" amount_max = f\"Sent_amount_{tran_type}_max\"\n",
|
||||||
|
" tran_count = f\"Sent_{tran_type}_count\"\n",
|
||||||
|
" rec_amount_name = f\"Received_amount_{tran_type}\"\n",
|
||||||
|
" rec_amount_median = f\"Received_amount_{tran_type}_median\"\n",
|
||||||
|
" rec_amount_min = f\"Received_amount_{tran_type}_min\"\n",
|
||||||
|
" rec_amount_max = f\"Received_amount_{tran_type}_max\"\n",
|
||||||
|
" rec_tran_count = f\"Received_{tran_type}_count\"\n",
|
||||||
|
" \n",
|
||||||
|
" stat_dict[amount_name] = 0\n",
|
||||||
|
" stat_dict[amount_median] = 0\n",
|
||||||
|
" stat_dict[amount_min] = 0\n",
|
||||||
|
" stat_dict[amount_max] = 0\n",
|
||||||
|
" stat_dict[tran_count] = 0\n",
|
||||||
|
" stat_dict[rec_amount_name] = 0\n",
|
||||||
|
" stat_dict[rec_amount_median] = 0\n",
|
||||||
|
" stat_dict[rec_amount_min] = 0\n",
|
||||||
|
" stat_dict[rec_amount_max] = 0\n",
|
||||||
|
" stat_dict[rec_tran_count] = 0\n",
|
||||||
|
"\n",
|
||||||
|
" return stat_dict\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def get_stat_df(df):\n",
|
||||||
|
" sent_unique_users = df[\"User ID (sender)\"].unique()\n",
|
||||||
|
" received_unique_users = df[\"User ID (receiver)\"].unique()\n",
|
||||||
|
" unique_users = np.unique(np.concatenate((sent_unique_users,received_unique_users),0))\n",
|
||||||
|
" print(unique_users)\n",
|
||||||
|
" stat_df = pd.DataFrame()\n",
|
||||||
|
" stat_dict = init_stat_dict()\n",
|
||||||
|
" transaction_types = {\"Ind\", \"Wl\", \"Dt\", \"Merchant\", \"ArRC\"}\n",
|
||||||
|
" for user in unique_users:\n",
|
||||||
|
" stat_dict = init_stat_dict() \n",
|
||||||
|
" stat_dict[\"User ID\"] = user\n",
|
||||||
|
"\n",
|
||||||
|
" user_df = df.loc[(df[\"User ID (sender)\"] == user)]\n",
|
||||||
|
" \n",
|
||||||
|
" if (not user_df.empty):\n",
|
||||||
|
" #stat_dict[\"User ID\"] = user\n",
|
||||||
|
" \n",
|
||||||
|
" stat_dict[\"Unique_receivers\"] = len(user_df[\"User ID (receiver)\"].unique())\n",
|
||||||
|
" stat_dict[\"User type\"] = user_df[\"Sender type\"].unique()[0]\n",
|
||||||
|
"\n",
|
||||||
|
" for tran_type in transaction_types:\n",
|
||||||
|
" amount_name = f\"Sent_amount_{tran_type}\"\n",
|
||||||
|
" amount_median = f\"Sent_amount_{tran_type}_median\"\n",
|
||||||
|
" amount_min = f\"Sent_amount_{tran_type}_min\"\n",
|
||||||
|
" amount_max = f\"Sent_amount_{tran_type}_max\"\n",
|
||||||
|
" tran_count = f\"Sent_{tran_type}_count\"\n",
|
||||||
|
" stat_dict[amount_name] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].sum()\n",
|
||||||
|
" stat_dict[amount_median] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].mean()\n",
|
||||||
|
" stat_dict[amount_min] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].min()\n",
|
||||||
|
" stat_dict[amount_max] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].max()\n",
|
||||||
|
" stat_dict[tran_count] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].count()\n",
|
||||||
|
" else:\n",
|
||||||
|
" stat_dict[\"User type\"] = (df.loc[(df[\"User ID (receiver)\"]==user)])[\"Receiver type\"].unique()[0]\n",
|
||||||
|
"\n",
|
||||||
|
" user_df = df.loc[(df[\"User ID (receiver)\"] == user)]\n",
|
||||||
|
" if (not user_df.empty):\n",
|
||||||
|
" stat_dict[\"Unique_senders\"] = len(user_df[\"User ID (sender)\"].unique())\n",
|
||||||
|
" for tran_type in transaction_types:\n",
|
||||||
|
" rec_amount_name = f\"Received_amount_{tran_type}\"\n",
|
||||||
|
" rec_amount_median = f\"Received_amount_{tran_type}_median\"\n",
|
||||||
|
" rec_amount_min = f\"Received_amount_{tran_type}_min\"\n",
|
||||||
|
" rec_amount_max = f\"Received_amount_{tran_type}_max\"\n",
|
||||||
|
" rec_tran_count = f\"Received_{tran_type}_count\"\n",
|
||||||
|
" stat_dict[rec_amount_name] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].sum()\n",
|
||||||
|
" stat_dict[rec_amount_median] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].median()\n",
|
||||||
|
" stat_dict[rec_amount_min] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].min()\n",
|
||||||
|
" stat_dict[rec_amount_max] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].max()\n",
|
||||||
|
" stat_dict[rec_tran_count] = (user_df.loc[user_df[\"Type of transaction\"]==tran_type])[\"Amount of transaction\"].count()\n",
|
||||||
|
" \n",
|
||||||
|
" df_temp = pd.DataFrame([stat_dict])\n",
|
||||||
|
" \n",
|
||||||
|
" #df_temp.head()\n",
|
||||||
|
" stat_df = pd.concat([stat_df, df_temp])\n",
|
||||||
|
" stat_df = stat_df.fillna(0)\n",
|
||||||
|
" return stat_df\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Кстати, обратите внимание уникальных пользователей в системе 2009. Это больше, чем число уникальных отправителей и уникальных получателей, значит, какие то пользователи только отправляют деньги, а какие-то только получают."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 46,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"['PN_EU_0_0' 'PN_EU_0_1' 'PN_EU_0_10' ... 'PN_Ret5' 'PN_Ret6' 'operator']\n",
|
||||||
|
"(2009, 54)\n",
|
||||||
|
" Sent_amount_Wl Sent_amount_Wl_median Sent_amount_Wl_min \n",
|
||||||
|
"0 0.0 0.0 0.0 \\\n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Sent_amount_Wl_max Sent_Wl_count Received_amount_Wl \n",
|
||||||
|
"0 0.0 0 0.0 \\\n",
|
||||||
|
"0 0.0 0 0.0 \n",
|
||||||
|
"0 0.0 0 0.0 \n",
|
||||||
|
"0 0.0 0 0.0 \n",
|
||||||
|
"0 0.0 0 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Received_amount_Wl_median Received_amount_Wl_min Received_amount_Wl_max \n",
|
||||||
|
"0 0.0 0.0 0.0 \\\n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Received_Wl_count ... Sent_Dt_count Received_amount_Dt \n",
|
||||||
|
"0 0 ... 0 686643.36 \\\n",
|
||||||
|
"0 0 ... 0 483467.30 \n",
|
||||||
|
"0 0 ... 0 0.00 \n",
|
||||||
|
"0 0 ... 0 0.00 \n",
|
||||||
|
"0 0 ... 0 0.00 \n",
|
||||||
|
"\n",
|
||||||
|
" Received_amount_Dt_median Received_amount_Dt_min Received_amount_Dt_max \n",
|
||||||
|
"0 27845.615 15965.17 41729.94 \\\n",
|
||||||
|
"0 35925.855 8067.95 86422.48 \n",
|
||||||
|
"0 0.000 0.00 0.00 \n",
|
||||||
|
"0 0.000 0.00 0.00 \n",
|
||||||
|
"0 0.000 0.00 0.00 \n",
|
||||||
|
"\n",
|
||||||
|
" Received_Dt_count User ID User type Unique_senders \n",
|
||||||
|
"0 24 PN_EU_0_0 EU 2.0 \\\n",
|
||||||
|
"0 12 PN_EU_0_1 EU 6.0 \n",
|
||||||
|
"0 0 PN_EU_0_10 EU 2.0 \n",
|
||||||
|
"0 0 PN_EU_0_100 EU 1.0 \n",
|
||||||
|
"0 0 PN_EU_0_1000 EU 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Unique_receivers \n",
|
||||||
|
"0 0.0 \n",
|
||||||
|
"0 0.0 \n",
|
||||||
|
"0 2.0 \n",
|
||||||
|
"0 1.0 \n",
|
||||||
|
"0 1.0 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 54 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"stat_df = get_stat_df(df)\n",
|
||||||
|
"print(stat_df.shape)\n",
|
||||||
|
"# print(stat_df.head())\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Была выбрана часть статистик и построила проекции пользователей. Анализируемые поля были выбраны на основе анализа свойств возможных финансовых аномалий (т.е. просто эвристически:))."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 47,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "ModuleNotFoundError",
|
||||||
|
"evalue": "No module named 'sklearn'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[47], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mplotting\u001b[39;00m \u001b[39mimport\u001b[39;00m scatter_matrix\n\u001b[0;32m----> 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpreprocessing\u001b[39;00m \u001b[39mimport\u001b[39;00m StandardScaler\n\u001b[1;32m 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpreprocessing\u001b[39;00m \u001b[39mimport\u001b[39;00m LabelEncoder\n\u001b[1;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdecomposition\u001b[39;00m \u001b[39mimport\u001b[39;00m PCA\n",
|
||||||
|
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from pandas.plotting import scatter_matrix\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler\n",
|
||||||
|
"from sklearn.preprocessing import LabelEncoder\n",
|
||||||
|
"from sklearn.decomposition import PCA\n",
|
||||||
|
"from matplotlib.ticker import FormatStrFormatter\n",
|
||||||
|
"import plotly.express as px"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Мошенничество, связанное с заражением бот-сетью."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Согласно описанию сценария атаки: есть множество зараженных пользователей, которые переводят деньги какому-то пользователю (\"ослу\" или \"мулу\"), и уже он выполняет операции обналичивания денег. Рассмотрен простейщий вариант сценария: цепочка мулов состоит из одного звена. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 48,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "NameError",
|
||||||
|
"evalue": "name 'StandardScaler' is not defined",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[48], line 11\u001b[0m\n\u001b[1;32m 8\u001b[0m x \u001b[39m=\u001b[39m stat_df[MobileBot_labels]\u001b[39m.\u001b[39mvalues\n\u001b[1;32m 10\u001b[0m \u001b[39m# нормализуем значения\u001b[39;00m\n\u001b[0;32m---> 11\u001b[0m x \u001b[39m=\u001b[39m StandardScaler()\u001b[39m.\u001b[39mfit_transform(x)\n\u001b[1;32m 13\u001b[0m pca \u001b[39m=\u001b[39m PCA(n_components\u001b[39m=\u001b[39m\u001b[39m3\u001b[39m)\n\u001b[1;32m 14\u001b[0m principalComponents \u001b[39m=\u001b[39m pca\u001b[39m.\u001b[39mfit_transform(x)\n",
|
||||||
|
"\u001b[0;31mNameError\u001b[0m: name 'StandardScaler' is not defined"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"#оставляем поля, связанные с переводами и снятиями и добавили число уникальных пользователей, это же бот сеть.\n",
|
||||||
|
"\n",
|
||||||
|
"MobileBot_labels = ['Unique_receivers','Unique_receivers','Sent_Ind_count' ,'Sent_Wl_count', 'Received_Ind_count']\n",
|
||||||
|
"\n",
|
||||||
|
"# а по этим полям будем пробовать найти пользователей с кражей телефона.\n",
|
||||||
|
"MobileTheft_labels = ['Sent_amount_Wl', 'Sent_amount_Wl_median', 'Sent_amount_Wl_min', 'Sent_amount_Wl_max', 'Sent_Wl_count']\n",
|
||||||
|
"\n",
|
||||||
|
"x = stat_df[MobileBot_labels].values\n",
|
||||||
|
"\n",
|
||||||
|
"# нормализуем значения\n",
|
||||||
|
"x = StandardScaler().fit_transform(x)\n",
|
||||||
|
"\n",
|
||||||
|
"pca = PCA(n_components=3)\n",
|
||||||
|
"principalComponents = pca.fit_transform(x)\n",
|
||||||
|
"print(f'Explained variance: {pca.explained_variance_ratio_}\\tSum: {pca.explained_variance_ratio_.sum()}')\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.3"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
2625
practice4/Практическая_работа__4.ipynb
Normal file
2625
practice4/Практическая_работа__4.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user