diff --git a/Experiments.ipynb b/Experiments.ipynb index aebe34f88030296b2dcdd58844ea5c3dafd26e33..7087397cb5761564157504bbf26f974d8be049f6 100644 --- a/Experiments.ipynb +++ b/Experiments.ipynb @@ -2,12 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "9d54cd9a", "metadata": { "ExecuteTime": { - "end_time": "2024-02-13T14:34:05.761955065Z", - "start_time": "2024-02-13T14:34:05.757963454Z" + "end_time": "2024-02-14T09:17:51.136290823Z", + "start_time": "2024-02-14T09:17:51.130011719Z" } }, "outputs": [], @@ -26,12 +26,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "id": "61c53cb1", "metadata": { "ExecuteTime": { - "end_time": "2024-02-13T16:19:33.814920230Z", - "start_time": "2024-02-13T16:18:58.796355230Z" + "end_time": "2024-02-14T09:39:50.059022516Z", + "start_time": "2024-02-14T09:39:12.027137257Z" } }, "outputs": [ @@ -39,196 +39,51 @@ "name": "stdout", "output_type": "stream", "text": [ - "/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr\n", - "Is CUDA supported by this system? False\n", - "CUDA version: None\n", - "dataTrain: ../../data/assist0910_tkde/train_valid_0.csv\n", - "dataTest: ../../data/assist0910_tkde/test_0.csv\n", - "dataPath: ../../data/\n", - "embPath: ../../results/table_2/\n", - "epochs: 75\n", - "batch_size: 512\n", - "[Epoch 0] loss: 1.362733\n", - "[Epoch 5] loss: 0.946293\n", - "[Epoch 10] loss: 0.843926\n", - "[Epoch 15] loss: 0.752875\n", - "[Epoch 20] loss: 0.675348\n", - "[Epoch 25] loss: 0.609859\n", - "[Epoch 30] loss: 0.555984\n", - "[Epoch 35] loss: 0.511323\n", - "[Epoch 40] loss: 0.475109\n", - "[Epoch 45] loss: 0.445370\n", - "[Epoch 50] loss: 0.421545\n", - "[Epoch 55] loss: 0.402110\n", - "[Epoch 60] loss: 0.386685\n", - "[Epoch 65] loss: 0.374130\n", - "[Epoch 70] loss: 0.364230\n", - "Best iteration 0\n", - "Accuracy train 0\n", - "doa 0.7493086165296035\n", - "Evaluate\n", - "RMSE 0.43184939597901045\n", - "AUC: 0.7797187622578838\n", - "0\n", - "Doa on Train dataset: 0.7493086165296035\n", - "AUC and RMSE on test dataset: 0.7797187622578838 0.43184939597901045\n", - "doa 0.5110034511151385\n", - "Accuracy and Doa on test dataset: 0.734148934531055 0.5110034511151385\n", - "dataTrain: ../../data/assist0910_tkde/train_valid_1.csv\n", - "dataTest: ../../data/assist0910_tkde/test_1.csv\n", - "dataPath: ../../data/\n", - "embPath: ../../results/table_2/\n", - "epochs: 75\n", - "batch_size: 512\n", - "[Epoch 0] loss: 1.370187\n", - "[Epoch 5] loss: 0.940516\n", - "[Epoch 10] loss: 0.839136\n", - "[Epoch 15] loss: 0.751113\n", - "[Epoch 20] loss: 0.674149\n", - "[Epoch 25] loss: 0.609572\n", - "[Epoch 30] loss: 0.555151\n", - "[Epoch 35] loss: 0.510757\n", - "[Epoch 40] loss: 0.473884\n", - "[Epoch 45] loss: 0.444131\n", - "[Epoch 50] loss: 0.419617\n", - "[Epoch 55] loss: 0.400048\n", - "[Epoch 60] loss: 0.383974\n", - "[Epoch 65] loss: 0.371254\n", - "[Epoch 70] loss: 0.360791\n", - "Best iteration 0\n", - "Accuracy train 0\n", - "doa 0.749059681595483\n", - "Evaluate\n", - "RMSE 0.4297057532202306\n", - "AUC: 0.7874179495114682\n", - "0\n", - "Doa on Train dataset: 0.749059681595483\n", - "AUC and RMSE on test dataset: 0.7874179495114682 0.4297057532202306\n", - "doa 0.5106703541023732\n", - "Accuracy and Doa on test dataset: 0.7411206963550168 0.5106703541023732\n", - "dataTrain: ../../data/assist0910_tkde/train_valid_2.csv\n", - "dataTest: ../../data/assist0910_tkde/test_2.csv\n", - "dataPath: ../../data/\n", - "embPath: ../../results/table_2/\n", - "epochs: 75\n", - "batch_size: 512\n", - "[Epoch 0] loss: 1.365342\n", - "[Epoch 5] loss: 0.951919\n", - "[Epoch 10] loss: 0.867441\n", - "[Epoch 15] loss: 0.787939\n", - "[Epoch 20] loss: 0.716660\n", - "[Epoch 25] loss: 0.652943\n", - "[Epoch 30] loss: 0.598784\n", - "[Epoch 35] loss: 0.551974\n", - "[Epoch 40] loss: 0.512592\n", - "[Epoch 45] loss: 0.479014\n", - "[Epoch 50] loss: 0.451044\n", - "[Epoch 55] loss: 0.427375\n", - "[Epoch 60] loss: 0.407843\n", - "[Epoch 65] loss: 0.391357\n", - "[Epoch 70] loss: 0.377866\n", - "Best iteration 0\n", - "Accuracy train 0\n", - "doa 0.7756294591328354\n", - "Evaluate\n", - "RMSE 0.4299590283249851\n", - "AUC: 0.7876359427708508\n", - "0\n", - "Doa on Train dataset: 0.7756294591328354\n", - "AUC and RMSE on test dataset: 0.7876359427708508 0.4299590283249851\n", - "doa 0.49555815191093133\n", - "Accuracy and Doa on test dataset: 0.7399504966005577 0.49555815191093133\n", - "dataTrain: ../../data/assist0910_tkde/train_valid_3.csv\n", - "dataTest: ../../data/assist0910_tkde/test_3.csv\n", - "dataPath: ../../data/\n", - "embPath: ../../results/table_2/\n", - "epochs: 75\n", - "batch_size: 512\n", - "[Epoch 0] loss: 1.361240\n", - "[Epoch 5] loss: 0.917660\n", - "[Epoch 10] loss: 0.810890\n", - "[Epoch 15] loss: 0.716422\n", - "[Epoch 20] loss: 0.636654\n", - "[Epoch 25] loss: 0.570979\n", - "[Epoch 30] loss: 0.518479\n", - "[Epoch 35] loss: 0.476139\n", - "[Epoch 40] loss: 0.442749\n", - "[Epoch 45] loss: 0.416121\n", - "[Epoch 50] loss: 0.395355\n", - "[Epoch 55] loss: 0.378881\n", - "[Epoch 60] loss: 0.366123\n", - "[Epoch 65] loss: 0.356016\n", - "[Epoch 70] loss: 0.348208\n", - "Best iteration 0\n", - "Accuracy train 0\n", - "doa 0.7598394776244297\n", - "Evaluate\n", - "RMSE 0.4295921854657559\n", - "AUC: 0.787839063041027\n", - "0\n", - "Doa on Train dataset: 0.7598394776244297\n", - "AUC and RMSE on test dataset: 0.787839063041027 0.4295921854657559\n", - "doa 0.5066278482467461\n", - "Accuracy and Doa on test dataset: 0.7418728695871734 0.5066278482467461\n", - "dataTrain: ../../data/assist0910_tkde/train_valid_4.csv\n", - "dataTest: ../../data/assist0910_tkde/test_4.csv\n", - "dataPath: ../../data/\n", - "embPath: ../../results/table_2/\n", - "epochs: 75\n", - "batch_size: 512\n", - "[Epoch 0] loss: 1.362248\n", - "[Epoch 5] loss: 0.923156\n", - "[Epoch 10] loss: 0.807012\n", - "[Epoch 15] loss: 0.707919\n", - "[Epoch 20] loss: 0.625367\n", - "[Epoch 25] loss: 0.558423\n", - "[Epoch 30] loss: 0.505917\n", - "[Epoch 35] loss: 0.464502\n", - "[Epoch 40] loss: 0.432523\n", - "[Epoch 45] loss: 0.407566\n", - "[Epoch 50] loss: 0.388625\n", - "[Epoch 55] loss: 0.373885\n", - "[Epoch 60] loss: 0.362804\n", - "[Epoch 65] loss: 0.354112\n", - "[Epoch 70] loss: 0.347627\n", - "Best iteration 0\n", - "Accuracy train 0\n", - "doa 0.7570199245665414\n", - "Evaluate\n", - "RMSE 0.42942382801447393\n", - "AUC: 0.7883277553203483\n", - "0\n", - "Doa on Train dataset: 0.7570199245665414\n", - "AUC and RMSE on test dataset: 0.7883277553203483 0.42942382801447393\n", - "doa 0.4913074020630229\n", - "Accuracy and Doa on test dataset: 0.7404650570983237 0.4913074020630229\n", - "[0.734148934531055, 0.7411206963550168, 0.7399504966005577, 0.7418728695871734, 0.7404650570983237]\n", - "[0.7797187622578838, 0.7874179495114682, 0.7876359427708508, 0.787839063041027, 0.7883277553203483]\n", - "[0.43184939597901045, 0.4297057532202306, 0.4299590283249851, 0.4295921854657559, 0.42942382801447393]\n", - "[0.7493086165296035, 0.749059681595483, 0.7756294591328354, 0.7598394776244297, 0.7570199245665414]\n", - "[0.5110034511151385, 0.5106703541023732, 0.49555815191093133, 0.5066278482467461, 0.4913074020630229]\n", - "acc : 0.7395116108344253 +- 0.002757704724534191\n", - "auc : 0.7861878945803157 +- 0.0032485354538092292\n", - "rmse : 0.4301060382008912 +- 0.0008888860088608456\n", - "doa_train : 0.7581714318897786 +- 0.00970015000339504\n", - "doa_test : 0.5030334414876424 +- 0.008101150566934542\n", - "reo : 0.33651754691705094\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/code/binary_bpr/main.py\", line 393\n", - " \"doa = compute_doa(testFileName)\n", - " ^\n", - "SyntaxError: unterminated string literal (detected at line 393)\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/code/binary_bpr/main.py\", line 393\n", - " \"doa = compute_doa(testFileName)\n", - " ^\n", - "SyntaxError: unterminated string literal (detected at line 393)\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/code/binary_bpr/main.py\", line 393\n", - " \"doa = compute_doa(testFileName)\n", - " ^\n", - "SyntaxError: unterminated string literal (detected at line 393)\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/code/binary_bpr/main.py\", line 393\n", - " \"doa = compute_doa(testFileName)\n", - " ^\n", - "SyntaxError: unterminated string literal (detected at line 393)\n" + "/home/arthurb/Programmation/cd-bpr\n", + "assist0910_tkde\r\n", + "Is CUDA supported by this system? False\r\n", + "CUDA version: None\r\n", + "==========> fold 0\r\n", + "dataTrain: ../../data/assist0910_tkde/train_valid_0.csv\r\n", + "dataTest: ../../data/assist0910_tkde/test_0.csv\r\n", + "embPath: ../../results/table_2/\r\n", + "epochs: 1\r\n", + "batch_size: 512\r\n", + "[Epoch 0] loss: 1.370641\r\n", + "Doa: 0.7126622039356039\r\n", + "AUC and RMSE: 0.7249253060269787 0.4473763215794524\r\n", + "==========> fold 1\r\n", + "dataTrain: ../../data/assist0910_tkde/train_valid_1.csv\r\n", + "dataTest: ../../data/assist0910_tkde/test_1.csv\r\n", + "embPath: ../../results/table_2/\r\n", + "epochs: 1\r\n", + "batch_size: 512\r\n", + "^C\r\n", + "Traceback (most recent call last):\r\n", + " File \"/home/arthurb/Programmation/cd-bpr/code/binary_bpr/main.py\", line 312, in <module>\r\n", + " dico_items, test, y_test = parse_dataframe(dataTest, dico_kc, dico_users, dico_items, False)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/Programmation/cd-bpr/code/binary_bpr/main.py\", line 105, in parse_dataframe\r\n", + " for row_index, row in df_group.iterrows():\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/core/frame.py\", line 1449, in iterrows\r\n", + " for k, v in zip(self.index, self.values):\r\n", + " ^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/core/frame.py\", line 12281, in values\r\n", + " return self._mgr.as_array()\r\n", + " ^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/core/internals/managers.py\", line 1656, in as_array\r\n", + " arr = self._interleave(dtype=dtype, na_value=na_value)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/core/internals/managers.py\", line 1682, in _interleave\r\n", + " dtype = interleaved_dtype( # type: ignore[assignment]\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/core/internals/base.py\", line 363, in interleaved_dtype\r\n", + " return find_common_type(dtypes)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/core/dtypes/cast.py\", line 1428, in find_common_type\r\n", + " types = list(dict.fromkeys(types).keys())\r\n", + " ^^^^^^^^^^^^^^^^^^^^\r\n", + "KeyboardInterrupt\r\n" ] } ], @@ -252,12 +107,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "790a43dd", "metadata": { "ExecuteTime": { - "end_time": "2024-02-13T16:09:34.105849299Z", - "start_time": "2024-02-13T16:09:33.923825334Z" + "end_time": "2024-02-14T09:17:54.627435200Z", + "start_time": "2024-02-14T09:17:54.189951143Z" } }, "outputs": [ @@ -265,36 +120,43 @@ "name": "stdout", "output_type": "stream", "text": [ - "/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr\n", - "Traceback (most recent call last):\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/./code/binary_bpr_ablation/compute_doa.py\", line 11, in <module>\n", - " doa = compute_doa(data)\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/code/binary_bpr_ablation/utils.py\", line 161, in compute_doa\n", - " F = fromDFtoArray(filename+\"_embed.csv\",False,'f')\n", - " File \"/home/celine/travail.svn/recherche/phd_subject/Arthur/gitlab/cd-bpr/code/binary_bpr_ablation/utils.py\", line 7, in fromDFtoArray\n", - " df = pd.read_csv(name,index_col=None, header=None)\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/util/_decorators.py\", line 211, in wrapper\n", - " return func(*args, **kwargs)\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/util/_decorators.py\", line 331, in wrapper\n", - " return func(*args, **kwargs)\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py\", line 950, in read_csv\n", - " return _read(filepath_or_buffer, kwds)\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py\", line 605, in _read\n", - " parser = TextFileReader(filepath_or_buffer, **kwds)\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py\", line 1442, in __init__\n", - " self._engine = self._make_engine(f, self.engine)\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py\", line 1735, in _make_engine\n", - " self.handles = get_handle(\n", - " File \"/home/celine/miniconda3/lib/python3.10/site-packages/pandas/io/common.py\", line 856, in get_handle\n", - " handle = open(\n", - "FileNotFoundError: [Errno 2] No such file or directory: '../../results/table_2/users/math1/train_embed.csv'\n" + "/home/arthurb/Programmation/cd-bpr\n", + "Traceback (most recent call last):\r\n", + " File \"/home/arthurb/Programmation/cd-bpr/./code/binary_bpr_ablation/compute_doa.py\", line 11, in <module>\r\n", + " doa = compute_doa(data)\r\n", + " ^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/Programmation/cd-bpr/code/binary_bpr_ablation/utils.py\", line 161, in compute_doa\r\n", + " F = fromDFtoArray(filename+\"_embed.csv\",False,'f')\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/Programmation/cd-bpr/code/binary_bpr_ablation/utils.py\", line 7, in fromDFtoArray\r\n", + " df = pd.read_csv(name,index_col=None, header=None)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/io/parsers/readers.py\", line 948, in read_csv\r\n", + " return _read(filepath_or_buffer, kwds)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/io/parsers/readers.py\", line 611, in _read\r\n", + " parser = TextFileReader(filepath_or_buffer, **kwds)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/io/parsers/readers.py\", line 1448, in __init__\r\n", + " self._engine = self._make_engine(f, self.engine)\r\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/io/parsers/readers.py\", line 1705, in _make_engine\r\n", + " self.handles = get_handle(\r\n", + " ^^^^^^^^^^^\r\n", + " File \"/home/arthurb/anaconda3/envs/cdbpr-env/lib/python3.11/site-packages/pandas/io/common.py\", line 863, in get_handle\r\n", + " handle = open(\r\n", + " ^^^^^\r\n", + "FileNotFoundError: [Errno 2] No such file or directory: '../../results/table_2/users/math_1/train_embed.csv'\r\n" ] } ], "source": [ - "i = 3\n", - "\n", + "import os\n", "embDirPath = \"../../results/table_2/users/\"\n", + "\n", + "i = 3 # dataset index\n", + "\n", + "\n", "print(os.getcwd())\n", "cmd = 'python ./code/binary_bpr_ablation/compute_doa.py --data '+embDirPath+datasets[i]+'/train' \n", "!{cmd}\n", @@ -373,7 +235,7 @@ "# 0 no ablation, 1 ablation L2, 2 ablation init, 3 both\n", "for abla in range(4):\n", " for i in range(5):\n", - " cmd = 'python ./binary_model/main.py --dataTrain '+ path+'data/'+datasets[i]+'/train.csv --dataTest '+path+'data/'+datasets[i]+'/test.csv --ablation '+str(abla)\n", + " cmd = 'python ./binary_model/main.py --dataTrain '+ path+'data/cdbpr_format/'+datasets[i]+'/train.csv --dataTest '+path+'data/'+datasets[i]+'/test.csv --ablation '+str(abla)\n", " os.system(cmd)" ] }, diff --git a/makefile b/makefile index f76ca8a35aab603da3c47ea3aefa4a57b479ecdd..39341c59cdf974917a03bc9ec19d01610e80e0c3 100644 --- a/makefile +++ b/makefile @@ -2,7 +2,7 @@ build: unzip data.zip - mkdir -p results/ results/table_2 results/table_2/users results/table_2/items + unzip results.zip clean: rm -rf data/ diff --git a/results.zip b/results.zip new file mode 100644 index 0000000000000000000000000000000000000000..a8129701eff340ff1dcf8b8e47703606a2126212 Binary files /dev/null and b/results.zip differ