Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • abatel/cd-bpr
1 result
Show changes
Commits on Source (4)
data/
./results/
results/
experiments_logs/*
\ No newline at end of file
This diff is collapsed.
......@@ -17,13 +17,17 @@ dependencies:
- blas=1.0=mkl
- bleach=4.1.0=pyhd3eb1b0_0
- bottleneck=1.3.5=py311hbed6279_0
- brotli=1.0.9=h5eee18b_7
- brotli-bin=1.0.9=h5eee18b_7
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2023.12.12=h06a4308_0
- certifi=2024.2.2=py311h06a4308_0
- cffi=1.16.0=py311h5eee18b_0
- charset-normalizer=2.0.4=pyhd3eb1b0_0
- comm=0.1.2=py311h06a4308_0
- contourpy=1.2.0=py311hdb19cb5_0
- cpuonly=2.0=0
- cycler=0.11.0=pyhd3eb1b0_0
- cyrus-sasl=2.1.28=h52b45da_1
- dbus=1.13.18=hb2f20db_0
- debugpy=1.6.7=py311h6a678d5_0
......@@ -34,6 +38,7 @@ dependencies:
- ffmpeg=4.3=hf484d3e_0
- filelock=3.13.1=py311h06a4308_0
- fontconfig=2.14.1=h4c34cd2_2
- fonttools=4.25.0=pyhd3eb1b0_0
- freetype=2.12.1=h4a9f257_0
- glib=2.69.1=he621ea3_2
- gmp=6.2.1=h295c915_3
......@@ -66,11 +71,15 @@ dependencies:
- jupyterlab_pygments=0.1.2=py_0
- jupyterlab_server=2.25.1=py311h06a4308_0
- jupyterlab_widgets=3.0.9=py311h06a4308_0
- kiwisolver=1.4.4=py311h6a678d5_0
- krb5=1.20.1=h143b758_1
- lame=3.100=h7b6447c_0
- lcms2=2.12=h3be6417_0
- ld_impl_linux-64=2.38=h1181459_1
- lerc=3.0=h295c915_0
- libbrotlicommon=1.0.9=h5eee18b_7
- libbrotlidec=1.0.9=h5eee18b_7
- libbrotlienc=1.0.9=h5eee18b_7
- libclang=14.0.6=default_hc6dbbc7_1
- libclang13=14.0.6=default_he11475f_1
- libcups=2.4.2=h2d74bed_1
......@@ -100,6 +109,8 @@ dependencies:
- llvm-openmp=14.0.6=h9e868ea_0
- lz4-c=1.9.4=h6a678d5_0
- markupsafe=2.1.3=py311h5eee18b_0
- matplotlib=3.8.0=py311h06a4308_0
- matplotlib-base=3.8.0=py311ha02d727_0
- matplotlib-inline=0.1.6=py311h06a4308_0
- mistune=2.0.4=py311h06a4308_0
- mkl=2023.1.0=h213fc3f_46344
......@@ -109,6 +120,7 @@ dependencies:
- mpc=1.1.0=h10f8cd9_1
- mpfr=4.0.2=hb69a4c5_1
- mpmath=1.3.0=py311h06a4308_0
- munkres=1.1.4=py_0
- mysql=5.7.24=h721c034_2
- nbclient=0.8.0=py311h06a4308_0
- nbconvert=7.10.0=py311h06a4308_0
......@@ -135,6 +147,7 @@ dependencies:
- pillow=10.2.0=py311h5eee18b_0
- pip=23.3.1=py311h06a4308_0
- platformdirs=3.10.0=py311h06a4308_0
- plotly=5.9.0=py311h06a4308_0
- ply=3.11=py311h06a4308_0
- prometheus_client=0.14.1=py311h06a4308_0
- prompt-toolkit=3.0.43=py311h06a4308_0
......@@ -144,6 +157,7 @@ dependencies:
- pure_eval=0.2.2=pyhd3eb1b0_0
- pycparser=2.21=pyhd3eb1b0_0
- pygments=2.15.1=py311h06a4308_1
- pyparsing=3.0.9=py311h06a4308_0
- pyqt=5.15.10=py311h6a678d5_0
- pyqt5-sip=12.13.0=py311h5eee18b_0
- python=3.11.7=h955ad1f_0
......@@ -167,6 +181,7 @@ dependencies:
- rpds-py=0.10.6=py311hb02cf49_0
- scikit-learn=1.2.2=py311h6a678d5_1
- scipy=1.11.4=py311h08b1b3b_0
- seaborn=0.12.2=py311h06a4308_0
- send2trash=1.8.2=py311h06a4308_0
- setuptools=68.2.2=py311h06a4308_0
- sip=6.7.12=py311h6a678d5_0
......@@ -177,6 +192,7 @@ dependencies:
- stack_data=0.2.0=pyhd3eb1b0_0
- sympy=1.12=py311h06a4308_0
- tbb=2021.8.0=hdb19cb5_0
- tenacity=8.2.2=py311h06a4308_0
- terminado=0.17.1=py311h06a4308_0
- threadpoolctl=2.2.0=pyh0d69192_0
- tinycss2=1.2.1=py311h06a4308_0
......
......@@ -4,10 +4,8 @@ import numpy as np
import argparse
import numpy as np
import sys
sys.path.append('../')
sys.path.insert(0, os.path.abspath('../portrait/'))
sys.path.insert(0, os.path.abspath('../../portrait/'))
from Utility import DatasetProcessor
sys.path.append('/')
from utility.DatasetProcessor import DatasetProcessor
def compute_kc_user(name):
fileName = name + "_responses.csv"
......@@ -127,49 +125,27 @@ def fromDFtoArray(name, vector, type_value):
return r
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-dt", "--datasetName", help="data file")
args = parser.parse_args()
dataset_name = args.datasetName
xp_folder_name = "8-Preprocessing_pipeline"
rel_data_path = "01-Data"
rel_xp_path = "02-Resultats/04-Dataset_preparation_logs"
abs_xp_path = "../Experiments"
rel_data_path = "../data/raw_format"
rel_xp_path = "../experiments_logs"
abs_xp_path = "../experiments_logs"
embs_path = "../results/table_2/users/"
exp = DatasetProcessor("3", data_path=rel_data_path, experiment_path=rel_xp_path)
dataset_name = "bridge"
dataset_file= 'preprocessed_bridge_to_algebra_2006_2007_30-01-2024.csv'
metadata_file = "metadata_bridge_to_algebra_2006_2007_30-01-2024.json"
dataset_file= dataset_name+'.csv'
metadata_file = "metadata_"+dataset_name+".json"
exp.import_dataset(dataset_file_name=dataset_file, metadata_file_name="metadata/"+metadata_file,
dataset_name=dataset_name)
exp.shuffle(dataset_name=dataset_name, attributes=["start_time"], group_attributes=["user_id"], rgn_seed=1)
files_dict = {
"DINA":[
"DINA_gs_DINA_bridge_to_algebra_2006_2007_1706638361__30-01-2024_18h05_55s_706709ms.csv",
"DINA_gs_DINA_bridge_to_algebra_2006_2007_1706638361__30-01-2024_18h11_36s_231506ms.csv",
"DINA_gs_DINA_bridge_to_algebra_2006_2007_1706638361__30-01-2024_18h17_17s_745290ms.csv",
"DINA_gs_DINA_bridge_to_algebra_2006_2007_1706638361__30-01-2024_18h23_07s_203039ms.csv",
"DINA_gs_DINA_bridge_to_algebra_2006_2007_1706638361__30-01-2024_18h28_59s_133353ms.csv"],
"MCD": [
"MCD_gs_MCD_bridge_to_algebra_2006_2007_1706630335__30-01-2024_15h42_34s_350457ms.csv",
"MCD_gs_MCD_bridge_to_algebra_2006_2007_1706630335__30-01-2024_15h44_20s_914592ms.csv",
"MCD_gs_MCD_bridge_to_algebra_2006_2007_1706630335__30-01-2024_15h46_09s_386992ms.csv",
"MCD_gs_MCD_bridge_to_algebra_2006_2007_1706630335__30-01-2024_15h47_58s_712822ms.csv",
"MCD_gs_MCD_bridge_to_algebra_2006_2007_1706630335__30-01-2024_15h49_48s_259017ms.csv"],
"NCDM": [
"NCDM_gs_NCDM_bridge_to_algebra_2006_2007_1706635966__30-01-2024_16h43_34s_649606ms.csv",
"NCDM_gs_NCDM_bridge_to_algebra_2006_2007_1706635966__30-01-2024_16h45_38s_895859ms.csv",
"NCDM_gs_NCDM_bridge_to_algebra_2006_2007_1706635966__30-01-2024_16h47_39s_318338ms.csv",
"NCDM_gs_NCDM_bridge_to_algebra_2006_2007_1706635966__30-01-2024_16h49_43s_359826ms.csv",
"NCDM_gs_NCDM_bridge_to_algebra_2006_2007_1706635966__30-01-2024_16h51_45s_189893ms.csv"],
"BPR" : [
"bpr_bridge_0_embed.csv",
"bpr_bridge_1_embed.csv",
"bpr_bridge_2_embed.csv",
"bpr_bridge_3_embed.csv",
"bpr_bridge_4_embed.csv"
]
}
print("dataset ",dataset_name)
......@@ -179,7 +155,7 @@ if __name__ == '__main__':
for i_fold in range(5):
print("fold", i_fold)
for model in files_dict :
for model in dao_mean_train.keys() :
print("model ", model)
exp.train_test_split(dataset_name, test_proportion=0.2, valid_proportion=0.2, n_folds=5, i_fold=i_fold)
......@@ -202,7 +178,7 @@ if __name__ == '__main__':
data = pivot_df.sort_values('skill_id')
data.to_csv('dataTest_responses.csv', header=False, index=False, na_rep='')
F = fromDFtoArray("./Embeddings/" + str(files_dict[model][i_fold]), False, 'f')
F = fromDFtoArray(embs_path + dataset_name +"_"+str(i_fold)+"_"+model+".csv", False, 'f')
# print(F)
kc_user, kc_user_val, dico_u, num_kc = compute_kc_user("dataTrain")
print("num_kc", num_kc)
......@@ -218,7 +194,7 @@ if __name__ == '__main__':
print("dao train", dao_mean_train)
print("dao test", dao_mean_test)
print("reo", reo_mean)
for model in files_dict:
for model in dao_mean_train.keys():
print("model",model)
print("dao train", np.mean(np.array(dao_mean_train[model])),"+-",np.std(np.array(dao_mean_train[model])))
print("dao test", np.mean(np.array(dao_mean_test[model])),"+-",np.std(np.array(dao_mean_test[model])))
......
......@@ -62,7 +62,7 @@ def read_file(dataTrain, dataTest):
dico_items = { k:v for (k,v) in zip(items, range(num_items))}
return dico_kc, dico_users, dico_items
def save_embeddings(xpName: str, modelName: str, embeddings,userEmbDir : str,itemEmbDir : str):
def save_embeddings(xpName: str, modelName: str, embeddings,userEmbDir : str,itemEmbDir : str) :
"""
Saves all the metrics measured after the training process.
......
......@@ -2,8 +2,8 @@ import os
dPath = "../../data/"
embDirPath = "../../results/table_2/"
datasets = ['assist0910_tkde', 'assist17_tkde', 'algebra','math_1', 'math_2']
epochs = [75 ,75, 95, 5, 90, 90]
batchSize =[ 512, 512,512, 512,512,4000]
epochs = [1, 75 ,75, 95, 5, 90, 90]
batchSize =[4000, 512, 512,512, 512,512]
learningRate = [0.01,0.01,0.01,0.01,0.01]
mode = [1,1,1,1,1]
for i in range(len(datasets)):
......
......@@ -5,5 +5,5 @@ for i in range(4):
print("Ablation (0 no ablation, 1 ablation L2, 2 ablation init, 3 both) ",i)
for a in range(5):
print(name[i])
cmd = "python main.py --dataTrain ../../data/"+name[i]+"/train_0.csv --dataTest ../../data/"+name[i]+"/test_0.csv --ablation "+str(i)
cmd = "python main.py --dataTrain ../../data/cdbpr_format/"+name[i]+"/train_0.csv --dataTest ../../data/cdbpr_format/"+name[i]+"/test_0.csv --ablation "+str(i)
os.system(cmd)
This diff is collapsed.
This diff is collapsed.
File deleted
File deleted
File added