Skip to content
Snippets Groups Projects
Commit 0aaf8c75 authored by Céline Robardet's avatar Céline Robardet
Browse files

binary_bpr

parent 34ebd4ef
No related branches found
No related tags found
No related merge requests found
......@@ -26,8 +26,8 @@ from datetime import datetime
dtype = torch.float32
print(f"Is CUDA supported by this system? {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
#print(f"Is CUDA supported by this system? {torch.cuda.is_available()}")
#print(f"CUDA version: {torch.version.cuda}")
if torch.cuda.is_available():
dev = "cuda:0"
else:
......@@ -49,21 +49,21 @@ def read_file(dataTrain, dataTest):
kc = flattern_arrays(kc.values, kcT.values)
num_kc = len(kc)
dico_kc = { k:v for (k,v) in zip(kc, range(len(kc)))}
print("NB KC", num_kc)
#print("NB KC", num_kc)
# dico users
users = df['user_id']
usersT = dfTest['user_id']
users = flattern_arrays(users.values, usersT.values)
num_users = len(users)
dico_users = { k:v for (k,v) in zip(users, range(num_users))}
print("NB Users", num_users)
#print("NB Users", num_users)
# dico items and their associated kc
itemsDT = df['item_id']
itemsT = dfTest['item_id']
items = flattern_arrays(itemsDT.values, itemsT.values)
num_items = len(items)
dico_items = { k:v for (k,v) in zip(items, range(num_items))}
print("NB Items", num_items, len(dico_items))
#print("NB Items", num_items, len(dico_items))
return dico_kc, dico_users, dico_items
def save_embeddings(xpName: str, modelName: str, embeddings,userEmbDir : str,itemEmbDir : str, grid_search_id):
"""
......@@ -76,7 +76,6 @@ def save_embeddings(xpName: str, modelName: str, embeddings,userEmbDir : str,ite
"""
u_emb, i_emb = embeddings
results_name_file = (xpName + modelName + "_" + str(grid_search_id))
# save embeddings
......@@ -90,8 +89,6 @@ def parse_dataframe(data, dico_kc, dico_users, dico_item, is_train = True):
df = pd.read_csv(data, names=['user_id', 'item_id','correct','knowledge'])
# Compute table of positive and negative items by KC and Users
# and the dictionary that associate the KC to a question/answer
#num_kc = np.max(np.array(list(dico_kc.keys()))) + 1
#print("Parse DF", num_kc)
num_kc = len(dico_kc)
num_users = len(dico_users)
# Find positive items for each kc/user
......@@ -112,7 +109,6 @@ def parse_dataframe(data, dico_kc, dico_users, dico_item, is_train = True):
col = row['item_id']
if col not in dico_items:
dico_items[col] = len(dico_items)
# Warning, all user's answers are positives!
q,r = parse_it(col)
col_neg = q+'_'+str(1-int(r))
if col_neg not in dico_items:
......@@ -155,7 +151,6 @@ def generate_quad(dico_items, triplets, t_trainy, item_users, alpha):
uu = item_users[t[2]][u]
t_quadriplets.append([t[0], t[1], t[2], uu])
t_y.append(t_trainy[k][i])
#break
else:
t_quadriplets.append([t[0], t[1], t[2], t[0]])
t_y.append(t_trainy[k][i])
......@@ -278,7 +273,7 @@ if __name__ == '__main__':
parser.add_argument("-bSize", "--batchSize", help="batch size")
parser.add_argument("-lr", "--learningRate", help="learning rate")
parser.add_argument("-mode", "--mode", help="CV mode = 1, GS mode = 0")
#parser.add_argument("-a", "--alpha", help="float")
args = parser.parse_args()
dataTrain = args.dataTrain
dataTest = args.dataTest
......@@ -307,7 +302,6 @@ if __name__ == '__main__':
FileNameTest_temp = testFileName[:-1] + str(i_fold)
dataTrain = FileNameTrain_temp+".csv"
dataTest = FileNameTest_temp+".csv"
# alpha = int(args.alpha)
print("dataTrain:", dataTrain)
print("dataTest:", dataTest)
print("dataPath:", dataPath)
......@@ -318,7 +312,6 @@ if __name__ == '__main__':
dico_kc, dico_users, dico_items = read_file(dataTrain, dataTest)
embedding_size = len(dico_kc)
dico_items, t_train, ty_train, item_users = parse_dataframe(dataTrain, dico_kc, dico_users, dico_items, True)
# print("alpha", alpha)
train, y_train = generate_quad(dico_items, t_train, ty_train, item_users, alpha)
dico_items, test, y_test = parse_dataframe(dataTest, dico_kc, dico_users, dico_items, False)
num_users = len(dico_users)
......@@ -341,7 +334,7 @@ if __name__ == '__main__':
write_file_doa(FileNameTrain_temp, emb[0], train, dico_kc, dico_users, dico_items)
doa = compute_doa(FileNameTrain_temp)
# '''
# Test
correctness, acc, users, auc, rmse = bpr_model.evaluate_model(test, len(dico_kc), y_test)
acc_list.append(acc)
......@@ -350,18 +343,13 @@ if __name__ == '__main__':
doa_train.append(doa)
print("Doa on Train dataset:", doa)
print("AUC and RMSE on test dataset:", auc, rmse)
# '''
new_embedding_value = bpr_model.user_embeddings.weight.clone().detach().cpu().numpy()
write_file_doa_test(FileNameTest_temp, new_embedding_value, test, y_test, dico_kc, dico_users, dico_items)
doa = compute_doa(FileNameTest_temp)
doa_test.append(doa)
print("Accuracy and Doa on test dataset:", acc, doa)
# '''
## test oppose
# acc, precision = bpr_model.evaluate_model(test1, len(dico_kc), y_test1)
# print(f'Accuracy: {acc}')
print(acc_list)
print(auc_list)
print(rmse_list)
......@@ -374,7 +362,6 @@ if __name__ == '__main__':
print("doa_test :", np.mean(doa_test), "+-", np.std(doa_test))
print("reo :",1- np.mean(doa_test)/np.mean(doa_train))
else :
#alpha = int(args.alpha)
print("dataTrain:",dataTrain)
print("epochs:",epochs)
print("batch_size:",batch_size)
......@@ -382,7 +369,6 @@ if __name__ == '__main__':
dico_kc, dico_users, dico_items = read_file(dataTrain, dataTest)
embedding_size = len(dico_kc)
dico_items, t_train, ty_train, item_users = parse_dataframe(dataTrain, dico_kc, dico_users, dico_items, True)
#print("alpha", alpha)
train, y_train = generate_quad(dico_items, t_train, ty_train, item_users, alpha)
dico_items, test, y_test = parse_dataframe(dataTest, dico_kc, dico_users, dico_items, False)
num_users = len(dico_users)
......@@ -403,16 +389,12 @@ if __name__ == '__main__':
write_file_doa(trainFileName, emb[0], train, dico_kc, dico_users, dico_items)
doa = compute_doa(trainFileName)
print("Doa on train dataset:", doa)
#'''
# Test
correctness, acc, users, auc, rmse = bpr_model.evaluate_model(test, len(dico_kc), y_test)
print(f'Accuracy: {acc}')
#'''
new_embedding_value = bpr_model.user_embeddings.weight.clone().detach().cpu().numpy()
write_file_doa_test(testFileName, new_embedding_value, test, y_test, dico_kc, dico_users, dico_items)
doa = compute_doa(testFileName)
print("Accuracy and Doa on test dataset:", acc, doa)
#'''
## test oppose
#acc, precision = bpr_model.evaluate_model(test1, len(dico_kc), y_test1)
#print(f'Accuracy: {acc}')
import os
dPath = "../../data/cdbpr_format/"
dPath = "../../data/"
embDirPath = "../../results/table_2/"
datasets = ['assist0910_tkde', 'assist17_tkde', 'algebra','math_1', 'math_2']
epochs = [75, 95, 5, 90, 90]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment