utils.py

import numpy as np
import pandas as pd
import math
import heapq


def get_train_instances(train, negatives, n_items, n_neg):
    user, item, labels = [],[],[]
    for (u, i), r in train.items():
        # positive instance
        user.append(u)
        item.append(i)
        labels.append(1)
        # negative instances: we also need to make sure they are not in the
        # negative examples used for testing
        for _ in range(n_neg):
            j = np.random.randint(n_items)
            while ((u, j) in train.keys()) or (j in negatives[u]):
                j = np.random.randint(n_items)
            user.append(u)
            item.append(j)
            labels.append(0)
    train_w_negative = np.vstack([user,item,labels]).T
    assert train_w_negative.shape[0] == (len(train) + len(train)*n_neg)
    return train_w_negative.astype(np.int64)


def get_hitratio(ranklist, gtitem):
    if gtitem in ranklist: return 1
    return 0


def get_ndcg(ranklist, gtitem):
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtitem:
            return math.log(2) / math.log(i+2)
    return 0


def get_scores(items, preds, topk):

    gtitem = items[0]

    # the following 3 lines of code ensure that the fact that the 1st item is
    # gtitem does not affect the final rank
    randidx = np.arange(100)
    np.random.shuffle(randidx)
    items, preds = items[randidx], preds[randidx]

    map_item_score = dict( zip(items, preds) )
    ranklist = heapq.nlargest(topk, map_item_score, key=map_item_score.get)
    hr = get_hitratio(ranklist, gtitem)
    ndcg = get_ndcg(ranklist, gtitem)
    return hr, ndcg