1st assignment (link prediction - friends' recommendation engine)¶

Stratos Gounidellis - DS3517005
Natasa Farmaki - DS3517018

# import necessary libraries
from scipy.sparse.csgraph import floyd_warshall
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import random

Data loading & graph creation¶

# create the undirected graph
g = nx.read_edgelist("facebook_combined.txt",
                     create_using=nx.Graph(), nodetype=int)

# print the graph info
g = g.to_undirected()
print(nx.info(g))

Name: 
Type: Graph
Number of nodes: 4039
Number of edges: 88234
Average degree:  43.6910

# convert also a directed instance of a graph in 
# case it is needed in any of the functionalities later
# and print the graph  info. The number of edges is doubled.
g_dir = g.to_directed()
print(nx.info(g_dir))

Name: 
Type: DiGraph
Number of nodes: 4039
Number of edges: 176468
Average in degree:  43.6910
Average out degree:  43.6910

Algorithm functions¶

# Baseline approach: randomly choose friend recommendations for the
# facebook users. The reccomendations are sorted based on the nodeid
# Obviously, this is a completely naive approach and should not have
# high accuracy.
def baseline(G, node1, k):
    nodeid = random.sample(G.nodes(), k)
    friend_rec = pd.DataFrame({"nodeid": nodeid})
    friend_rec = friend_rec.sort_values(by=['nodeid'], ascending=[True])
    friend_rec = friend_rec.reset_index(drop=True)
    friend_rec.nodeid = friend_rec.nodeid.astype(int)
    return friend_rec

# Common neighbors approach: according to this method the best friend reccomendation
# is the facebook user with whom you have the greatest number
# of mutual friends. In the case of ties in friendship score the node with the smallest
# nodeID is shown first.
def common_neighbors(G, node1, k):
    nodeid = []
    number_of_common_neighbors = []
    for i in G.nodes():
        if not G.has_edge(node1, i) and node1 != i:
            nodeid.append(i)
            number_of_common_neighbors.append(len(list(nx.common_neighbors(G,
                                                                           node1,
                                                                           i))))
    friend_rec = pd.DataFrame({"nodeid": nodeid,
                               "number_of_common_neighbors": number_of_common_neighbors})
    friend_rec = friend_rec.sort_values(by=['number_of_common_neighbors',
                                            'nodeid'], ascending=[False, True])
    friend_rec = friend_rec.head(k)
    friend_rec = friend_rec.reset_index(drop=True)
    return friend_rec

# Return a set with the common friends of two facebook users.
def union_neighbors(G, node1, node2):
    return set(nx.all_neighbors(G, node1)) | set(nx.all_neighbors(G, node2))

# Jaccard coefficient approach: The Jaccard coefficient is a metric used to
# evaluate similarities. The Jaccard coefficient measures the similarity between 
# sets and is defined as the size of the intersection divided by the size 
# of the union. In the case of ties in friendship score the node with the smallest
# nodeID is shown first.
def jaccard_coef(G, node1, k):
    nodeid = []
    jaccard_coef = []
    for i in G.nodes():
        if not G.has_edge(node1, i) and node1 != i:
            nodeid.append(i)
            union_neigh = float(len(union_neighbors(G, node1, i)))
            jaccard_coef.append(len(list(nx.common_neighbors(G, node1, i)))/union_neigh)
    friend_rec = pd.DataFrame({"nodeid": nodeid,
                               "jaccard_coef": jaccard_coef})
    friend_rec = friend_rec.sort_values(by=['jaccard_coef', 'nodeid'],
                                        ascending=[False, True])
    friend_rec = friend_rec.head(k)
    friend_rec = friend_rec.reset_index(drop=True)
    friend_rec.nodeid = friend_rec.nodeid.astype(int)
    cols = friend_rec.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    friend_rec = friend_rec[cols]
    return friend_rec

# Return the number of friends of facebook user.
def find_neighbors(G, common_neighbor):
    return len(list(nx.all_neighbors(G, common_neighbor)))

# Return the required logarithmic sum for the Adamic and Adar
# similarity function.
def calc_log_sum(G, common_neighbors):
    log_sum = 0.0
    for common_neighbor in common_neighbors:
        log_sum = log_sum + 1 / math.log(find_neighbors(G,
                                                        common_neighbor), 10)
    return log_sum

# Adamic and Adar function: Adamic and Adar proposed a weighted function for
# scoring similarity between users (nodes) in a network. The more similar a user
# is to another, the more likely a friendship will occur in the network. In the
# case of ties in friendship score the node with the smallest nodeID is shown first.
def adamic_adar(G, node1, k):
    nodeid = []
    adamic_adar_similarity = []
    for i in G.nodes():
        if not G.has_edge(node1, i) and node1 != i:
            nodeid.append(i)
            common_neighbors = nx.common_neighbors(G, node1, i)
            adamic_adar_similarity.append(calc_log_sum(G, common_neighbors))
    friend_rec = pd.DataFrame({"nodeid": nodeid,
                               "adamic_adar_similarity": adamic_adar_similarity})
    friend_rec = friend_rec.sort_values(by=['adamic_adar_similarity',
                                            'nodeid'],
                                        ascending=[False, True])

    friend_rec = friend_rec.head(k)
    friend_rec = friend_rec.reset_index(drop=True)
    friend_rec.nodeid = friend_rec.nodeid.astype(int)

    cols = friend_rec.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    friend_rec = friend_rec[cols]

    return friend_rec

# compute the adjacency matrix of the undirected graph
A = nx.adjacency_matrix(g, nodelist=sorted(g.nodes()))

# compiute the distance matrix of the graph. The algorithm used
# is floyd_warshall while the distance matrix will be used for the
# metrics presented and implemented below.
distance_array = floyd_warshall(A, directed=False, unweighted=True)

# Graph distance approach: according to this approach the best friend 
# reccomendation is the facebook user that is closer to you. In the
# case of ties in friendship score the node with the smallest nodeID 
# is shown first.
def graph_distance(G, node1, distance_array, k):
    nodeid = []
    graph_distance_similarity = []
    for i in G.nodes():
        if not G.has_edge(node1, i) and node1 != i:
            nodeid.append(i)
            distance = distance_array[node1, i]
            graph_distance_similarity.append(float(distance))

    friend_rec = pd.DataFrame({"nodeid": nodeid,
                               "graph_distance_similarity": graph_distance_similarity})
    friend_rec = friend_rec.sort_values(by=['graph_distance_similarity',
                                            'nodeid'], ascending=[True, True])

    friend_rec = friend_rec.head(k)
    friend_rec = friend_rec.reset_index(drop=True)
    friend_rec.nodeid = friend_rec.nodeid.astype(int)

    cols = friend_rec.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    friend_rec = friend_rec[cols]

    return friend_rec

# Weighted Graph Distance (FoF): this approach takes into account not only
# the distance among two facebook users but also their common friends.
# In that sense, it could be considered a weighted graph distance metric
# with the common neighbors as a weighting factor. In the
# case of ties in friendship score the node with the smallest nodeID 
# is shown first.
def distance_common_neigh(G, node1, distance_array, k):
    nodeid = []
    graph_distance_similarity = []
    for i in G.nodes():
        if not G.has_edge(node1, i) and node1 != i:
            nodeid.append(i)
            common_neighbors = len(list(nx.common_neighbors(G, node1, i)))
            distance = distance_array[node1, i]
            graph_distance_similarity.append(common_neighbors/float(distance))

    friend_rec = pd.DataFrame({"nodeid": nodeid,
                               "graph_distance_similarity": graph_distance_similarity})
    friend_rec = friend_rec.sort_values(by=['graph_distance_similarity',
                                            'nodeid'], ascending=[False, True])

    friend_rec = friend_rec.head(k)
    friend_rec = friend_rec.reset_index(drop=True)
    friend_rec.nodeid = friend_rec.nodeid.astype(int)

    cols = friend_rec.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    friend_rec = friend_rec[cols]

    return friend_rec

# Weighted Graph Distance (Jaccard): this approach takes into account not only
# the distance among two facebook users but also the jaccard coefficient.
# In that sense, it could be considered a weighted graph distance metric
# with the jaccard coefficient as a weighting factor. In the
# case of ties in friendship score the node with the smallest nodeID 
# is shown first.
def distance_jaccard(G, node1, distance_array, k):
    nodeid = []
    distance_jaccard_similarity = []
    for i in G.nodes():
        if not G.has_edge(node1, i) and node1 != i:
            nodeid.append(i)
            union_neigh = float(len(union_neighbors(G, node1, i)))
            common_neighbors = len(list(nx.common_neighbors(G, node1, i)))
            jaccard_coef = common_neighbors / union_neigh
            distance = distance_array[node1, i]
            distance_jaccard_similarity.append(jaccard_coef/distance)

    friend_rec = pd.DataFrame({"nodeid": nodeid,
                               "distance_jaccard_similarity": distance_jaccard_similarity})
    friend_rec = friend_rec.sort_values(by=['distance_jaccard_similarity',
                                            'nodeid'], ascending=[False, True])

    friend_rec = friend_rec.head(k)
    friend_rec = friend_rec.reset_index(drop=True)
    friend_rec.nodeid = friend_rec.nodeid.astype(int)

    cols = friend_rec.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    friend_rec = friend_rec[cols]

    return friend_rec

Indicative Outputs¶

The ouput of the aformentioned metrics is presented for the facebook users with nodeId [107, 1126, 14, 35].

fb_users = [107, 1126, 14, 35]

for fb_user in fb_users:
    print("Current facebook user:", fb_user)
    display(common_neighbors(g, fb_user, 10))

Current facebook user: 107

Current facebook user: 1126

Current facebook user: 14

Current facebook user: 35

for fb_user in fb_users:
    print("Current facebook user:", fb_user)
    display(jaccard_coef(g, fb_user, 10))

Current facebook user: 107

Current facebook user: 1126

Current facebook user: 14

Current facebook user: 35

for fb_user in fb_users:
    print("Current facebook user:", fb_user)
    display(adamic_adar(g, fb_user, 10))

Current facebook user: 107

Current facebook user: 1126

Current facebook user: 14

Current facebook user: 35

for fb_user in fb_users:
    print("Current facebook user:", fb_user)
    display(graph_distance(g, fb_user, distance_array, 10))

Current facebook user: 107

Current facebook user: 1126

Current facebook user: 14

Current facebook user: 35

for fb_user in fb_users:
    print("Current facebook user:", fb_user)
    display(distance_common_neigh(g, fb_user, distance_array, 10))

Current facebook user: 107

Current facebook user: 1126

Current facebook user: 14

Current facebook user: 35

for fb_user in fb_users:
    print("Current facebook user:", fb_user)
    display(distance_jaccard(g, fb_user, distance_array, 10))

Current facebook user: 107

Current facebook user: 1126

Current facebook user: 14

Current facebook user: 35

Measures to evaluate metrics' performance¶

# Examine whether the scoring functions give different
# recommendations for specific users. We consider only the 
# facebook user with a nodeId, which is a multiple of 10, i.e.
# 40 users in total. The distance based approaches seem to be
# completely different from the other.
def compare_algos(G, distance_array, k):
    same_rec = 0
    dif_rec = 0
    
    fof_jaccard = 0.0
    jaccard_adamic = 0.0
    fof_adamic = 0.0
    
    fof_distance = 0.0
    fof_distance_common = 0.0
    fof_distance_jaccard = 0.0
    
    jaccard_distance = 0.0
    jaccard_distance_common = 0.0
    jaccard_distance_jaccard = 0.0
    
    adamic_distance = 0.0
    adamic_distance_common = 0.0
    adamic_distance_jaccard = 0.0

    for user in range(100, len(G.nodes()), 100):
        fof_metric = sorted(common_neighbors(G, user, k).nodeid.values)
        jaccard_metric = sorted(jaccard_coef(G, user, k).nodeid.values)
        adamic_adar_metric = sorted(adamic_adar(G, user, k).nodeid.values)
        distance_metric = sorted(graph_distance(G, user, distance_array, k).nodeid.values)
        distance_common_metric = sorted(distance_common_neigh(G, user, distance_array, k).nodeid.values)
        distance_jaccard_metric = sorted(distance_jaccard(G, user, distance_array, k).nodeid.values)
        
        bool_first = (fof_metric == jaccard_metric) and (jaccard_metric == adamic_adar_metric) and (fof_metric == adamic_adar_metric)
        bool_sec = (fof_metric == distance_metric) and (jaccard_metric == distance_common_metric) and (distance_jaccard_metric == adamic_adar_metric)
        bool_third = (fof_metric == distance_common_metric) and (jaccard_metric == distance_jaccard_metric) and (distance_metric == adamic_adar_metric)
        bool_fourth = (fof_metric == distance_jaccard_metric) and (jaccard_metric == distance_metric) and (distance_common_metric == adamic_adar_metric)

        if bool_first and bool_sec and bool_third and bool_fourth:
            same_rec += 1
        else:
            dif_rec += 1
        fof_jaccard += len(set(fof_metric) & set(jaccard_metric)) / float(len(set(fof_metric) | set(jaccard_metric)))
        jaccard_adamic += len(set(adamic_adar_metric) & set(jaccard_metric)) / float(len(set(adamic_adar_metric) | set(jaccard_metric)))
        fof_adamic += len(set(fof_metric) & set(adamic_adar_metric)) / float(len(set(fof_metric) | set(adamic_adar_metric)))
        
        fof_distance = len(set(fof_metric) & set(distance_metric)) / float(len(set(fof_metric) | set(distance_metric)))
        fof_distance_common = len(set(fof_metric) & set(distance_common_metric)) / float(len(set(fof_metric) | set(distance_common_metric)))
        fof_distance_jaccard = len(set(fof_metric) & set(distance_jaccard_metric)) / float(len(set(fof_metric) | set(distance_jaccard_metric)))
        
        jaccard_distance = len(set(jaccard_metric) & set(distance_metric)) / float(len(set(jaccard_metric) | set(distance_metric)))
        jaccard_distance_common = len(set(jaccard_metric) & set(distance_common_metric)) / float(len(set(jaccard_metric) | set(distance_common_metric)))
        jaccard_distance_jaccard = len(set(jaccard_metric) & set(distance_jaccard_metric)) / float(len(set(jaccard_metric) | set(distance_jaccard_metric)))
        
        adamic_distance = len(set(adamic_adar_metric) & set(distance_metric)) / float(len(set(adamic_adar_metric) | set(distance_metric)))
        adamic_distance_common = len(set(adamic_adar_metric) & set(distance_common_metric)) / float(len(set(adamic_adar_metric) | set(distance_common_metric)))
        adamic_distance_jaccard = len(set(adamic_adar_metric) & set(distance_jaccard_metric)) / float(len(set(adamic_adar_metric) | set(distance_jaccard_metric)))

        
    fof_jaccard /= 40
    jaccard_adamic /= 40
    fof_adamic /= 40
    fof_distance /= 40
    fof_distance_common /= 40
    fof_distance_jaccard /= 40
    jaccard_distance /= 40
    jaccard_distance_common /= 40
    jaccard_distance_jaccard /= 40
    adamic_distance /= 40
    adamic_distance_common /= 40
    adamic_distance_jaccard /= 40

    average_similarity = float(fof_jaccard + jaccard_adamic + fof_adamic + fof_distance + fof_distance_common +
                               fof_distance_jaccard + jaccard_distance + jaccard_distance_common + jaccard_distance_jaccard +
                               adamic_distance + adamic_distance_common + adamic_distance_jaccard) / 12
    
    print("Users with same recommendations:", same_rec)
    print("Users with different recommendations:", dif_rec)
    print()
    
    print("FoF - Jaccard similarity percentage:", fof_jaccard)
    print("Jaccard - Adamic similarity percentage:", jaccard_adamic)
    print("FoF - Adamic similarity percentage:", fof_adamic)
    print()
    
    print("FoF - Distance similarity percentage:", fof_distance)
    print("FoF - Weighted Distance (FoF) similarity percentage:", fof_distance_common)
    print("FoF - Weighted Distance (Jaccard) similarity percentage:", fof_distance_jaccard)
    print()
    
    print("Jaccard - Distance similarity percentage:", jaccard_distance)
    print("Jaccard - Weighted Distance (FoF) similarity percentage:", jaccard_distance_common)
    print("Jaccard - Weighted Distance (Jaccard) similarity percentage:", jaccard_distance_jaccard)
    print()
    
    print("Adamic - Distance similarity percentage:", adamic_distance)
    print("Adamic - Weighted Distance (FoF) similarity percentage:", adamic_distance_common)
    print("Adamic - Weighted Distance (Jaccard) similarity percentage:", adamic_distance_jaccard)
    print()

    print("Average similarity between the algorithms:", average_similarity)
    
compare_algos(g, distance_array, 10)

Users with same recommendations: 0
Users with different recommendations: 40

FoF - Jaccard similarity percentage: 0.4460914880613022
Jaccard - Adamic similarity percentage: 0.45298849327487084
FoF - Adamic similarity percentage: 0.8567927170868345

FoF - Distance similarity percentage: 0.00625
FoF - Weighted Distance (FoF) similarity percentage: 0.025
FoF - Weighted Distance (Jaccard) similarity percentage: 0.020454545454545454

Jaccard - Distance similarity percentage: 0.00625
Jaccard - Weighted Distance (FoF) similarity percentage: 0.020454545454545454
Jaccard - Weighted Distance (Jaccard) similarity percentage: 0.025

Adamic - Distance similarity percentage: 0.00625
Adamic - Weighted Distance (FoF) similarity percentage: 0.025
Adamic - Weighted Distance (Jaccard) similarity percentage: 0.020454545454545454

Average similarity between the algorithms: 0.15924886123222035

# Return a set of random edges of the graph.
# The number of the required edges has to be specified.
def random_choices(G, n):
    edges = random.sample(G.edges(), n)
    return edges

# Return a set of random edges which can be
# used for the evaluation of the six different metrics.
# The same set of random edges is used for the evaluation of the
# algorithms,to prevent different random choices from skewing
# the results.
def random_set_edges(G, k):
    random_edges = random_choices(G, len(G.edges()))
    selected_edges = []
    counter = 0
    for edge in random_edges:
        G.remove_edge(*edge)
    
        metric_f1_fof = common_neighbors(G, edge[0], k).nodeid.values
        metric_f2_fof = common_neighbors(G, edge[1], k).nodeid.values

        metric_f1_jaccard = jaccard_coef(G, edge[0], k).nodeid.values
        metric_f2_jaccard = jaccard_coef(G, edge[1], k).nodeid.values

        metric_f1_adamic = adamic_adar(G, edge[0], k).nodeid.values
        metric_f2_adamic = adamic_adar(G, edge[1], k).nodeid.values

        metric_f1_distance_common = distance_common_neigh(G, edge[0], distance_array, k).nodeid.values
        metric_f2_distance_common = distance_common_neigh(G, edge[1], distance_array, k).nodeid.values
        
        metric_f1_distance_jaccard = distance_jaccard(G, edge[0], distance_array, k).nodeid.values
        metric_f2_distance_jaccard = distance_jaccard(G, edge[1], distance_array, k).nodeid.values

        metric_f1_distance = graph_distance(G, edge[0], distance_array, k).nodeid.values
        metric_f2_distance = graph_distance(G, edge[1], distance_array, k).nodeid.values
            
        if edge[1] not in metric_f1_fof or edge[0] not in metric_f2_fof:
            G.add_edge(*edge)
            continue

        if edge[1] not in metric_f1_jaccard or edge[0] not in metric_f2_jaccard:
            G.add_edge(*edge)
            continue
            
        if edge[1] not in metric_f1_adamic or edge[0] not in metric_f2_adamic:
            G.add_edge(*edge)
            continue

        if edge[1] not in metric_f1_distance or edge[0] not in metric_f2_distance:
            G.add_edge(*edge)
            continue
            
        if edge[1] not in metric_f1_distance_jaccard or edge[0] not in metric_f2_distance_jaccard:
            G.add_edge(*edge)
            continue
            
        if edge[1] not in metric_f1_distance_common or edge[0] not in metric_f2_distance_common:
            G.add_edge(*edge)
            continue
            
        counter += 1
        selected_edges.append(edge)
        G.add_edge(*edge)
        
        if counter == 100:
            break

    return selected_edges


# Evaluate the performance of each algorithm
def eval_algos(G, algorithm, edges, distance_array, k):
    count_fails = 0
    avg_rank = 0
    for edge in edges:
        G.remove_edge(*edge)
    
        if algorithm == "fof":
            metric_f1 = common_neighbors(G, edge[0], k).nodeid.values
            metric_f2 = common_neighbors(G, edge[1], k).nodeid.values
        elif algorithm == "jaccard":
            metric_f1 = jaccard_coef(G, edge[0], k).nodeid.values
            metric_f2 = jaccard_coef(G, edge[1], k).nodeid.values
        elif algorithm == "adamic_adar":
            metric_f1 = adamic_adar(G, edge[0], k).nodeid.values
            metric_f2 = adamic_adar(G, edge[1], k).nodeid.values
        elif algorithm == "baseline":
            metric_f1 = baseline(G, edge[0], k).nodeid.values
            metric_f2 = baseline(G, edge[1], k).nodeid.values
        elif algorithm == "distance_common_neigh":
            metric_f1 = distance_common_neigh(G, edge[0], distance_array, k).nodeid.values
            metric_f2 = distance_common_neigh(G, edge[1], distance_array, k).nodeid.values
        elif algorithm == "distance_jaccard":
            metric_f1 = distance_jaccard(G, edge[0], distance_array, k).nodeid.values
            metric_f2 = distance_jaccard(G, edge[1], distance_array, k).nodeid.values
        elif algorithm == "distance":
            metric_f1 = graph_distance(G, edge[0], distance_array, k).nodeid.values
            metric_f2 = graph_distance(G, edge[1], distance_array, k).nodeid.values
            
        if edge[1] not in metric_f1 or edge[0] not in metric_f2:
            G.add_edge(*edge)
            count_fails += 1
            continue
        
        # add one to the rank to be greater than zero
        rank_f2inf1 = np.where(metric_f1 == edge[1])[0][0] + 1
        rank_f1inf2 = np.where(metric_f2 == edge[0])[0][0] + 1
        # get the avrage rank
        avg_rank += float(rank_f2inf1 + rank_f1inf2) / 2
        G.add_edge(*edge)

    if (100 - count_fails) != 0:
        avg_rank /= float(100 - count_fails)
        
    else:
        avg_rank = k

    return avg_rank, count_fails

# Ecaluate the perfomance of the six algorithm. Obviously, the distance-based
# algorithms peform better. Especially, the plain vanilla version of the algorithm
# has the best performance and also the smallest standart deviation. Howver, the
# distance-based algorithms are more computationally intensive than the other approaches.
def wrapper_eval(G, distance_array, k, rep):
    algorithm = ["Common Neighbors (FoF)", "Jaccard Coefficient", "Adamic and Adar function",
                 "Graph Distance", "Weighted Graph Distance (FoF)", "Weighted Graph Distance (Jaccard)", "Baseline"]
    rank_fof = []
    rank_jaccard = []
    rank_adamic_adar = []
    rank_distance = []
    rank_distance_fof = []
    rank_distance_jaccard = []
    rank_baseline = []
    
    for i in range(rep):
        edges = random_set_edges(G, k)
        rank_fof.append(eval_algos(g, "fof", edges, distance_array, k)[0])
        rank_jaccard.append(eval_algos(g, "jaccard", edges, distance_array, k)[0])
        rank_adamic_adar.append(eval_algos(g, "adamic_adar", edges, distance_array, k)[0])
        rank_distance.append(eval_algos(g, "distance", edges, distance_array, k)[0])
        rank_distance_fof.append(eval_algos(g, "distance_common_neigh", edges, distance_array, k)[0])
        rank_distance_jaccard.append(eval_algos(g, "distance_jaccard", edges, distance_array, k)[0])
        rank_baseline.append(eval_algos(g, "baseline", edges, distance_array, k)[0])

    rank_avg = []
    rank_avg.append(np.mean(rank_fof))
    rank_avg.append(np.mean(rank_jaccard))
    rank_avg.append(np.mean(rank_adamic_adar))
    rank_avg.append(np.mean(rank_distance))
    rank_avg.append(np.mean(rank_distance_fof))
    rank_avg.append(np.mean(rank_distance_jaccard))
    rank_avg.append(np.mean(rank_baseline))
    
    rank_std = []
    rank_std.append(np.std(rank_fof))
    rank_std.append(np.std(rank_jaccard))
    rank_std.append(np.std(rank_adamic_adar))
    rank_std.append(np.std(rank_distance))
    rank_std.append(np.std(rank_distance_fof))
    rank_std.append(np.std(rank_distance_jaccard))
    rank_std.append(np.std(rank_baseline))
    
    
    resultsdf = pd.DataFrame({"algorithm": algorithm, "average_rank": rank_avg, "stdev_rank": rank_std})
    return resultsdf

display(wrapper_eval(g, distance_array, 100, 5))

display(wrapper_eval(g, distance_array, 100, 20))

	nodeid	number_of_common_neighbors
0	513	19
1	400	18
2	559	18
3	373	17
4	492	17
5	500	17
6	378	16
7	436	16
8	431	15
9	514	15

	nodeid	number_of_common_neighbors
0	916	113
1	1238	103
2	1750	99
3	1230	98
4	1004	94
5	1791	94
6	1530	90
7	1172	89
8	1570	85
9	1597	84

	nodeid	number_of_common_neighbors
0	2	9
1	17	9
2	140	9
3	111	8
4	137	7
5	162	7
6	19	6
7	333	6
8	44	5
9	243	4

	nodeid	number_of_common_neighbors
0	46	2
1	68	2
2	99	2
3	131	2
4	175	2
5	177	2
6	225	2
7	227	2
8	278	2
9	321	2

	nodeid	jaccard_coef
0	513	0.017040
1	400	0.016364
2	559	0.016216
3	492	0.015554
4	500	0.015302
5	373	0.015152
6	436	0.014692
7	378	0.014665
8	515	0.013749
9	514	0.013711

	nodeid	number_of_common_neighbors
0	513	19
1	400	18
2	559	18
3	373	17
4	492	17
5	500	17
6	378	16
7	436	16
8	431	15
9	514	15

	nodeid	jaccard_coef
0	916	0.487069
1	1750	0.440000
2	1230	0.435556
3	1530	0.422535
4	1004	0.419643
5	1238	0.417004
6	1172	0.400901
7	1791	0.400000
8	1789	0.374429
9	1597	0.371681

	nodeid	jaccard_coef
0	2	0.562500
1	140	0.529412
2	17	0.473684
3	162	0.437500
4	111	0.380952
5	333	0.352941
6	44	0.312500
7	137	0.291667
8	19	0.240000
9	243	0.210526

	nodeid	jaccard_coef
0	321	0.666667
1	11	0.500000
2	12	0.500000
3	15	0.500000
4	18	0.500000
5	37	0.500000
6	43	0.500000
7	74	0.500000
8	114	0.500000
9	209	0.500000

	nodeid	adamic_adar_similarity
0	513	9.724953
1	400	9.307010
2	559	8.818876
3	500	8.528682
4	492	8.373501
5	373	8.368363
6	378	7.852810
7	436	7.813676
8	524	7.441609
9	514	7.437235

	nodeid	adamic_adar_similarity
0	916	53.556246
1	1238	48.753746
2	1750	46.496906
3	1230	45.811896
4	1004	44.041808
5	1791	43.959211
6	1530	41.808500
7	1172	41.579656
8	1570	40.006415
9	1597	39.146933

	nodeid	adamic_adar_similarity
0	2	6.843324
1	140	6.736628
2	17	6.736628
3	111	5.964252
4	162	5.214517
5	137	5.073642
6	333	4.598281
7	19	4.190559
8	44	3.471818
9	243	2.722615

	nodeid	adamic_adar_similarity
0	46	1.320278
1	68	1.320278
2	99	1.320278
3	131	1.320278
4	175	1.320278
5	177	1.320278
6	225	1.320278
7	227	1.320278
8	278	1.320278
9	321	1.320278

	nodeid	graph_distance_similarity
0	0	2.0
1	58	2.0
2	136	2.0
3	171	2.0
4	348	2.0
5	353	2.0
6	363	2.0
7	366	2.0
8	376	2.0
9	389	2.0

	nodeid	graph_distance_similarity
0	513	9.5
1	400	9.0
2	559	9.0
3	373	8.5
4	492	8.5
5	500	8.5
6	378	8.0
7	436	8.0
8	431	7.5
9	514	7.5

	nodeid	graph_distance_similarity
0	2	4.5
1	17	4.5
2	140	4.5
3	111	4.0
4	137	3.5
5	162	3.5
6	19	3.0
7	333	3.0
8	44	2.5
9	243	2.0

	nodeid	graph_distance_similarity
0	46	1.0
1	68	1.0
2	99	1.0
3	131	1.0
4	175	1.0
5	177	1.0
6	225	1.0
7	227	1.0
8	278	1.0
9	321	1.0

	nodeid	distance_jaccard_similarity
0	2	0.281250
1	140	0.264706
2	17	0.236842
3	162	0.218750
4	111	0.190476
5	333	0.176471
6	44	0.156250
7	137	0.145833
8	19	0.120000
9	243	0.105263

	nodeid	distance_jaccard_similarity
0	321	0.333333
1	11	0.250000
2	12	0.250000
3	15	0.250000
4	18	0.250000
5	37	0.250000
6	43	0.250000
7	74	0.250000
8	114	0.250000
9	209	0.250000

	nodeid	number_of_common_neighbors
0	513	19
1	400	18
2	559	18
3	373	17
4	492	17
5	500	17
6	378	16
7	436	16
8	431	15
9	514	15

	nodeid	graph_distance_similarity
0	0	2.0
1	58	2.0
2	136	2.0
3	171	2.0
4	348	2.0
5	353	2.0
6	363	2.0
7	366	2.0
8	376	2.0
9	389	2.0

	nodeid	graph_distance_similarity
0	513	9.5
1	400	9.0
2	559	9.0
3	373	8.5
4	492	8.5
5	500	8.5
6	378	8.0
7	436	8.0
8	431	7.5
9	514	7.5

	nodeid	graph_distance_similarity
0	916	56.5
1	1238	51.5
2	1750	49.5
3	1230	49.0
4	1004	47.0
5	1791	47.0
6	1530	45.0
7	1172	44.5
8	1570	42.5
9	1597	42.0

	nodeid	graph_distance_similarity
0	2	4.5
1	17	4.5
2	140	4.5
3	111	4.0
4	137	3.5
5	162	3.5
6	19	3.0
7	333	3.0
8	44	2.5
9	243	2.0

	nodeid	graph_distance_similarity
0	46	1.0
1	68	1.0
2	99	1.0
3	131	1.0
4	175	1.0
5	177	1.0
6	225	1.0
7	227	1.0
8	278	1.0
9	321	1.0

	nodeid	distance_jaccard_similarity
0	513	0.008520
1	400	0.008182
2	559	0.008108
3	492	0.007777
4	500	0.007651
5	373	0.007576
6	436	0.007346
7	378	0.007333
8	515	0.006874
9	514	0.006856

	nodeid	distance_jaccard_similarity
0	916	0.243534
1	1750	0.220000
2	1230	0.217778
3	1530	0.211268
4	1004	0.209821
5	1238	0.208502
6	1172	0.200450
7	1791	0.200000
8	1789	0.187215
9	1597	0.185841

	algorithm	average_rank	stdev_rank
0	Common Neighbors (FoF)	13.297	1.331561
1	Jaccard Coefficient	13.411	0.949513
2	Adamic and Adar function	12.481	1.312385
3	Graph Distance	1.001	0.002000
4	Weighted Graph Distance (FoF)	2.347	0.258720
5	Weighted Graph Distance (Jaccard)	2.448	0.187419
6	Baseline	100.000	0.000000

	algorithm	average_rank	stdev_rank
0	Common Neighbors (FoF)	13.85875	1.698487
1	Jaccard Coefficient	13.54875	1.520829
2	Adamic and Adar function	13.03550	1.644221
3	Graph Distance	1.00000	0.000000
4	Weighted Graph Distance (FoF)	2.34900	0.375991
5	Weighted Graph Distance (Jaccard)	2.49900	0.511871
6	Baseline	97.70000	8.845055

	nodeid	number_of_common_neighbors
0	513	19
1	400	18
2	559	18
3	373	17
4	492	17
5	500	17
6	378	16
7	436	16
8	431	15
9	514	15

	nodeid	graph_distance_similarity
0	0	2.0
1	58	2.0
2	136	2.0
3	171	2.0
4	348	2.0
5	353	2.0
6	363	2.0
7	366	2.0
8	376	2.0
9	389	2.0

	nodeid	graph_distance_similarity
0	513	9.5
1	400	9.0
2	559	9.0
3	373	8.5
4	492	8.5
5	500	8.5
6	378	8.0
7	436	8.0
8	431	7.5
9	514	7.5

	nodeid	graph_distance_similarity
0	2	4.5
1	17	4.5
2	140	4.5
3	111	4.0
4	137	3.5
5	162	3.5
6	19	3.0
7	333	3.0
8	44	2.5
9	243	2.0

	nodeid	graph_distance_similarity
0	46	1.0
1	68	1.0
2	99	1.0
3	131	1.0
4	175	1.0
5	177	1.0
6	225	1.0
7	227	1.0
8	278	1.0
9	321	1.0