Commit 2ec82128 authored by LANORE Vincent's avatar LANORE Vincent
Browse files

Added Jensen-Shannon divergence and distance + added progress bar

parent b255b6ba
......@@ -77,27 +77,49 @@ MESSAGE("Profiles file contains " + data(profiles.shape[0]) + " profiles.")
STEP("Picking random pairs")
from numpy.linalg import norm
import matplotlib.pyplot as plt
from scipy.stats import entropy
from math import sqrt
def pick(profiles): # returns a profile in the form of a numpy array
return profiles.sample().values[0]
def euclidian_distance(p1, p2):
return norm(p1 - p2)
def pick(profiles): # returns a profile in the form of a numpy array
return profiles.sample().values
def jensen_shannon_divergence(p1, p2): # https://stackoverflow.com/questions/15880133/jensen-shannon-divergence
P = p1 / norm(p1, ord=1)
Q = p2 / norm(p2, ord=1)
M = 0.5 * (P + Q)
return 0.5 * (entropy(P, M) + entropy(Q, M))
def jensen_shannon_distance(p1, p2):
return sqrt(jensen_shannon_divergence(p1, p2))
MESSAGE("Preparing dataframe...")
columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
pairs = pd.DataFrame(columns = columns)
MESSAGE("Picking profile pairs and computing distances...")
for i in range(1000):
nb_pairs = 1000
try:
import progressbar
bar = progressbar.ProgressBar()
myrange = bar(range(nb_pairs))
except:
myrange = range(nb_pairs)
print("-- Progressbar is not installed! Cannot display progress. Please wait for a while...")
for i in myrange:
p1 = pick(profiles)
p2 = pick(profiles)
dist = euclidian_distance(p1, p2)
new_row = p1.tolist()[0] + p2.tolist()[0] + [dist]
# dist = jensen_shannon_divergence(p1, p2)
dist = jensen_shannon_distance(p1, p2)
# dist = euclidian_distance(p1, p2)
new_row = p1.tolist() + p2.tolist() + [dist]
pairs.loc[len(pairs)] = new_row
# pairs["distance"].hist(bins = 40)
# plt.show()
pairs["distance"].hist(bins = 40)
plt.show()
#===================================================================================================
print(step("Writing result to file"))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment