Commit b255b6ba authored by LANORE Vincent's avatar LANORE Vincent
Browse files

generate_pairs now outputs a tsv of profile pairs + distance

parent 4f6531ee
......@@ -76,6 +76,7 @@ MESSAGE("Profiles file contains " + data(profiles.shape[0]) + " profiles.")
#===================================================================================================
STEP("Picking random pairs")
from numpy.linalg import norm
import matplotlib.pyplot as plt
def euclidian_distance(p1, p2):
return norm(p1 - p2)
......@@ -83,4 +84,26 @@ def euclidian_distance(p1, p2):
def pick(profiles): # returns a profile in the form of a numpy array
return profiles.sample().values
print(euclidian_distance(pick(profiles), pick(profiles)))
MESSAGE("Preparing dataframe...")
columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
pairs = pd.DataFrame(columns = columns)
MESSAGE("Picking profile pairs and computing distances...")
for i in range(1000):
p1 = pick(profiles)
p2 = pick(profiles)
dist = euclidian_distance(p1, p2)
new_row = p1.tolist()[0] + p2.tolist()[0] + [dist]
pairs.loc[len(pairs)] = new_row
# pairs["distance"].hist(bins = 40)
# plt.show()
#===================================================================================================
print(step("Writing result to file"))
MESSAGE("Writing pairs to " + param(out))
columns[0] = "#p1_0"
pairs.to_csv(out, sep='\t', index=False, header=columns)
MESSAGE("Done :)")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment