Commit 4f6531ee authored by LANORE Vincent's avatar LANORE Vincent
Browse files

More work towards generate_pairs. Now able to pick profiles and compute...

More work towards generate_pairs. Now able to pick profiles and compute euclidian distance between them.
parent f4d28bc5
......@@ -127,3 +127,10 @@ def SUBMESSAGE(string):
def WARNING(string):
myprint(warning_str(string))
def SUCCESS(string):
myprint(success_str(string))
def FAILURE(string):
myprint(failure_str(string))
exit(1)
......@@ -57,6 +57,30 @@ STEP("Reading profiles from file")
import pandas as pd
MESSAGE("Reading tsv file")
profiles = pd.read_csv(profiles_file, sep=" ", header=None).transpose()
print(profiles.shape)
print(profiles)
try:
profiles = pd.read_csv(profiles_file, sep="\t", header=None).transpose()
SUCCESS("Profiles file seems to exist.")
except:
FAILURE("Something went wrong while trying to open profiles file.")
if profiles.shape[1] != 20:
FAILURE("Profiles file does not seem to contain 20 lines.")
else:
SUCCESS("Profiles file contains 20 lines.")
profiles_sum = profiles.sum(axis=1)
if max(profiles_sum) < 1.02 and min(profiles_sum) > 0.98:
SUCCESS("All profiles sum to 1.")
else:
FAILURE("Some profiles don't sum to 1 (sum is comprised between " + data(min(profiles_sum)) + " and " + data(max(profiles_sum)) + ").")
MESSAGE("Profiles file contains " + data(profiles.shape[0]) + " profiles.")
#===================================================================================================
STEP("Picking random pairs")
from numpy.linalg import norm
def euclidian_distance(p1, p2):
return norm(p1 - p2)
def pick(profiles): # returns a profile in the form of a numpy array
return profiles.sample().values
print(euclidian_distance(pick(profiles), pick(profiles)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment