profile.ml 2.42 KB
Newer Older
1 2 3 4 5 6
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats

type profile = {
7 8 9
  profile_f : text_file workflow;
  profile_c : text_file workflow;
  profile_n : string;
10 11
}

12
let split_profile ~nb_sites ~dist_bins profile_f ~seed : text_file directory workflow =
13
  let env = Env.env_py in
14 15 16 17 18 19 20 21 22
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "generate_pairs.py" in
  let prefix = dest // "profile" in
  workflow ~descr:"parse_profile.split_profile" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;
23 24
        cmd "(" [string "Xvfb :1 -screen 0 1024x768x16 & )"];
        cmd "export" [string "DISPLAY=:1"];
25 26 27 28 29 30 31 32

        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.generate_pairs) ; script] ;

        (*generate_pairs.py [options...] -o <output-prefix> <profiles-file> *)
        cmd "python" [
          string "generate_pairs.py" ;
          opt "-o" ident prefix ;
33 34
          opt "-s" int nb_sites ;
          opt "-b" string dist_bins ;
35
          opt "-r" int seed ;
36
          dep profile_f ;
37 38 39 40 41 42
        ]
      ]
    )
  ]


43 44 45 46 47
let cat_file ~(f_l: text_file workflow list) : text_file workflow =
  workflow ~descr:"cat.text_files" [
    cmd "cat" ~stdout:dest (List.concat [
        List.map f_l ~f:(fun f -> dep f) ;
      ])
48
  ]
49

Carine Rey's avatar
Carine Rey committed
50 51 52 53 54
type profile_choice =
  | All
  | Dist
  | Unif_3
  
55
let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn ~seed =
56
  let profile_f = input profile_fn in
Carine Rey's avatar
Carine Rey committed
57
  let prefix = Filename.chop_extension (Filename.basename profile_fn) in
58
  let dist_bins = match nb_cat with
Carine Rey's avatar
Carine Rey committed
59 60 61
    | Unif_3 -> "[0.01,0.4],[0.4,0.6],[0.6,2]"
    | Dist -> "[0.6,2]"
    | All -> "[0.01,2]"
62
  in
63
  let splitted_profile = split_profile ~nb_sites ~dist_bins profile_f ~seed in
64
  match nb_cat with
Carine Rey's avatar
Carine Rey committed
65 66 67
  | Unif_3 -> (
      let p0 = splitted_profile / selector ["profile_0.tsv"] in
      let p1 = splitted_profile / selector ["profile_1.tsv"] in
68
      let p2 = splitted_profile / selector ["profile_2.tsv"] in
Carine Rey's avatar
Carine Rey committed
69 70 71 72 73
      {profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f}
    )
  | Dist -> (
      let p0 = splitted_profile / selector ["profile_0.tsv"] in
      {profile_c=p0 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f};
74
    )
Carine Rey's avatar
Carine Rey committed
75
  | All -> (let p0 = splitted_profile / selector ["profile_0.tsv"] in
76 77
          {profile_c=p0; profile_n=prefix ; profile_f})
  | _ -> failwith {| nbcat must be 1 or 3 |}