Commit 37368a9e authored by boussau's avatar boussau
Browse files
parents c086bdd6 0e606449
......@@ -22,4 +22,6 @@ RUN apt-get update && \
RUN pip install --upgrade pip
RUN pip install ete3==3.0.0b35
RUN pip install scipy==0.19.1
RUN pip install biopython==1.72
......@@ -2,9 +2,9 @@
set -e
IMAGE_NAME=ete3
IMAGE_NAME=python_basics
DOCKERFILE_DIR=.
TAG=3.0.0b35
TAG=07172018
REPO=carinerey/$IMAGE_NAME:$TAG
docker build -t $REPO -f ./Dockerfile $DOCKERFILE_DIR
......
(((A:0.707394[&&NHX:Condition=0],(B:0.82502[&&NHX:Condition=1:Transition=1],(C:0.399704[&&NHX:Condition=0],(D:0.130784[&&NHX:Condition=0],E:0.126315[&&NHX:Condition=1:Transition=1]):0.679028[&&NHX:Condition=0]):0.355645[&&NHX:Condition=0]):0.829847[&&NHX:Condition=0]):0.25651[&&NHX:Condition=0],F:0.0179799[&&NHX:Condition=0]):0.487697[&&NHX:Condition=0],(G:0.797879[&&NHX:Condition=0],(H:0.338484[&&NHX:Condition=0],(I:0.526082[&&NHX:Condition=1],J:0.366882[&&NHX:Condition=1]):0.553277[&&NHX:Condition=1:Transition=1]):0.0758827[&&NHX:Condition=0]):0.913262[&&NHX:Condition=0]);
......@@ -16,6 +16,7 @@ type result = [
| `Topological_LG of [`topological] directory workflow
| `Topological_WAG of [`topological] directory workflow
| `Tdg09 of [`tdg09] directory workflow
| `Multinomial of [`multinomial] directory workflow
]
let meth_string_of_result = function
......@@ -29,6 +30,7 @@ let meth_string_of_result = function
| `Topological_LG _ -> "topological_LG"
| `Topological_WAG _ -> "topological_WAG"
| `Tdg09 _ -> "tdg09"
| `Multinomial _ -> "multinomial"
type dataset_res = {
model_prefix : string ;
......@@ -40,7 +42,7 @@ type dataset_res = {
}
let merge_results ~res_by_tools : text_file workflow =
let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07172018" () in
let command = List.map res_by_tools ~f:(fun res ->
let w = match res with
| `Pcoc d -> Pcoc.results d
......@@ -53,6 +55,7 @@ let merge_results ~res_by_tools : text_file workflow =
| `Topological_LG d -> Topological.results d
| `Topological_WAG d -> Topological.results d
| `Tdg09 d -> Tamuri.results d
| `Multinomial d -> Multinomial.results d
in
let opt = match res with
| `Pcoc _ -> string "--pcoc"
......@@ -65,6 +68,7 @@ let merge_results ~res_by_tools : text_file workflow =
| `Topological_LG _ -> string "--topological_LG"
| `Topological_WAG _ -> string "--topological_WAG"
| `Tdg09 _ -> string "--tdg09"
| `Multinomial _ -> string "--multinomial"
in
seq ~sep:" " [opt; dep w]
)
......@@ -79,7 +83,6 @@ let merge_results ~res_by_tools : text_file workflow =
let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflow =
(*let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in*)
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
......@@ -94,6 +97,7 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
| `Topological_LG _ -> "Topological_LG08"
| `Topological_WAG _ -> "Topological_WAG01"
| `Tdg09 _ -> "Tdg09_1-FDR,Tdg09_prob_post"
| `Multinomial _ -> ""
in
string opt
) |> seq ~sep:","
......@@ -110,6 +114,7 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
| `Topological_LG _ -> "Topological_LG08:0.9"
| `Topological_WAG _ -> "Topological_WAG01:0.9"
| `Tdg09 _ -> "Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
| `Multinomial _ -> ""
in
string opt
) |> seq ~sep:","
......
......@@ -16,6 +16,7 @@ type result = [
| `Topological_LG of [`topological] directory workflow
| `Topological_WAG of [`topological] directory workflow
| `Tdg09 of [`tdg09] directory workflow
| `Multinomial of [`multinomial] directory workflow
]
val meth_string_of_result : result -> string
......
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open File_formats
let multinomial ~(tree_id:_ workflow) ~(tree_sc:_ workflow) ~(faa:aminoacid_fasta workflow) : [`multinomial] directory workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07172018" () in
workflow ~descr:("calc_multinomial") [
mkdir_p dest;
cmd "python" ~env [
file_dump (string Scripts.calc_multinomial) ;
opt "-t" dep tree_sc;
opt "-a" dep faa;
opt "-o" ident (dest // "out.tsv") ;
]
]
let results w =
w / selector ["out.tsv"]
......@@ -125,6 +125,7 @@ let repo_of_detection_result res =
| `Topological_LG w -> item ["Topological_LG.results.tsv"] (Topological.results w)
| `Topological_WAG w -> item ["Topological_WAG.results.tsv"] (Topological.results w)
| `Tdg09 w -> item ["Tdg09.results.tsv"] (Tamuri.results w)
| `Multinomial w -> item ["Multinomial.results.tsv"] (Multinomial.results w)
) ;
(
match res with
......@@ -138,6 +139,7 @@ let repo_of_detection_result res =
| `Topological_LG w -> item ["raw_results"] w
| `Topological_WAG w -> item ["raw_results"] w
| `Tdg09 w -> item ["raw_results"] w
| `Multinomial w -> item ["raw_results"] w
) ;
]
|> Repo.shift det_meth_prefix
......@@ -184,6 +186,7 @@ let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
| `Identical_WAG -> `Identical_WAG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"WAG01")
| `Topological_LG -> `Topological_LG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"LG08")
| `Topological_WAG -> `Topological_WAG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"WAG01")
| `Multinomial -> `Multinomial (Multinomial.multinomial ~faa ~tree_id ~tree_sc)
let derive_from_dataset ~dataset ~preview ~fast_mode=
......@@ -191,7 +194,8 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
[`Pcoc;
`Tdg09;
`Identical_LG;
`Topological_LG;] ;
`Topological_LG;
`Multinomial] ;
if preview then
[]
else
......@@ -242,14 +246,15 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
let repo = Dataset.repo dataset_l ~preview in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let validation_main ~outdir ~indir ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~tree_dir ~profile_fn ~use_concat () =
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~tree_dir ~profile_fn ~use_concat () =
let trees = Array.to_list @@ Sys.readdir tree_dir in
let repo = List.map trees ~f:(fun tree ->
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns
@ parse_input_data indir in
@ indir_dataset_l in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let post_analyses = Post_analyses.post_analyses_of_dataset_results_l ~dataset_results_l in
let repo_per_tree = [
......@@ -318,7 +323,7 @@ let validation_command =
let outdir =
flag "--outdir" (required string) ~doc:"PATH Output directory"
and indir =
flag "--indir" (required string) ~doc:"PATH Input directory"
flag "--indir" (optional string) ~doc:"PATH Input directory"
and preview =
flag "--preview-mode" no_arg ~doc:" Preview mode"
and fast_mode =
......@@ -336,5 +341,5 @@ let validation_command =
and profile_fn =
flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
in
validation_main ~outdir ~indir ?ns ?np ?mem ~preview ~fast_mode ~tree_dir ~profile_fn ~use_concat
validation_main ~outdir ?indir ?ns ?np ?mem ~preview ~fast_mode ~tree_dir ~profile_fn ~use_concat
]
......@@ -17,11 +17,13 @@ type t_choices = {
type simu_infos = {
simu_infos: text_file workflow option ;
model_prefix: string ;
tree_prefix: string ;
}
type post_analyses = {
t_choices : t_choices option;
simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ;
}
......@@ -57,6 +59,31 @@ let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file workflow
];
]
let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07162018" () in
let cmd_cp_l = List.map simu_infos_l ~f:(fun s ->
match s.simu_infos with
| Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"."^ s.model_prefix ^ ".tsv")]]
| None -> []
) |> List.concat
in
let out = dest // "out" in
workflow ~descr:"post_analyses.plot_simu_infos" [
docker env (
and_list ([
[mkdir_p dest];
[mkdir_p tmp ];
cmd_cp_l;
[cmd "Rscript" [
file_dump (string Scripts.plot_hyp_simu_validation) ;
opt "--input_dir" ident tmp;
opt "--out " ident out;
];]
]
|> List.concat)
)
]
let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
let h0_res = List.find dataset_results_l (is_hyp ~hyp: "H0") in
let ha_res = List.find dataset_results_l (is_hyp ~hyp: "HaPCOC") in
......@@ -85,11 +112,20 @@ let get_simu_infos ~dataset_results =
let post_analyses_of_dataset_results_l ~dataset_results_l =
let t_choices = get_t_choices ~dataset_results_l in
let simu_infos_l = List.map dataset_results_l ~f:(fun dataset_results ->
{simu_infos = (get_simu_infos ~dataset_results); model_prefix = dataset_results.model_prefix} )in
{t_choices ; simu_infos_l}
{simu_infos = (get_simu_infos ~dataset_results);
tree_prefix =dataset_results.tree_prefix ;
model_prefix = dataset_results.model_prefix
}
) in
let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in
{t_choices ; simu_infos_l; simu_infos_plot}
let repo_of_post_analyses ~prefix ~post_analyses =
[
Repo.[
item [prefix ^ ".pdf"] post_analyses.simu_infos_plot
] |> Repo.shift "simu_infos"
;
(match post_analyses.t_choices with
| None -> []
| Some w ->
......@@ -99,7 +135,7 @@ let repo_of_post_analyses ~prefix ~post_analyses =
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
);
List.map post_analyses.simu_infos_l ~f:(fun simu_infos ->
(List.map post_analyses.simu_infos_l ~f:(fun simu_infos ->
match simu_infos.simu_infos with
| None -> []
| Some w ->
......@@ -107,4 +143,5 @@ let repo_of_post_analyses ~prefix ~post_analyses =
item [prefix ^ "." ^ simu_infos.model_prefix ^ ".tsv"] w
] |> Repo.shift "simu_infos"
) |> List.concat
);
] |> List.concat
......@@ -70,6 +70,8 @@ availableOptions.add_argument('--topological_WAG', type=str,
help="topological_WAG output name", default = None)
availableOptions.add_argument('--tdg09', type=str,
help="tdg09 output name", default = None)
availableOptions.add_argument('--multinomial', type=str,
help="multinomial output name", default = None)
##############
......@@ -87,6 +89,7 @@ df_identical_WAG = pd.DataFrame()
df_topological_LG = pd.DataFrame()
df_topological_WAG = pd.DataFrame()
df_tdg09 = pd.DataFrame()
df_multinomial = pd.DataFrame()
OutName = args.output
if args.pcoc :
......@@ -134,12 +137,15 @@ if args.topological_WAG :
if args.tdg09 :
df_tdg09 = pd.read_csv(args.tdg09, sep="\t")
if args.multinomial :
df_multinomial = pd.read_csv(args.multinomial, sep="\t")
df_list = [df for df in [df_pcoc, df_pcoc_gamma, df_pcoc_C60,
df_diffsel, df_diffsel_bis,
df_identical_LG, df_identical_WAG,
df_topological_LG, df_topological_WAG,
df_tdg09
df_tdg09,
df_multinomial
] if not df.empty ]
df_list_len = [df.shape[0] for df in df_list]
......
......@@ -86,7 +86,7 @@ if methods_to_be_plotted:
threshold_by_method = args.threshold_by_method
dic_threshold_by_method = {}
if threshold_by_method:
dic_threshold_by_method = {mt.split(":")[0]:float(mt.split(":")[1]) for mt in threshold_by_method.split(",") }
dic_threshold_by_method = {mt.split(":")[0]:float(mt.split(":")[1]) for mt in threshold_by_method.split(",") if mt}
MESSAGE("Threshold by method: "+", ".join([(param(key)+": "+param(value)) for key,value in dic_threshold_by_method.items()]))
......
#!/usr/bin/env Rscript
library("optparse")
library("reshape2")
library("ggplot2")
library("cowplot")
option_list = list(
make_option(c("--input_dir"), type="character", default=NULL,
help="Input dir", metavar="character"),
make_option(c("-o","--out"), type="character", default="out",
help="output prefix [default= %default]", metavar="character")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
if (is.null(opt$input_dir)){
print_help(opt_parser)
stop("At least one argument must be supplied (input_dir)", call.=FALSE)
}
input_dir = opt$input_dir
## fun...
## program...
files = paste0(list.files(input_dir))
files_split = strsplit(files, ".", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, hyp = files_df$V2)
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
hyp = x["hyp"]
df = read.csv(file, sep = "\t", header = T)
df$tree = tree
df$hyp = hyp
return(df)
}
df = do.call(rbind, apply(files_df_ok, 1, read_dir))
df$CommonRate = df$NbCommonAA / (df$NbCommonAA+df$NbOnlyConvAA+df$NbOnlyNonConvAA)
alpha = 0.7
x_labs = "# of common AA between Conv and not Conv Leaves / # of AA"
y_labs = "# of sites"
plot = ggplot(df, aes(x=CommonRate, fill=hyp)) + theme_bw() + labs(x=x_labs, y=y_labs)
plot = plot + geom_histogram(binwidth = 0.05)
plot = plot + facet_grid(hyp ~ tree)
output_pdf = paste0(opt$out,".pdf")
save_plot(output_pdf,
plot,
ncol = 0.4 * length(unique(df$tree)),
nrow = 0.35 * length(unique(df$hyp)),
base_aspect_ratio = 2,
limitsize = FALSE
)
output_tsv = paste0(opt$out,".tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")
......@@ -34,7 +34,7 @@ let tdg09 ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) : [`tdg09] directo
cmd "python" ~stdout:tdg09_out [
string "try_again.py";
opt "--timeout" int 60;
string "\"tdg09.sh";
string "\"java -cp /tdg09/tdg09-1.1.2/dist/tdg09.jar tdg09.Analyse";
opt "-tree" ident tmp_tree;
opt "-alignment" ident tmp_ali_phy ;
opt "-threads" int 1 ;
......
......@@ -3,7 +3,7 @@ open Bistro.EDSL
open Bistro.Std
open File_formats
let env = docker_image ~account:"carinerey" ~name:"ete3:3.0.0b35" ()
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07172018" ()
let prepare tree =
workflow ~descr:"utils.parse_input_tree" [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment