Commit e5695953 authored by Carine Rey's avatar Carine Rey
Browse files

add pcoc_v2 (V1 implementation with C10/C60 + manual profils with V1 implementation)

parent 02b0d86b
......@@ -46,8 +46,11 @@ test:
.PHONY: realdata_test
realdata_test:
cd example && \
reviewphiltrans realdata --outdir outdir_realdata_test --indir real_data/online_rodent --np 4 &&\
reviewphiltrans realdata --pcoc --outdir outdir_realdata_test --indir real_data/online_rodent --np 4
../_build/install/default/bin/reviewphiltrans realdata --outdir outdir_realdata_test --indir real_data/online_rodent --np 16 &&\
../_build/install/default/bin/reviewphiltrans realdata --pcoc-v2 --pcoc-pcp --pcoc --outdir outdir_realdata_test --indir real_data/online_rodent --np 16 &&\
../_build/install/default/bin/reviewphiltrans realdata --pcoc-v2 --pcoc-pcp --pcoc --gt --outdir outdir_realdata_test --indir real_data/online_rodent --np 16 &&\
../_build/install/default/bin/reviewphiltrans realdata --pcoc-v2 --pcoc-pcp --pcoc --gt --dnds --outdir outdir_realdata_test --indir real_data/online_rodent --np 16 &&\
../_build/install/default/bin/reviewphiltrans realdata --pcoc-v2 --pcoc-pcp --pcoc --tdg09 --gt --dnds --outdir outdir_realdata_test --indir real_data/online_rodent --np 16
# -----------------------------------------------------------------------
# Realdata
......
......@@ -13,6 +13,9 @@ let realdata_main ~use_diffsel
~use_pcoc
~use_pcoc_c60
~use_pcoc_gamma
~use_pcoc_v2
~use_pcoc_pcp
~use_tdg09
~use_topological
~use_identical
~no_use_multinomial
......@@ -34,6 +37,9 @@ let realdata_main ~use_diffsel
sw use_pcoc `Pcoc ;
sw use_pcoc_c60 `Pcoc_C60 ;
sw use_pcoc_gamma `Pcoc_gamma ;
sw use_pcoc_v2 `PCOC_v2 ;
sw use_pcoc_pcp `PCOC_pcp ;
sw use_tdg09 `Tdg09 ;
sw use_topological `Topological ;
sw use_identical `Identical ;
sw use_multinomial `Multinomial ;
......@@ -67,6 +73,12 @@ let realdata_command =
flag "--pcoc-c60" no_arg ~doc:" use the pcoc method with c60 profils (very_slow)."
and use_pcoc_gamma =
flag "--pcoc-gamma" no_arg ~doc:" use the pcoc method with the gamma option (very_slow)."
and use_pcoc_v2 =
flag "--pcoc-v2" no_arg ~doc:" use the pcoc v2 method with the C10 profiles (slow)."
and use_pcoc_pcp =
flag "--pcoc-pcp" no_arg ~doc:" use the pcoc v2 method with the physico-chemical profiles (slow)."
and use_tdg09 =
flag "--tdg09" no_arg ~doc:" use the tdg09 method (slow)."
and use_topological =
flag "--topological" no_arg ~doc:" use the topological method (fast)."
and use_identical =
......@@ -86,6 +98,9 @@ let realdata_command =
~use_pcoc
~use_pcoc_c60
~use_pcoc_gamma
~use_pcoc_v2
~use_pcoc_pcp
~use_tdg09
~use_topological
~use_identical
~no_use_multinomial
......
......@@ -78,7 +78,7 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkf
] ;
]
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?diffsel ?diffseldsparse () : text_file pworkflow =
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : text_file pworkflow =
Workflow.shell ~descr:"convergence_detection.merge_results" [
cmd "python" ~img:Env.env_py [
file_dump (string Scripts.merge_det_results) ;
......@@ -87,6 +87,8 @@ let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topol
option (opt "--tdg09" dep) tdg09 ;
option (opt "--identical_LG" dep) identical ;
option (opt "--topological_LG" dep) topological ;
option (opt "--pcoc_v2" dep) pcoc_v2 ;
option (opt "--pcoc_pcp" dep) pcoc_pcp ;
option (opt "--pcoc" dep) pcoc ;
option (opt "--diffsel" dep) diffsel ;
option (opt "--diffseldsparse" dep) diffseldsparse ;
......@@ -96,7 +98,7 @@ let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topol
]
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow =
let img = Pcoc.img in
let img = Env.env_pcoc in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
let opt = match res with
......@@ -163,7 +165,7 @@ let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~t
let plot_convergent_sites ?(plot_all_sites = true) ~alignment ~detection_results ~tree () =
Workflow.shell ~descr:"plot_convergent_sites.py" [
within_container Pcoc.img (
within_container Env.env_pcoc (
and_list [
mkdir_p dest ;
cmd "python" [
......
......@@ -40,6 +40,8 @@ val merge_result_tables :
?identical:text_file pworkflow ->
?topological:text_file pworkflow ->
?pcoc:text_file pworkflow ->
?pcoc_v2:text_file pworkflow ->
?pcoc_pcp:text_file pworkflow ->
?diffsel:text_file pworkflow ->
?diffseldsparse:text_file pworkflow ->
unit ->
......
......@@ -88,11 +88,17 @@ module Make(D : Dataset) = struct
|> Diffseldsparse.readdiffseldsparse
|> Diffseldsparse.results
let pcoc ?(gamma = true) ?(ncat = 60) d =
let pcoc ?(gamma = true) ?(ncat = 10) d =
let faa = amino_acid_alignment d in
let tree = tree d in
Pcoc.pcoc ~catx_est:ncat ~plot_complete:false ~gamma ~faa ~tree ()
|> Pcoc.results
let pcoc_v2 ?(gamma = true) ?(aa_profiles = `C10) d =
let faa = amino_acid_alignment d in
let tree = tree d in
Pcoc.pcoc_v2 ~aa_profiles ~gamma ~faa ~tree ()
|> Pcoc.results
let dn_ds_dnds_trees d =
Testnh.dn_ds_trees_real_data ~fna:(nucleotide_alignment d) ~tree:(tree d) ()
......
......@@ -15,3 +15,7 @@ let env_msd = [ docker_image ~account:"carinerey" ~name:"msd" ~tag:"31082018" ()
let env_diffsel = [ docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"master_a4b5" () ]
let env_raxml_ng = [ docker_image ~account:"nanozoo" ~name:"raxml-ng" ~tag:"0.9.0--435348d" () ]
let env_pcoc = [ docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"08312018" () ]
let env_pcoc_v2 = [ docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"v1.1.0.beta" () ]
......@@ -99,7 +99,7 @@ let identical ?(descr="") ~(tree_id:_ pworkflow) ~(tree_sc:_ pworkflow) ~(faa:am
let proba = Workflow.select run_bppancestor ["sites.tsv"] in
Workflow.shell ~descr:("identical."^prot_model^"."^descr) [
mkdir dest ;
cmd "python" ~img:Pcoc.img [
cmd "python" ~img:Env.env_pcoc [
file_dump (string Scripts.calc_identical) ;
opt "-t" dep tree_sc;
opt "-a" dep faa;
......
......@@ -3,7 +3,7 @@ open Bistro
open Bistro.Shell_dsl
open File_formats
let img = [ docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"08312018" () ]
let img = Env.env_pcoc
let pcoc ?(descr = "") ?plot_complete ?gamma ?catx_est ?max_gap_per_pos ?max_gap_per_conv_leaf ~(faa:aminoacid_fasta pworkflow) ~(tree:_ workflow) (): [`pcoc] dworkflow =
Workflow.shell ~descr:("convergence_detection.pcoc."^descr) [
......@@ -25,3 +25,32 @@ let results run_pcoc : text_file pworkflow =
Workflow.shell ~descr:"convergence_detection.selector_pcoc" [
cmd "cp" [out_pcoc; ident dest] ;
]
let img_v2 = Env.env_pcoc_v2
type aa_profiles =
[`C10 |
`C60 |
`Physic_properties
]
let pcoc_v2 ?(descr = "") ?gamma ~(aa_profiles:aa_profiles) ~(faa:aminoacid_fasta pworkflow) ~(tree:_ workflow) (): [`pcoc] dworkflow =
let tmp_profiles = tmp // "Physic_properties_profiles.tsv" in
let est_profiles = match aa_profiles with
| `C10 -> opt "-est_profiles" string "C10"
| `C60 -> opt "-est_profiles" string "C60"
| `Physic_properties -> opt "-est_profiles" ident tmp_profiles
in
Workflow.shell ~descr:("convergence_detection.pcoc_v2."^descr) [
cmd "cat" ~stdout:tmp_profiles [(file_dump (string Scripts.physic_properties_profiles))];
cmd "pcoc_det.py" ~img:img_v2 [
opt "-t" dep tree;
opt "-m" string "-";
opt "-aa" dep faa ;
opt "-o" ident dest ;
string "--no_mixture --V1";
option ( flag string "--gamma" ) gamma;
est_profiles
]
]
open Bistro
open File_formats
val img : Shell_dsl.container_image list
val pcoc :
?descr:string ->
?plot_complete:bool ->
......@@ -18,3 +16,18 @@ val pcoc :
val results :
[`pcoc] dworkflow ->
text_file pworkflow
type aa_profiles =
[`C10 |
`C60 |
`Physic_properties
]
val pcoc_v2 :
?descr:string ->
?gamma:bool ->
aa_profiles:aa_profiles ->
faa:aminoacid_fasta pworkflow ->
tree:_ pworkflow ->
unit ->
[`pcoc] dworkflow
......@@ -173,7 +173,7 @@ let make_t_choices ?(tree_prefix="") ~haPCOC_mr
]
let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file pworkflow =
let img = Pcoc.img in
let img = Env.env_pcoc in
Workflow.shell ~descr:("post_analyses.simu_infos." ^ descr) [
cmd "python" ~img [
file_dump (string Scripts.calc_simu_infos) ;
......
......@@ -12,6 +12,8 @@ type meth =
`Pcoc |
`Pcoc_gamma |
`Pcoc_C60|
`PCOC_v2 |
`PCOC_pcp |
`Tdg09 |
`Topological |
`Identical |
......@@ -82,6 +84,10 @@ let implementation f = function
DP.pcoc ~gamma:true ~ncat:10 f
| `Pcoc_C60 ->
DP.pcoc ~gamma:false ~ncat:60 f
| `PCOC_v2 ->
DP.pcoc_v2 ~gamma:false ~aa_profiles:`C10 f
| `PCOC_pcp ->
DP.pcoc_v2 ~gamma:false ~aa_profiles:`Physic_properties f
| `Tdg09 ->
DP.tdg09 f
| `Topological ->
......@@ -110,6 +116,8 @@ let result_table meths rd =
?multinomial:(maybe_apply_method meths f `Multinomial)
?tdg09:(maybe_apply_method meths f `Tdg09)
?pcoc:(maybe_apply_method meths f `Pcoc)
?pcoc_v2:(maybe_apply_method meths f `PCOC_v2)
?pcoc_pcp:(maybe_apply_method meths f `PCOC_pcp)
?topological:(maybe_apply_method meths f `Topological)
?identical:(maybe_apply_method meths f `Identical)
?diffsel:(maybe_apply_method meths f `Diffsel)
......
......@@ -8,6 +8,8 @@ type meth =
`Pcoc |
`Pcoc_gamma |
`Pcoc_C60|
`PCOC_v2 |
`PCOC_pcp |
`Tdg09 |
`Topological |
`Identical |
......
......@@ -54,6 +54,10 @@ availableOptions.add_argument('--pcoc', type=str,
help="Pcoc output name", default = None)
availableOptions.add_argument('--pcoc_gamma', type=str,
help="Pcoc_gamma output name", default = None)
availableOptions.add_argument('--pcoc_v2', type=str,
help="Pcoc_v2 output name", default = None)
availableOptions.add_argument('--pcoc_pcp', type=str,
help="Pcoc_pcp output name", default = None)
availableOptions.add_argument('--pcoc_C60', type=str,
help="Pcoc_C60 output name", default = None)
availableOptions.add_argument('--diffsel', type=str,
......@@ -89,6 +93,8 @@ args = parser.parse_args()
df_pcoc = pd.DataFrame()
df_pcoc_gamma = pd.DataFrame()
df_pcoc_v2 = pd.DataFrame()
df_pcoc_pcp = pd.DataFrame()
df_pcoc_C60 = pd.DataFrame()
df_diffsel = pd.DataFrame()
df_diffseldsparse = pd.DataFrame()
......@@ -116,6 +122,26 @@ if args.pcoc_gamma :
df_pcoc_gamma.rename(columns={'PCOC': 'PCOC_gamma',
'PC': 'PC_gamma',
'OC': 'OC_gamma'}, inplace=True)
if args.pcoc_v2 :
df_pcoc_v2 = pd.read_csv(args.pcoc_v2, sep="\t")
col = ['Sites','PCOC_V1','PC_V1','OC_V1']
if not args.pcoc :
col.extend(["Indel_prop","Indel_prop(ConvLeaves)"])
df_pcoc_v2 = df_pcoc_v2[col]
df_pcoc_v2.rename(columns={'PCOC_V1': 'PCOC_v2',
'PC_V1': 'PC_v2',
'OC_V1': 'OC_v2'}, inplace=True)
if args.pcoc_pcp :
df_pcoc_pcp = pd.read_csv(args.pcoc_pcp, sep="\t")
col = ['Sites','PCOC_V1','PC_V1','OC_V1']
if not args.pcoc :
col.extend(["Indel_prop","Indel_prop(ConvLeaves)"])
df_pcoc_pcp = df_pcoc_pcp[col]
df_pcoc_pcp.rename(columns={'PCOC_V1': 'PCOC_pcp',
'PC_V1': 'PC_pcp',
'OC_V1': 'OC_pcp'}, inplace=True)
if args.pcoc_C60 :
df_pcoc_C60 = pd.read_csv(args.pcoc_C60, sep="\t")
col = ['Sites','PCOC','PC','OC']
......@@ -183,7 +209,7 @@ if args.fna_infos :
df_fna_infos["Sites"] = df_fna_infos.index + 1
#df_fna_infos = df_fna_infos[['Sites','P_distance']]
df_list = [df for df in [df_pcoc, df_pcoc_gamma, df_pcoc_C60,
df_list = [df for df in [df_pcoc, df_pcoc_gamma, df_pcoc_C60, df_pcoc_v2, df_pcoc_pcp,
df_diffsel, df_diffsel_bis, df_diffseldsparse,
df_identical_LG, df_identical_WAG,
df_topological_LG, df_topological_WAG,
......
AA,profil_ATSPMIVFL,profil_WHCYNGDEQKR,profil_G,profil_AV,profil_LIM,profil_P,profil_C,profil_FW,profil_DE,profil_KR,profil_ST,profil_NQ,profil_HY
A,13.614,0.634,2.179,40.772,0.138,1.965,0.125,0.085,0.558,0.063,3.792,0.066,0.025
C,0.19,3.483,0.321,0.014,0.022,0.035,83.09,0.802,0.01,0.276,0.469,0.027,1.609
D,0.223,12.109,1.704,0.168,0.008,0.026,0.051,0.008,36.819,0.048,0.074,1.448,0.425
E,0.311,14.374,3.161,0.313,0.022,0.037,0.004,0.019,55.801,1.407,0.048,1.275,0.06
F,3.453,0.216,0.023,0.14,1.782,0.08,0.928,73.59,0.011,0.006,0.355,0.009,2.289
G,0.56,7.735,83.464,0.672,0.023,0.037,1.478,0.255,1.761,1.071,1,0.161,0.052
H,0.165,9.321,0.042,0.007,0.087,0.345,0.481,0.064,0.152,0.573,0.046,2.116,47.416
I,14.69,0.236,0.048,5.11,37.588,0.064,0.023,0.903,0.027,0.121,0.802,0.106,0.028
K,0.394,13.825,0.23,0.041,0.07,0.035,0.032,0.013,1.73,53.901,0.169,1.598,0.04
L,9.532,0.429,0.053,1.1,38.364,2.828,0.122,7.497,0.028,0.112,0.777,0.378,0.407
M,6.905,0.15,0.03,0.856,12.485,0.018,0.021,0.061,0.015,0.128,0.247,0.013,0.005
N,0.472,8.668,0.318,0.045,0.044,0.034,0.124,0.013,1.491,0.895,2.22,44.947,0.959
P,8.003,0.547,0.063,0.475,0.578,84.213,0.056,0.098,0.031,0.104,2.538,1.043,0.514
Q,0.217,9.977,0.087,0.017,0.205,0.715,0.057,0.146,0.898,2.085,0.041,40.007,2.084
R,0.429,9.891,3.032,0.022,0.138,0.28,4.63,1.152,0.091,37.969,0.314,1.296,1.561
S,9.734,2.055,4.158,1.325,0.713,7.651,4.388,1.364,0.172,0.37,55.373,4.519,0.394
T,16.058,0.753,0.163,3.469,0.945,1.505,0.081,0.033,0.078,0.592,31.418,0.748,0.027
V,14.877,0.4,0.813,45.444,6.698,0.105,0.044,0.945,0.245,0.041,0.212,0.027,0.021
W,0.04,0.877,0.088,0.003,0.068,0.009,0.33,10.68,0.005,0.213,0.011,0.06,0.017
Y,0.13,4.321,0.022,0.006,0.022,0.019,3.934,2.275,0.078,0.025,0.092,0.156,42.068
......@@ -63,7 +63,7 @@ let topological ?(descr="") ~(tree:_ pworkflow) ~(tree_conv:_ pworkflow) ~(faa:a
cmd "cp" [dep bppml_out_conv ; dest // "estimates.bppml_out_conv.tsv" ];
cmd "cp" [dep bppml_config ; dest // "estimates.bppml_config" ];
cmd "cp" [dep bppml_config_conv ; dest // "estimates.bppml_config_conv" ];
cmd "python" ~img:Pcoc.img [
cmd "python" ~img:Env.env_pcoc [
file_dump (string Scripts.calc_topological) ;
opt "-bppml_non_conv" dep bppml_out ;
opt "-bppml_conv" dep bppml_out_conv ;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment