svg2pdf |
12 |
- plots/fdr-control/simulated-bwa.INS.pdf
- plots/fdr-control/simulated-bwa.DEL.pdf
- plots/allelefreqs/simulated-bwa.INS.pdf
- plots/allelefreqs/simulated-bwa.DEL.pdf
- plots/score-dist/simulated-bwa.INS.pdf
- plots/score-dist/simulated-bwa.DEL.pdf
- plots/allelefreq-recall/simulated-bwa.INS.pdf
- plots/allelefreq-recall/simulated-bwa.DEL.pdf
- plots/allelefreq-scatter/simulated-bwa.INS.pdf
- plots/allelefreq-scatter/simulated-bwa.DEL.pdf
- plots/concordance/colo1.INS.concordance.pdf
- plots/concordance/colo1.DEL.concordance.pdf
|
|
|
| cairosvg {input} -o {output}
|
|
plot_precision_recall |
12 |
- plots/precision-recall/simulated-bwa.INS.zoom.pdf
- plots/precision-recall/simulated-bwa.INS.nozoom.pdf
- plots/precision-recall/synthetic-5.INS.zoom.pdf
- plots/precision-recall/synthetic-5.INS.nozoom.pdf
- plots/precision-recall/synthetic-20.INS.zoom.pdf
- plots/precision-recall/synthetic-20.INS.nozoom.pdf
- plots/precision-recall/simulated-bwa.DEL.zoom.pdf
- plots/precision-recall/simulated-bwa.DEL.nozoom.pdf
- plots/precision-recall/synthetic-5.DEL.zoom.pdf
- plots/precision-recall/synthetic-5.DEL.nozoom.pdf
- plots/precision-recall/synthetic-20.DEL.zoom.pdf
- plots/precision-recall/synthetic-20.DEL.nozoom.pdf
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 | from itertools import product
from functools import partial
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
import math
from matplotlib.lines import Line2D
vartype = snakemake.wildcards.vartype
colors = common.get_colors(snakemake.config)
def props(callers):
return product(callers, snakemake.params.len_ranges)
def plot_len_range(minlen, maxlen, min_precision=0.0):
truth = common.load_variants(
snakemake.input.truth, minlen, maxlen, vartype=vartype)
def plot(calls,
label,
color,
line=True,
style="-",
invert=False,
markersize=4,
endmarker=False):
calls = pd.read_table(calls, index_col=0)
if len(calls) < 10:
return
if line:
thresholds = calls.score.quantile(np.linspace(0.0, 1.0, 50))
precision = []
recall = []
for t in thresholds:
if invert:
c = calls[calls.score >= t]
else:
c = calls[calls.score <= t]
p = common.precision(c)
r = common.recall(c, truth)
print(label, t, c.shape[0], p, r)
if len(c) < 10:
print("skipping threshold: too few calls", c)
continue
precision.append(p)
recall.append(r)
if len(precision) <= 2:
print("skipping curve because we have too few values")
return
else:
precision = [common.precision(calls)]
recall = [common.recall(calls, truth)]
style = "."
print(label, calls.shape[0], precision, recall)
plt.plot(
recall,
precision,
style,
color=color,
label=label,
markersize=markersize
)
if endmarker:
plt.plot(recall[-1], precision[-1], "s", color=color, markersize=markersize)
handles = []
for calls, (caller,
len_range) in zip(snakemake.input.varlociraptor_calls,
props(snakemake.params.varlociraptor_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
label = "varlociraptor+{}".format(caller)
plot(calls, label, colors[caller], endmarker=True)
handles.append(Line2D([0], [0], color=colors[caller], label=label))
for calls, (caller,
len_range) in zip(snakemake.input.default_calls,
props(snakemake.params.default_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
color = colors[caller]
plot(
calls,
caller,
color,
style=":",
invert=snakemake.config["caller"][caller].get("invert", False))
if caller in snakemake.params.adhoc_callers:
handles.append(Line2D([0], [0], markersize=10, markerfacecolor=color, markeredgecolor=color, color=color, label=caller, marker=".", linestyle=":"))
else:
handles.append(Line2D([0], [0], color=color, label=caller, linestyle=":"))
for calls, (caller, len_range) in zip(snakemake.input.adhoc_calls,
props(snakemake.params.adhoc_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
color = colors[caller]
plot(calls, caller, color, markersize=10, line=False)
if caller not in snakemake.params.default_callers:
handles.append(Line2D([0], [0], markersize=10, markerfacecolor=color, markeredgecolor=color, label=caller, marker=".", lw=0))
sns.despine()
ax = plt.gca()
plt.ylim((min_precision, 1.01 if min_precision == 0.0 else 1.001))
return ax, handles
plot = plot_len_range
fig_height = None
legend_outside = snakemake.params.legend_outside
if snakemake.wildcards.zoom == "zoom":
plot = partial(plot_len_range, min_precision=0.99 if vartype == "INS" else 0.95)
fig_height = 3
legend_outside = True
common.plot_ranges(
snakemake.params.len_ranges,
plot,
xlabel="recall",
ylabel="precision",
fig_height=fig_height,
legend_outside=legend_outside,
)
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
plot_fdr |
2 |
- plots/fdr-control/simulated-bwa.INS.svg
- plots/fdr-control/simulated-bwa.DEL.svg
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 | from itertools import product
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
MIN_CALLS = 100
colors = common.get_colors(snakemake.config)
props = product(snakemake.params.callers,
snakemake.params.len_ranges, snakemake.params.fdrs)
calls = []
for _calls, (caller, len_range, fdr) in zip(snakemake.input.varlociraptor_calls, props):
calls.append({"caller": caller, "len_range": len_range, "fdr": float(fdr), "calls": _calls})
calls = pd.DataFrame(calls)
calls = calls.set_index("caller", drop=False)
def plot_len_range(minlen, maxlen):
def plot(caller):
color = colors[caller]
label = "varlociraptor+{}".format(caller)
fdrs = []
alphas = []
calls_ = calls.loc[caller]
calls_ = calls_[calls_["len_range"].map(lambda r: r == [minlen, maxlen])]
calls_ = calls_.sort_values("fdr")
for e in calls_.itertuples():
c = pd.read_table(e.calls)
n = c.shape[0]
if n < MIN_CALLS:
continue
true_fdr = 1.0 - common.precision(c)
if fdrs and fdrs[-1] == true_fdr:
continue
fdrs.append(true_fdr)
alphas.append(e.fdr)
plt.plot(alphas, fdrs, ".-", color=color, label=label)
for caller in calls.index.unique():
plot(caller)
plt.plot([0, 1], [0, 1], ":", color="grey")
sns.despine()
ax = plt.gca()
handles, _ = ax.get_legend_handles_labels()
return ax, handles
common.plot_ranges(
snakemake.params.len_ranges,
plot_len_range,
xlabel="FDR threshold",
ylabel="true FDR")
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
plot_allelefreq |
2 |
- plots/allelefreqs/simulated-bwa.INS.svg
- plots/allelefreqs/simulated-bwa.DEL.svg
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 | from itertools import product
import math
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
MIN_CALLS = 10
vartype = snakemake.wildcards.vartype
colors = common.get_colors(snakemake.config)
truth = common.load_variants(snakemake.input.truth, vartype=vartype)
def props(callers):
return product(callers, snakemake.params.len_ranges)
def plot_len_range(minlen, maxlen):
def plot(calls, colors):
calls = calls[calls.is_tp]
true_af = truth.loc[calls.MATCHING].reset_index().TAF
calls = calls.reset_index()
calls["error"] = calls.CASE_AF - true_af
if calls.empty:
return
calls["true_af"] = true_af
true_af = pd.Series(calls["true_af"].unique()).sort_values()
# standard deviation when sampling in binomial process from allele freq
# this is the expected sampling error within the correctly mapped fragments
# sd = true_af.apply(lambda af: 1 / 40 * math.sqrt(40 * af * (1 - af)))
# x = np.arange(len(true_af))
# offsets = [-0.5, 0.5]
# y_upper = np.array([v for v in sd for o in offsets])
# y_lower = np.maximum(-y_upper, [-f for f in true_af for o in offsets])
# plt.fill_between([v + o for v in x for o in offsets], y_lower, y_upper, color="#EEEEEE", zorder=-5)
calls["true_af"] = calls["true_af"].apply("{:.3f}".format)
size = 1 if maxlen == 30 else 2
sns.stripplot("true_af", "error", hue="caller", data=calls, palette=colors, dodge=True, jitter=True, alpha=0.5, size=size, rasterized=True)
sns.boxplot("true_af", "error", hue="caller", data=calls, color="white", fliersize=0, linewidth=1)
handles, labels = plt.gca().get_legend_handles_labels()
n = len(calls.caller.unique())
plt.ylim((-1,1))
plt.grid(axis="y", linestyle=":", color="grey")
sns.despine()
plt.xticks(rotation="vertical")
ax = plt.gca()
ax.legend().remove()
return ax, handles[n:]
all_calls, all_colors = load_calls(minlen, maxlen)
return plot(all_calls, all_colors)
def load_calls(minlen, maxlen):
all_calls = []
all_colors = []
for calls, (caller, len_range) in zip(snakemake.input.varlociraptor_calls, props(snakemake.params.varlociraptor_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
label = "varlociraptor+{}".format(caller)
calls = pd.read_table(calls)
calls["caller"] = label
if not calls.empty:
all_calls.append(calls)
all_colors.append(colors[caller])
all_calls = pd.concat(all_calls)
return all_calls, all_colors
common.plot_ranges(
snakemake.params.len_ranges,
plot_len_range,
xlabel="true allele frequency",
ylabel="predicted - truth")
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
plot_score_dist |
2 |
- plots/score-dist/simulated-bwa.INS.svg
- plots/score-dist/simulated-bwa.DEL.svg
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 | from itertools import product
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
import math
vartype = snakemake.wildcards.vartype
colors = common.get_colors(snakemake.config)
def props(callers):
return product(callers, snakemake.params.len_ranges)
phred_to_log_factor = -0.23025850929940456
log_to_phred_factor = -4.3429448190325175
def plot_len_range(minlen, maxlen):
for calls, (caller, len_range) in zip(snakemake.input.varlociraptor_calls, props(snakemake.params.varlociraptor_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
label = "varlociraptor+{}".format(caller)
calls = pd.read_table(calls)
calls["caller"] = label
if not calls.empty:
color = colors[caller]
sns.kdeplot(calls[calls.is_tp].PROB_SOMATIC_TUMOR.map(np.log), color=color, label=label)
sns.kdeplot(calls[~calls.is_tp].PROB_SOMATIC_TUMOR.map(np.log), color=color, linestyle=":", label="")
ax = plt.gca()
fmt_ticks = lambda ticks: ["{:.1g}".format(np.exp(t)) for t in ticks]
ax.set_xticklabels(fmt_ticks(plt.xticks()[0]))
ax.legend().remove()
handles, _ = ax.get_legend_handles_labels()
sns.despine()
return ax, handles
common.plot_ranges(
snakemake.params.len_ranges,
plot_len_range,
xlabel=r"$-10 \log_{10}$ Pr(somatic)",
ylabel="density")
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
plot_allelefreq_recall |
2 |
- plots/allelefreq-recall/simulated-bwa.INS.svg
- plots/allelefreq-recall/simulated-bwa.DEL.svg
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 | from itertools import product
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
import math
from matplotlib.lines import Line2D
MIN_CALLS = 10
vartype = snakemake.wildcards.vartype
colors = common.get_colors(snakemake.config)
def props(callers):
return product(callers, snakemake.params.len_ranges)
def plot_len_range(minlen, maxlen):
truth = common.load_variants(
snakemake.input.truth, minlen, maxlen, vartype=vartype)
afs = pd.Series(truth.TAF.unique()).sort_values()
def plot(calls,
label,
color,
varlociraptor=True,
style="-.",
markersize=4):
calls = pd.read_table(calls, index_col=0)
if len(calls) < 10:
return
if varlociraptor:
phred = lambda p: -10 * math.log10(p)
def calc_recall(p):
c = calls[calls.score <= phred(p)]
return [common.recall(c, truth[truth.TAF >= af]) for af in afs]
return plt.fill_between(
afs,
calc_recall(0.98 if maxlen > 30 else 0.99),
calc_recall(0.9),
color=color,
label=label,
alpha=0.6)
else:
recall = [common.recall(calls, truth[truth.TAF >= af]) for af in afs]
# plot a white background first to increase visibility
plt.plot(afs, recall, "-", color="white", alpha=0.8)
return plt.plot(
afs,
recall,
style,
color=color,
label=label)[0]
handles = []
def register_handle(handle):
if handle is not None:
handles.append(handle)
for calls, (caller,
len_range) in zip(snakemake.input.varlociraptor_calls,
props(snakemake.params.varlociraptor_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
label = "varlociraptor+{}".format(caller)
handle = plot(calls, label, colors[caller], varlociraptor=True)
register_handle(handle)
#handles.append(Line2D([0], [0], color=colors[caller], label=label))
for calls, (caller, len_range) in zip(snakemake.input.adhoc_calls,
props(snakemake.params.adhoc_callers)):
if len_range[0] != minlen and len_range[1] != maxlen:
continue
color = colors[caller]
handle = plot(calls, caller, color, style=":", varlociraptor=False)
register_handle(handle)
#handles.append(Line2D([0], [0], linestyle=":", color=color, label=caller))
sns.despine()
ax = plt.gca()
return ax, handles
common.plot_ranges(
snakemake.params.len_ranges,
plot_len_range,
xlabel="allele frequency",
ylabel="recall")
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
plot_allelefreq_scatter |
2 |
- plots/allelefreq-scatter/simulated-bwa.INS.svg
- plots/allelefreq-scatter/simulated-bwa.DEL.svg
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 | import math
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
MIN_COUNT = 20
MAX_DEPTH = 60
vartype = snakemake.wildcards.vartype
colors = common.get_colors(snakemake.config)
truth = common.load_variants(snakemake.input.truth, vartype=vartype)
all_calls = []
for caller, calls in zip(snakemake.params.callers, snakemake.input.calls):
calls = pd.read_table(calls)
calls.loc[:, "caller"] = caller
all_calls.append(calls)
all_calls = pd.concat(all_calls)
def plot(af, _):
constrain_lower = lambda error: np.maximum(error, -af)
constrain_upper = lambda error: np.minimum(error, 1.0 - af)
dp = all_calls["TUMOR_DP"]
calls = all_calls[all_calls.is_tp]
true_af = truth.loc[calls.MATCHING].reset_index().TAF
calls = calls.reset_index()
calls["true_af"] = true_af
calls = calls[calls["true_af"] == af]
calls["error"] = calls.CASE_AF - true_af
sns.kdeplot(calls["TUMOR_DP"], calls["error"], cmap="Blues", n_levels=50, shade=True, alpha=0.7, shade_lowest=False) #alpha=0.5, clip=((0.0, 1.0), (0.0, af)))
plt.plot(calls["TUMOR_DP"], calls["error"], ",", color="k", lw=0, alpha=1.0, rasterized=True)
by_depth = calls.groupby("TUMOR_DP")["error"].describe().reset_index()
by_depth["-std"] = constrain_lower(-by_depth["std"])
by_depth["std"] = constrain_upper(by_depth["std"])
by_depth = by_depth[by_depth["count"] >= MIN_COUNT]
plt.plot(by_depth.TUMOR_DP, by_depth["std"], "--", color="k")
plt.plot(by_depth.TUMOR_DP, by_depth["-std"], "--", color="k")
plt.plot(by_depth.TUMOR_DP, by_depth["mean"], "-", color="k")
depths = np.arange(0, MAX_DEPTH)
# standard deviation when sampling in binomial process from allele freq
# this is the expected sampling error within the correctly mapped fragments
sd = np.array([1.0 / depth * math.sqrt(depth * af * (1.0 - af)) for depth in depths])
plt.fill_between(depths, constrain_lower(-sd), constrain_upper(sd), color="grey", alpha=0.5)
sns.despine()
plt.xticks(rotation="vertical")
ax = plt.gca()
ax.legend().remove()
handles, labels = ax.get_legend_handles_labels()
plt.ylim((-1.0, 1.0))
plt.xlim((0, MAX_DEPTH))
return ax, []
afs = [(af, af) for af in truth.TAF.sort_values().unique()]
common.plot_ranges(
afs,
plot,
"depth",
"predicted - truth")
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
obtain_tp_fp |
610 |
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 | import pandas as pd
import numpy as np
from common import load_variants
minlen = int(snakemake.wildcards.minlen)
maxlen = int(snakemake.wildcards.maxlen)
vartype = snakemake.wildcards.vartype
if snakemake.wildcards.mode == "varlociraptor":
score = snakemake.config["caller"]["varlociraptor"]["score"]
# calls are already filtered by FDR control step
minlen = None
maxlen = None
elif snakemake.wildcards.mode == "default":
score = snakemake.config["caller"][snakemake.wildcards.caller]["score"]
else:
score = None
calls = load_variants(snakemake.input.calls, vartype=vartype, minlen=minlen, maxlen=maxlen)
calls["is_tp"] = calls["MATCHING"] >= 0
calls["score"] = calls[score] if score else np.nan
calls.to_csv(snakemake.output[0], sep="\t")
|
|
truth_to_tsv |
2 |
- truth/synthetic-5.annotated.tsv
- truth/synthetic-20.annotated.tsv
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt --genotypes --info SOMATIC SVLEN SVTYPE TAF NAF < {input} > {output}
|
|
plot_concordance |
2 |
- plots/concordance/colo1.INS.concordance.svg
- plots/concordance/colo1.DEL.concordance.svg
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127 | from itertools import product
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import common
import numpy as np
import math
from matplotlib.lines import Line2D
from matplotlib.colors import to_rgba
class NotEnoughObservationsException(Exception):
pass
MIN_CALLS = 20
MAX_LEN = 1000
vartype = snakemake.wildcards.vartype
colors = common.get_colors(snakemake.config)
varlociraptor_calls_low = [pd.read_table(f) for f in snakemake.input.varlociraptor_calls_low]
varlociraptor_calls_high = [pd.read_table(f) for f in snakemake.input.varlociraptor_calls_high]
adhoc_calls = [pd.read_table(f) for f in snakemake.input.adhoc_calls]
def expected_count(af, effective_mutation_rate):
"""Calculate the expected number of somatic variants
greater than a given allele frequency given an effective mutation
rate, according to the model of Williams et al. Nature
Genetics 2016"""
return effective_mutation_rate * (1.0 / af - 1.0)
def expected_counts(afs, effective_mutation_rate):
return [expected_count(af, effective_mutation_rate) for af in afs]
def calc_concordance(calls):
n = len(calls)
return (calls["concordance_count"] > 1).sum() / n
def plot_len_range(minlen, maxlen, yfunc=None, yscale=None, upper_bound=None):
handles_varlociraptor = []
handles_adhoc = []
for i, caller in enumerate(snakemake.params.callers):
def plot_calls(calls, label, color, style, calls_lower=None):
def get_xy(calls, caseafs=None):
svlen = calls.loc[:, calls.columns.str.startswith("SVLEN")].abs()
# at least one of the calls has a valid svlen
valid = ((svlen >= minlen) & (svlen <= maxlen)).sum(axis=1) >= 1
calls = calls[valid]
if caseafs is None:
caseafs = calls["max_case_af"].dropna().unique()
y = []
_caseafs = []
for caseaf in sorted(caseafs):
_calls = calls[calls["max_case_af"] >= caseaf]
if upper_bound is not None:
_calls = _calls[_calls["max_case_af"] <= caseaf + upper_bound]
if len(_calls) < MIN_CALLS:
continue
_caseafs.append(caseaf)
y.append(yfunc(_calls))
return _caseafs, y
x, y = get_xy(calls)
if not x:
raise NotEnoughObservationsException()
if calls_lower is not None:
_, y2 = get_xy(calls_lower, caseafs=x)
return plt.fill_between(x, y, y2, label=label, edgecolor=color, facecolor=to_rgba(color, alpha=0.2))
else:
if style != "-":
plt.plot(x, y, "-", color="white", alpha=0.8)
return plt.plot(x, y, style, label=label, color=color)[0]
color = colors[snakemake.params.callers[i]]
try:
handles_varlociraptor.append(
plot_calls(
varlociraptor_calls_high[i],
"varlociraptor+{}".format(caller),
color=color, style="-",
calls_lower=varlociraptor_calls_low[i]))
except NotEnoughObservationsException:
# skip plot
pass
try:
handles_adhoc.append(plot_calls(adhoc_calls[i], caller, color=color, style=":"))
except NotEnoughObservationsException:
# skip plot
pass
handles = handles_varlociraptor + handles_adhoc
sns.despine()
ax = plt.gca()
if yscale is not None:
ax.set_yscale(yscale)
return ax, handles
plt.figure(figsize=(10, 4))
plt.subplot(121)
plot_len_range(1, MAX_LEN, yfunc=calc_concordance)
plt.xlabel("$\geq$ tumor allele frequency")
plt.ylabel("concordance")
plt.subplot(122)
for effective_mutation_rate in 10 ** np.linspace(1, 5, 7):
afs = np.linspace(0.0, 1.0, 100, endpoint=False)
plt.semilogy(afs, expected_counts(afs, effective_mutation_rate), "-", color="grey", alpha=0.4)
ax, handles = plot_len_range(1, MAX_LEN, yfunc=lambda calls: len(calls), yscale="log")
plt.xlabel("$\geq$ tumor allele frequency")
plt.ylabel("# of calls")
ax.legend(handles=handles, loc="upper left", bbox_to_anchor=(1.0, 1.0))
plt.tight_layout()
plt.savefig(snakemake.output[0], bbox_inches="tight")
|
|
varlociraptor_calls_to_tsv |
122 |
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
varlociraptor_calls_to_tsv |
368 |
|
|
- rust-bio-tools =0.5.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
other_calls_to_tsv |
23 |
- matched-calls/default-neusomatic/simulated-bwa.all.tsv
- matched-calls/adhoc-neusomatic/simulated-bwa.all.tsv
- matched-calls/adhoc-delly/simulated-bwa.all.tsv
- matched-calls/default-neusomatic/synthetic-5.all.tsv
- matched-calls/default-lancet/synthetic-5.all.tsv
- matched-calls/default-manta/synthetic-5.all.tsv
- matched-calls/default-strelka/synthetic-5.all.tsv
- matched-calls/adhoc-neusomatic/synthetic-5.all.tsv
- matched-calls/adhoc-delly/synthetic-5.all.tsv
- matched-calls/adhoc-lancet/synthetic-5.all.tsv
- matched-calls/adhoc-manta/synthetic-5.all.tsv
- matched-calls/adhoc-strelka/synthetic-5.all.tsv
- matched-calls/adhoc-bpi/synthetic-5.all.tsv
- matched-calls/default-neusomatic/synthetic-20.all.tsv
- matched-calls/default-lancet/synthetic-20.all.tsv
- matched-calls/default-manta/synthetic-20.all.tsv
- matched-calls/default-strelka/synthetic-20.all.tsv
- matched-calls/adhoc-neusomatic/synthetic-20.all.tsv
- matched-calls/adhoc-delly/synthetic-20.all.tsv
- matched-calls/adhoc-lancet/synthetic-20.all.tsv
- matched-calls/adhoc-manta/synthetic-20.all.tsv
- matched-calls/adhoc-strelka/synthetic-20.all.tsv
- matched-calls/adhoc-bpi/synthetic-20.all.tsv
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
other_calls_to_tsv |
7 |
- matched-calls/default-lancet/simulated-bwa.all.tsv
- matched-calls/default-manta/simulated-bwa.all.tsv
- matched-calls/default-strelka/simulated-bwa.all.tsv
- matched-calls/adhoc-lancet/simulated-bwa.all.tsv
- matched-calls/adhoc-manta/simulated-bwa.all.tsv
- matched-calls/adhoc-strelka/simulated-bwa.all.tsv
- matched-calls/adhoc-bpi/simulated-bwa.all.tsv
|
|
- rust-bio-tools =0.2.5
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
aggregate_concordance |
36 |
- aggregated-concordance/varlociraptor-neusomatic-0.9/colo1.INS.tsv
- aggregated-concordance/varlociraptor-delly-0.9/colo1.INS.tsv
- aggregated-concordance/varlociraptor-lancet-0.9/colo1.INS.tsv
- aggregated-concordance/varlociraptor-manta-0.9/colo1.INS.tsv
- aggregated-concordance/varlociraptor-strelka-0.9/colo1.INS.tsv
- aggregated-concordance/varlociraptor-bpi-0.9/colo1.INS.tsv
- aggregated-concordance/varlociraptor-neusomatic-0.98/colo1.INS.tsv
- aggregated-concordance/varlociraptor-delly-0.98/colo1.INS.tsv
- aggregated-concordance/varlociraptor-lancet-0.98/colo1.INS.tsv
- aggregated-concordance/varlociraptor-manta-0.98/colo1.INS.tsv
- aggregated-concordance/varlociraptor-strelka-0.98/colo1.INS.tsv
- aggregated-concordance/varlociraptor-bpi-0.98/colo1.INS.tsv
- aggregated-concordance/adhoc-neusomatic-default/colo1.INS.tsv
- aggregated-concordance/adhoc-delly-default/colo1.INS.tsv
- aggregated-concordance/adhoc-lancet-default/colo1.INS.tsv
- aggregated-concordance/adhoc-manta-default/colo1.INS.tsv
- aggregated-concordance/adhoc-strelka-default/colo1.INS.tsv
- aggregated-concordance/adhoc-bpi-default/colo1.INS.tsv
- aggregated-concordance/varlociraptor-neusomatic-0.9/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-delly-0.9/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-lancet-0.9/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-manta-0.9/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-strelka-0.9/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-bpi-0.9/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-neusomatic-0.98/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-delly-0.98/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-lancet-0.98/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-manta-0.98/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-strelka-0.98/colo1.DEL.tsv
- aggregated-concordance/varlociraptor-bpi-0.98/colo1.DEL.tsv
- aggregated-concordance/adhoc-neusomatic-default/colo1.DEL.tsv
- aggregated-concordance/adhoc-delly-default/colo1.DEL.tsv
- aggregated-concordance/adhoc-lancet-default/colo1.DEL.tsv
- aggregated-concordance/adhoc-manta-default/colo1.DEL.tsv
- aggregated-concordance/adhoc-strelka-default/colo1.DEL.tsv
- aggregated-concordance/adhoc-bpi-default/colo1.DEL.tsv
|
|
- python =3.6
- pandas =0.23
- matplotlib =3.0
- seaborn =0.9.0
- pysam =0.13.0
- svgutils =0.2
- pybedtools =0.7.10
- networkx =2.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79 | from common import load_variants
import networkx as nx
import pandas as pd
import numpy as np
vartype = snakemake.wildcards.vartype
index_cols = ["CHROM", "POS", "SVLEN"] if vartype == "INS" or vartype == "DEL" else ["CHROM", "POS", "ALT"]
all_variants = [load_variants(f, vartype=vartype) for f in snakemake.input.calls]
G = nx.Graph()
for calls, (i, j) in zip(all_variants, snakemake.params.dataset_combinations):
calls["component"] = None
for call in calls.itertuples():
a = (i, call.Index)
G.add_node(a)
if call.MATCHING >= 0:
b = (j, call.MATCHING)
G.add_node(b)
G.add_edge(a, b)
# get a set of calls for each dataset (we don't need all pairwise comparisons for that)
representatives = {snakemake.params.dataset_combinations[i][0]: calls for i, calls in enumerate(all_variants)}
if snakemake.wildcards.mode != "varlociraptor":
varlociraptor_variants = [load_variants(f, vartype=vartype) for f in snakemake.input.varlociraptor_calls]
for calls in varlociraptor_variants:
calls.set_index(index_cols, inplace=True)
varlociraptor_representatives = {snakemake.params.dataset_combinations[i][0]: calls for i, calls in enumerate(varlociraptor_variants)}
# annotate calls with their component, i.e. their equivalence class
for component_id, component in enumerate(nx.connected_components(G)):
for i, k in component:
representatives[i].loc[k, "component"] = component_id
for calls in representatives.values():
calls["component"] = calls["component"].astype(np.float32)
calls.set_index("component", inplace=True)
# join calls based on their equivalence class
aggregated = None
suffix = "_{}".format
dataset_name = lambda i: snakemake.params.datasets[i]
is_varlociraptor = False
for dataset_id, calls in representatives.items():
cols = list(index_cols)
if "CASE_AF" in calls.columns:
cols.extend(["CASE_AF", "PROB_SOMATIC_TUMOR"])
is_varlociraptor = True
calls = calls[cols]
if snakemake.wildcards.mode != "varlociraptor":
idx_calls = calls.set_index(cols, drop=False)
caseaf = idx_calls.join(varlociraptor_representatives[dataset_id][["CASE_AF"]], how="left")["CASE_AF"]
caseaf = caseaf[~caseaf.index.duplicated()]
calls = calls[~idx_calls.index.duplicated()]
calls["CASE_AF"] = caseaf.values
calls.columns = [c + suffix(dataset_name(dataset_id)) for c in calls.columns]
if aggregated is None:
aggregated = calls
else:
aggregated = aggregated.join(calls, how="outer", lsuffix="", rsuffix="")
# Forget the component id. Otherwise, we might run into errors with duplicate elements
# in the index below. These can occur if there are multiple ambiguous calls.
aggregated.reset_index(inplace=True, drop=True)
pos_cols = aggregated.columns[aggregated.columns.str.startswith("POS_")]
is_called = (~aggregated[pos_cols].isnull()).astype(int)
is_called.columns = pos_cols.str.replace("POS_", "")
aggregated = aggregated.join(is_called, lsuffix="", rsuffix="")
aggregated.insert(len(aggregated.columns), "concordance_count", is_called.sum(axis=1))
aggregated["max_case_af"] = aggregated[aggregated.columns[aggregated.columns.str.startswith("CASE_AF")]].max(axis=1)
if is_varlociraptor:
aggregated["max_prob_somatic_tumor"] = aggregated[aggregated.columns[aggregated.columns.str.startswith("PROB_SOMATIC")]].min(axis=1)
aggregated.to_csv(snakemake.output[0], sep="\t", index=False)
|
|
match_varlociraptor_calls |
122 |
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| bcftools view {params.regions} {input.calls} | rbt vcf-match {params.match} {input.truth} > {output}
|
|
match_varlociraptor_calls |
368 |
|
|
- rust-bio-tools =0.5.0
- bedtools =2.27.1
- bcftools =1.8
|
| bcftools view {params.regions} {input.calls} | rbt vcf-match {params.match} {input.truth} > {output}
|
|
match_other_calls |
23 |
- matched-calls/default-neusomatic/simulated-bwa.all.bcf
- matched-calls/adhoc-neusomatic/simulated-bwa.all.bcf
- matched-calls/adhoc-delly/simulated-bwa.all.bcf
- matched-calls/default-neusomatic/synthetic-5.all.bcf
- matched-calls/default-lancet/synthetic-5.all.bcf
- matched-calls/default-manta/synthetic-5.all.bcf
- matched-calls/default-strelka/synthetic-5.all.bcf
- matched-calls/adhoc-neusomatic/synthetic-5.all.bcf
- matched-calls/adhoc-delly/synthetic-5.all.bcf
- matched-calls/adhoc-lancet/synthetic-5.all.bcf
- matched-calls/adhoc-manta/synthetic-5.all.bcf
- matched-calls/adhoc-strelka/synthetic-5.all.bcf
- matched-calls/adhoc-bpi/synthetic-5.all.bcf
- matched-calls/default-neusomatic/synthetic-20.all.bcf
- matched-calls/default-lancet/synthetic-20.all.bcf
- matched-calls/default-manta/synthetic-20.all.bcf
- matched-calls/default-strelka/synthetic-20.all.bcf
- matched-calls/adhoc-neusomatic/synthetic-20.all.bcf
- matched-calls/adhoc-delly/synthetic-20.all.bcf
- matched-calls/adhoc-lancet/synthetic-20.all.bcf
- matched-calls/adhoc-manta/synthetic-20.all.bcf
- matched-calls/adhoc-strelka/synthetic-20.all.bcf
- matched-calls/adhoc-bpi/synthetic-20.all.bcf
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| bcftools view {params.regions} {input.calls} | rbt vcf-match {params.match} {input.truth} > {output}
|
|
match_other_calls |
7 |
- matched-calls/default-lancet/simulated-bwa.all.bcf
- matched-calls/default-manta/simulated-bwa.all.bcf
- matched-calls/default-strelka/simulated-bwa.all.bcf
- matched-calls/adhoc-lancet/simulated-bwa.all.bcf
- matched-calls/adhoc-manta/simulated-bwa.all.bcf
- matched-calls/adhoc-strelka/simulated-bwa.all.bcf
- matched-calls/adhoc-bpi/simulated-bwa.all.bcf
|
|
|
| bcftools view {params.regions} {input.calls} | rbt vcf-match {params.match} {input.truth} > {output}
|
|
concordance_to_tsv |
18 |
- concordance/varlociraptor-neusomatic-0.9/colo1.0-vs-1.tsv
- concordance/varlociraptor-neusomatic-0.9/colo1.1-vs-2.tsv
- concordance/varlociraptor-neusomatic-0.9/colo1.2-vs-0.tsv
- concordance/varlociraptor-neusomatic-0.9/colo1.2-vs-3.tsv
- concordance/varlociraptor-neusomatic-0.9/colo1.3-vs-0.tsv
- concordance/varlociraptor-neusomatic-0.9/colo1.3-vs-1.tsv
- concordance/varlociraptor-neusomatic-0.98/colo1.0-vs-1.tsv
- concordance/varlociraptor-neusomatic-0.98/colo1.1-vs-2.tsv
- concordance/varlociraptor-neusomatic-0.98/colo1.2-vs-0.tsv
- concordance/varlociraptor-neusomatic-0.98/colo1.2-vs-3.tsv
- concordance/varlociraptor-neusomatic-0.98/colo1.3-vs-0.tsv
- concordance/varlociraptor-neusomatic-0.98/colo1.3-vs-1.tsv
- concordance/adhoc-neusomatic-default/colo1.0-vs-1.tsv
- concordance/adhoc-neusomatic-default/colo1.1-vs-2.tsv
- concordance/adhoc-neusomatic-default/colo1.2-vs-0.tsv
- concordance/adhoc-neusomatic-default/colo1.2-vs-3.tsv
- concordance/adhoc-neusomatic-default/colo1.3-vs-0.tsv
- concordance/adhoc-neusomatic-default/colo1.3-vs-1.tsv
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
concordance_to_tsv |
60 |
- concordance/varlociraptor-delly-0.9/colo1.0-vs-1.tsv
- concordance/varlociraptor-delly-0.9/colo1.1-vs-2.tsv
- concordance/varlociraptor-delly-0.9/colo1.2-vs-0.tsv
- concordance/varlociraptor-delly-0.9/colo1.2-vs-3.tsv
- concordance/varlociraptor-delly-0.9/colo1.3-vs-0.tsv
- concordance/varlociraptor-delly-0.9/colo1.3-vs-1.tsv
- concordance/varlociraptor-lancet-0.9/colo1.0-vs-1.tsv
- concordance/varlociraptor-lancet-0.9/colo1.1-vs-2.tsv
- concordance/varlociraptor-lancet-0.9/colo1.2-vs-0.tsv
- concordance/varlociraptor-lancet-0.9/colo1.2-vs-3.tsv
- concordance/varlociraptor-lancet-0.9/colo1.3-vs-0.tsv
- concordance/varlociraptor-lancet-0.9/colo1.3-vs-1.tsv
- concordance/varlociraptor-manta-0.9/colo1.0-vs-1.tsv
- concordance/varlociraptor-manta-0.9/colo1.1-vs-2.tsv
- concordance/varlociraptor-manta-0.9/colo1.2-vs-0.tsv
- concordance/varlociraptor-manta-0.9/colo1.2-vs-3.tsv
- concordance/varlociraptor-manta-0.9/colo1.3-vs-0.tsv
- concordance/varlociraptor-manta-0.9/colo1.3-vs-1.tsv
- concordance/varlociraptor-strelka-0.9/colo1.0-vs-1.tsv
- concordance/varlociraptor-strelka-0.9/colo1.1-vs-2.tsv
- concordance/varlociraptor-strelka-0.9/colo1.2-vs-0.tsv
- concordance/varlociraptor-strelka-0.9/colo1.2-vs-3.tsv
- concordance/varlociraptor-strelka-0.9/colo1.3-vs-0.tsv
- concordance/varlociraptor-strelka-0.9/colo1.3-vs-1.tsv
- concordance/varlociraptor-bpi-0.9/colo1.0-vs-1.tsv
- concordance/varlociraptor-bpi-0.9/colo1.1-vs-2.tsv
- concordance/varlociraptor-bpi-0.9/colo1.2-vs-0.tsv
- concordance/varlociraptor-bpi-0.9/colo1.2-vs-3.tsv
- concordance/varlociraptor-bpi-0.9/colo1.3-vs-0.tsv
- concordance/varlociraptor-bpi-0.9/colo1.3-vs-1.tsv
- concordance/varlociraptor-delly-0.98/colo1.0-vs-1.tsv
- concordance/varlociraptor-delly-0.98/colo1.1-vs-2.tsv
- concordance/varlociraptor-delly-0.98/colo1.2-vs-0.tsv
- concordance/varlociraptor-delly-0.98/colo1.2-vs-3.tsv
- concordance/varlociraptor-delly-0.98/colo1.3-vs-0.tsv
- concordance/varlociraptor-delly-0.98/colo1.3-vs-1.tsv
- concordance/varlociraptor-lancet-0.98/colo1.0-vs-1.tsv
- concordance/varlociraptor-lancet-0.98/colo1.1-vs-2.tsv
- concordance/varlociraptor-lancet-0.98/colo1.2-vs-0.tsv
- concordance/varlociraptor-lancet-0.98/colo1.2-vs-3.tsv
- concordance/varlociraptor-lancet-0.98/colo1.3-vs-0.tsv
- concordance/varlociraptor-lancet-0.98/colo1.3-vs-1.tsv
- concordance/varlociraptor-manta-0.98/colo1.0-vs-1.tsv
- concordance/varlociraptor-manta-0.98/colo1.1-vs-2.tsv
- concordance/varlociraptor-manta-0.98/colo1.2-vs-0.tsv
- concordance/varlociraptor-manta-0.98/colo1.2-vs-3.tsv
- concordance/varlociraptor-manta-0.98/colo1.3-vs-0.tsv
- concordance/varlociraptor-manta-0.98/colo1.3-vs-1.tsv
- concordance/varlociraptor-strelka-0.98/colo1.0-vs-1.tsv
- concordance/varlociraptor-strelka-0.98/colo1.1-vs-2.tsv
- concordance/varlociraptor-strelka-0.98/colo1.2-vs-0.tsv
- concordance/varlociraptor-strelka-0.98/colo1.2-vs-3.tsv
- concordance/varlociraptor-strelka-0.98/colo1.3-vs-0.tsv
- concordance/varlociraptor-strelka-0.98/colo1.3-vs-1.tsv
- concordance/varlociraptor-bpi-0.98/colo1.0-vs-1.tsv
- concordance/varlociraptor-bpi-0.98/colo1.1-vs-2.tsv
- concordance/varlociraptor-bpi-0.98/colo1.2-vs-0.tsv
- concordance/varlociraptor-bpi-0.98/colo1.2-vs-3.tsv
- concordance/varlociraptor-bpi-0.98/colo1.3-vs-0.tsv
- concordance/varlociraptor-bpi-0.98/colo1.3-vs-1.tsv
|
|
- rust-bio-tools =0.5.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
concordance_to_tsv |
30 |
- concordance/adhoc-delly-default/colo1.0-vs-1.tsv
- concordance/adhoc-delly-default/colo1.1-vs-2.tsv
- concordance/adhoc-delly-default/colo1.2-vs-0.tsv
- concordance/adhoc-delly-default/colo1.2-vs-3.tsv
- concordance/adhoc-delly-default/colo1.3-vs-0.tsv
- concordance/adhoc-delly-default/colo1.3-vs-1.tsv
- concordance/adhoc-lancet-default/colo1.0-vs-1.tsv
- concordance/adhoc-lancet-default/colo1.1-vs-2.tsv
- concordance/adhoc-lancet-default/colo1.2-vs-0.tsv
- concordance/adhoc-lancet-default/colo1.2-vs-3.tsv
- concordance/adhoc-lancet-default/colo1.3-vs-0.tsv
- concordance/adhoc-lancet-default/colo1.3-vs-1.tsv
- concordance/adhoc-manta-default/colo1.0-vs-1.tsv
- concordance/adhoc-manta-default/colo1.1-vs-2.tsv
- concordance/adhoc-manta-default/colo1.2-vs-0.tsv
- concordance/adhoc-manta-default/colo1.2-vs-3.tsv
- concordance/adhoc-manta-default/colo1.3-vs-0.tsv
- concordance/adhoc-manta-default/colo1.3-vs-1.tsv
- concordance/adhoc-strelka-default/colo1.0-vs-1.tsv
- concordance/adhoc-strelka-default/colo1.1-vs-2.tsv
- concordance/adhoc-strelka-default/colo1.2-vs-0.tsv
- concordance/adhoc-strelka-default/colo1.2-vs-3.tsv
- concordance/adhoc-strelka-default/colo1.3-vs-0.tsv
- concordance/adhoc-strelka-default/colo1.3-vs-1.tsv
- concordance/adhoc-bpi-default/colo1.0-vs-1.tsv
- concordance/adhoc-bpi-default/colo1.1-vs-2.tsv
- concordance/adhoc-bpi-default/colo1.2-vs-0.tsv
- concordance/adhoc-bpi-default/colo1.2-vs-3.tsv
- concordance/adhoc-bpi-default/colo1.3-vs-0.tsv
- concordance/adhoc-bpi-default/colo1.3-vs-1.tsv
|
|
- rust-bio-tools =0.2.5
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} --info MATCHING < {input} > {output}
|
|
varlociraptor_all_calls_to_tsv |
4 |
- varlociraptor-neusomatic/COLO_829-GSC.all.tsv
- varlociraptor-neusomatic/COLO_829-Ill.all.tsv
- varlociraptor-neusomatic/COLO_829-TGen.all.tsv
- varlociraptor-neusomatic/COLO_829-EBI.all.tsv
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} < {input} > {output}
|
|
varlociraptor_all_calls_to_tsv |
20 |
- varlociraptor-delly/COLO_829-GSC.all.tsv
- varlociraptor-delly/COLO_829-Ill.all.tsv
- varlociraptor-delly/COLO_829-TGen.all.tsv
- varlociraptor-delly/COLO_829-EBI.all.tsv
- varlociraptor-lancet/COLO_829-GSC.all.tsv
- varlociraptor-lancet/COLO_829-Ill.all.tsv
- varlociraptor-lancet/COLO_829-TGen.all.tsv
- varlociraptor-lancet/COLO_829-EBI.all.tsv
- varlociraptor-manta/COLO_829-GSC.all.tsv
- varlociraptor-manta/COLO_829-Ill.all.tsv
- varlociraptor-manta/COLO_829-TGen.all.tsv
- varlociraptor-manta/COLO_829-EBI.all.tsv
- varlociraptor-strelka/COLO_829-GSC.all.tsv
- varlociraptor-strelka/COLO_829-Ill.all.tsv
- varlociraptor-strelka/COLO_829-TGen.all.tsv
- varlociraptor-strelka/COLO_829-EBI.all.tsv
- varlociraptor-bpi/COLO_829-GSC.all.tsv
- varlociraptor-bpi/COLO_829-Ill.all.tsv
- varlociraptor-bpi/COLO_829-TGen.all.tsv
- varlociraptor-bpi/COLO_829-EBI.all.tsv
|
|
- rust-bio-tools =0.5.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-to-txt {params.gt} {params.tags} < {input} > {output}
|
|
varlociraptor_control_fdr |
122 |
|
|
- varlociraptor =1.1.1
- bcftools =1.9
|
| varlociraptor filter-calls control-fdr {input} --events SOMATIC_TUMOR --var {wildcards.type} --minlen {wildcards.minlen} --maxlen {wildcards.maxlen} --fdr {wildcards.fdr} > {output}
|
|
varlociraptor_control_fdr |
368 |
|
|
|
| varlociraptor filter-calls control-fdr {input} --events SOMATIC_TUMOR --var {wildcards.type} --minlen {wildcards.minlen} --maxlen {wildcards.maxlen} --fdr {wildcards.fdr} > {output}
|
|
fix_neusomatic |
7 |
- default-neusomatic/simulated-bwa.all.bcf
- default-neusomatic/synthetic-5.all.bcf
- default-neusomatic/synthetic-20.all.bcf
- default-neusomatic/COLO_829-GSC.all.bcf
- default-neusomatic/COLO_829-Ill.all.bcf
- default-neusomatic/COLO_829-TGen.all.bcf
- default-neusomatic/COLO_829-EBI.all.bcf
|
|
- bcftools =1.6
- samtools =1.6
|
| bcftools annotate -Ob -o {output} -h {input.header} {input.vcf}
|
|
index_bcf |
80 |
- default-neusomatic/simulated-bwa.all.bcf.csi
- adhoc-delly/simulated-bwa.all.bcf.csi
- varlociraptor-neusomatic/synthetic-5.INS.1-250.1.0.bcf.csi
- varlociraptor-delly/synthetic-5.INS.1-250.1.0.bcf.csi
- varlociraptor-lancet/synthetic-5.INS.1-250.1.0.bcf.csi
- varlociraptor-manta/synthetic-5.INS.1-250.1.0.bcf.csi
- varlociraptor-strelka/synthetic-5.INS.1-250.1.0.bcf.csi
- default-neusomatic/synthetic-5.all.bcf.csi
- default-lancet/synthetic-5.all.bcf.csi
- default-manta/synthetic-5.all.bcf.csi
- default-strelka/synthetic-5.all.bcf.csi
- adhoc-neusomatic/synthetic-5.all.bcf.csi
- adhoc-delly/synthetic-5.all.bcf.csi
- adhoc-lancet/synthetic-5.all.bcf.csi
- adhoc-manta/synthetic-5.all.bcf.csi
- adhoc-strelka/synthetic-5.all.bcf.csi
- adhoc-bpi/synthetic-5.all.bcf.csi
- varlociraptor-neusomatic/synthetic-20.INS.1-30.1.0.bcf.csi
- varlociraptor-neusomatic/synthetic-20.INS.30-250.1.0.bcf.csi
- varlociraptor-delly/synthetic-20.INS.1-30.1.0.bcf.csi
- varlociraptor-delly/synthetic-20.INS.30-250.1.0.bcf.csi
- varlociraptor-lancet/synthetic-20.INS.1-30.1.0.bcf.csi
- varlociraptor-lancet/synthetic-20.INS.30-250.1.0.bcf.csi
- varlociraptor-manta/synthetic-20.INS.1-30.1.0.bcf.csi
- varlociraptor-manta/synthetic-20.INS.30-250.1.0.bcf.csi
- varlociraptor-strelka/synthetic-20.INS.1-30.1.0.bcf.csi
- varlociraptor-strelka/synthetic-20.INS.30-250.1.0.bcf.csi
- default-neusomatic/synthetic-20.all.bcf.csi
- default-lancet/synthetic-20.all.bcf.csi
- default-manta/synthetic-20.all.bcf.csi
- default-strelka/synthetic-20.all.bcf.csi
- adhoc-neusomatic/synthetic-20.all.bcf.csi
- adhoc-delly/synthetic-20.all.bcf.csi
- adhoc-lancet/synthetic-20.all.bcf.csi
- adhoc-manta/synthetic-20.all.bcf.csi
- adhoc-strelka/synthetic-20.all.bcf.csi
- adhoc-bpi/synthetic-20.all.bcf.csi
- varlociraptor-neusomatic/synthetic-5.DEL.1-250.1.0.bcf.csi
- varlociraptor-delly/synthetic-5.DEL.1-250.1.0.bcf.csi
- varlociraptor-lancet/synthetic-5.DEL.1-250.1.0.bcf.csi
- varlociraptor-manta/synthetic-5.DEL.1-250.1.0.bcf.csi
- varlociraptor-strelka/synthetic-5.DEL.1-250.1.0.bcf.csi
- varlociraptor-neusomatic/synthetic-20.DEL.1-30.1.0.bcf.csi
- varlociraptor-neusomatic/synthetic-20.DEL.30-250.1.0.bcf.csi
- varlociraptor-delly/synthetic-20.DEL.1-30.1.0.bcf.csi
- varlociraptor-delly/synthetic-20.DEL.30-250.1.0.bcf.csi
- varlociraptor-lancet/synthetic-20.DEL.1-30.1.0.bcf.csi
- varlociraptor-lancet/synthetic-20.DEL.30-250.1.0.bcf.csi
- varlociraptor-manta/synthetic-20.DEL.1-30.1.0.bcf.csi
- varlociraptor-manta/synthetic-20.DEL.30-250.1.0.bcf.csi
- varlociraptor-strelka/synthetic-20.DEL.1-30.1.0.bcf.csi
- varlociraptor-strelka/synthetic-20.DEL.30-250.1.0.bcf.csi
- delly/synthetic-5.all.bcf.csi
- delly/synthetic-20.all.bcf.csi
- default-neusomatic/COLO_829-GSC.all.bcf.csi
- default-neusomatic/COLO_829-Ill.all.bcf.csi
- default-neusomatic/COLO_829-TGen.all.bcf.csi
- default-neusomatic/COLO_829-EBI.all.bcf.csi
- delly/COLO_829-GSC.all.bcf.csi
- delly/COLO_829-Ill.all.bcf.csi
- delly/COLO_829-TGen.all.bcf.csi
- delly/COLO_829-EBI.all.bcf.csi
- default-lancet/COLO_829-GSC.all.bcf.csi
- default-lancet/COLO_829-Ill.all.bcf.csi
- default-lancet/COLO_829-TGen.all.bcf.csi
- default-lancet/COLO_829-EBI.all.bcf.csi
- manta/COLO_829-GSC.all.bcf.csi
- manta/COLO_829-Ill.all.bcf.csi
- manta/COLO_829-TGen.all.bcf.csi
- manta/COLO_829-EBI.all.bcf.csi
- default-strelka/COLO_829-GSC.all.bcf.csi
- default-strelka/COLO_829-Ill.all.bcf.csi
- default-strelka/COLO_829-TGen.all.bcf.csi
- default-strelka/COLO_829-EBI.all.bcf.csi
- bpi/COLO_829-GSC.all.bcf.csi
- bpi/COLO_829-Ill.all.bcf.csi
- bpi/COLO_829-TGen.all.bcf.csi
- bpi/COLO_829-EBI.all.bcf.csi
- manta/synthetic-5.all.bcf.csi
- manta/synthetic-20.all.bcf.csi
|
|
- bcftools =1.6
- samtools =1.6
|
|
neusomatic_adhoc |
7 |
- adhoc-neusomatic/simulated-bwa.all.bcf
- adhoc-neusomatic/synthetic-5.all.bcf
- adhoc-neusomatic/synthetic-20.all.bcf
- adhoc-neusomatic/COLO_829-GSC.all.bcf
- adhoc-neusomatic/COLO_829-Ill.all.bcf
- adhoc-neusomatic/COLO_829-TGen.all.bcf
- adhoc-neusomatic/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
annotate_truth |
2 |
- truth/synthetic-5.annotated.vcf
- truth/synthetic-20.annotated.vcf
|
|
- cyvcf2 =0.11.5
- python =3.7
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 | from cyvcf2 import VCF, Writer
import numpy as np
def subclone_vaf(gt):
"""Calculate subclone allele frequency"""
if np.all(gt[:2] == [1, 1]):
return 1.0
elif (np.all(gt[:2] == [0, 1]) or np.all(gt[:2] == [1, 0]) or
np.all(gt[:2] == [-1, 1]) or np.all(gt[:2] == [1, -1])):
return 0.5
else:
return 0.0
# Reader
vcf_in = VCF(snakemake.input[0])
# Setup subclone information
subclones = ["Som{}".format(i) for i in range(1, 5)]
fractions = [1/3, 1/3, 1/4, 1/12]
# Prepare writer
vcf_in.add_info_to_header({"ID": "TAF",
"Number": "1",
"Description": "True tumor allele frequency",
"Type": "Float"})
vcf_in.add_info_to_header({"ID": "NAF",
"Number": "1",
"Description": "True normal allele frequency",
"Type": "Float"})
bcf_out = Writer(snakemake.output[0], vcf_in)
for rec in vcf_in:
if len(rec.ALT) > 1:
raise ValueError("multiallelic sites are not supported at the moment")
try:
# get VAFs from VCF
tumor_vaf = rec.INFO["TAF"]
normal_vaf = rec.INFO["NAF"]
except KeyError:
# calculate VAFs
subclone_idx = [vcf_in.samples.index(s) for s in subclones]
control_idx = vcf_in.samples.index("Control")
tumor_vaf = sum(fraction * subclone_vaf(rec.genotypes[idx])
for idx, fraction in zip(subclone_idx, fractions))
normal_vaf = subclone_vaf(rec.genotypes[control_idx])
rec.INFO["TAF"] = tumor_vaf
rec.INFO["NAF"] = normal_vaf
# only keep somatic variants
if normal_vaf == 0.0 and tumor_vaf > 0.0:
bcf_out.write_record(rec)
bcf_out.close()
|
|
merge_lancet |
6 |
- default-lancet/synthetic-5.all.bcf
- default-lancet/synthetic-20.all.bcf
- default-lancet/COLO_829-GSC.all.bcf
- default-lancet/COLO_829-Ill.all.bcf
- default-lancet/COLO_829-TGen.all.bcf
- default-lancet/COLO_829-EBI.all.bcf
|
|
- bcftools =1.6
- samtools =1.6
|
| bcftools concat -Ob {input} > {output}
|
|
manta_default |
6 |
- default-manta/synthetic-5.all.bcf
- default-manta/synthetic-20.all.bcf
- default-manta/COLO_829-GSC.all.bcf
- default-manta/COLO_829-Ill.all.bcf
- default-manta/COLO_829-TGen.all.bcf
- default-manta/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
strelka_default |
6 |
- default-strelka/synthetic-5.all.bcf
- default-strelka/synthetic-20.all.bcf
- default-strelka/COLO_829-GSC.all.bcf
- default-strelka/COLO_829-Ill.all.bcf
- default-strelka/COLO_829-TGen.all.bcf
- default-strelka/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools concat {snakemake.params} -o {snakemake.output[0]} "
"{snakemake.input.calls}")
|
|
delly_adhoc |
6 |
- adhoc-delly/synthetic-5.all.bcf
- adhoc-delly/synthetic-20.all.bcf
- adhoc-delly/COLO_829-GSC.all.bcf
- adhoc-delly/COLO_829-Ill.all.bcf
- adhoc-delly/COLO_829-TGen.all.bcf
- adhoc-delly/COLO_829-EBI.all.bcf
|
|
- delly =0.7.7
- bcftools =1.6
|
| delly filter -m 0 -r 1.0 --samples {input.samples} -o {params.tmp} {input.bcf}; bcftools view -i INFO/SOMATIC -f PASS -Ob {params.tmp} > {output}
|
|
lancet_adhoc |
6 |
- adhoc-lancet/synthetic-5.all.bcf
- adhoc-lancet/synthetic-20.all.bcf
- adhoc-lancet/COLO_829-GSC.all.bcf
- adhoc-lancet/COLO_829-Ill.all.bcf
- adhoc-lancet/COLO_829-TGen.all.bcf
- adhoc-lancet/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
manta_adhoc |
6 |
- adhoc-manta/synthetic-5.all.bcf
- adhoc-manta/synthetic-20.all.bcf
- adhoc-manta/COLO_829-GSC.all.bcf
- adhoc-manta/COLO_829-Ill.all.bcf
- adhoc-manta/COLO_829-TGen.all.bcf
- adhoc-manta/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
strelka_adhoc |
6 |
- adhoc-strelka/synthetic-5.all.bcf
- adhoc-strelka/synthetic-20.all.bcf
- adhoc-strelka/COLO_829-GSC.all.bcf
- adhoc-strelka/COLO_829-Ill.all.bcf
- adhoc-strelka/COLO_829-TGen.all.bcf
- adhoc-strelka/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
bpi_adhoc |
6 |
- adhoc-bpi/synthetic-5.all.bcf
- adhoc-bpi/synthetic-20.all.bcf
- adhoc-bpi/COLO_829-GSC.all.bcf
- adhoc-bpi/COLO_829-Ill.all.bcf
- adhoc-bpi/COLO_829-TGen.all.bcf
- adhoc-bpi/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
concordance_match |
18 |
- concordance/varlociraptor-neusomatic-0.9/colo1.0-vs-1.bcf
- concordance/varlociraptor-neusomatic-0.9/colo1.1-vs-2.bcf
- concordance/varlociraptor-neusomatic-0.9/colo1.2-vs-0.bcf
- concordance/varlociraptor-neusomatic-0.9/colo1.2-vs-3.bcf
- concordance/varlociraptor-neusomatic-0.9/colo1.3-vs-0.bcf
- concordance/varlociraptor-neusomatic-0.9/colo1.3-vs-1.bcf
- concordance/varlociraptor-neusomatic-0.98/colo1.0-vs-1.bcf
- concordance/varlociraptor-neusomatic-0.98/colo1.1-vs-2.bcf
- concordance/varlociraptor-neusomatic-0.98/colo1.2-vs-0.bcf
- concordance/varlociraptor-neusomatic-0.98/colo1.2-vs-3.bcf
- concordance/varlociraptor-neusomatic-0.98/colo1.3-vs-0.bcf
- concordance/varlociraptor-neusomatic-0.98/colo1.3-vs-1.bcf
- concordance/adhoc-neusomatic-default/colo1.0-vs-1.bcf
- concordance/adhoc-neusomatic-default/colo1.1-vs-2.bcf
- concordance/adhoc-neusomatic-default/colo1.2-vs-0.bcf
- concordance/adhoc-neusomatic-default/colo1.2-vs-3.bcf
- concordance/adhoc-neusomatic-default/colo1.3-vs-0.bcf
- concordance/adhoc-neusomatic-default/colo1.3-vs-1.bcf
|
|
- rust-bio-tools =0.9.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-match {params.match} {params.bcfs[1]} < {params.bcfs[0]} > {output}
|
|
concordance_match |
60 |
- concordance/varlociraptor-delly-0.9/colo1.0-vs-1.bcf
- concordance/varlociraptor-delly-0.9/colo1.1-vs-2.bcf
- concordance/varlociraptor-delly-0.9/colo1.2-vs-0.bcf
- concordance/varlociraptor-delly-0.9/colo1.2-vs-3.bcf
- concordance/varlociraptor-delly-0.9/colo1.3-vs-0.bcf
- concordance/varlociraptor-delly-0.9/colo1.3-vs-1.bcf
- concordance/varlociraptor-lancet-0.9/colo1.0-vs-1.bcf
- concordance/varlociraptor-lancet-0.9/colo1.1-vs-2.bcf
- concordance/varlociraptor-lancet-0.9/colo1.2-vs-0.bcf
- concordance/varlociraptor-lancet-0.9/colo1.2-vs-3.bcf
- concordance/varlociraptor-lancet-0.9/colo1.3-vs-0.bcf
- concordance/varlociraptor-lancet-0.9/colo1.3-vs-1.bcf
- concordance/varlociraptor-manta-0.9/colo1.0-vs-1.bcf
- concordance/varlociraptor-manta-0.9/colo1.1-vs-2.bcf
- concordance/varlociraptor-manta-0.9/colo1.2-vs-0.bcf
- concordance/varlociraptor-manta-0.9/colo1.2-vs-3.bcf
- concordance/varlociraptor-manta-0.9/colo1.3-vs-0.bcf
- concordance/varlociraptor-manta-0.9/colo1.3-vs-1.bcf
- concordance/varlociraptor-strelka-0.9/colo1.0-vs-1.bcf
- concordance/varlociraptor-strelka-0.9/colo1.1-vs-2.bcf
- concordance/varlociraptor-strelka-0.9/colo1.2-vs-0.bcf
- concordance/varlociraptor-strelka-0.9/colo1.2-vs-3.bcf
- concordance/varlociraptor-strelka-0.9/colo1.3-vs-0.bcf
- concordance/varlociraptor-strelka-0.9/colo1.3-vs-1.bcf
- concordance/varlociraptor-bpi-0.9/colo1.0-vs-1.bcf
- concordance/varlociraptor-bpi-0.9/colo1.1-vs-2.bcf
- concordance/varlociraptor-bpi-0.9/colo1.2-vs-0.bcf
- concordance/varlociraptor-bpi-0.9/colo1.2-vs-3.bcf
- concordance/varlociraptor-bpi-0.9/colo1.3-vs-0.bcf
- concordance/varlociraptor-bpi-0.9/colo1.3-vs-1.bcf
- concordance/varlociraptor-delly-0.98/colo1.0-vs-1.bcf
- concordance/varlociraptor-delly-0.98/colo1.1-vs-2.bcf
- concordance/varlociraptor-delly-0.98/colo1.2-vs-0.bcf
- concordance/varlociraptor-delly-0.98/colo1.2-vs-3.bcf
- concordance/varlociraptor-delly-0.98/colo1.3-vs-0.bcf
- concordance/varlociraptor-delly-0.98/colo1.3-vs-1.bcf
- concordance/varlociraptor-lancet-0.98/colo1.0-vs-1.bcf
- concordance/varlociraptor-lancet-0.98/colo1.1-vs-2.bcf
- concordance/varlociraptor-lancet-0.98/colo1.2-vs-0.bcf
- concordance/varlociraptor-lancet-0.98/colo1.2-vs-3.bcf
- concordance/varlociraptor-lancet-0.98/colo1.3-vs-0.bcf
- concordance/varlociraptor-lancet-0.98/colo1.3-vs-1.bcf
- concordance/varlociraptor-manta-0.98/colo1.0-vs-1.bcf
- concordance/varlociraptor-manta-0.98/colo1.1-vs-2.bcf
- concordance/varlociraptor-manta-0.98/colo1.2-vs-0.bcf
- concordance/varlociraptor-manta-0.98/colo1.2-vs-3.bcf
- concordance/varlociraptor-manta-0.98/colo1.3-vs-0.bcf
- concordance/varlociraptor-manta-0.98/colo1.3-vs-1.bcf
- concordance/varlociraptor-strelka-0.98/colo1.0-vs-1.bcf
- concordance/varlociraptor-strelka-0.98/colo1.1-vs-2.bcf
- concordance/varlociraptor-strelka-0.98/colo1.2-vs-0.bcf
- concordance/varlociraptor-strelka-0.98/colo1.2-vs-3.bcf
- concordance/varlociraptor-strelka-0.98/colo1.3-vs-0.bcf
- concordance/varlociraptor-strelka-0.98/colo1.3-vs-1.bcf
- concordance/varlociraptor-bpi-0.98/colo1.0-vs-1.bcf
- concordance/varlociraptor-bpi-0.98/colo1.1-vs-2.bcf
- concordance/varlociraptor-bpi-0.98/colo1.2-vs-0.bcf
- concordance/varlociraptor-bpi-0.98/colo1.2-vs-3.bcf
- concordance/varlociraptor-bpi-0.98/colo1.3-vs-0.bcf
- concordance/varlociraptor-bpi-0.98/colo1.3-vs-1.bcf
|
|
- rust-bio-tools =0.5.0
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-match {params.match} {params.bcfs[1]} < {params.bcfs[0]} > {output}
|
|
concordance_match |
30 |
- concordance/adhoc-delly-default/colo1.0-vs-1.bcf
- concordance/adhoc-delly-default/colo1.1-vs-2.bcf
- concordance/adhoc-delly-default/colo1.2-vs-0.bcf
- concordance/adhoc-delly-default/colo1.2-vs-3.bcf
- concordance/adhoc-delly-default/colo1.3-vs-0.bcf
- concordance/adhoc-delly-default/colo1.3-vs-1.bcf
- concordance/adhoc-lancet-default/colo1.0-vs-1.bcf
- concordance/adhoc-lancet-default/colo1.1-vs-2.bcf
- concordance/adhoc-lancet-default/colo1.2-vs-0.bcf
- concordance/adhoc-lancet-default/colo1.2-vs-3.bcf
- concordance/adhoc-lancet-default/colo1.3-vs-0.bcf
- concordance/adhoc-lancet-default/colo1.3-vs-1.bcf
- concordance/adhoc-manta-default/colo1.0-vs-1.bcf
- concordance/adhoc-manta-default/colo1.1-vs-2.bcf
- concordance/adhoc-manta-default/colo1.2-vs-0.bcf
- concordance/adhoc-manta-default/colo1.2-vs-3.bcf
- concordance/adhoc-manta-default/colo1.3-vs-0.bcf
- concordance/adhoc-manta-default/colo1.3-vs-1.bcf
- concordance/adhoc-strelka-default/colo1.0-vs-1.bcf
- concordance/adhoc-strelka-default/colo1.1-vs-2.bcf
- concordance/adhoc-strelka-default/colo1.2-vs-0.bcf
- concordance/adhoc-strelka-default/colo1.2-vs-3.bcf
- concordance/adhoc-strelka-default/colo1.3-vs-0.bcf
- concordance/adhoc-strelka-default/colo1.3-vs-1.bcf
- concordance/adhoc-bpi-default/colo1.0-vs-1.bcf
- concordance/adhoc-bpi-default/colo1.1-vs-2.bcf
- concordance/adhoc-bpi-default/colo1.2-vs-0.bcf
- concordance/adhoc-bpi-default/colo1.2-vs-3.bcf
- concordance/adhoc-bpi-default/colo1.3-vs-0.bcf
- concordance/adhoc-bpi-default/colo1.3-vs-1.bcf
|
|
- rust-bio-tools =0.2.5
- bedtools =2.27.1
- bcftools =1.8
|
| rbt vcf-match {params.match} {params.bcfs[1]} < {params.bcfs[0]} > {output}
|
|
varlociraptor_merge |
39 |
- varlociraptor-neusomatic/COLO_829-GSC.all.bcf
- varlociraptor-neusomatic/COLO_829-Ill.all.bcf
- varlociraptor-neusomatic/COLO_829-TGen.all.bcf
- varlociraptor-neusomatic/COLO_829-EBI.all.bcf
- varlociraptor-delly/COLO_829-GSC.all.bcf
- varlociraptor-delly/COLO_829-Ill.all.bcf
- varlociraptor-delly/COLO_829-TGen.all.bcf
- varlociraptor-delly/COLO_829-EBI.all.bcf
- varlociraptor-lancet/COLO_829-GSC.all.bcf
- varlociraptor-lancet/COLO_829-Ill.all.bcf
- varlociraptor-lancet/COLO_829-TGen.all.bcf
- varlociraptor-lancet/COLO_829-EBI.all.bcf
- varlociraptor-manta/COLO_829-GSC.all.bcf
- varlociraptor-manta/COLO_829-Ill.all.bcf
- varlociraptor-manta/COLO_829-TGen.all.bcf
- varlociraptor-manta/COLO_829-EBI.all.bcf
- varlociraptor-strelka/COLO_829-GSC.all.bcf
- varlociraptor-strelka/COLO_829-Ill.all.bcf
- varlociraptor-strelka/COLO_829-TGen.all.bcf
- varlociraptor-strelka/COLO_829-EBI.all.bcf
- varlociraptor-bpi/COLO_829-GSC.all.bcf
- varlociraptor-bpi/COLO_829-Ill.all.bcf
- varlociraptor-bpi/COLO_829-TGen.all.bcf
- varlociraptor-bpi/COLO_829-EBI.all.bcf
- varlociraptor-neusomatic/simulated-bwa.all.bcf
- varlociraptor-delly/simulated-bwa.all.bcf
- varlociraptor-lancet/simulated-bwa.all.bcf
- varlociraptor-manta/simulated-bwa.all.bcf
- varlociraptor-strelka/simulated-bwa.all.bcf
- varlociraptor-neusomatic/synthetic-5.all.bcf
- varlociraptor-delly/synthetic-5.all.bcf
- varlociraptor-lancet/synthetic-5.all.bcf
- varlociraptor-manta/synthetic-5.all.bcf
- varlociraptor-strelka/synthetic-5.all.bcf
- varlociraptor-neusomatic/synthetic-20.all.bcf
- varlociraptor-delly/synthetic-20.all.bcf
- varlociraptor-lancet/synthetic-20.all.bcf
- varlociraptor-manta/synthetic-20.all.bcf
- varlociraptor-strelka/synthetic-20.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools concat {snakemake.params} -o {snakemake.output[0]} "
"{snakemake.input}")
|
|
neusomatic |
7 |
- neusomatic/simulated-bwa
- neusomatic/simulated-bwa.all.vcf
- neusomatic/synthetic-5
- neusomatic/synthetic-5.all.vcf
- neusomatic/synthetic-20
- neusomatic/synthetic-20.all.vcf
- neusomatic/COLO_829-GSC
- neusomatic/COLO_829-GSC.all.vcf
- neusomatic/COLO_829-Ill
- neusomatic/COLO_829-Ill.all.vcf
- neusomatic/COLO_829-TGen
- neusomatic/COLO_829-TGen.all.vcf
- neusomatic/COLO_829-EBI
- neusomatic/COLO_829-EBI.all.vcf
|
docker://msahraeian/neusomatic:0.2.1 |
|
| (preprocess.py --mode call --reference {input.ref} --region_bed {input.bed} --tumor_bam {input.bams[0]} --normal_bam {input.bams[1]} --work {output.workdir} --min_mapq 10 --num_threads {threads} --scan_alignments_binary /opt/neusomatic/neusomatic/bin/scan_alignments
call.py --candidates_tsv {output.workdir}/dataset/*/candidates*.tsv --reference {input.ref} --out {output.workdir} --checkpoint /opt/neusomatic/neusomatic/models/NeuSomatic_v0.1.4_standalone_SEQC-WGS-Spike.pth --num_threads {threads} --batch_size 100
python `which postprocess.py` --reference {input.ref} --tumor_bam {input.bams[0]} --pred_vcf {output.workdir}/pred.vcf --candidates_vcf {output.workdir}/work_tumor/filtered_candidates.vcf --output_vcf {output.vcf} --work {output.workdir}) 2> {log}
|
|
fix_lancet |
150 |
|
|
- bcftools =1.6
- samtools =1.6
|
| sed -r 's/MS\=[0-9]+[ACGT]+/MS/g' {input.vcf} | bcftools annotate -o {output} -h {input.header} -
|
|
manta |
6 |
- manta/synthetic-5/results/variants/candidateSV.vcf.gz
- manta/synthetic-5/results/variants/somaticSV.vcf.gz
- manta/synthetic-5/results/variants/candidateSmallIndels.vcf.gz
- manta/synthetic-20/results/variants/candidateSV.vcf.gz
- manta/synthetic-20/results/variants/somaticSV.vcf.gz
- manta/synthetic-20/results/variants/candidateSmallIndels.vcf.gz
- manta/COLO_829-GSC/results/variants/candidateSV.vcf.gz
- manta/COLO_829-GSC/results/variants/somaticSV.vcf.gz
- manta/COLO_829-GSC/results/variants/candidateSmallIndels.vcf.gz
- manta/COLO_829-Ill/results/variants/candidateSV.vcf.gz
- manta/COLO_829-Ill/results/variants/somaticSV.vcf.gz
- manta/COLO_829-Ill/results/variants/candidateSmallIndels.vcf.gz
- manta/COLO_829-TGen/results/variants/candidateSV.vcf.gz
- manta/COLO_829-TGen/results/variants/somaticSV.vcf.gz
- manta/COLO_829-TGen/results/variants/candidateSmallIndels.vcf.gz
- manta/COLO_829-EBI/results/variants/candidateSV.vcf.gz
- manta/COLO_829-EBI/results/variants/somaticSV.vcf.gz
- manta/COLO_829-EBI/results/variants/candidateSmallIndels.vcf.gz
|
|
|
| rm -rf {params.dir}; (configManta.py {params.extra} --tumorBam {input.samples[0]} --normalBam {input.samples[1]} --referenceFasta {input.ref} --runDir {params.dir}; {params.dir}/runWorkflow.py -m local -j {threads}) > {log} 2>&1
|
|
strelka |
6 |
- strelka/synthetic-5/results/variants/somatic.snvs.vcf.gz
- strelka/synthetic-5/results/variants/somatic.indels.vcf.gz
- strelka/synthetic-20/results/variants/somatic.snvs.vcf.gz
- strelka/synthetic-20/results/variants/somatic.indels.vcf.gz
- strelka/COLO_829-GSC/results/variants/somatic.snvs.vcf.gz
- strelka/COLO_829-GSC/results/variants/somatic.indels.vcf.gz
- strelka/COLO_829-Ill/results/variants/somatic.snvs.vcf.gz
- strelka/COLO_829-Ill/results/variants/somatic.indels.vcf.gz
- strelka/COLO_829-TGen/results/variants/somatic.snvs.vcf.gz
- strelka/COLO_829-TGen/results/variants/somatic.indels.vcf.gz
- strelka/COLO_829-EBI/results/variants/somatic.snvs.vcf.gz
- strelka/COLO_829-EBI/results/variants/somatic.indels.vcf.gz
|
|
|
| rm -rf {params.dir}; (configureStrelkaSomaticWorkflow.py {params.extra} --tumorBam {input.samples[0]} --normalBam {input.samples[1]} --referenceFasta {input.ref} --runDir {params.dir} --indelCandidates {input.manta}; {params.dir}/runWorkflow.py -m local -j {threads}) > {log} 2>&1
|
|
delly_concat |
6 |
- delly/synthetic-5.all.bcf
- delly/synthetic-20.all.bcf
- delly/COLO_829-GSC.all.bcf
- delly/COLO_829-Ill.all.bcf
- delly/COLO_829-TGen.all.bcf
- delly/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools concat {snakemake.params} -o {snakemake.output[0]} "
"{snakemake.input}")
|
|
bpi_convert |
6 |
- bpi/synthetic-5.all.bcf
- bpi/synthetic-20.all.bcf
- bpi/COLO_829-GSC.all.bcf
- bpi/COLO_829-Ill.all.bcf
- bpi/COLO_829-TGen.all.bcf
- bpi/COLO_829-EBI.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
adhoc_varlociraptor |
48 |
- varlociraptor-neusomatic/COLO_829-GSC.adhoc.0.9.bcf
- varlociraptor-neusomatic/COLO_829-Ill.adhoc.0.9.bcf
- varlociraptor-neusomatic/COLO_829-TGen.adhoc.0.9.bcf
- varlociraptor-neusomatic/COLO_829-EBI.adhoc.0.9.bcf
- varlociraptor-delly/COLO_829-GSC.adhoc.0.9.bcf
- varlociraptor-delly/COLO_829-Ill.adhoc.0.9.bcf
- varlociraptor-delly/COLO_829-TGen.adhoc.0.9.bcf
- varlociraptor-delly/COLO_829-EBI.adhoc.0.9.bcf
- varlociraptor-lancet/COLO_829-GSC.adhoc.0.9.bcf
- varlociraptor-lancet/COLO_829-Ill.adhoc.0.9.bcf
- varlociraptor-lancet/COLO_829-TGen.adhoc.0.9.bcf
- varlociraptor-lancet/COLO_829-EBI.adhoc.0.9.bcf
- varlociraptor-manta/COLO_829-GSC.adhoc.0.9.bcf
- varlociraptor-manta/COLO_829-Ill.adhoc.0.9.bcf
- varlociraptor-manta/COLO_829-TGen.adhoc.0.9.bcf
- varlociraptor-manta/COLO_829-EBI.adhoc.0.9.bcf
- varlociraptor-strelka/COLO_829-GSC.adhoc.0.9.bcf
- varlociraptor-strelka/COLO_829-Ill.adhoc.0.9.bcf
- varlociraptor-strelka/COLO_829-TGen.adhoc.0.9.bcf
- varlociraptor-strelka/COLO_829-EBI.adhoc.0.9.bcf
- varlociraptor-bpi/COLO_829-GSC.adhoc.0.9.bcf
- varlociraptor-bpi/COLO_829-Ill.adhoc.0.9.bcf
- varlociraptor-bpi/COLO_829-TGen.adhoc.0.9.bcf
- varlociraptor-bpi/COLO_829-EBI.adhoc.0.9.bcf
- varlociraptor-neusomatic/COLO_829-GSC.adhoc.0.98.bcf
- varlociraptor-neusomatic/COLO_829-Ill.adhoc.0.98.bcf
- varlociraptor-neusomatic/COLO_829-TGen.adhoc.0.98.bcf
- varlociraptor-neusomatic/COLO_829-EBI.adhoc.0.98.bcf
- varlociraptor-delly/COLO_829-GSC.adhoc.0.98.bcf
- varlociraptor-delly/COLO_829-Ill.adhoc.0.98.bcf
- varlociraptor-delly/COLO_829-TGen.adhoc.0.98.bcf
- varlociraptor-delly/COLO_829-EBI.adhoc.0.98.bcf
- varlociraptor-lancet/COLO_829-GSC.adhoc.0.98.bcf
- varlociraptor-lancet/COLO_829-Ill.adhoc.0.98.bcf
- varlociraptor-lancet/COLO_829-TGen.adhoc.0.98.bcf
- varlociraptor-lancet/COLO_829-EBI.adhoc.0.98.bcf
- varlociraptor-manta/COLO_829-GSC.adhoc.0.98.bcf
- varlociraptor-manta/COLO_829-Ill.adhoc.0.98.bcf
- varlociraptor-manta/COLO_829-TGen.adhoc.0.98.bcf
- varlociraptor-manta/COLO_829-EBI.adhoc.0.98.bcf
- varlociraptor-strelka/COLO_829-GSC.adhoc.0.98.bcf
- varlociraptor-strelka/COLO_829-Ill.adhoc.0.98.bcf
- varlociraptor-strelka/COLO_829-TGen.adhoc.0.98.bcf
- varlociraptor-strelka/COLO_829-EBI.adhoc.0.98.bcf
- varlociraptor-bpi/COLO_829-GSC.adhoc.0.98.bcf
- varlociraptor-bpi/COLO_829-Ill.adhoc.0.98.bcf
- varlociraptor-bpi/COLO_829-TGen.adhoc.0.98.bcf
- varlociraptor-bpi/COLO_829-EBI.adhoc.0.98.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
varlociraptor_call |
375 |
|
|
- varlociraptor =1.1.1
- bcftools =1.9
|
| bcftools view -Ou {input.calls} {params.chrom_prefix} | varlociraptor call variants {input.ref} {config[caller][varlociraptor][params]} {params.caller} tumor-normal {input.bams} --purity {params.purity} > {output} 2> {log}
|
|
varlociraptor_call |
600 |
|
|
|
| bcftools view -Ou {input.calls} {params.chrom_prefix} | varlociraptor call variants {input.ref} {config[caller][varlociraptor][params]} {params.caller} tumor-normal {input.bams} --purity {params.purity} > {output} 2> {log}
|
|
get_region_bed |
7 |
- neusomatic/simulated-bwa.region.bed
- neusomatic/synthetic-5.region.bed
- neusomatic/synthetic-20.region.bed
- neusomatic/COLO_829-GSC.region.bed
- neusomatic/COLO_829-Ill.region.bed
- neusomatic/COLO_829-TGen.region.bed
- neusomatic/COLO_829-EBI.region.bed
|
|
|
| faidx --transform bed {input} > {output}
|
|
mark_duplicates |
12 |
- mapped-bwa/synthetic-5.tumor.hg38.sorted.bam
- mapped-bwa/synthetic-5.tumor.hg38.markdup.metrics.txt
- mapped-bwa/synthetic-5.normal.hg38.sorted.bam
- mapped-bwa/synthetic-5.normal.hg38.markdup.metrics.txt
- mapped-bwa/synthetic-20.tumor.hg38.sorted.bam
- mapped-bwa/synthetic-20.tumor.hg38.markdup.metrics.txt
- mapped-bwa/synthetic-20.normal.hg38.sorted.bam
- mapped-bwa/synthetic-20.normal.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-GSC.tumor.hg38.sorted.bam
- mapped-bwa/COLO_829-GSC.tumor.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-GSC.normal.hg38.sorted.bam
- mapped-bwa/COLO_829-GSC.normal.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-Ill.tumor.hg38.sorted.bam
- mapped-bwa/COLO_829-Ill.tumor.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-Ill.normal.hg38.sorted.bam
- mapped-bwa/COLO_829-Ill.normal.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-TGen.tumor.hg38.sorted.bam
- mapped-bwa/COLO_829-TGen.tumor.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-TGen.normal.hg38.sorted.bam
- mapped-bwa/COLO_829-TGen.normal.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-EBI.tumor.hg38.sorted.bam
- mapped-bwa/COLO_829-EBI.tumor.hg38.markdup.metrics.txt
- mapped-bwa/COLO_829-EBI.normal.hg38.sorted.bam
- mapped-bwa/COLO_829-EBI.normal.hg38.markdup.metrics.txt
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell("picard MarkDuplicates {snakemake.params} INPUT={snakemake.input} "
"OUTPUT={snakemake.output.bam} METRICS_FILE={snakemake.output.metrics} "
"&> {snakemake.log}")
|
|
samtools_index |
12 |
- mapped-bwa/synthetic-5.tumor.hg38.sorted.bam.bai
- mapped-bwa/synthetic-5.normal.hg38.sorted.bam.bai
- mapped-bwa/synthetic-20.tumor.hg38.sorted.bam.bai
- mapped-bwa/synthetic-20.normal.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-GSC.tumor.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-GSC.normal.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-Ill.tumor.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-Ill.normal.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-TGen.tumor.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-TGen.normal.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-EBI.tumor.hg38.sorted.bam.bai
- mapped-bwa/COLO_829-EBI.normal.hg38.sorted.bam.bai
|
|
|
| __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell("samtools index {snakemake.params} {snakemake.input[0]} {snakemake.output[0]}")
|
|
lancet |
50 |
- lancet/synthetic-5/chr1.vcf
- lancet/synthetic-5/chr2.vcf
- lancet/synthetic-5/chr3.vcf
- lancet/synthetic-5/chr4.vcf
- lancet/synthetic-5/chr5.vcf
- lancet/synthetic-5/chr6.vcf
- lancet/synthetic-5/chr7.vcf
- lancet/synthetic-5/chr8.vcf
- lancet/synthetic-5/chr9.vcf
- lancet/synthetic-5/chr10.vcf
- lancet/synthetic-5/chr11.vcf
- lancet/synthetic-5/chr12.vcf
- lancet/synthetic-5/chr13.vcf
- lancet/synthetic-5/chr14.vcf
- lancet/synthetic-5/chr15.vcf
- lancet/synthetic-5/chr16.vcf
- lancet/synthetic-5/chr17.vcf
- lancet/synthetic-5/chr18.vcf
- lancet/synthetic-5/chr19.vcf
- lancet/synthetic-5/chr20.vcf
- lancet/synthetic-5/chr21.vcf
- lancet/synthetic-5/chr22.vcf
- lancet/synthetic-5/chrM.vcf
- lancet/synthetic-5/chrX.vcf
- lancet/synthetic-5/chrY.vcf
- lancet/synthetic-20/chr1.vcf
- lancet/synthetic-20/chr2.vcf
- lancet/synthetic-20/chr3.vcf
- lancet/synthetic-20/chr4.vcf
- lancet/synthetic-20/chr5.vcf
- lancet/synthetic-20/chr6.vcf
- lancet/synthetic-20/chr7.vcf
- lancet/synthetic-20/chr8.vcf
- lancet/synthetic-20/chr9.vcf
- lancet/synthetic-20/chr10.vcf
- lancet/synthetic-20/chr11.vcf
- lancet/synthetic-20/chr12.vcf
- lancet/synthetic-20/chr13.vcf
- lancet/synthetic-20/chr14.vcf
- lancet/synthetic-20/chr15.vcf
- lancet/synthetic-20/chr16.vcf
- lancet/synthetic-20/chr17.vcf
- lancet/synthetic-20/chr18.vcf
- lancet/synthetic-20/chr19.vcf
- lancet/synthetic-20/chr20.vcf
- lancet/synthetic-20/chr21.vcf
- lancet/synthetic-20/chr22.vcf
- lancet/synthetic-20/chrM.vcf
- lancet/synthetic-20/chrX.vcf
- lancet/synthetic-20/chrY.vcf
|
|
- gxx_linux-64
- make
- cmake
- bamtools =2.5.1
- htslib =1.3
|
| LD_LIBRARY_PATH=$CONDA_PREFIX/lib resources/lancet --tumor {input.bams[0]} --normal {input.bams[1]} --ref {input.ref} --reg {params.region} --num-threads {threads} {params.extra} > {output} 2> {log}
|
|
lancet |
75 |
- lancet/COLO_829-GSC/chr1.vcf
- lancet/COLO_829-GSC/chr2.vcf
- lancet/COLO_829-GSC/chr3.vcf
- lancet/COLO_829-GSC/chr4.vcf
- lancet/COLO_829-GSC/chr5.vcf
- lancet/COLO_829-GSC/chr6.vcf
- lancet/COLO_829-GSC/chr7.vcf
- lancet/COLO_829-GSC/chr8.vcf
- lancet/COLO_829-GSC/chr9.vcf
- lancet/COLO_829-GSC/chr10.vcf
- lancet/COLO_829-GSC/chr11.vcf
- lancet/COLO_829-GSC/chr12.vcf
- lancet/COLO_829-GSC/chr13.vcf
- lancet/COLO_829-GSC/chr14.vcf
- lancet/COLO_829-GSC/chr15.vcf
- lancet/COLO_829-GSC/chr16.vcf
- lancet/COLO_829-GSC/chr17.vcf
- lancet/COLO_829-GSC/chr18.vcf
- lancet/COLO_829-GSC/chr19.vcf
- lancet/COLO_829-GSC/chr20.vcf
- lancet/COLO_829-GSC/chr21.vcf
- lancet/COLO_829-GSC/chr22.vcf
- lancet/COLO_829-GSC/chrM.vcf
- lancet/COLO_829-GSC/chrX.vcf
- lancet/COLO_829-GSC/chrY.vcf
- lancet/COLO_829-Ill/chr1.vcf
- lancet/COLO_829-Ill/chr2.vcf
- lancet/COLO_829-Ill/chr3.vcf
- lancet/COLO_829-Ill/chr4.vcf
- lancet/COLO_829-Ill/chr5.vcf
- lancet/COLO_829-Ill/chr6.vcf
- lancet/COLO_829-Ill/chr7.vcf
- lancet/COLO_829-Ill/chr8.vcf
- lancet/COLO_829-Ill/chr9.vcf
- lancet/COLO_829-Ill/chr10.vcf
- lancet/COLO_829-Ill/chr11.vcf
- lancet/COLO_829-Ill/chr12.vcf
- lancet/COLO_829-Ill/chr13.vcf
- lancet/COLO_829-Ill/chr14.vcf
- lancet/COLO_829-Ill/chr15.vcf
- lancet/COLO_829-Ill/chr16.vcf
- lancet/COLO_829-Ill/chr17.vcf
- lancet/COLO_829-Ill/chr18.vcf
- lancet/COLO_829-Ill/chr19.vcf
- lancet/COLO_829-Ill/chr20.vcf
- lancet/COLO_829-Ill/chr21.vcf
- lancet/COLO_829-Ill/chr22.vcf
- lancet/COLO_829-Ill/chrM.vcf
- lancet/COLO_829-Ill/chrX.vcf
- lancet/COLO_829-Ill/chrY.vcf
- lancet/COLO_829-TGen/chr1.vcf
- lancet/COLO_829-TGen/chr2.vcf
- lancet/COLO_829-TGen/chr3.vcf
- lancet/COLO_829-TGen/chr4.vcf
- lancet/COLO_829-TGen/chr5.vcf
- lancet/COLO_829-TGen/chr6.vcf
- lancet/COLO_829-TGen/chr7.vcf
- lancet/COLO_829-TGen/chr8.vcf
- lancet/COLO_829-TGen/chr9.vcf
- lancet/COLO_829-TGen/chr10.vcf
- lancet/COLO_829-TGen/chr11.vcf
- lancet/COLO_829-TGen/chr12.vcf
- lancet/COLO_829-TGen/chr13.vcf
- lancet/COLO_829-TGen/chr14.vcf
- lancet/COLO_829-TGen/chr15.vcf
- lancet/COLO_829-TGen/chr16.vcf
- lancet/COLO_829-TGen/chr17.vcf
- lancet/COLO_829-TGen/chr18.vcf
- lancet/COLO_829-TGen/chr19.vcf
- lancet/COLO_829-TGen/chr20.vcf
- lancet/COLO_829-TGen/chr21.vcf
- lancet/COLO_829-TGen/chr22.vcf
- lancet/COLO_829-TGen/chrM.vcf
- lancet/COLO_829-TGen/chrX.vcf
- lancet/COLO_829-TGen/chrY.vcf
|
|
|
| LD_LIBRARY_PATH=$CONDA_PREFIX/lib resources/lancet --tumor {input.bams[0]} --normal {input.bams[1]} --ref {input.ref} --reg {params.region} --num-threads {threads} {params.extra} > {output} 2> {log}
|
|
lancet |
25 |
- lancet/COLO_829-EBI/chr1.vcf
- lancet/COLO_829-EBI/chr2.vcf
- lancet/COLO_829-EBI/chr3.vcf
- lancet/COLO_829-EBI/chr4.vcf
- lancet/COLO_829-EBI/chr5.vcf
- lancet/COLO_829-EBI/chr6.vcf
- lancet/COLO_829-EBI/chr7.vcf
- lancet/COLO_829-EBI/chr8.vcf
- lancet/COLO_829-EBI/chr9.vcf
- lancet/COLO_829-EBI/chr10.vcf
- lancet/COLO_829-EBI/chr11.vcf
- lancet/COLO_829-EBI/chr12.vcf
- lancet/COLO_829-EBI/chr13.vcf
- lancet/COLO_829-EBI/chr14.vcf
- lancet/COLO_829-EBI/chr15.vcf
- lancet/COLO_829-EBI/chr16.vcf
- lancet/COLO_829-EBI/chr17.vcf
- lancet/COLO_829-EBI/chr18.vcf
- lancet/COLO_829-EBI/chr19.vcf
- lancet/COLO_829-EBI/chr20.vcf
- lancet/COLO_829-EBI/chr21.vcf
- lancet/COLO_829-EBI/chr22.vcf
- lancet/COLO_829-EBI/chrM.vcf
- lancet/COLO_829-EBI/chrX.vcf
- lancet/COLO_829-EBI/chrY.vcf
|
|
|
| LD_LIBRARY_PATH=$CONDA_PREFIX/lib resources/lancet --tumor {input.bams[0]} --normal {input.bams[1]} --ref {input.ref} --reg {params.region} --num-threads {threads} {params.extra} > {output} 2> {log}
|
|
delly |
12 |
- delly/synthetic-5.DEL.bcf
- delly/synthetic-5.INS.bcf
- delly/synthetic-20.DEL.bcf
- delly/synthetic-20.INS.bcf
- delly/COLO_829-GSC.DEL.bcf
- delly/COLO_829-GSC.INS.bcf
- delly/COLO_829-Ill.DEL.bcf
- delly/COLO_829-Ill.INS.bcf
- delly/COLO_829-TGen.DEL.bcf
- delly/COLO_829-TGen.INS.bcf
- delly/COLO_829-EBI.DEL.bcf
- delly/COLO_829-EBI.INS.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
try:
exclude = "-x " + snakemake.input.exclude
except AttributeError:
exclude = ""
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
shell(
"OMP_NUM_THREADS={snakemake.threads} delly call {extra} "
"{exclude} -t {snakemake.params.vartype} -g {snakemake.input.ref} "
"-o {snakemake.output[0]} {snakemake.input.samples} {log}")
|
|
bpi |
6 |
- bpi/synthetic-5.all.vcf
- bpi/synthetic-20.all.vcf
- bpi/COLO_829-GSC.all.vcf
- bpi/COLO_829-Ill.all.vcf
- bpi/COLO_829-TGen.all.vcf
- bpi/COLO_829-EBI.all.vcf
|
|
- break-point-inspector =1.5
|
| (break-point-inspector -vcf {input.manta} -ref {input.samples[1]} -tumor {input.samples[0]} -output_vcf {output}) > {log} 2>&1
|
|
manta_raw |
6 |
- manta/COLO_829-GSC.all.bcf
- manta/COLO_829-Ill.all.bcf
- manta/COLO_829-TGen.all.bcf
- manta/COLO_829-EBI.all.bcf
- manta/synthetic-5.all.bcf
- manta/synthetic-20.all.bcf
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
from snakemake.shell import shell
shell(
"bcftools view {snakemake.params} {snakemake.input[0]} "
"-o {snakemake.output[0]}")
|
|
samtools_sort |
12 |
- mapped-bwa/synthetic-5.tumor.hg38.sorted.pre.bam
- mapped-bwa/synthetic-5.normal.hg38.sorted.pre.bam
- mapped-bwa/synthetic-20.tumor.hg38.sorted.pre.bam
- mapped-bwa/synthetic-20.normal.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-GSC.tumor.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-GSC.normal.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-Ill.tumor.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-Ill.normal.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-TGen.tumor.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-TGen.normal.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-EBI.tumor.hg38.sorted.pre.bam
- mapped-bwa/COLO_829-EBI.normal.hg38.sorted.pre.bam
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 | __author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
import os
from snakemake.shell import shell
prefix = os.path.splitext(snakemake.output[0])[0]
shell(
"samtools sort {snakemake.params} -@ {snakemake.threads} -o {snakemake.output[0]} "
"-T {prefix} {snakemake.input[0]}")
|
|
bwa |
4 |
- mapped-bwa/synthetic-5.tumor.hg38.bam
- mapped-bwa/synthetic-5.normal.hg38.bam
- mapped-bwa/synthetic-20.tumor.hg38.bam
- mapped-bwa/synthetic-20.normal.hg38.bam
|
|
- bwa =0.7.17
- samtools =1.10
|
| (resources/bwa mem -t {threads} {params.extra} {params.index} {input.sample} | samtools view -Sb - > {output}) 2> {log}
|
|
bwa |
8 |
- mapped-bwa/COLO_829-GSC.tumor.hg38.bam
- mapped-bwa/COLO_829-GSC.normal.hg38.bam
- mapped-bwa/COLO_829-Ill.tumor.hg38.bam
- mapped-bwa/COLO_829-Ill.normal.hg38.bam
- mapped-bwa/COLO_829-TGen.tumor.hg38.bam
- mapped-bwa/COLO_829-TGen.normal.hg38.bam
- mapped-bwa/COLO_829-EBI.tumor.hg38.bam
- mapped-bwa/COLO_829-EBI.normal.hg38.bam
|
|
- python
- samtools =1.6
- numpy
- scikit-learn
- pandas
- setuptools
|
| (resources/bwa mem -t {threads} {params.extra} {params.index} {input.sample} | samtools view -Sb - > {output}) 2> {log}
|
|