Skip to content

Commit 63366a1

Browse files
authored
Merge pull request #20 from RasmussenLab/dbds_update
Dbds update
2 parents 736ebb9 + 8caae32 commit 63366a1

File tree

9 files changed

+29
-53
lines changed

9 files changed

+29
-53
lines changed

bin/build_matrix.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@
3535
from pathlib import Path
3636
from typing import Dict, List, Tuple, Optional
3737

38-
def natural_key(s: str):
39-
return [int(t) if t.isdigit() else t.lower() for t in re.findall(r"\d+|\D+", s)]
40-
4138
def read_pools(pools_path: Path) -> Tuple[List[str], List[str]]:
4239
rows, cols = [], []
4340
with pools_path.open() as fh:
@@ -108,8 +105,8 @@ def build_context(rows: List[str],
108105
decode: Dict[Tuple[str,str], str],
109106
pad_width: Optional[int]) -> dict:
110107
# Force deterministic ordering
111-
rows_ord = sorted(rows, key=natural_key)
112-
cols_ord = sorted(cols, key=natural_key)
108+
rows_ord = sorted(rows, key=str.lower)
109+
cols_ord = sorted(cols, key=str.lower)
113110

114111
# Indices and labels
115112
row_index = {pid: i for i, pid in enumerate(rows_ord)}

bin/report.Rmd

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ output_path <- here::here('reports')
6060
pinvars <-
6161
read_tsv(pinpoints_path,
6262
col_types = cols(.default = col_character())) %>%
63-
type_convert(guess_integer = TRUE)
63+
type_convert(guess_integer = TRUE) %>%
64+
mutate(row_label = as.integer(row_label))
6465
6566
poolvars <-
6667
read_tsv(all_variants_path,
@@ -71,35 +72,22 @@ poolvars <-
7172
```{r eval=have(params$rescue)}
7273
rescue <-
7374
read_tsv(rescue_path) %>%
75+
mutate(row_label = as.integer(row_label)) %>%
7476
select(-sample_id)
7577
7678
pinvars <-
7779
pinvars %>%
78-
full_join(rescue, by=c('uvarid','row_id','column_id','varid','row_label','column_label','sample_alias'))
80+
full_join(rescue, by=c('uvarid','row_id','column_id','row_label','column_label','varid','sample_alias')) %>%
81+
mutate(is_pool_pin = if_else(is.na(is_pool_pin),0,is_pool_pin))
7982
```
8083

8184
```{r eval=have(params$annotations)}
8285
83-
annotations <-
84-
read_tsv(annotations_path,
85-
na = '-', col_types = cols(.default = col_character())) %>%
86-
type_convert(guess_integer = TRUE)
87-
88-
pinvars <-
89-
pinvars %>%
90-
left_join(annotations, by='varid')
91-
92-
poolvars <-
93-
poolvars %>%
94-
left_join(annotations, by='varid')
95-
```
96-
97-
98-
```{r eval=F}
9986
annotations <-
10087
read_tsv(annotations_path,
10188
na = '-', col_types = cols(.default = col_character())) %>%
10289
type_convert(guess_integer = TRUE) %>%
90+
mutate(is_lof = if_else(LoF == 'HC',1,0)) %>%
10391
mutate(clinvar_stars = case_when(
10492
str_detect(ClinVar_CLNREVSTAT, "practice_guideline") ~ 4,
10593
str_detect(ClinVar_CLNREVSTAT, "reviewed_by_expert_panel") ~ 3,
@@ -111,23 +99,13 @@ annotations <-
11199
mutate(is_p = if_else(str_detect(ClinVar_CLNSIG, 'Pathogenic') | str_detect(ClinVar_CLNSIG, "Likely_pathogenic"), 1,0)) %>%
112100
mutate(is_lofp = if_else(is_lof == 1 | is_p == 1,1,0))
113101
114-
rescue <-
115-
read_tsv(rescue_path) %>%
116-
select(-sample_id)
117-
118102
pinvars <-
119-
read_tsv(pinpoints_path,
120-
col_types = cols(.default = col_character())) %>%
121-
type_convert(guess_integer = TRUE) %>%
122-
left_join(rescue, by=c('uvarid','row_id','column_id','varid')) %>%
103+
pinvars %>%
123104
left_join(annotations, by='varid')
124105
125106
poolvars <-
126-
read_tsv(all_variants_path,
127-
col_types = cols(.default = col_character())) %>%
128-
type_convert(guess_integer = TRUE) %>%
107+
poolvars %>%
129108
left_join(annotations, by='varid')
130-
131109
```
132110

133111
```{r}
@@ -166,8 +144,10 @@ pinvars <-
166144
```{r}
167145
plot_data <-
168146
pinvars %>%
147+
filter(is_pool_pin == 1) %>%
169148
group_by(sample_id,row_factor, column_factor) %>%
170149
summarise(private_variants = n())
150+
171151
plot_data %>%
172152
ggplot(aes(x = column_factor, y = row_factor, fill = private_variants)) +
173153
geom_point(shape = 22, size = 14, color='black') +
@@ -190,8 +170,8 @@ plot_data %>%
190170
```{r eval=have(params$annotations)}
191171
plot_data <-
192172
pinvars %>%
193-
mutate(row_factor = fct_rev(factor(row_label, levels = 1:2, ordered = TRUE)),
194-
column_factor = factor(column_label, levels = LETTERS[1:2], ordered = TRUE)) %>%
173+
filter(is_pool_pin == 1) %>%
174+
filter(is_lofp == 1) %>%
195175
group_by(sample_id,row_factor, column_factor) %>%
196176
summarise(private_variants = n())
197177
@@ -214,11 +194,15 @@ plot_data %>%
214194
ggtitle('pLoF/p only')
215195
```
216196

197+
198+
199+
200+
217201
```{r eval=have(params$annotations) & have(params$rescue)}
218202
plot_data <-
219203
pinvars %>%
220-
mutate(row_factor = fct_rev(factor(row_label, levels = 1:2, ordered = TRUE)),
221-
column_factor = factor(column_label, levels = LETTERS[1:2], ordered = TRUE)) %>%
204+
filter((probability >= 0.5 & is_single) | is_pool_pin == 1) %>%
205+
filter(is_lofp == 1) %>%
222206
group_by(sample_id,row_factor, column_factor) %>%
223207
summarise(private_variants = n())
224208

conf/container.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ params {
3434
pinpy = "madscort/dswf_pinpy:1.0"
3535
filter_variants = "madscort/dswf_filter:1.0"
3636
vep = "madscort/vep:111.0"
37-
marbl = "madscort/marbl:0.4.0"
37+
marbl = "madscort/marbl:0.6.0"
3838
}
3939
singularity {
4040
gatk = "https://depot.galaxyproject.org-singularity-gatk4-4.5.0.0--py36hdfd78af_0"
@@ -67,7 +67,7 @@ params {
6767
pinpy = "madscort/dswf_pinpy:1.0"
6868
filter_variants = "madscort/dswf_filter:1.0"
6969
vep = "madscort/vep:111.0"
70-
marbl = "madscort/marbl:0.4.0"
70+
marbl = "madscort/marbl:0.6.0"
7171
}
7272
}
7373
}

conf/ngc.config

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,6 @@ profiles {
107107
time = 2.hour
108108
}
109109

110-
withName: DUPLICATE_METRICS {
111-
// module = ['tools','oracle_jdk/21.0.2','gatk/4.6.0.0']
112-
cpus = 4
113-
memory = 16.GB
114-
time = 2.hour
115-
}
116110

117111
withName: GROUP_UMI {
118112
// module = ['tools','java/1.8.0','fgbio/1.5.1']

envs/pinpy/build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
docker buildx build --platform linux/amd64,linux/arm64,windows/amd64 -t madscort/dswf_pinpy:1.0 --push .
1+
docker buildx build --platform linux/amd64,linux/arm64 -t madscort/dswf_pinpy:1.0 --push .

envs/pinpy/environment.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- bcftools=1.19
8-
- python=3.9
7+
- bcftools=1.22
8+
- python=3.9
9+
- samtools=1.22

envs/rescue/build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
docker buildx build --platform linux/amd64,linux/arm64 -t madscort/marbl:0.4.0 --push .
1+
docker buildx build --platform linux/amd64,linux/arm64 -t madscort/marbl:0.6.0 --push .

envs/rescue/environment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ channels:
66
dependencies:
77
- python=3.11
88
- pip:
9-
- marbl-pool==0.4.0
9+
- marbl-pool==0.6.0

next.pbs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# myqsub [-F "optional arguments to Nexflow"] next.pbs
1515
#
1616

17-
#PBS -l nodes=1:ppn=40:thinnode
17+
#PBS -l nodes=1:ppn=40
1818
#PBS -l walltime=24:00:00
1919
#PBS -l mem=160gb
2020
#PBS -j oe

0 commit comments

Comments
 (0)