Read Strelka variants — strelka • readthis

Reads the variant calls from Strelka2 Small Variant Caller

Usage

read_strelka_somatic_snvs(
  path,
  sample_ids = "drop_first",
  PASS_only = TRUE,
  patient_id_pattern = "(?<=\\/)[:alnum:]*(?=\\.)",
  chrom_convention = "UCSC",
  verbose = TRUE
)

Arguments

path

Can be either:

path to a single file
vector of file paths, element names will be used as patient IDs
directory containing multiple Mutect .vcf files, patient IDs will be guessed from the file names (should follow convention: <patient_ID>.XXX.XXX.vcf)

sample_ids

Either:

"drop_first"
"all"
ID(s) of the selected tumor samples.

Default: "drop_first"

PASS_only

Keep FILTER == PASS variants only?

patient_id_pattern

If path is a dir only: pattern for str_extract() that should be used to extract the patient_id from the filenames

chrom_convention

UCSC/NCBI/keep

verbose

Verbose?

Examples

library(readthis)

file1 <- system.file("extdata", "Strelka", "S1.somatic.snvs.vcf.gz", package = "readthis")
read_strelka_somatic_snvs(file1)
#> Scanning file to determine attributes.
#> File attributes:
#>   meta lines: 53
#>   header_line: 54
#>   variant count: 12
#>   column count: 11
#> 
Meta line 53 read in.
#> All meta lines processed.
#> gt matrix initialized.
#> Character matrix gt created.
#>   Character matrix gt rows: 12
#>   Character matrix gt cols: 11
#>   skip: 0
#>   nrows: 12
#>   row_num: 0
#> 
Processed variant: 12
#> All variants processed
#> Extracting gt element DP
#> Extracting gt element FDP
#> Extracting gt element SDP
#> Extracting gt element SUBDP
#> Extracting gt element AU
#> Extracting gt element CU
#> Extracting gt element GU
#> Extracting gt element TU
#> readthis>Guessing sample_ids: TUMOR
#> # A tibble: 9 × 9
#>   sample_id chrom   pos ref   alt   ref_reads alt_reads    VAF    DP
#>   <chr>     <chr> <int> <chr> <chr>     <int>     <int>  <dbl> <int>
#> 1 TUMOR     chr1      1 T     G            40         2 0.0476  1000
#> 2 TUMOR     chr2      3 A     T            27         3 0.1      554
#> 3 TUMOR     chr3      5 C     A            23         3 0.115    412
#> 4 TUMOR     chr4      7 T     C            39        10 0.204    932
#> 5 TUMOR     chr5      8 A     C            59         3 0.0484   945
#> 6 TUMOR     chr6      9 C     A            25         2 0.0741   500
#> 7 TUMOR     chr7     10 A     C            35         2 0.0541   870
#> 8 TUMOR     chrX     11 A     T            32         1 0.0303   893
#> 9 TUMOR     chrY     12 T     A            62         3 0.0462   740

file2 <- system.file("extdata", "Strelka", "S2.somatic.snvs.vcf.gz", package = "readthis")
files <- c(S1 = file1, S2 = file2)
read_strelka_somatic_snvs(files, verbose = FALSE)
#> # A tibble: 18 × 10
#>    patient_id sample_id chrom   pos ref   alt   ref_reads alt_reads    VAF    DP
#>    <chr>      <chr>     <chr> <int> <chr> <chr>     <int>     <int>  <dbl> <int>
#>  1 S1         TUMOR     chr1      1 T     G            40         2 0.0476  1000
#>  2 S1         TUMOR     chr2      3 A     T            27         3 0.1      554
#>  3 S1         TUMOR     chr3      5 C     A            23         3 0.115    412
#>  4 S1         TUMOR     chr4      7 T     C            39        10 0.204    932
#>  5 S1         TUMOR     chr5      8 A     C            59         3 0.0484   945
#>  6 S1         TUMOR     chr6      9 C     A            25         2 0.0741   500
#>  7 S1         TUMOR     chr7     10 A     C            35         2 0.0541   870
#>  8 S1         TUMOR     chrX     11 A     T            32         1 0.0303   893
#>  9 S1         TUMOR     chrY     12 T     A            62         3 0.0462   740
#> 10 S2         TUMOR     chr1      1 T     G            40         2 0.0476  1000
#> 11 S2         TUMOR     chr2      3 A     T            27         3 0.1      554
#> 12 S2         TUMOR     chr3      5 C     A            23         3 0.115    412
#> 13 S2         TUMOR     chr4      7 T     C            39        10 0.204    932
#> 14 S2         TUMOR     chr5      8 A     C            59         3 0.0484   945
#> 15 S2         TUMOR     chr6      9 C     A            25         2 0.0741   500
#> 16 S2         TUMOR     chr7     10 A     C            35         2 0.0541   870
#> 17 S2         TUMOR     chrX     11 A     T            32         1 0.0303   893
#> 18 S2         TUMOR     chrY     12 T     A            62         3 0.0462   740

dir <- system.file("extdata", "Strelka", package = "readthis")
read_strelka_somatic_snvs(dir)
#> Scanning file to determine attributes.
#> File attributes:
#>   meta lines: 53
#>   header_line: 54
#>   variant count: 12
#>   column count: 11
#> 
Meta line 53 read in.
#> All meta lines processed.
#> gt matrix initialized.
#> Character matrix gt created.
#>   Character matrix gt rows: 12
#>   Character matrix gt cols: 11
#>   skip: 0
#>   nrows: 12
#>   row_num: 0
#> 
Processed variant: 12
#> All variants processed
#> Extracting gt element DP
#> Extracting gt element FDP
#> Extracting gt element SDP
#> Extracting gt element SUBDP
#> Extracting gt element AU
#> Extracting gt element CU
#> Extracting gt element GU
#> Extracting gt element TU
#> readthis>Guessing sample_ids: TUMOR
#> Scanning file to determine attributes.
#> File attributes:
#>   meta lines: 53
#>   header_line: 54
#>   variant count: 12
#>   column count: 11
#> 
Meta line 53 read in.
#> All meta lines processed.
#> gt matrix initialized.
#> Character matrix gt created.
#>   Character matrix gt rows: 12
#>   Character matrix gt cols: 11
#>   skip: 0
#>   nrows: 12
#>   row_num: 0
#> 
Processed variant: 12
#> All variants processed
#> Extracting gt element DP
#> Extracting gt element FDP
#> Extracting gt element SDP
#> Extracting gt element SUBDP
#> Extracting gt element AU
#> Extracting gt element CU
#> Extracting gt element GU
#> Extracting gt element TU
#> readthis>Guessing sample_ids: TUMOR
#> # A tibble: 18 × 10
#>    patient_id sample_id chrom   pos ref   alt   ref_reads alt_reads    VAF    DP
#>    <chr>      <chr>     <chr> <int> <chr> <chr>     <int>     <int>  <dbl> <int>
#>  1 S1         TUMOR     chr1      1 T     G            40         2 0.0476  1000
#>  2 S1         TUMOR     chr2      3 A     T            27         3 0.1      554
#>  3 S1         TUMOR     chr3      5 C     A            23         3 0.115    412
#>  4 S1         TUMOR     chr4      7 T     C            39        10 0.204    932
#>  5 S1         TUMOR     chr5      8 A     C            59         3 0.0484   945
#>  6 S1         TUMOR     chr6      9 C     A            25         2 0.0741   500
#>  7 S1         TUMOR     chr7     10 A     C            35         2 0.0541   870
#>  8 S1         TUMOR     chrX     11 A     T            32         1 0.0303   893
#>  9 S1         TUMOR     chrY     12 T     A            62         3 0.0462   740
#> 10 S2         TUMOR     chr1      1 T     G            40         2 0.0476  1000
#> 11 S2         TUMOR     chr2      3 A     T            27         3 0.1      554
#> 12 S2         TUMOR     chr3      5 C     A            23         3 0.115    412
#> 13 S2         TUMOR     chr4      7 T     C            39        10 0.204    932
#> 14 S2         TUMOR     chr5      8 A     C            59         3 0.0484   945
#> 15 S2         TUMOR     chr6      9 C     A            25         2 0.0741   500
#> 16 S2         TUMOR     chr7     10 A     C            35         2 0.0541   870
#> 17 S2         TUMOR     chrX     11 A     T            32         1 0.0303   893
#> 18 S2         TUMOR     chrY     12 T     A            62         3 0.0462   740