Read Mutect variants
mutect.Rd
Reads the variant calls from Mutect2 somatic variants caller
Usage
read_mutect_snvs(
path,
sample_ids = "drop_first",
PASS_only = TRUE,
patient_id_pattern = "(?<=\\/)[:alnum:]*(?=\\.)",
chrom_convention = "UCSC",
extract_VEP_fields = FALSE,
verbose = TRUE
)
Arguments
- path
Can be either:
path to a single file
vector of file paths, element names will be used as patient IDs
directory containing multiple Mutect .vcf files, patient IDs will be guessed from the file names (should follow convention: <patient_ID>.XXX.XXX.vcf)
- sample_ids
Either:
"drop_first"
"all"
ID(s) of the selected tumor samples.
Default: "drop_first"
- PASS_only
Keep FILTER == PASS variants only?
- patient_id_pattern
If path is a dir only: pattern for str_extract() that should be used to extract the patient_id from the filenames
- chrom_convention
UCSC/NCBI/keep
- extract_VEP_fields
If VCF file contains VEP annotations, following fields will be extracted: Variant_Classification, impact, gene_symbol and entrez_id (epxerimental, not tested)
- verbose
Verbose?
Examples
library(readthis)
file1 <- system.file(
"extdata", "Mutect", "S1.Mutect2.filter.pass.phased.annot.vcf",
package = "readthis"
)
file2 <- system.file(
"extdata", "Mutect", "S2.Mutect2.filter.pass.phased.annot.vcf",
package = "readthis"
)
read_mutect_snvs(file1)
#> Scanning file to determine attributes.
#> File attributes:
#> meta lines: 52
#> header_line: 53
#> variant count: 5
#> column count: 12
#>
Meta line 52 read in.
#> All meta lines processed.
#> gt matrix initialized.
#> Character matrix gt created.
#> Character matrix gt rows: 5
#> Character matrix gt cols: 12
#> skip: 0
#> nrows: 5
#> row_num: 0
#>
Processed variant: 5
#> All variants processed
#> Extracting gt element AD
#> Extracting gt element AF
#> Extracting gt element DP
#> Extracting gt element F1R2
#> Extracting gt element F2R1
#> Extracting gt element FAD
#> Extracting gt element GQ
#> Extracting gt element GT
#> Extracting gt element PGT
#> Extracting gt element PID
#> Extracting gt element PL
#> Extracting gt element PS
#> Extracting gt element SB
#> readthis>Guessing sample_ids: S1_L1, S1_P1
#> # A tibble: 8 × 12
#> sample_id chrom pos ref alt FILTER ref_reads alt_reads VAF AF
#> <chr> <chr> <int> <chr> <chr> <chr> <int> <int> <dbl> <chr>
#> 1 S1_L1 chr1 2 G A PASS 22 2 0.0833 0.104
#> 2 S1_P1 chr1 2 G A PASS 43 2 0.0444 0.049
#> 3 S1_L1 chr2 3 G A PASS 13 2 0.133 0.154
#> 4 S1_P1 chr2 3 G A PASS 117 2 0.0168 0.022
#> 5 S1_L1 chrX 4 C T PASS 28 1 0.0345 0.100
#> 6 S1_P1 chrX 4 C T PASS 149 3 0.0197 0.025
#> 7 S1_L1 chrY 5 C T PASS 8 0 0 0.143
#> 8 S1_P1 chrY 5 C T PASS 18 2 0.1 0.141
#> # ℹ 2 more variables: DP <int>, CSQ <chr>
files <- c(S1 = file1, S2 = file2)
read_mutect_snvs(files, verbose = FALSE)
#> # A tibble: 16 × 13
#> patient_id sample_id chrom pos ref alt FILTER ref_reads alt_reads
#> <chr> <chr> <chr> <int> <chr> <chr> <chr> <int> <int>
#> 1 S1 S1_L1 chr1 2 G A PASS 22 2
#> 2 S1 S1_P1 chr1 2 G A PASS 43 2
#> 3 S1 S1_L1 chr2 3 G A PASS 13 2
#> 4 S1 S1_P1 chr2 3 G A PASS 117 2
#> 5 S1 S1_L1 chrX 4 C T PASS 28 1
#> 6 S1 S1_P1 chrX 4 C T PASS 149 3
#> 7 S1 S1_L1 chrY 5 C T PASS 8 0
#> 8 S1 S1_P1 chrY 5 C T PASS 18 2
#> 9 S2 S2_L1 chr1 2 G A PASS 22 2
#> 10 S2 S2_P1 chr1 2 G A PASS 43 2
#> 11 S2 S2_L1 chr2 3 G A PASS 13 2
#> 12 S2 S2_P1 chr2 3 G A PASS 117 2
#> 13 S2 S2_L1 chrX 4 C T PASS 28 1
#> 14 S2 S2_P1 chrX 4 C T PASS 149 3
#> 15 S2 S2_L1 chrY 5 C T PASS 8 0
#> 16 S2 S2_P1 chrY 5 C T PASS 18 2
#> # ℹ 4 more variables: VAF <dbl>, AF <chr>, DP <int>, CSQ <chr>
mutect_dir <- system.file("extdata", "Mutect", package = "readthis")
read_mutect_snvs(mutect_dir)
#> Scanning file to determine attributes.
#> File attributes:
#> meta lines: 52
#> header_line: 53
#> variant count: 5
#> column count: 12
#>
Meta line 52 read in.
#> All meta lines processed.
#> gt matrix initialized.
#> Character matrix gt created.
#> Character matrix gt rows: 5
#> Character matrix gt cols: 12
#> skip: 0
#> nrows: 5
#> row_num: 0
#>
Processed variant: 5
#> All variants processed
#> Extracting gt element AD
#> Extracting gt element AF
#> Extracting gt element DP
#> Extracting gt element F1R2
#> Extracting gt element F2R1
#> Extracting gt element FAD
#> Extracting gt element GQ
#> Extracting gt element GT
#> Extracting gt element PGT
#> Extracting gt element PID
#> Extracting gt element PL
#> Extracting gt element PS
#> Extracting gt element SB
#> readthis>Guessing sample_ids: S1_L1, S1_P1
#> Scanning file to determine attributes.
#> File attributes:
#> meta lines: 52
#> header_line: 53
#> variant count: 5
#> column count: 12
#>
Meta line 52 read in.
#> All meta lines processed.
#> gt matrix initialized.
#> Character matrix gt created.
#> Character matrix gt rows: 5
#> Character matrix gt cols: 12
#> skip: 0
#> nrows: 5
#> row_num: 0
#>
Processed variant: 5
#> All variants processed
#> Extracting gt element AD
#> Extracting gt element AF
#> Extracting gt element DP
#> Extracting gt element F1R2
#> Extracting gt element F2R1
#> Extracting gt element FAD
#> Extracting gt element GQ
#> Extracting gt element GT
#> Extracting gt element PGT
#> Extracting gt element PID
#> Extracting gt element PL
#> Extracting gt element PS
#> Extracting gt element SB
#> readthis>Guessing sample_ids: S2_L1, S2_P1
#> # A tibble: 16 × 13
#> patient_id sample_id chrom pos ref alt FILTER ref_reads alt_reads
#> <chr> <chr> <chr> <int> <chr> <chr> <chr> <int> <int>
#> 1 S1 S1_L1 chr1 2 G A PASS 22 2
#> 2 S1 S1_P1 chr1 2 G A PASS 43 2
#> 3 S1 S1_L1 chr2 3 G A PASS 13 2
#> 4 S1 S1_P1 chr2 3 G A PASS 117 2
#> 5 S1 S1_L1 chrX 4 C T PASS 28 1
#> 6 S1 S1_P1 chrX 4 C T PASS 149 3
#> 7 S1 S1_L1 chrY 5 C T PASS 8 0
#> 8 S1 S1_P1 chrY 5 C T PASS 18 2
#> 9 S2 S2_L1 chr1 2 G A PASS 22 2
#> 10 S2 S2_P1 chr1 2 G A PASS 43 2
#> 11 S2 S2_L1 chr2 3 G A PASS 13 2
#> 12 S2 S2_P1 chr2 3 G A PASS 117 2
#> 13 S2 S2_L1 chrX 4 C T PASS 28 1
#> 14 S2 S2_P1 chrX 4 C T PASS 149 3
#> 15 S2 S2_L1 chrY 5 C T PASS 8 0
#> 16 S2 S2_P1 chrY 5 C T PASS 18 2
#> # ℹ 4 more variables: VAF <dbl>, AF <chr>, DP <int>, CSQ <chr>