Read ASCAT CNA calls
read_ascat_files.Rd
Reads the CNA variant calls and sample statistics from ASCAT CNA caller
Usage
read_ascat_files(
path,
sample_statistics = NULL,
sample_id = path,
sample_id_pattern = "(?<=\\/)[:alnum:]*(?=\\.)",
chrom_convention = "UCSC"
)
Arguments
- path
Can be either:
path to a single csv file with ASCAT CNA calls
tibble with sample_id, cnas, and (optionally) sample_statistics columns containing sample_ids and paths.
directory containing multiple ASCAT files with ".csv" and ".samplestatistics.*" names.
If path is a single csv file, sampleID can be passed in sample_id argument. If not provided, path will be used in the output sample_id column. If path is a directory, sample names will be inferred using str_expr() and sample_id_pattern.
- sample_statistics
Path to the ".samplestatistics.* file, used when path leads to a single csv file
- sample_id
Sample ID, used with path is a sigle file
- sample_id_pattern
Pattern used to extract sample IDs from the filenames when path is a directory
- chrom_convention
UCSC/NCBI/keep
Examples
library(readthis)
ASCAT_csv <- system.file("extdata", "ASCAT", "S1.csv", package = "readthis")
read_ascat_files(ASCAT_csv)
#> $cnas
#> # A tibble: 10 × 8
#> sample_id chrom start end total_cn major_cn minor_cn normal_cn
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 /home/runner/work/_… chr1 1.3 e4 1.4 e4 3 3 0 2
#> 2 /home/runner/work/_… chr1 1.5 e4 1.6 e4 1 1 0 2
#> 3 /home/runner/work/_… chr1 1.7 e4 1.80e4 2 2 0 2
#> 4 /home/runner/work/_… chr1 5 e5 2.49e8 3 3 0 2
#> 5 /home/runner/work/_… chr2 1.3 e5 3 e5 2 1 1 2
#> 6 /home/runner/work/_… chr2 1.5 e5 3 e5 1 1 0 2
#> 7 /home/runner/work/_… chr2 1.7 e5 3 e5 2 2 0 2
#> 8 /home/runner/work/_… chr2 2 e5 3 e5 1 1 0 2
#> 9 /home/runner/work/_… chr2 2.20e8 2.40e8 2 2 0 2
#> 10 /home/runner/work/_… chr3 1 e4 2.3 e7 2 2 0 2
#>
#> $sample_statistics
#> # A tibble: 0 × 7
#> # ℹ 7 variables: normal_contamination <dbl>, ploidy <dbl>, rho <dbl>,
#> # psi <dbl>, goodness_of_fit <dbl>, gender_chr <chr>, gender_chr_found <chr>
#>
#> attr(,"class")
#> [1] "cevo_ASCAT"
ASCAT_stats <- system.file("extdata", "ASCAT", "S1.samplestatistics.txt", package = "readthis")
read_ascat_files(ASCAT_csv, ASCAT_stats, sample_id = "S1")
#> $cnas
#> # A tibble: 10 × 8
#> sample_id chrom start end total_cn major_cn minor_cn normal_cn
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 S1 chr1 13000 14000 3 3 0 2
#> 2 S1 chr1 15000 16000 1 1 0 2
#> 3 S1 chr1 17000 18000 2 2 0 2
#> 4 S1 chr1 500000 249250000 3 3 0 2
#> 5 S1 chr2 130000 300000 2 1 1 2
#> 6 S1 chr2 150000 300000 1 1 0 2
#> 7 S1 chr2 170000 300000 2 2 0 2
#> 8 S1 chr2 200000 300000 1 1 0 2
#> 9 S1 chr2 220000000 240000000 2 2 0 2
#> 10 S1 chr3 10000 23000000 2 2 0 2
#>
#> $sample_statistics
#> # A tibble: 1 × 8
#> sample_id normal_contamination ploidy rho psi goodness_of_fit gender_chr
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 S1 0.00678 3.1 0.99 2.9 95.4 Y
#> # ℹ 1 more variable: gender_chr_found <chr>
#>
#> attr(,"class")
#> [1] "cevo_ASCAT"
ASCAT_dir <- system.file("extdata", "ASCAT", package = "readthis")
read_ascat_files(ASCAT_dir)
#> $cnas
#> # A tibble: 20 × 8
#> sample_id chrom start end total_cn major_cn minor_cn normal_cn
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 S1 chr1 13000 14000 3 3 0 2
#> 2 S1 chr1 15000 16000 1 1 0 2
#> 3 S1 chr1 17000 18000 2 2 0 2
#> 4 S1 chr1 500000 249250000 3 3 0 2
#> 5 S1 chr2 130000 300000 2 1 1 2
#> 6 S1 chr2 150000 300000 1 1 0 2
#> 7 S1 chr2 170000 300000 2 2 0 2
#> 8 S1 chr2 200000 300000 1 1 0 2
#> 9 S1 chr2 220000000 240000000 2 2 0 2
#> 10 S1 chr3 10000 23000000 2 2 0 2
#> 11 S2 chr1 13000 14000 3 3 0 2
#> 12 S2 chr1 15000 16000 1 1 0 2
#> 13 S2 chr1 17000 18000 2 2 0 2
#> 14 S2 chr1 500000 249250000 3 3 0 2
#> 15 S2 chr2 130000 300000 2 1 1 2
#> 16 S2 chr2 150000 300000 1 1 0 2
#> 17 S2 chr2 170000 300000 2 2 0 2
#> 18 S2 chr2 200000 300000 1 1 0 2
#> 19 S2 chr2 220000000 240000000 2 2 0 2
#> 20 S2 chr3 10000 23000000 2 2 0 2
#>
#> $sample_statistics
#> # A tibble: 2 × 8
#> sample_id normal_contamination ploidy rho psi goodness_of_fit gender_chr
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 S1 0.00678 3.1 0.99 2.9 95.4 Y
#> 2 S2 0.00678 3.1 0.99 2.9 95.4 Y
#> # ℹ 1 more variable: gender_chr_found <chr>
#>
#> attr(,"class")
#> [1] "cevo_ASCAT"
ASCAT_tbl <- tibble::tibble(
sample_id = c("S1", "S2"),
csv = c(
system.file("extdata", "ASCAT", "S1.csv", package = "readthis"),
system.file("extdata", "ASCAT", "S2.csv", package = "readthis")
),
sample_statistics = c(
system.file("extdata", "ASCAT", "S1.samplestatistics.txt", package = "readthis"),
system.file("extdata", "ASCAT", "S2.samplestatistics.txt", package = "readthis")
)
)
read_ascat_files(ASCAT_tbl)
#> $cnas
#> # A tibble: 20 × 8
#> sample_id chrom start end total_cn major_cn minor_cn normal_cn
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 S1 chr1 13000 14000 3 3 0 2
#> 2 S1 chr1 15000 16000 1 1 0 2
#> 3 S1 chr1 17000 18000 2 2 0 2
#> 4 S1 chr1 500000 249250000 3 3 0 2
#> 5 S1 chr2 130000 300000 2 1 1 2
#> 6 S1 chr2 150000 300000 1 1 0 2
#> 7 S1 chr2 170000 300000 2 2 0 2
#> 8 S1 chr2 200000 300000 1 1 0 2
#> 9 S1 chr2 220000000 240000000 2 2 0 2
#> 10 S1 chr3 10000 23000000 2 2 0 2
#> 11 S2 chr1 13000 14000 3 3 0 2
#> 12 S2 chr1 15000 16000 1 1 0 2
#> 13 S2 chr1 17000 18000 2 2 0 2
#> 14 S2 chr1 500000 249250000 3 3 0 2
#> 15 S2 chr2 130000 300000 2 1 1 2
#> 16 S2 chr2 150000 300000 1 1 0 2
#> 17 S2 chr2 170000 300000 2 2 0 2
#> 18 S2 chr2 200000 300000 1 1 0 2
#> 19 S2 chr2 220000000 240000000 2 2 0 2
#> 20 S2 chr3 10000 23000000 2 2 0 2
#>
#> $sample_statistics
#> # A tibble: 2 × 8
#> sample_id normal_contamination ploidy rho psi goodness_of_fit gender_chr
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 S1 0.00678 3.1 0.99 2.9 95.4 Y
#> 2 S2 0.00678 3.1 0.99 2.9 95.4 Y
#> # ℹ 1 more variable: gender_chr_found <chr>
#>
#> attr(,"class")
#> [1] "cevo_ASCAT"