Read ASCAT CNA calls — read_ascat

Reads the CNA variant calls and sample statistics from ASCAT CNA caller

Usage

read_ascat_files(
  path,
  sample_statistics = NULL,
  sample_id = path,
  sample_id_pattern = "(?<=\\/)[:alnum:]*(?=\\.)",
  chrom_convention = "UCSC"
)

Arguments

path

Can be either:

path to a single csv file with ASCAT CNA calls
tibble with sample_id, cnas, and (optionally) sample_statistics columns containing sample_ids and paths.
directory containing multiple ASCAT files with ".csv" and ".samplestatistics.*" names.

If path is a single csv file, sampleID can be passed in sample_id argument. If not provided, path will be used in the output sample_id column. If path is a directory, sample names will be inferred using str_expr() and sample_id_pattern.

sample_statistics

Path to the ".samplestatistics.* file, used when path leads to a single csv file

sample_id

Sample ID, used with path is a sigle file

sample_id_pattern

Pattern used to extract sample IDs from the filenames when path is a directory

chrom_convention

UCSC/NCBI/keep

Examples

library(readthis)

ASCAT_csv <- system.file("extdata", "ASCAT", "S1.csv", package = "readthis")
read_ascat_files(ASCAT_csv)
#> $cnas
#> # A tibble: 10 × 8
#>    sample_id            chrom  start    end total_cn major_cn minor_cn normal_cn
#>    <chr>                <chr>  <dbl>  <dbl>    <dbl>    <dbl>    <dbl>     <dbl>
#>  1 /home/runner/work/_… chr1  1.3 e4 1.4 e4        3        3        0         2
#>  2 /home/runner/work/_… chr1  1.5 e4 1.6 e4        1        1        0         2
#>  3 /home/runner/work/_… chr1  1.7 e4 1.80e4        2        2        0         2
#>  4 /home/runner/work/_… chr1  5   e5 2.49e8        3        3        0         2
#>  5 /home/runner/work/_… chr2  1.3 e5 3   e5        2        1        1         2
#>  6 /home/runner/work/_… chr2  1.5 e5 3   e5        1        1        0         2
#>  7 /home/runner/work/_… chr2  1.7 e5 3   e5        2        2        0         2
#>  8 /home/runner/work/_… chr2  2   e5 3   e5        1        1        0         2
#>  9 /home/runner/work/_… chr2  2.20e8 2.40e8        2        2        0         2
#> 10 /home/runner/work/_… chr3  1   e4 2.3 e7        2        2        0         2
#> 
#> $sample_statistics
#> # A tibble: 0 × 7
#> # ℹ 7 variables: normal_contamination <dbl>, ploidy <dbl>, rho <dbl>,
#> #   psi <dbl>, goodness_of_fit <dbl>, gender_chr <chr>, gender_chr_found <chr>
#> 
#> attr(,"class")
#> [1] "cevo_ASCAT"

ASCAT_stats <- system.file("extdata", "ASCAT", "S1.samplestatistics.txt", package = "readthis")
read_ascat_files(ASCAT_csv, ASCAT_stats, sample_id = "S1")
#> $cnas
#> # A tibble: 10 × 8
#>    sample_id chrom     start       end total_cn major_cn minor_cn normal_cn
#>    <chr>     <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>     <dbl>
#>  1 S1        chr1      13000     14000        3        3        0         2
#>  2 S1        chr1      15000     16000        1        1        0         2
#>  3 S1        chr1      17000     18000        2        2        0         2
#>  4 S1        chr1     500000 249250000        3        3        0         2
#>  5 S1        chr2     130000    300000        2        1        1         2
#>  6 S1        chr2     150000    300000        1        1        0         2
#>  7 S1        chr2     170000    300000        2        2        0         2
#>  8 S1        chr2     200000    300000        1        1        0         2
#>  9 S1        chr2  220000000 240000000        2        2        0         2
#> 10 S1        chr3      10000  23000000        2        2        0         2
#> 
#> $sample_statistics
#> # A tibble: 1 × 8
#>   sample_id normal_contamination ploidy   rho   psi goodness_of_fit gender_chr
#>   <chr>                    <dbl>  <dbl> <dbl> <dbl>           <dbl> <chr>     
#> 1 S1                     0.00678    3.1  0.99   2.9            95.4 Y         
#> # ℹ 1 more variable: gender_chr_found <chr>
#> 
#> attr(,"class")
#> [1] "cevo_ASCAT"

ASCAT_dir <- system.file("extdata", "ASCAT", package = "readthis")
read_ascat_files(ASCAT_dir)
#> $cnas
#> # A tibble: 20 × 8
#>    sample_id chrom     start       end total_cn major_cn minor_cn normal_cn
#>    <chr>     <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>     <dbl>
#>  1 S1        chr1      13000     14000        3        3        0         2
#>  2 S1        chr1      15000     16000        1        1        0         2
#>  3 S1        chr1      17000     18000        2        2        0         2
#>  4 S1        chr1     500000 249250000        3        3        0         2
#>  5 S1        chr2     130000    300000        2        1        1         2
#>  6 S1        chr2     150000    300000        1        1        0         2
#>  7 S1        chr2     170000    300000        2        2        0         2
#>  8 S1        chr2     200000    300000        1        1        0         2
#>  9 S1        chr2  220000000 240000000        2        2        0         2
#> 10 S1        chr3      10000  23000000        2        2        0         2
#> 11 S2        chr1      13000     14000        3        3        0         2
#> 12 S2        chr1      15000     16000        1        1        0         2
#> 13 S2        chr1      17000     18000        2        2        0         2
#> 14 S2        chr1     500000 249250000        3        3        0         2
#> 15 S2        chr2     130000    300000        2        1        1         2
#> 16 S2        chr2     150000    300000        1        1        0         2
#> 17 S2        chr2     170000    300000        2        2        0         2
#> 18 S2        chr2     200000    300000        1        1        0         2
#> 19 S2        chr2  220000000 240000000        2        2        0         2
#> 20 S2        chr3      10000  23000000        2        2        0         2
#> 
#> $sample_statistics
#> # A tibble: 2 × 8
#>   sample_id normal_contamination ploidy   rho   psi goodness_of_fit gender_chr
#>   <chr>                    <dbl>  <dbl> <dbl> <dbl>           <dbl> <chr>     
#> 1 S1                     0.00678    3.1  0.99   2.9            95.4 Y         
#> 2 S2                     0.00678    3.1  0.99   2.9            95.4 Y         
#> # ℹ 1 more variable: gender_chr_found <chr>
#> 
#> attr(,"class")
#> [1] "cevo_ASCAT"

ASCAT_tbl <- tibble::tibble(
  sample_id = c("S1", "S2"),
  csv = c(
    system.file("extdata", "ASCAT", "S1.csv", package = "readthis"),
    system.file("extdata", "ASCAT", "S2.csv", package = "readthis")
   ),
  sample_statistics = c(
    system.file("extdata", "ASCAT", "S1.samplestatistics.txt", package = "readthis"),
    system.file("extdata", "ASCAT", "S2.samplestatistics.txt", package = "readthis")
  )
)
read_ascat_files(ASCAT_tbl)
#> $cnas
#> # A tibble: 20 × 8
#>    sample_id chrom     start       end total_cn major_cn minor_cn normal_cn
#>    <chr>     <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>     <dbl>
#>  1 S1        chr1      13000     14000        3        3        0         2
#>  2 S1        chr1      15000     16000        1        1        0         2
#>  3 S1        chr1      17000     18000        2        2        0         2
#>  4 S1        chr1     500000 249250000        3        3        0         2
#>  5 S1        chr2     130000    300000        2        1        1         2
#>  6 S1        chr2     150000    300000        1        1        0         2
#>  7 S1        chr2     170000    300000        2        2        0         2
#>  8 S1        chr2     200000    300000        1        1        0         2
#>  9 S1        chr2  220000000 240000000        2        2        0         2
#> 10 S1        chr3      10000  23000000        2        2        0         2
#> 11 S2        chr1      13000     14000        3        3        0         2
#> 12 S2        chr1      15000     16000        1        1        0         2
#> 13 S2        chr1      17000     18000        2        2        0         2
#> 14 S2        chr1     500000 249250000        3        3        0         2
#> 15 S2        chr2     130000    300000        2        1        1         2
#> 16 S2        chr2     150000    300000        1        1        0         2
#> 17 S2        chr2     170000    300000        2        2        0         2
#> 18 S2        chr2     200000    300000        1        1        0         2
#> 19 S2        chr2  220000000 240000000        2        2        0         2
#> 20 S2        chr3      10000  23000000        2        2        0         2
#> 
#> $sample_statistics
#> # A tibble: 2 × 8
#>   sample_id normal_contamination ploidy   rho   psi goodness_of_fit gender_chr
#>   <chr>                    <dbl>  <dbl> <dbl> <dbl>           <dbl> <chr>     
#> 1 S1                     0.00678    3.1  0.99   2.9            95.4 Y         
#> 2 S2                     0.00678    3.1  0.99   2.9            95.4 Y         
#> # ℹ 1 more variable: gender_chr_found <chr>
#> 
#> attr(,"class")
#> [1] "cevo_ASCAT"