% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/main.R
\name{run_matrix_spma}
\alias{run_matrix_spma}
\title{Matrix-based Spectrum Motif Analysis}
\usage{
run_matrix_spma(
  sorted_transcript_sequences,
  sorted_transcript_values = NULL,
  transcript_values_label = "transcript value",
  motifs = NULL,
  n_bins = 40,
  midpoint = 0,
  x_value_limits = NULL,
  max_model_degree = 1,
  max_cs_permutations = 1e+07,
  min_cs_permutations = 5000,
  max_hits = 5,
  threshold_method = "p_value",
  threshold_value = 0.25^6,
  max_fg_permutations = 1e+06,
  min_fg_permutations = 1000,
  e = 5,
  p_adjust_method = "BH",
  n_cores = 1,
  cache = paste0(tempdir(), "/sc/")
)
}
\arguments{
\item{sorted_transcript_sequences}{named character vector of ranked sequences
(only containing upper case characters A, C, G, T), where the
names are RefSeq identifiers and sequence
type qualifiers (\code{"3UTR"}, \code{"5UTR"} or \code{"mRNA"}), separated by
\code{"|"}, e.g.
\code{"NM_010356|3UTR"}. Names are only used to cache results.
The sequences in \code{sorted_transcript_sequences} must be ranked
(i.e., sorted).
Commonly used sorting criteria are measures of differential expression, such
as fold change or signal-to-noise ratio (e.g., between treatment and control
samples in gene expression profiling experiments).}

\item{sorted_transcript_values}{vector of sorted transcript values, i.e.,
the fold change or signal-to-noise ratio or any other quantity that was used
to sort the transcripts that were passed to \code{run_matrix_spma} or
\code{run_kmer_spma} (default value is \code{NULL}). These values are
displayed as a semi-transparent area over the enrichment value heatmaps
of spectrum plots.}

\item{transcript_values_label}{label of transcript sorting criterion
(e.g., \code{"log fold change"}, default value is \code{"transcript value"}),
only shown if \code{!is.null(sorted_transcript_values)}}

\item{motifs}{a list of motifs that is used to score the specified sequences.
If \code{is.null(motifs)} then all Transite motifs are used.}

\item{n_bins}{specifies the number of bins in which the sequences
will be divided,
valid values are between 7 and 100}

\item{midpoint}{for enrichment values the midpoint should be \code{1},
for log enrichment values \code{0} (defaults to \code{0})}

\item{x_value_limits}{sets limits of the x-value color scale (used to
harmonize color scales of different spectrum plots), see \code{limits}
argument of \code{\link[ggplot2]{continuous_scale}} (defaults to
\code{NULL}, i.e., the data-dependent default scale range)}

\item{max_model_degree}{maximum degree of polynomial}

\item{max_cs_permutations}{maximum number of permutations performed in
Monte Carlo test for consistency score}

\item{min_cs_permutations}{minimum number of permutations performed in
Monte Carlo test for consistency score}

\item{max_hits}{maximum number of putative binding sites per mRNA
that are counted}

\item{threshold_method}{either \code{"p_value"} (default) or
\code{"relative"}.
If \code{threshold_method} equals \code{"p_value"}, the default
\code{threshold_value}
 is \code{0.25^6}, which is
lowest p-value that can be achieved by hexamer motifs, the shortest
supported motifs.
If \code{threshold_method} equals \code{"relative"}, the default
\code{threshold_value}
is \code{0.9}, which is 90\% of the maximum PWM score.}

\item{threshold_value}{semantics of the \code{threshold_value} depend on
\code{threshold_method} (default is 0.25^6)}

\item{max_fg_permutations}{maximum number of foreground permutations
performed in
Monte Carlo test for enrichment score}

\item{min_fg_permutations}{minimum number of foreground permutations
performed in
Monte Carlo test for enrichment score}

\item{e}{integer-valued stop criterion for enrichment score Monte Carlo
test: aborting
permutation process after
observing \code{e} random enrichment values with more extreme values than
the actual
enrichment value}

\item{p_adjust_method}{adjustment of p-values from Monte Carlo tests to
avoid alpha error
 accumulation, see \code{\link[stats]{p.adjust}}}

\item{n_cores}{the number of cores that are used}

\item{cache}{either logical or path to a directory where scores are cached.
The scores of each
motif are stored in a
separate file that contains a hash table with RefSeq identifiers and
sequence type
qualifiers as keys and the number of putative binding sites as values.
If \code{cache} is \code{FALSE}, scores will not be cached.}
}
\value{
A list with the following components:
\tabular{rl}{
  \code{foreground_scores} \tab the result of \code{\link{score_transcripts}}
  for the foreground
  sets (the bins)\cr
  \code{background_scores} \tab the result of \code{\link{score_transcripts}}
  for the background
  set\cr
  \code{enrichment_dfs} \tab a list of data frames, returned by
  \code{\link{calculate_motif_enrichment}}\cr
  \code{spectrum_info_df} \tab a data frame with the SPMA results\cr
  \code{spectrum_plots} \tab a list of spectrum plots, as generated by
  \code{\link{score_spectrum}}\cr
  \code{classifier_scores} \tab a list of classifier scores, as returned by
  \code{\link{classify_spectrum}}
}
}
\description{
SPMA helps to illuminate the relationship between RBP binding
evidence and the transcript
sorting criterion, e.g., fold change between treatment and control samples.
}
\details{
In order to investigate how motif targets are distributed across a
spectrum of
transcripts (e.g., all transcripts of a platform, ordered by fold change),
Spectrum Motif Analysis visualizes the gradient of RBP binding evidence
across all transcripts.

The matrix-based approach skips the \emph{k}-merization step of the
\emph{k}-mer-based approach
and instead scores the transcript sequence as a whole with a position
specific scoring matrix.

For each sequence in foreground and background sets and each sequence motif,
the scoring algorithm evaluates the score for each sequence position.
Positions with
a relative score greater than a certain threshold are considered hits, i.e.,
putative binding sites.

By scoring all sequences in foreground and background sets, a hit count
for each motif and
each set is obtained, which is used to calculate enrichment values and
associated p-values
in the same way in which motif-compatible hexamer enrichment values are
calculated in
the \emph{k}-mer-based approach. P-values are adjusted with one of the
available adjustment methods.

An advantage of the matrix-based approach is the possibility of detecting
clusters of
binding sites. This can be done by counting regions with many hits using
positional
hit information or by simply applying a hit count threshold per
sequence, e.g., only
sequences with more than some number of hits are considered. Homotypic
clusters of RBP
binding sites may play a similar role as clusters of transcription factors.
}
\examples{
# example data set
background_df <- transite:::ge$background_df
# sort sequences by signal-to-noise ratio
background_df <- dplyr::arrange(background_df, value)
# character vector of named and ranked (by signal-to-noise ratio) sequences
background_seqs <- gsub("T", "U", background_df$seq)
names(background_seqs) <- paste0(background_df$refseq, "|",
  background_df$seq_type)

results <- run_matrix_spma(background_seqs,
                           sorted_transcript_values = background_df$value,
                           transcript_values_label = "signal-to-noise ratio",
                           motifs = get_motif_by_id("M178_0.6"),
                           n_bins = 20,
                           max_fg_permutations = 10000)

\dontrun{
results <- run_matrix_spma(background_seqs,
                           sorted_transcript_values = background_df$value,
                           transcript_values_label = "SNR") }

}
\seealso{
Other SPMA functions: 
\code{\link{classify_spectrum}()},
\code{\link{run_kmer_spma}()},
\code{\link{score_spectrum}()},
\code{\link{subdivide_data}()}

Other matrix functions: 
\code{\link{calculate_motif_enrichment}()},
\code{\link{run_matrix_tsma}()},
\code{\link{score_transcripts}()},
\code{\link{score_transcripts_single_motif}()}
}
\concept{SPMA functions}
\concept{matrix functions}
