import numpy as np
from ..configuration import DynamoAdataConfig
from ..preprocessing.utils import pca_monocle
from .cell_velocities import cell_velocities
from .connectivity import neighbors, normalize_knn_graph
from .dimension_reduction import reduceDimension
from .dynamics import dynamics
from .moments import moments
from .utils import set_transition_genes
# add recipe_csc_data()
[docs]def recipe_kin_data(
adata,
tkey=None,
reset_X=True,
X_total_layers=False,
splicing_total_layers=False,
n_top_genes=1000,
keep_filtered_cells=None,
keep_filtered_genes=None,
keep_raw_layers=None,
del_2nd_moments=None,
ekey="M_t",
vkey="velocity_T",
basis="umap",
rm_kwargs={},
):
"""An analysis recipe that properly pre-processes different layers for an kinetics experiment with both labeling and
splicing or only labeling data.
Parameters
----------
adata: :class:`~anndata.AnnData`
AnnData object that stores data for the the kinetics experiment, must include `uu, ul, su, sl` four
different layers.
tkey: `str` or None (default: None)
The column key for the labeling time of cells in .obs. Used for labeling based scRNA-seq data (will also
support for conventional scRNA-seq data). Note that `tkey` will be saved to adata.uns['pp']['tkey'] and used
in `dyn.tl.dynamics` in which when `group` is None, `tkey` will also be used for calculating 1st/2st moment
or covariance. We recommend to use hour as the unit of `time`.
reset_X: bool (default: `False`)
Whether do you want to let dynamo reset `adata.X` data based on layers stored in your experiment. One
critical functionality of dynamo is about visualizing RNA velocity vector flows which requires proper data
into which the high dimensional RNA velocity vectors will be projected.
(1) For `kinetics` experiment, we recommend the use of `total` layer as `adata.X`;
(2) For `degradation/conventional` experiment scRNA-seq, we recommend using `splicing` layer as `adata.X`.
Set `reset_X` to `True` to set those default values if you are not sure.
splicing_total_layers: bool (default `False`)
Whether to also normalize spliced / unspliced layers by size factor from total RNA. Paramter to
`recipe_monocle` function.
X_total_layers: bool (default `False`)
Whether to also normalize adata.X by size factor from total RNA. Paramter to `recipe_monocle` function.
n_top_genes: `int` (default: `1000`)
How many top genes based on scoring method (specified by sort_by) will be selected as feature genes.
Arguments required by the `recipe_monocle` function.
keep_filtered_cells: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_filtered_genes: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_raw_layers: `bool` (default: `False`)
Whether to keep layers with raw measurements in the returned adata object. Used in `recipe_monocle`.
del_2nd_moments: `bool` (default: `None`)
Whether to remove second moments or covariances. Default it is `None` rgument used for `dynamics` function.
tkey: `str` (default: `time`)
The column key for the time label of cells in .obs. Used for the "kinetic" model.
mode with labeled data. When `group` is None, `tkey` will also be used for calculating 1st/2st moment or
covariance. `{tkey}` column must exist in your adata object and indicates the labeling time period.
Parameters required for `dynamics` function.
ekey: str or None (optional, default None)
The dictionary key that corresponds to the gene expression in the layer attribute. By default, ekey and vkey
will be automatically detected from the adata object. Parameters required by `cell_velocities`.
vkey: str or None (optional, default None)
The dictionary key that corresponds to the estimated velocity values in the layers attribute. Parameters
required by `cell_velocities`
basis: int (optional, default `umap`)
The dictionary key that corresponds to the reduced dimension in `.obsm` attribute. Can be `X_spliced_umap`
or `X_total_umap`, etc. Parameters required by `cell_velocities`
rm_kwargs: `dict` or None (default: `None`)
Other Parameters passed into the pp.recipe_monocle function.
Returns
-------
An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
"""
from ..preprocessing import recipe_monocle
from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
)
keep_filtered_genes = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_genes, DynamoAdataConfig.RECIPE_KEEP_FILTERED_GENES_KEY
)
keep_raw_layers = DynamoAdataConfig.use_default_var_if_none(
keep_raw_layers, DynamoAdataConfig.RECIPE_KEEP_RAW_LAYERS_KEY
)
del_2nd_moments = DynamoAdataConfig.use_default_var_if_none(
del_2nd_moments, DynamoAdataConfig.RECIPE_DEL_2ND_MOMENTS_KEY
)
has_splicing, has_labeling, splicing_labeling, _ = detect_experiment_datatype(adata)
if has_splicing and has_labeling and splicing_labeling:
layers = ["X_new", "X_total", "X_uu", "X_ul", "X_su", "X_sl"]
elif has_labeling:
layers = ["X_new", "X_total"]
if not has_labeling:
raise Exception(
"This recipe is only applicable to kinetics experiment datasets that have "
"labeling data (at least either with `'uu', 'ul', 'su', 'sl'` or `'new', 'total'` "
"layers."
)
if has_splicing and has_labeling:
# new, total (and uu, ul, su, sl if existed) layers will be normalized with size factor calculated with total
# layers spliced / unspliced layers will be normalized independently.
recipe_monocle(
adata,
tkey=tkey,
experiment_type="kin",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
tkey = adata.uns["pp"]["tkey"]
# first calculate moments for labeling data relevant layers using total based connectivity graph
moments(adata, group=tkey, layers=layers)
# then we want to calculate moments for spliced and unspliced layers based on connectivity graph from spliced
# data.
# first get X_spliced based pca embedding
CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].A)
cm_genesums = CM.sum(axis=0)
valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
valid_ind = np.array(valid_ind).flatten()
pca_monocle(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
# then get neighbors graph based on X_spliced_pca
neighbors(adata, X_data=adata.obsm["X_spliced_pca"], layer="X_spliced")
# then normalize neighbors graph so that each row sums up to be 1
conn = normalize_knn_graph(adata.obsp["connectivities"] > 0)
# then calculate moments for spliced related layers using spliced based connectivity graph
moments(adata, conn=conn, layers=["X_spliced", "X_unspliced"])
# then perform kinetic estimations with properly preprocessed layers for either the labeling or the splicing
# data
dynamics(
adata,
model="deterministic",
est_method="twostep",
del_2nd_moments=del_2nd_moments,
)
# then perform dimension reduction
reduceDimension(adata, reduction_method=basis)
# lastly, project RNA velocity to low dimensional embedding.
cell_velocities(adata, enforce=True, vkey=vkey, ekey=ekey, basis=basis)
else:
recipe_monocle(
adata,
tkey=tkey,
experiment_type="kin",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
dynamics(
adata,
model="deterministic",
est_method="twostep",
del_2nd_moments=del_2nd_moments,
)
reduceDimension(adata, reduction_method=basis)
cell_velocities(adata, basis=basis)
return adata
[docs]def recipe_deg_data(
adata,
tkey=None,
reset_X=True,
X_total_layers=False,
splicing_total_layers=False,
n_top_genes=1000,
keep_filtered_cells=None,
keep_filtered_genes=None,
keep_raw_layers=None,
del_2nd_moments=True,
fraction_for_deg=False,
ekey="M_s",
vkey="velocity_S",
basis="umap",
rm_kwargs={},
):
"""An analysis recipe that properly pre-processes different layers for a degradation experiment with both
labeling and splicing data or only labeling . Functions need to be updated.
Parameters
----------
adata: :class:`~anndata.AnnData`
AnnData object that stores data for the the kinetics experiment, must include `uu, ul, su, sl` four
different layers.
tkey: `str` or None (default: None)
The column key for the labeling time of cells in .obs. Used for labeling based scRNA-seq data (will also
support for conventional scRNA-seq data). Note that `tkey` will be saved to adata.uns['pp']['tkey'] and used
in `dyn.tl.dynamics` in which when `group` is None, `tkey` will also be used for calculating 1st/2st moment
or covariance. We recommend to use hour as the unit of `time`.
reset_X: bool (default: `False`)
Whether do you want to let dynamo reset `adata.X` data based on layers stored in your experiment. One
critical functionality of dynamo is about visualizing RNA velocity vector flows which requires proper data
into which the high dimensional RNA velocity vectors will be projected.
(1) For `kinetics` experiment, we recommend the use of `total` layer as `adata.X`;
(2) For `degradation/conventional` experiment scRNA-seq, we recommend using `splicing` layer as `adata.X`.
Set `reset_X` to `True` to set those default values if you are not sure.
splicing_total_layers: bool (default `False`)
Whether to also normalize spliced / unspliced layers by size factor from total RNA. Paramter to
`recipe_monocle` function.
X_total_layers: bool (default `False`)
Whether to also normalize adata.X by size factor from total RNA. Paramter to `recipe_monocle` function.
n_top_genes: `int` (default: `1000`)
How many top genes based on scoring method (specified by sort_by) will be selected as feature genes.
Arguments required by the `recipe_monocle` function.
keep_filtered_cells: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_filtered_genes: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_raw_layers: `bool` (default: `False`)
Whether to keep layers with raw measurements in the returned adata object. Used in `recipe_monocle`.
del_2nd_moments: `bool` (default: `None`)
Whether to remove second moments or covariances. Default it is `None` rgument used for `dynamics` function.
fraction_for_deg: `bool` (default: `False`)
Whether to use the fraction of labeled RNA instead of the raw labeled RNA to estimate the degradation parameter.
tkey: `str` (default: `time`)
The column key for the time label of cells in .obs. Used for the "kinetic" model.
mode with labeled data. When `group` is None, `tkey` will also be used for calculating 1st/2st moment or
covariance. `{tkey}` column must exist in your adata object and indicates the labeling time period.
Parameters required for `dynamics` function.
ekey: str or None (optional, default None)
The dictionary key that corresponds to the gene expression in the layer attribute. By default, ekey and vkey
will be automatically detected from the adata object. Parameters required by `cell_velocities`.
vkey: str or None (optional, default None)
The dictionary key that corresponds to the estimated velocity values in the layers attribute. Parameters
required by `cell_velocities`
basis: int (optional, default `umap`)
The dictionary key that corresponds to the reduced dimension in `.obsm` attribute. Can be `X_spliced_umap`
or `X_total_umap`, etc. Parameters required by `cell_velocities`
rm_kwargs: `dict` or None (default: `None`)
Other Parameters passed into the pp.recipe_monocle function.
Returns
-------
An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
"""
from ..preprocessing import recipe_monocle
from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
)
keep_filtered_genes = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_genes, DynamoAdataConfig.RECIPE_KEEP_FILTERED_GENES_KEY
)
keep_raw_layers = DynamoAdataConfig.use_default_var_if_none(
keep_raw_layers, DynamoAdataConfig.RECIPE_KEEP_RAW_LAYERS_KEY
)
has_splicing, has_labeling, splicing_labeling, _ = detect_experiment_datatype(adata)
if has_splicing and has_labeling and splicing_labeling:
layers = ["X_new", "X_total", "X_uu", "X_ul", "X_su", "X_sl"]
elif has_labeling:
layers = ["X_new", "X_total"]
if not has_labeling:
raise Exception(
"This recipe is only applicable to kinetics experiment datasets that have "
"labeling data (at least either with `'uu', 'ul', 'su', 'sl'` or `'new', 'total'` "
"layers."
)
if has_splicing and has_labeling:
# new, total (and uu, ul, su, sl if existed) layers will be normalized with size factor calculated with total
# layers spliced / unspliced layers will be normalized independently.
recipe_monocle(
adata,
tkey=tkey,
experiment_type="deg",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
tkey = adata.uns["pp"]["tkey"]
# first calculate moments for spliced related layers using spliced based connectivity graph
moments(adata, layers=["X_spliced", "X_unspliced"])
# then calculate moments for labeling data relevant layers using total based connectivity graph
# first get X_total based pca embedding
CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_total"].A)
cm_genesums = CM.sum(axis=0)
valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
valid_ind = np.array(valid_ind).flatten()
pca_monocle(adata, CM[:, valid_ind], pca_key="X_total_pca")
# then get neighbors graph based on X_spliced_pca
neighbors(adata, X_data=adata.obsm["X_total_pca"], layer="X_total")
# then normalize neighbors graph so that each row sums up to be 1
conn = normalize_knn_graph(adata.obsp["connectivities"] > 0)
moments(adata, conn=conn, group=tkey, layers=layers)
# then perform kinetic estimations with properly preprocessed layers for either the labeling or the splicing
# data
dynamics(
adata,
model="deterministic",
est_method="twostep",
del_2nd_moments=del_2nd_moments,
fraction_for_deg=fraction_for_deg,
)
# then perform dimension reduction
reduceDimension(adata, reduction_method=basis)
# lastly, project RNA velocity to low dimensional embedding.
try:
set_transition_genes(adata)
cell_velocities(adata, enforce=True, vkey=vkey, ekey=ekey, basis=basis)
except BaseException:
cell_velocities(
adata,
min_r2=adata.var.gamma_r2.min(),
enforce=True,
vkey=vkey,
ekey=ekey,
basis=basis,
)
else:
recipe_monocle(
adata,
tkey=tkey,
experiment_type="deg",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
dynamics(
adata,
model="deterministic",
del_2nd_moments=del_2nd_moments,
fraction_for_deg=fraction_for_deg,
)
reduceDimension(adata, reduction_method=basis)
return adata
[docs]def recipe_mix_kin_deg_data(
adata,
tkey=None,
reset_X=True,
X_total_layers=False,
splicing_total_layers=False,
n_top_genes=1000,
keep_filtered_cells=None,
keep_filtered_genes=None,
keep_raw_layers=None,
del_2nd_moments=None,
ekey="M_t",
vkey="velocity_T",
basis="umap",
rm_kwargs={},
):
"""An analysis recipe that properly pre-processes different layers for an mixture kinetics and degradation
experiment with both labeling and splicing or only labeling data.
Parameters
----------
adata: :class:`~anndata.AnnData`
AnnData object that stores data for the the kinetics experiment, must include `uu, ul, su, sl` four
different layers.
tkey: `str` or None (default: None)
The column key for the labeling time of cells in .obs. Used for labeling based scRNA-seq data (will also
support for conventional scRNA-seq data). Note that `tkey` will be saved to adata.uns['pp']['tkey'] and used
in `dyn.tl.dynamics` in which when `group` is None, `tkey` will also be used for calculating 1st/2st moment
or covariance. We recommend to use hour as the unit of `time`.
reset_X: bool (default: `False`)
Whether do you want to let dynamo reset `adata.X` data based on layers stored in your experiment. One
critical functionality of dynamo is about visualizing RNA velocity vector flows which requires proper data
into which the high dimensional RNA velocity vectors will be projected.
(1) For `kinetics` experiment, we recommend the use of `total` layer as `adata.X`;
(2) For `degradation/conventional` experiment scRNA-seq, we recommend using `splicing` layer as `adata.X`.
Set `reset_X` to `True` to set those default values if you are not sure.
splicing_total_layers: bool (default `False`)
Whether to also normalize spliced / unspliced layers by size factor from total RNA. Paramter to
`recipe_monocle` function.
X_total_layers: bool (default `False`)
Whether to also normalize adata.X by size factor from total RNA. Paramter to `recipe_monocle` function.
n_top_genes: `int` (default: `1000`)
How many top genes based on scoring method (specified by sort_by) will be selected as feature genes.
Arguments required by the `recipe_monocle` function.
keep_filtered_cells: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_filtered_genes: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_raw_layers: `bool` (default: `False`)
Whether to keep layers with raw measurements in the returned adata object. Used in `recipe_monocle`.
del_2nd_moments: `bool` (default: `None`)
Whether to remove second moments or covariances. Default it is `None` rgument used for `dynamics` function.
tkey: `str` (default: `time`)
The column key for the time label of cells in .obs. Used for the "kinetic" model.
mode with labeled data. When `group` is None, `tkey` will also be used for calculating 1st/2st moment or
covariance. `{tkey}` column must exist in your adata object and indicates the labeling time period.
Parameters required for `dynamics` function.
ekey: str or None (optional, default None)
The dictionary key that corresponds to the gene expression in the layer attribute. By default, ekey and vkey
will be automatically detected from the adata object. Parameters required by `cell_velocities`.
vkey: str or None (optional, default None)
The dictionary key that corresponds to the estimated velocity values in the layers attribute. Parameters
required by `cell_velocities`
basis: int (optional, default `umap`)
The dictionary key that corresponds to the reduced dimension in `.obsm` attribute. Can be `X_spliced_umap`
or `X_total_umap`, etc. Parameters required by `cell_velocities`
rm_kwargs: `dict` or None (default: `None`)
Other Parameters passed into the pp.recipe_monocle function.
Returns
-------
An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
"""
from ..preprocessing import recipe_monocle
from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
)
keep_filtered_genes = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_genes, DynamoAdataConfig.RECIPE_KEEP_FILTERED_GENES_KEY
)
keep_raw_layers = DynamoAdataConfig.use_default_var_if_none(
keep_raw_layers, DynamoAdataConfig.RECIPE_KEEP_RAW_LAYERS_KEY
)
del_2nd_moments = DynamoAdataConfig.use_default_var_if_none(
del_2nd_moments, DynamoAdataConfig.RECIPE_DEL_2ND_MOMENTS_KEY
)
has_splicing, has_labeling, splicing_labeling, _ = detect_experiment_datatype(adata)
if has_splicing and has_labeling and splicing_labeling:
layers = ["X_new", "X_total", "X_uu", "X_ul", "X_su", "X_sl"]
elif has_labeling:
layers = ["X_new", "X_total"]
if not has_labeling:
raise Exception(
"This recipe is only applicable to kinetics experiment datasets that have "
"labeling data (at least either with `'uu', 'ul', 'su', 'sl'` or `'new', 'total'` "
"layers."
)
if has_splicing and has_labeling:
# new, total (and uu, ul, su, sl if existed) layers will be normalized with size factor calculated with total
# layers spliced / unspliced layers will be normalized independently.
recipe_monocle(
adata,
tkey=tkey,
experiment_type="mix_pulse_chase",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
tkey = adata.uns["pp"]["tkey"]
# first calculate moments for labeling data relevant layers using total based connectivity graph
moments(adata, group=tkey, layers=layers)
# then we want to calculate moments for spliced and unspliced layers based on connectivity graph from spliced
# data.
# first get X_spliced based pca embedding
CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].A)
cm_genesums = CM.sum(axis=0)
valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
valid_ind = np.array(valid_ind).flatten()
pca_monocle(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
# then get neighbors graph based on X_spliced_pca
neighbors(adata, X_data=adata.obsm["X_spliced_pca"], layer="X_spliced")
# then normalize neighbors graph so that each row sums up to be 1
conn = normalize_knn_graph(adata.obsp["connectivities"] > 0)
# then calculate moments for spliced related layers using spliced based connectivity graph
moments(adata, conn=conn, layers=["X_spliced", "X_unspliced"])
# then perform kinetic estimations with properly preprocessed layers for either the labeling or the splicing
# data
dynamics(
adata,
model="deterministic",
est_method="twostep",
del_2nd_moments=del_2nd_moments,
)
# then perform dimension reduction
reduceDimension(adata, reduction_method=basis)
# lastly, project RNA velocity to low dimensional embedding.
cell_velocities(adata, enforce=True, vkey=vkey, ekey=ekey, basis=basis)
else:
recipe_monocle(
adata,
tkey=tkey,
experiment_type="mix_pulse_chase",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
dynamics(
adata,
model="deterministic",
est_method="twostep",
del_2nd_moments=del_2nd_moments,
)
reduceDimension(adata, reduction_method=basis)
cell_velocities(adata, enforce=True, vkey=vkey, ekey=ekey, basis=basis)
return adata
# support using just spliced/unspliced/new/total 4 layers, as well as uu, ul, su, sl layers
[docs]def recipe_one_shot_data(
adata,
tkey=None,
reset_X=True,
X_total_layers=False,
splicing_total_layers=False,
n_top_genes=1000,
keep_filtered_cells=None,
keep_filtered_genes=None,
keep_raw_layers=None,
one_shot_method="sci-fate",
del_2nd_moments=None,
ekey="M_t",
vkey="velocity_T",
basis="umap",
rm_kwargs={},
):
"""An analysis recipe that properly pre-processes different layers for an one-shot experiment with both labeling and
splicing data.
Parameters
----------
adata: :class:`~anndata.AnnData`
AnnData object that stores data for the the kinetics experiment, must include `uu, ul, su, sl` four
different layers.
tkey: `str` or None (default: None)
The column key for the labeling time of cells in .obs. Used for labeling based scRNA-seq data (will also
support for conventional scRNA-seq data). Note that `tkey` will be saved to adata.uns['pp']['tkey'] and used
in `dyn.tl.dynamics` in which when `group` is None, `tkey` will also be used for calculating 1st/2st moment
or covariance. We recommend to use hour as the unit of `time`.
reset_X: bool (default: `False`)
Whether do you want to let dynamo reset `adata.X` data based on layers stored in your experiment. One
critical functionality of dynamo is about visualizing RNA velocity vector flows which requires proper data
into which the high dimensional RNA velocity vectors will be projected.
(1) For `kinetics` experiment, we recommend the use of `total` layer as `adata.X`;
(2) For `degradation/conventional` experiment scRNA-seq, we recommend using `splicing` layer as `adata.X`.
Set `reset_X` to `True` to set those default values if you are not sure.
splicing_total_layers: bool (default `False`)
Whether to also normalize spliced / unspliced layers by size factor from total RNA. Paramter to
`recipe_monocle` function.
X_total_layers: bool (default `False`)
Whether to also normalize adata.X by size factor from total RNA. Paramter to `recipe_monocle` function.
n_top_genes: `int` (default: `1000`)
How many top genes based on scoring method (specified by sort_by) will be selected as feature genes.
Arguments required by the `recipe_monocle` function.
keep_filtered_cells: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_filtered_genes: `bool` (default: `False`)
Whether to keep genes that don't pass the filtering in the returned adata object. Used in `recipe_monocle`.
keep_raw_layers: `bool` (default: `False`)
Whether to keep layers with raw measurements in the returned adata object. Used in `recipe_monocle`.
one_shot_method: `str` (default: `sci-fate`)
The method to use for calculate the absolute labeling and splicing velocity for the one-shot data of use.
del_2nd_moments: `bool` (default: `None`)
Whether to remove second moments or covariances. Default it is `None` rgument used for `dynamics` function.
tkey: `str` (default: `time`)
The column key for the time label of cells in .obs. Used for the "kinetic" model.
mode with labeled data. When `group` is None, `tkey` will also be used for calculating 1st/2st moment or
covariance. `{tkey}` column must exist in your adata object and indicates the labeling time period.
Parameters required for `dynamics` function.
ekey: str or None (optional, default None)
The dictionary key that corresponds to the gene expression in the layer attribute. By default, ekey and vkey
will be automatically detected from the adata object. Parameters required by `cell_velocities`.
vkey: str or None (optional, default None)
The dictionary key that corresponds to the estimated velocity values in the layers attribute. Parameters
required by `cell_velocities`
basis: int (optional, default `umap`)
The dictionary key that corresponds to the reduced dimension in `.obsm` attribute. Can be `X_spliced_umap`
or `X_total_umap`, etc. Parameters required by `cell_velocities`
rm_kwargs: `dict` or None (default: `None`)
Other Parameters passed into the pp.recipe_monocle function.
Returns
-------
An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
"""
from ..preprocessing import recipe_monocle
from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
)
keep_filtered_genes = DynamoAdataConfig.use_default_var_if_none(
keep_filtered_genes, DynamoAdataConfig.RECIPE_KEEP_FILTERED_GENES_KEY
)
keep_raw_layers = DynamoAdataConfig.use_default_var_if_none(
keep_raw_layers, DynamoAdataConfig.RECIPE_KEEP_RAW_LAYERS_KEY
)
del_2nd_moments = DynamoAdataConfig.use_default_var_if_none(
del_2nd_moments, DynamoAdataConfig.RECIPE_DEL_2ND_MOMENTS_KEY
)
has_splicing, has_labeling, splicing_labeling, _ = detect_experiment_datatype(adata)
if has_splicing and has_labeling and splicing_labeling:
layers = ["X_new", "X_total", "X_uu", "X_ul", "X_su", "X_sl"]
elif has_labeling:
layers = ["X_new", "X_total"]
if not has_labeling:
raise Exception(
"This recipe is only applicable to kinetics experiment datasets that have "
"labeling data (at least either with `'uu', 'ul', 'su', 'sl'` or `'new', 'total'` "
"layers."
)
if has_splicing and has_labeling:
# new, total (and uu, ul, su, sl if existed) layers will be normalized with size factor calculated with total
# layers spliced / unspliced layers will be normalized independently.
recipe_monocle(
adata,
tkey=tkey,
experiment_type="one-shot",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
tkey = adata.uns["pp"]["tkey"]
# first calculate moments for labeling data relevant layers using total based connectivity graph
moments(adata, group=tkey, layers=layers)
# then we want to calculate moments for spliced and unspliced layers based on connectivity graph from spliced
# data.
# first get X_spliced based pca embedding
CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].A)
cm_genesums = CM.sum(axis=0)
valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
valid_ind = np.array(valid_ind).flatten()
pca_monocle(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
# then get neighbors graph based on X_spliced_pca
neighbors(adata, X_data=adata.obsm["X_spliced_pca"], layer="X_spliced")
# then normalize neighbors graph so that each row sums up to be 1
conn = normalize_knn_graph(adata.obsp["connectivities"] > 0)
# then calculate moments for spliced related layers using spliced based connectivity graph
moments(adata, conn=conn, layers=["X_spliced", "X_unspliced"])
# then perform kinetic estimations with properly preprocessed layers for either the labeling or the splicing
# data
dynamics(
adata,
model="deterministic",
one_shot_method=one_shot_method,
del_2nd_moments=del_2nd_moments,
)
# then perform dimension reduction
reduceDimension(adata, reduction_method=basis)
# lastly, project RNA velocity to low dimensional embedding.
cell_velocities(adata, enforce=True, vkey=vkey, ekey=ekey, basis=basis)
else:
recipe_monocle(
adata,
tkey=tkey,
experiment_type="one-shot",
reset_X=reset_X,
X_total_layers=X_total_layers,
splicing_total_layers=splicing_total_layers,
n_top_genes=n_top_genes,
total_layers=True,
keep_filtered_cells=keep_filtered_cells,
keep_filtered_genes=keep_filtered_genes,
keep_raw_layers=keep_raw_layers,
**rm_kwargs,
)
dynamics(
adata,
model="deterministic",
one_shot_method=one_shot_method,
del_2nd_moments=del_2nd_moments,
)
reduceDimension(adata, reduction_method=basis)
cell_velocities(adata, enforce=True, vkey=vkey, ekey=ekey, basis=basis)
return adata
[docs]def velocity_N(
adata,
group=None,
recalculate_pca=True,
recalculate_umap=True,
del_2nd_moments=None,
):
"""use new RNA based pca, umap, for velocity calculation and projection for kinetics or one-shot experiment.
Note that currently velocity_N function only considers labeling data and removes splicing data if they exist.
Parameters
----------
adata: :class:`~anndata.AnnData`
AnnData object that stores data for the the kinetics or one-shot experiment, must include `X_new, X_total`
layers.
group: `str` or None (default: None)
The cell group that will be used to calculate velocity in each separate group. This is useful if your data
comes from different labeling condition, etc.
recalculate_pca: `bool` (default: True)
Whether to recalculate pca with the new RNA data. If setting to be False, you need to make sure the pca is
already generated via new RNA.
recalculate_umap: `bool` (default: True)
Whether to recalculate umap with the new RNA data. If setting to be False, you need to make sure the umap is
already generated via new RNA.
del_2nd_moments: `None` or `bool`
Whether to remove second moments or covariances. Default it is `None` rgument used for `dynamics` function.
Returns
-------
Nothing but the adata object is updated with the low dimensional (umap or pca) velocity projections with the
new RNA or pca based RNA velocities.
"""
del_2nd_moments = DynamoAdataConfig.use_default_var_if_none(
del_2nd_moments, DynamoAdataConfig.RECIPE_DEL_2ND_MOMENTS_KEY
)
var_columns = adata.var.columns
layer_keys = adata.layers.keys()
# check velocity_N, velocity_T, X_new, X_total
if not np.all([i in layer_keys for i in ["X_new", "X_total"]]):
raise Exception(f"The `X_new`, `X_total` has to exist in your data before running velocity_N function.")
# delete the moments and velocities that generated via total RNA
for i in ["M_t", "M_tt", "M_n", "M_tn", "M_nn", "velocity_N", "velocity_T"]:
if i in layer_keys:
del adata.layers[i]
# delete the kinetic paraemters that generated via total RNA
for i in [
"alpha",
"beta",
"gamma",
"half_life",
"alpha_b",
"alpha_r2",
"gamma_b",
"gamma_r2",
"gamma_logLL",
"delta_b",
"delta_r2",
"bs",
"bf",
"uu0",
"ul0",
"su0",
"sl0",
"U0",
"S0",
"total0",
"beta_k",
"gamma_k",
]:
if i in var_columns:
del adata.var[i]
if group is not None:
group_prefixes = [group + "_" + str(i) + "_" for i in adata.obs[group].unique()]
for i in group_prefixes:
for j in [
"alpha",
"beta",
"gamma",
"half_life",
"alpha_b",
"alpha_r2",
"gamma_b",
"gamma_r2",
"gamma_logLL",
"delta_b",
"delta_r2",
"bs",
"bf",
"uu0",
"ul0",
"su0",
"sl0",
"U0",
"S0",
"total0",
"beta_k",
"gamma_k",
]:
if i + j in var_columns:
del adata.var[i + j]
# now let us first run pca with new RNA
if recalculate_pca:
pca_monocle(adata, np.log1p(adata[:, adata.var.use_for_pca].layers["X_new"]), pca_key="X_pca")
# if there are unspliced / spliced data, delete them for now:
for i in ["spliced", "unspliced", "X_spliced", "X_unspliced"]:
if i in layer_keys:
del adata.layers[i]
# now redo the RNA velocity analysis with moments generated with pca space of new RNA
# let us also check whether it is a one-shot or kinetics experiment
if adata.uns["pp"]["experiment_type"] == "one-shot":
dynamics(
adata,
one_shot_method="sci_fate",
model="deterministic",
group=group,
del_2nd_moments=del_2nd_moments,
)
elif adata.uns["pp"]["experiment_type"] == "kin":
dynamics(
adata,
model="deterministic",
est_method="twostep",
group=group,
del_2nd_moments=del_2nd_moments,
)
else:
raise Exception(
f"velocity_N function only supports either the one-shot or kinetics (kin) metabolic labeling "
f"experiment."
)
# umap based on new RNA
if recalculate_umap:
reduceDimension(adata, enforce=True)
# project new RNA velocity to new RNA pca
cell_velocities(
adata,
basis="pca",
X=adata.layers["M_n"],
V=adata.layers["velocity_N"],
enforce=True,
)
# project new RNA velocity to new RNA umap
cell_velocities(
adata,
basis="umap",
X=adata.layers["M_n"],
V=adata.layers["velocity_N"],
enforce=True,
)