Source code for dynamo.plot.dynamics

import sys
import warnings
from typing import List, Optional, Tuple, Union

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal

import numpy as np
import pandas as pd
from anndata import AnnData
from matplotlib.figure import Figure

from ..configuration import _themes
from ..dynamo_logger import main_warning
from ..estimation.csc.velocity import sol_s, sol_u, solve_first_order_deg
from ..estimation.tsc.utils_moments import moments
from ..tools.utils import (
    get_mapper,
    get_valid_bools,
    get_vel_params,
    index_gene,
    log1p_,
    update_dict,
    update_vel_params,
)
from .scatters import scatters
from .utils import (
    _datashade_points,
    _matplotlib_points,
    _select_font_color,
    arrowed_spines,
    deaxis_all,
    default_quiver_args,
    despline,
    despline_all,
    quiver_autoscaler,
    save_show_ret,
)
from .utils_dynamics import *


[docs]def phase_portraits( adata: AnnData, genes: List[str], x: int = 0, y: int = 1, pointsize: Optional[float] = None, vkey: Optional[str] = None, ekey: Optional[str] = None, basis: str = "umap", log1p: bool = True, color: str = "cell_cycle_phase", use_smoothed: bool = True, highlights: Optional[list] = None, discrete_continous_div_themes: Optional[list] = None, discrete_continous_div_cmap: Optional[list] = None, discrete_continous_div_color_key: list = [None, None, None], discrete_continous_div_color_key_cmap: Optional[list] = None, figsize: Tuple[float, float] = (6, 4), ncols: Optional[int] = None, legend: str = "upper left", background: Optional[str] = None, show_quiver: bool = False, quiver_size: Optional[float] = None, quiver_length: Optional[float] = None, no_vel_u: bool = True, frontier: bool = True, q_kwargs_dict: dict = {}, show_arrowed_spines: Optional[bool] = None, save_show_or_return: Literal["save", "show", "return"] = "show", save_kwargs: dict = {}, **kwargs, ) -> Optional[Figure]: """Draw the phase portrait, expression values, velocity on the low dimensional embedding. Note that this function allows to manually set the theme, cmap, color_key and color_key_cmap for the phase portrait, expression and velocity subplots. When the background is 'black', the default themes for each of those subplots are ["glasbey_dark", "inferno", "div_blue_black_red"], respectively. When the background is 'black', the default themes are "glasbey_white", "viridis", "div_blue_red". Args: adata: an AnnData object. genes: a list of gene names that are going to be visualized. x: the column index of the low dimensional embedding for the x-axis. Defaults to 0. y: the column index of the low dimensional embedding for the y-axis. Defaults to 1. pointsize: the scale of the point size. Actual point cell size is calculated as `2500.0 / np.sqrt(adata.shape[0]) * pointsize`. Defaults to None. vkey: the velocity key used for visualizing the magnitude of velocity. Can be either velocity in the layers slot or the keys in the obsm slot. Defaults to None. ekey: the layer of data to represent the gene expression level. Defaults to None. basis: the key of the low dimensional embedding will be used to visualize the cell. Defaults to "umap". log1p: whether to log1p transform the expression so that visualization can be robust to extreme values. Defaults to True. color: the group to be used to color cells. It would only be used for the phase portrait because the other two plots are colored by the velocity magnitude or the gene expression value respectively. Defaults to "cell_cycle_phase". use_smoothed: Whether to use smoothed 1/2-nd moments as gene expression for the first and third columns. This is useful for checking the confidence of transition genes. For example, you may see a very nice linear pattern for some genes with the smoothed expression but this could just be an artificially generated when the number of expressed cells is very low. This raises red flags for the quality of the velocity values we learned for those genes. And we recommend to set the higher values (for example, 10% of all cells) for `min_cell_s`, `min_cell_u` or `shared_count` of the `fg_kwargs` argument of the dyn.pl.receipe_monocle. Note that this is often related to the small single cell datasets (like plate-based scRNA-seq or scSLAM-seq/NASC-seq, etc). Defaults to True. highlights: the color group to be highlighted. If highligts is a list of lists, each list is relate to each color element. Defaults to None. discrete_continous_div_themes: the discrete, continous and divergent color themes to use for plotting, respectively. The description for each element in the list is as following. A small set of predefined themes are provided which have relatively good aesthetics. Available themes are: * 'blue' * 'red' * 'green' * 'inferno' * 'fire' * 'viridis' * 'darkblue' * 'darkred' * 'darkgreen'. Defaults to None. discrete_continous_div_cmap: the names of discrete, continous and divergent matplotlib colormap to use for coloring or shading points. The description for each element in the list is as following. If no labels or values are passed this will be used for shading points according to density (largely only of relevance for very large datasets). If values are passed this will be used for shading according the value. Note that if theme is passed then this value will be overridden by the corresponding option of the theme. Defaults to None. discrete_continous_div_color_key: A list to assign discrete, continous and divergent colors to categoricals. This can either be an explicit dict mapping labels to colors (as strings of form '#RRGGBB'), or an array like object providing one color for each distinct category being provided in `labels`. Either way this mapping will be used to color points according to the label. Note that if theme is passed then this value will be overridden by the corresponding option of the theme. Defaults to [None, None, None]. discrete_continous_div_color_key_cmap: the names of discrete, continous and divergent matplotlib colormap to use for categorical coloring. If an explicit `color_key` is not given a color mapping for categories can be generated from the label list and selecting a matching list of colors from the given colormap. Note that if theme is passed then this value will be overridden by the corresponding option of the theme. Defaults to None. figsize: the width and height of each panel in the figure. Defaults to (6, 4). ncols: number of columns in each facet grid. Defaults to None. legend: the position to draw the legend. Legend is drawn by seaborn with “brief” mode, numeric hue and size v ariables will be represented with a sample of evenly spaced values. By default, legend is drawn on top of cells. Defaults to "upper left". background: the background color. If set to None the face color of the figure would be used. Defaults to None. show_quiver: Whether to show the quiver plot. If velocity for x component (corresponds to either spliced, total RNA, protein, etc.) or y component (corresponds to either unspliced, new RNA, protein, etc.) are both calculated, quiver represents velocity for both components otherwise the uncalculated component (usually y component) will be set to be 0. Defaults to False. quiver_size: the size of quiver. If None, we will use set quiver_size to be 1. Note that quiver quiver_size is used to calculate the head_width (10 x quiver_size), head_length (12 x quiver_size) and headaxislength (8 x quiver_size) of the quiver. This is done via the `default_quiver_args` function which also calculate the scale of the quiver (1 / quiver_length). Defaults to None. quiver_length: the length of quiver. The quiver length which will be used to calculate scale of quiver. Note that before applying `default_quiver_args` velocity values are first rescaled via the quiver_autoscaler function. Scale of quiver indicates the number of data units per arrow length unit, e.g., m/s per plot width; a smaller scale parameter makes the arrow longer. Defaults to None. no_vel_u: whether to not show velocity U (velocity of unspliced RNAs). Defaults to True. frontier: whether to add the frontier. Scatter plots can be enhanced by using transparency (alpha) in order to show area of high density and multiple scatter plots can be used to delineate a frontier. See matplotlib tips & tricks cheatsheet (https://github.com/matplotlib/cheatsheets). Originally inspired by figures from scEU-seq paper: https://science.sciencemag.org/content/367/6482/1151. Defaults to True. q_kwargs_dict: the dictionary of the quiver arguments. The default setting of quiver argument is identical to that used in the cell_wise_velocity and grid_velocity. Defaults to {}. show_arrowed_spines: whether to show a pair of arrowed spines represeenting the basis of the scatter is currently using. If None, only the first panel in the expression / velocity plot will have the arrowed spine. Defaults to None. save_show_or_return: whether to save, show or return the figure. Defaults to "show". save_kwargs: a dictionary that will be passed to the save_show_ret function. By default, it is an empty dictionary and the save_show_ret function will use the { "path": None, "prefix": 'phase_portraits', "dpi": None, "ext": 'pdf', "transparent": True, "close": True, "verbose": True } as its parameters. Otherwise, you can provide a dictionary that properly modify those keys according to your needs. Defaults to {}. **kwargs: additional parameters that will be passed to `plt.scatter` function. Raises: ValueError: missing velocity data in the AnnData object. ValueError: cannot find layer with given `v_key`. ValueError: missing velocity_gamma column in the AnnData object. ValueError: missing velocity_gamma column in the AnnData object. ValueError: corrupted AnnData object missing basic labeled/unlabeled or spliced/unspliced data. NotImplementedError: invalid `save_show_or_return`. Returns: None would be returned in default and the plotted figure would be shown directly. The matplotlib plot would show 1) the phase portrait of each category used in velocity embedding, cells' low dimensional embedding, colored either by 2) the gene expression level or 3) the velocity magnitude values. If set `save_show_or_return='return'` as a kwarg, the axes of the plot would be returned. """ import matplotlib.pyplot as plt from matplotlib import rcParams from matplotlib.colors import to_hex # from matplotlib.colors import DivergingNorm # TwoSlopeNorm has_labeling, has_splicing, splicing_labeling = ( adata.uns["dynamics"]["has_labeling"], adata.uns["dynamics"]["has_splicing"], adata.uns["dynamics"]["splicing_labeling"], ) if background is None: _background = rcParams.get("figure.facecolor") _background = to_hex(_background) if type(_background) is tuple else _background else: _background = background mapper = get_mapper(smoothed=use_smoothed) point_size = ( 500.0 / np.sqrt(adata.shape[0]) * 5 if pointsize is None else 500.0 / np.sqrt(adata.shape[0]) * 5 * pointsize ) scatter_kwargs = dict(alpha=0.2, s=point_size, edgecolor=None, linewidth=0) # (0, 0, 0, 1) if kwargs is not None: scatter_kwargs.update(kwargs) div_scatter_kwargs = scatter_kwargs.copy() # div_scatter_kwargs.update({"norm": DivergingNorm(vcenter=0)}) if type(genes) == str: genes = [genes] _genes = list(set(adata.var.index).intersection(genes)) # avoid object for dtype in the gamma column https://stackoverflow.com/questions/40809503/python-numpy-typeerror-ufunc-isfinite-not-supported-for-the-input-types if adata.uns["dynamics"]["experiment_type"] in [ "one-shot", "kin", "deg", "mix_kin_deg", "mix_pulse_chase", ]: k_name = "gamma_k" else: k_name = "gamma" vel_params_df = get_vel_params(adata) valid_id = np.isfinite(np.array(vel_params_df.loc[_genes, k_name], dtype="float")).flatten() genes = np.array(_genes)[valid_id].tolist() # idx = [adata.var.index.to_list().index(i) for i in genes] if len(genes) == 0: raise ValueError( "adata has no genes listed in your input gene vector or " "velocity estimation for those genes are not performed. " "Please try to run dyn.tl.dynamics(adata, filter_gene_mode='no')" "to estimate velocity for all genes: {}".format(_genes) ) if not "X_" + basis in adata.obsm.keys(): main_warning("{} is not applied to adata.".format(basis)) from ..tools.dimension_reduction import reduceDimension reduceDimension(adata, reduction_method=basis) embedding = pd.DataFrame( { basis + "_0": adata.obsm["X_" + basis][:, x], basis + "_1": adata.obsm["X_" + basis][:, y], } ) embedding.columns = ["dim_1", "dim_2"] if has_labeling and not has_splicing: mode = "labeling" vkey = "T" if vkey is None else vkey ekey = "M_t" if ekey is None else ekey elif has_splicing and not has_labeling: mode = "splicing" vkey = "S" if vkey is None else vkey ekey = "M_s" if ekey is None else ekey elif splicing_labeling: mode = "full" vkey = "S" if vkey is None else vkey ekey = "M_s" if ekey is None else ekey elif has_splicing and has_labeling: mode = "full" vkey = "S" if vkey is None else vkey ekey = "M_s" if ekey is None else ekey layers = list(adata.layers.keys()) layers.extend(["X", "protein", "X_protein"]) if ekey in layers: if ekey == "X": E_vec = ( index_gene(adata, adata.layers[mapper["X"]], genes) if mapper["X"] in adata.layers.keys() else index_gene(adata, adata.X, genes) ) elif ekey in ["protein", "X_protein"]: E_vec = ( index_gene(adata, adata.layers[mapper[ekey]], genes) if (ekey in mapper.keys()) and (mapper[ekey] in adata.obsm_keys()) else index_gene(adata, adata.obsm[ekey], genes) ) else: E_vec = ( index_gene(adata, adata.layers[mapper[ekey]], genes) if (ekey in mapper.keys()) and (mapper[ekey] in adata.layers.keys()) else index_gene(adata, adata.layers[ekey], genes) ) if log1p: E_vec = log1p_(adata, E_vec) n_cells, n_genes = adata.shape[0], len(genes) color_vec = np.repeat(np.nan, n_cells) if color is not None: color_vec = adata.obs[color].to_list() if "velocity_" not in vkey: vkey = "velocity_" + vkey if vkey == "velocity_U": V_vec = index_gene(adata, adata.layers["velocity_U"], genes) if "velocity_P" in adata.obsm.keys(): P_vec = index_gene(adata, adata.layers["velocity_P"], genes) elif vkey == "velocity_S": V_vec = index_gene(adata, adata.layers["velocity_S"], genes) if "velocity_P" in adata.obsm.keys(): P_vec = index_gene(adata, adata.layers["velocity_P"], genes) elif vkey == "velocity_T": V_vec = index_gene(adata, adata.layers["velocity_T"], genes) if "velocity_P" in adata.obsm.keys(): P_vec = index_gene(adata, adata.layers["velocity_P"], genes) elif vkey == "velocity_N": V_vec = index_gene(adata, adata.layers["velocity_N"], genes) if "velocity_P" in adata.obsm.keys(): P_vec = index_gene(adata, adata.layers["velocity_P"], genes) else: if vkey in adata.layers.keys(): V_vec = index_gene(adata, adata.layers[vkey], genes) if "velocity_P" in adata.obsm.keys(): P_vec = index_gene(adata, adata.layers["velocity_P"], genes) else: raise ValueError("adata has no vkey {} in either the layers or the obsm slot".format(vkey)) E_vec, V_vec = ( E_vec.toarray() if issparse(E_vec) else E_vec, V_vec.toarray() if issparse(V_vec) else V_vec, ) if k_name in vel_params_df.columns: if not ("gamma_b" in vel_params_df.columns) or all(vel_params_df.gamma_b.isna()): vel_params_df.loc[:, "gamma_b"] = 0 gamma, velocity_offset = ( index_gene(adata, vel_params_df.loc[:, k_name].values, genes), index_gene(adata, vel_params_df.gamma_b.values, genes), ) ( gamma[~np.isfinite(list(gamma))], velocity_offset[~np.isfinite(list(velocity_offset))], ) = (0, 0) else: raise ValueError( "adata does not seem to have velocity_gamma column. Velocity estimation is required before " "running this function." ) if mode == "labeling": new_mat, tot_mat = ( index_gene(adata, adata.layers[mapper["X_new"]], genes), index_gene(adata, adata.layers[mapper["X_total"]], genes), ) new_mat, tot_mat = (new_mat.toarray(), tot_mat.toarray()) if issparse(new_mat) else (new_mat, tot_mat) vel_u, vel_s = ( index_gene(adata, adata.layers["velocity_N"].toarray(), genes), index_gene(adata, adata.layers["velocity_T"].toarray(), genes), ) df = pd.DataFrame( { "new": new_mat.flatten(), "total": tot_mat.flatten(), "gene": genes * n_cells, "gamma": np.tile(gamma, n_cells), "velocity_offset": np.tile(velocity_offset, n_cells), "expression": E_vec.flatten(), "velocity": V_vec.flatten(), "color": np.repeat(color_vec, n_genes), "vel_u": vel_u.flatten(), "vel_s": vel_s.flatten(), }, index=range(n_cells * n_genes), ) elif mode == "splicing": unspliced_mat, spliced_mat = ( index_gene(adata, adata.layers[mapper["X_unspliced"]], genes), index_gene(adata, adata.layers[mapper["X_spliced"]], genes), ) unspliced_mat, spliced_mat = ( (unspliced_mat.toarray(), spliced_mat.toarray()) if issparse(unspliced_mat) else (unspliced_mat, spliced_mat) ) vel_u, vel_s = ( np.zeros_like(index_gene(adata, adata.layers["velocity_S"].toarray(), genes)), index_gene(adata, adata.layers["velocity_S"].toarray(), genes), ) df = pd.DataFrame( { "U": unspliced_mat.flatten(), "S": spliced_mat.flatten(), "gene": genes * n_cells, "gamma": np.tile(gamma, n_cells), "velocity_offset": np.tile(velocity_offset, n_cells), "expression": E_vec.flatten(), "velocity": V_vec.flatten(), "color": np.repeat(color_vec, n_genes), "vel_u": vel_u.flatten(), "vel_s": vel_s.flatten(), }, index=range(n_cells * n_genes), ) elif mode == "full": U, S, N, T = ( index_gene(adata, adata.layers[mapper["X_unspliced"]], genes), index_gene(adata, adata.layers[mapper["X_spliced"]], genes), index_gene(adata, adata.layers[mapper["X_new"]], genes), index_gene(adata, adata.layers[mapper["X_total"]], genes), ) U, S, N, T = (U.toarray(), S.toarray(), N.toarray(), T.toarray()) if issparse(U) else (U, S, N, T) vel_u, vel_s = ( ( index_gene(adata, adata.layers["velocity_U"].toarray(), genes) if "velocity_U" in adata.layers.keys() else None, index_gene(adata, adata.layers["velocity_S"].toarray(), genes), ) if vkey == "velocity_S" else ( index_gene(adata, adata.layers["velocity_N"].toarray(), genes) if "velocity_U" in adata.layers.keys() else None, index_gene(adata, adata.layers["velocity_T"].toarray(), genes), ) ) if "protein" in adata.obsm.keys(): if "delta" in vel_params_df.columns: gamma_P = vel_params_df.delta[genes].values velocity_offset_P = ( [0] * n_cells if (not ("delta_b" in vel_params_df.columns) or vel_params_df.delta_b.unique() is None) else vel_params_df.delta_b[genes].values ) else: raise ValueError( "adata does not seem to have velocity_gamma column. Velocity estimation is required before " "running this function." ) P = ( index_gene(adata, adata.obsm[mapper["X_protein"]], genes) if (["X_protein"] in adata.obsm.keys() or [mapper["X_protein"]] in adata.obsm.keys()) else index_gene(adata, adata.obsm["protein"], genes) ) P = P.toarray() if issparse(P) else P if issparse(P_vec): P_vec = P_vec.toarray() vel_p = np.zeros_like(adata.obsm["velocity_P"][:, :]) # df = pd.DataFrame({"uu": uu.flatten(), "ul": ul.flatten(), "su": su.flatten(), "sl": sl.flatten(), "P": P.flatten(), # 'gene': genes * n_cells, 'prediction': np.tile(gamma, n_cells) * uu.flatten() + # np.tile(velocity_offset, n_cells), "velocity": genes * n_cells}, index=range(n_cells * n_genes)) df = pd.DataFrame( { "new": N.flatten(), "total": T.flatten(), "S": S.flatten(), "U": U.flatten(), "P": P.flatten(), "gene": genes * n_cells, "gamma": np.tile(gamma, n_cells), "velocity_offset": np.tile(velocity_offset, n_cells), "gamma_P": np.tile(gamma_P, n_cells), "velocity_offset_P": np.tile(velocity_offset_P, n_cells), "expression": E_vec.flatten(), "velocity": V_vec.flatten(), "velocity_protein": P_vec.flatten(), "color": np.repeat(color_vec, n_genes), "vel_u": vel_u.flatten() if vel_u is not None else None, "vel_s": vel_s.flatten(), "vel_p": vel_p.flatten(), }, index=range(n_cells * n_genes), ) else: df = pd.DataFrame( { "new": N.flatten(), "total": T.flatten(), "S": S.flatten(), "U": U.flatten(), "gene": genes * n_cells, "gamma": np.tile(gamma, n_cells), "velocity_offset": np.tile(velocity_offset, n_cells), "expression": E_vec.flatten(), "velocity": V_vec.flatten(), "color": np.repeat(color_vec, n_genes), "vel_u": vel_u.flatten(), "vel_s": vel_s.flatten(), }, index=range(n_cells * n_genes), ) else: raise ValueError( "Your adata is corrupted. Make sure that your layer has keys new, old for the labelling mode, " "spliced, ambiguous, unspliced for the splicing model and uu, ul, su, sl for the full mode" ) num_per_gene = 6 if ("protein" in adata.obsm.keys() and mode == "full") else 3 ncols = min([num_per_gene, ncols]) if ncols is not None else num_per_gene nrow, ncol = int(np.ceil(num_per_gene * n_genes / ncols)), ncols if figsize is None: g = plt.figure(None, (3 * ncol, 3 * nrow), facecolor=_background) # , dpi=160 else: g = plt.figure(None, (figsize[0] * ncol, figsize[1] * nrow), facecolor=_background) # , dpi=160 if discrete_continous_div_themes is None: if _background in ["#ffffff", "black"]: discrete_theme, continous_theme, divergent_theme = ( "glasbey_dark", "inferno", "div_blue_black_red", ) else: discrete_theme, continous_theme, divergent_theme = ( "glasbey_white", "viridis", "div_blue_red", ) else: ( discrete_theme, continous_theme, divergent_theme, ) = discrete_continous_div_themes discrete_cmap, discrete_color_key_cmap, discrete_background = ( _themes[discrete_theme]["cmap"] if discrete_continous_div_cmap is None else discrete_continous_div_cmap[0], ( _themes[discrete_theme]["color_key_cmap"] if discrete_continous_div_color_key_cmap is None else discrete_continous_div_color_key_cmap[0] ), _themes[discrete_theme]["background"], ) continous_cmap, continous_color_key_cmap, continous_background = ( _themes[continous_theme]["cmap"] if discrete_continous_div_cmap is None else discrete_continous_div_cmap[1], ( _themes[continous_theme]["color_key_cmap"] if discrete_continous_div_color_key_cmap is None else discrete_continous_div_color_key_cmap[1] ), _themes[continous_theme]["background"], ) divergent_cmap, divergent_color_key_cmap, divergent_background = ( _themes[divergent_theme]["cmap"] if discrete_continous_div_cmap is None else discrete_continous_div_cmap[2], ( _themes[divergent_theme]["color_key_cmap"] if discrete_continous_div_color_key_cmap is None else discrete_continous_div_color_key_cmap[2] ), _themes[divergent_theme]["background"], ) font_color = _select_font_color(discrete_background) # the following code is inspired by https://github.com/velocyto-team/velocyto-notebooks/blob/master/python/DentateGyrus.ipynb gs = plt.GridSpec(nrow, ncol, wspace=0.5, hspace=0.36) for i, gn in enumerate(genes): if num_per_gene == 3: ax1, ax2, ax3 = ( plt.subplot(gs[i * 3]), plt.subplot(gs[i * 3 + 1]), plt.subplot(gs[i * 3 + 2]), ) elif num_per_gene == 6: ax1, ax2, ax3, ax4, ax5, ax6 = ( plt.subplot(gs[i * 3]), plt.subplot(gs[i * 3 + 1]), plt.subplot(gs[i * 3 + 2]), plt.subplot(gs[i * 3 + 3]), plt.subplot(gs[i * 3 + 4]), plt.subplot(gs[i * 3 + 5]), ) try: ix = np.where(adata.var.index == gn)[0][0] except: continue cur_pd = df.loc[df.gene == gn, :] if cur_pd.color.isna().all(): if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax1, color = _matplotlib_points( ( cur_pd.loc[:, ["S", "U"]].values if vkey == "velocity_S" else cur_pd.loc[:, ["total", "new"]].values ), ax=ax1, labels=None, values=cur_pd.loc[:, "expression"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: ax1, color = _datashade_points( ( cur_pd.loc[:, ["S", "U"]].values if vkey == "velocity_S" else cur_pd.loc[:, ["total", "new"]].values ), ax=ax1, labels=None, values=cur_pd.loc[:, "expression"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax1, color = _matplotlib_points( ( cur_pd.loc[:, ["S", "U"]].values if vkey == "velocity_S" else cur_pd.loc[:, ["total", "new"]].values ), ax=ax1, labels=cur_pd.loc[:, "color"], values=None, highlights=highlights, cmap=discrete_cmap, color_key=discrete_continous_div_color_key[0], color_key_cmap=discrete_color_key_cmap, background=discrete_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: ax1, color = _datashade_points( ( cur_pd.loc[:, ["S", "U"]].values if vkey == "velocity_S" else cur_pd.loc[:, ["total", "new"]].values ), ax=ax1, labels=cur_pd.loc[:, "color"], values=None, highlights=highlights, cmap=discrete_cmap, color_key=discrete_continous_div_color_key[0], color_key_cmap=discrete_color_key_cmap, background=discrete_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) ax1.set_title(gn) if vkey == "velocity_S": ax1.set_xlabel("spliced (1st moment)") ax1.set_ylabel("unspliced (1st moment)") elif vkey == "velocity_T": ax1.set_xlabel("total (1st moment)") ax1.set_ylabel("new (1st moment)") # only linear regression fitting of extreme cells will be plotted together with U-S phase plane. if vkey in ["velocity_S", "velocity_T"]: xnew = ( np.linspace(cur_pd.loc[:, "S"].min(), cur_pd.loc[:, "S"].max() * 0.80) if vkey == "velocity_S" else np.linspace( cur_pd.loc[:, "total"].min(), cur_pd.loc[:, "total"].max() * 0.80, ) ) ax1.plot( xnew, xnew * cur_pd.loc[:, "gamma"].unique() + cur_pd.loc[:, "velocity_offset"].unique(), dashes=[6, 2], c=font_color, ) X_array, V_array = ( cur_pd.loc[:, ["S", "U"]].values if vkey == "velocity_S" else cur_pd.loc[:, ["total", "new"]].values, cur_pd.loc[:, ["vel_s", "vel_u"]].values, ) if no_vel_u and vkey == "velocity_S": V_array[:, 1] = 0 # add quiver: if show_quiver: V_array /= 3 * quiver_autoscaler(X_array, V_array) if background is None: background = rcParams.get("figure.facecolor") if quiver_size is None: quiver_size = 1 if background == "black": edgecolors = "white" else: edgecolors = "black" head_w, head_l, ax_l, scale = default_quiver_args(quiver_size, quiver_length) quiver_kwargs = { "angles": "xy", "scale": scale, "scale_units": "xy", "width": 0.0005, "headwidth": head_w, "headlength": head_l, "headaxislength": ax_l, "minshaft": 1, "minlength": 1, "pivot": "tail", "edgecolors": edgecolors, "linewidth": 0.2, "color": color, "alpha": 1, "zorder": 10, } quiver_kwargs = update_dict(quiver_kwargs, q_kwargs_dict) ax1.quiver( X_array[:, 0], X_array[:, 1], V_array[:, 0], V_array[:, 1], **quiver_kwargs, ) ax1.set_xlim(np.min(X_array[:, 0]), np.max(X_array[:, 0]) * 1.02) ax1.set_ylim(np.min(X_array[:, 1]), np.max(X_array[:, 1]) * 1.02) despline(ax1) # sns.despline() df_embedding = pd.concat([cur_pd, embedding], axis=1) V_vec = cur_pd.loc[:, "velocity"] # limit = np.nanmax( # np.abs(np.nanpercentile(V_vec, [1, 99])) # ) # upper and lowe limit / saturation # # V_vec = V_vec + limit # that is: tmp_colorandum - (-limit) # V_vec = V_vec / (2 * limit) # that is: tmp_colorandum / (limit - (-limit)) # V_vec = np.clip(V_vec, 0, 1) if show_arrowed_spines is None: show_arrowed_spines_ = True if i == 0 else False else: show_arrowed_spines_ = show_arrowed_spines if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax2, _ = _matplotlib_points( embedding.iloc[:, :2].values, ax=ax2, labels=None, values=cur_pd.loc[:, "expression"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: ax2, _ = _datashade_points( embedding.iloc[:, :2].values, ax=ax2, labels=None, values=cur_pd.loc[:, "expression"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) ax2.set_title(gn + " (" + ekey + ")") if show_arrowed_spines_: ax2 = arrowed_spines(ax2, basis) else: despline_all(ax2) deaxis_all(ax2) v_max = 0.01 if min(V_vec) + max(V_vec) == 0 else np.max(np.abs(V_vec.values)) div_scatter_kwargs.update({"vmin": -v_max, "vmax": v_max}) if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax3, _ = _matplotlib_points( embedding.iloc[:, :2].values, ax=ax3, labels=None, values=V_vec.values, highlights=highlights, cmap=divergent_cmap, color_key=discrete_continous_div_color_key[2], color_key_cmap=divergent_color_key_cmap, background=divergent_background, width=figsize[0], height=figsize[1], show_legend=legend, sort="abs", frontier=frontier, **div_scatter_kwargs, ) else: ax3, _ = _datashade_points( embedding.iloc[:, :2].values, ax=ax3, labels=None, values=V_vec.values, highlights=highlights, cmap=divergent_cmap, color_key=discrete_continous_div_color_key[2], color_key_cmap=divergent_color_key_cmap, background=divergent_background, width=figsize[0], height=figsize[1], show_legend=legend, sort="abs", frontier=frontier, **div_scatter_kwargs, ) ax3.set_title(gn + " (" + vkey + ")") if show_arrowed_spines_: ax3 = arrowed_spines(ax3, basis) else: despline_all(ax3) deaxis_all(ax3) if ( "protein" in adata.obsm.keys() and mode == "full" and all([i in adata.layers.keys() for i in ["uu", "ul", "su", "sl"]]) ): if cur_pd.color.unique() != np.nan: if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax4, color = _matplotlib_points( cur_pd.loc[:, ["P", "S"]].values, ax=ax4, labels=None, values=cur_pd.loc[:, "expression"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: ax4, color = _datashade_points( cur_pd.loc[:, ["P", "S"]].values, ax=ax4, labels=None, values=cur_pd.loc[:, "expression"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax4, color = _matplotlib_points( cur_pd.loc[:, ["P", "S"]].values, ax=ax4, labels=cur_pd.loc[:, "color"].values, values=None, highlights=highlights, cmap=discrete_cmap, color_key=discrete_continous_div_color_key[0], color_key_cmap=discrete_color_key_cmap, background=discrete_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: ax4, color = _datashade_points( cur_pd.loc[:, ["P", "S"]].values, ax=ax4, labels=cur_pd.loc[:, "color"].values, values=None, highlights=highlights, cmap=discrete_cmap, color_key=discrete_continous_div_color_key[0], color_key_cmap=discrete_color_key_cmap, background=discrete_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) ax4.set_title(gn) ax4.set_xlabel("spliced (1st moment)") ax4.set_ylabel("protein (1st moment)") xnew = np.linspace(cur_pd.loc[:, "P"].min(), cur_pd.loc[:, "P"].max()) ax4.plot( xnew, xnew * cur_pd.loc[:, "gamma_P"].unique() + cur_pd.loc[:, "velocity_offset_P"].unique(), dashes=[6, 2], c=font_color, ) X_array, V_array = ( cur_pd.loc[:, ["P", "S"]].values, cur_pd.loc[:, ["vel_p", "vel_s"]].values, ) # add quiver: if show_quiver: V_array /= 3 * quiver_autoscaler(X_array, V_array) if background is None: background = rcParams.get("figure.facecolor") if quiver_size is None: quiver_size = 1 if background == "black": edgecolors = "white" else: edgecolors = "black" head_w, head_l, ax_l, scale = default_quiver_args(quiver_size, quiver_length) quiver_kwargs = { "angles": "xy", "scale": scale, "scale_units": "xy", "width": 0.0005, "headwidth": head_w, "headlength": head_l, "headaxislength": ax_l, "minshaft": 1, "minlength": 1, "pivot": "tail", "edgecolors": edgecolors, "linewidth": 0.2, "color": color, "alpha": 1, "zorder": 10, } quiver_kwargs = update_dict(quiver_kwargs, q_kwargs_dict) ax4.quiver( X_array[:, 0], X_array[:, 1], V_array[:, 0], V_array[:, 1], **quiver_kwargs, ) ax4.set_ylim(np.min(X_array[:, 0]), np.max(X_array[:, 0]) * 1.02) ax4.set_xlim(np.min(X_array[:, 1]), np.max(X_array[:, 1]) * 1.02) despline(ax1) # sns.despline() V_vec = df_embedding.loc[:, "velocity_p"] limit = np.nanmax(np.abs(np.nanpercentile(V_vec, [1, 99]))) # upper and lowe limit / saturation V_vec = V_vec + limit # that is: tmp_colorandum - (-limit) V_vec = V_vec / (2 * limit) # that is: tmp_colorandum / (limit - (-limit)) V_vec = np.clip(V_vec, 0, 1) if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax5, _ = _matplotlib_points( embedding.iloc[:, :2], ax=ax5, labels=None, values=embedding.loc[:, "P"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) else: ax5, _ = _datashade_points( embedding.iloc[:, :2], ax=ax5, labels=None, values=embedding.loc[:, "P"].values, highlights=highlights, cmap=continous_cmap, color_key=discrete_continous_div_color_key[1], color_key_cmap=continous_color_key_cmap, background=continous_background, width=figsize[0], height=figsize[1], show_legend=legend, **scatter_kwargs, ) ax5.set_title(gn + " (protein expression)") if show_arrowed_spines_: ax5 = arrowed_spines(ax5, basis) else: despline_all(ax5) deaxis_all(ax5) v_max = np.max(np.abs(V_vec.values)) div_scatter_kwargs.update({"vmin": -v_max, "vmax": v_max}) if cur_pd.shape[0] <= figsize[0] * figsize[1] * 1000000: ax6, _ = _matplotlib_points( embedding.iloc[:, :2], ax=ax6, labels=None, values=V_vec.values, highlights=highlights, cmap=divergent_cmap, color_key=discrete_continous_div_color_key[2], color_key_cmap=divergent_color_key_cmap, background=divergent_background, width=figsize[0], height=figsize[1], show_legend=legend, sort="abs", frontier=frontier, **div_scatter_kwargs, ) else: ax6, _ = _datashade_points( embedding.iloc[:, :2], ax=ax6, labels=None, values=V_vec.values, highlights=highlights, cmap=divergent_cmap, color_key=discrete_continous_div_color_key[2], color_key_cmap=divergent_color_key_cmap, background=divergent_background, width=figsize[0], height=figsize[1], show_legend=legend, sort="abs", frontier=frontier, **div_scatter_kwargs, ) ax6.set_title(gn + " (protein velocity)") if show_arrowed_spines_: ax6 = arrowed_spines(ax6, basis) else: despline_all(ax6) deaxis_all(ax6) update_vel_params(adata, params_df=vel_params_df) return save_show_ret("phase_portraits", save_show_or_return, save_kwargs, g)
[docs]def dynamics( adata: AnnData, genes: List[str], unit: str = "hours", log_unnormalized: bool = True, y_log_scale: bool = False, ci: Optional[str] = None, ncols: Optional[int] = None, figsize: Union[List[float], Tuple[float, float], None] = None, dpi: Optional[float] = None, boxwidth: Optional[float] = None, barwidth: Optional[float] = None, true_param_prefix: Optional[str] = None, show_moms_fit: bool = False, show_variance: bool = True, show_kin_parameters: bool = True, gene_order: Literal["column", "row"] = "column", font_size_scale: float = 1, save_show_or_return: Literal["save", "show", "return"] = "show", save_kwargs: dict = {}, ) -> Optional[Figure]: """Plot the data and fitting of different metabolic labeling experiments. Note that if non-smoothed data was used for kinetic fitting, you often won't see boxplot but only the triangles for the mean values. This is because the raw data has a lot of dropouts, thus the median is outside of the whiskers of the boxplot and the boxplot is then not drawn by default. Args: adata: an Annodata object. genes: the key for variable or gene names. unit: the unit of the labeling time, for example, `hours` or `minutes`. Defaults to "hours". log_unnormalized: whether the data has logged value. Defaults to True. y_log_scale: whether to use log scale for y-axis. Defaults to False. ci: the confidence interval to be drawn for the parameter fitting. Currently not used. Defaults to None. ncols: the number of columns in the plot. Defaults to None. figsize: the size of the each panel in the figure. Defaults to None. dpi: the resolution of the figure. Defaults to None. boxwidth: the width of the box of the boxplot. Defaults to None. barwidth: the width of the bar of the barplot. Defaults to None. true_param_prefix: the prefix for the column names of true parameters in the .var attributes. Useful for the simulation data. Defaults to None. show_moms_fit: whether to show fitting curves associated with the stochastic models, only works for non-deterministic models. Defaults to False. show_variance: whether to add a boxplot to show the variance at each time point. Defaults to True. show_kin_parameters: whether to include the estimated kinetic parameter values on the plot. Defaults to True. gene_order: the order of genes to present on the figure, either row-major or column major. Defaults to "column". font_size_scale: the scale factor of fonts. Defaults to 1. save_show_or_return: whether to save, show or return the figure. Defaults to "show". save_kwargs: a dictionary that will be passed to the save_show_ret function. By default, it is an empty dictionary and the save_show_ret function will use the { "path": None, "prefix": 'dynamics', "dpi": None, "ext": 'pdf', "transparent": True, "close": True, "verbose": True } as its parameters. Otherwise, you can provide a dictionary that properly modify those keys according to your needs. Defaults to {}. Raises: ValueError: the gene specified does not have fitted kinetic parameters. Returns: None would be returned in default and the plotted metabolic fitting figure would be shown directly. If set `save_show_or_return='return'` as a kwarg, the axes of the plot would be returned. """ import matplotlib import matplotlib.pyplot as plt params = { "font.size": 4 * font_size_scale, "legend.fontsize": 4 * font_size_scale, "legend.handlelength": 0.5, "axes.labelsize": 6 * font_size_scale, "axes.titlesize": 6 * font_size_scale, "xtick.labelsize": 6 * font_size_scale, "ytick.labelsize": 6 * font_size_scale, "axes.titlepad": 1, "axes.labelpad": 1, "axes.spines.right": False, "axes.spines.top": False, } matplotlib.rcParams.update(params) show_kin_parameters = True if true_param_prefix else show_kin_parameters uns_keys = np.array(adata.uns_keys()) group = next((i.split("_dynamics")[0] for i in uns_keys if i.endswith("_dynamics")), None) if group is not None: uns_key = group + "_dynamics" _group, grp_len = ( np.unique(adata.obs[group]), len(np.unique(adata.obs[group])), ) else: uns_key = "dynamics" _group, grp_len = ["_all_cells"], 1 experiment_type = adata.uns[uns_key]["experiment_type"] T = adata.uns[uns_key]["t"] filter_gene_mode = adata.uns[uns_key]["filter_gene_mode"] X_data = adata.uns[uns_key]["X_data"] X_fit_data = adata.uns[uns_key]["X_fit_data"] has_splicing = adata.uns[uns_key]["has_splicing"] and adata.uns[uns_key]["splicing_labeling"] model = adata.uns[uns_key]["model"] use_smoothed = adata.uns[uns_key]["use_smoothed"] est_method = adata.uns[uns_key]["est_method"] # for key, val in adata.uns[uns_key].items(): # exec(key + '=val') if experiment_type == "conventional": # run the phase_portraits plot main_warning( "dynamics plot doesn't support conventional experiment type, using phase_portraits function instead." ) phase_portraits(adata, genes=genes, save_show_or_return=save_show_or_return) return T_uniq = np.unique(T) t = np.linspace(0, T_uniq[-1], 1000) valid_genes = adata.var_names[get_valid_bools(adata, filter_gene_mode)] valid_gene_names = valid_genes.intersection(genes) if len(valid_gene_names) == 0: raise ValueError( f"no kinetic parameter fitting has performed for the gene {genes} you provided. Try " f"using dyn.tl.dynamics(adata, filter_gene_mode='final', ...) or use only fitted genes." ) valid_adata = adata[:, valid_gene_names] gene_idx = np.array([np.where(valid_genes == gene)[0][0] for gene in valid_gene_names]) X_data, X_fit_data = ( [X_data[i] for i in gene_idx], [X_fit_data[i] for i in gene_idx], ) if boxwidth is None and len(T_uniq) > 1: boxwidth = 0.8 * (np.diff(T_uniq).min() / 2) if barwidth is None and len(T_uniq) > 1: barwidth = 0.8 * (np.diff(T_uniq).min() / 2) if has_splicing: if model == "moment": sub_plot_n = 4 # number of subplots for each gene elif experiment_type == "kin": if model == "deterministic": sub_plot_n = 2 if show_variance else 1 elif model == "stochastic": mom_n = 3 if show_moms_fit else 0 sub_plot_n = 2 + mom_n if show_variance else 1 + mom_n elif model == "mixture": sub_plot_n = 4 if show_variance else 2 elif model == "mixture_deterministic_stochastic": mom_n = 3 if show_moms_fit else 0 sub_plot_n = 4 + mom_n if show_variance else 2 + mom_n elif model == "mixture_stochastic_stochastic": mom_n = 6 if show_moms_fit else 0 sub_plot_n = 4 + mom_n if show_variance else 2 + mom_n elif experiment_type == "deg": if model == "deterministic": sub_plot_n = 2 if show_variance else 1 elif model == "stochastic": mom_n = 3 if show_moms_fit else 0 sub_plot_n = 2 + mom_n if show_variance else 1 + mom_n elif experiment_type == "one_shot": # just the labeled RNA sub_plot_n = 1 elif experiment_type == "mix_std_stm": sub_plot_n = 5 elif experiment_type in ["mix_pulse_chase", "mix_kin_deg"]: sub_plot_n = 2 else: if model == "moment": sub_plot_n = 2 # number of subplots for each gene elif experiment_type == "kin": if model == "deterministic": sub_plot_n = 1 if est_method != "twostep" else 2 elif model == "stochastic": mom_n = 1 if show_moms_fit else 0 sub_plot_n = 1 + mom_n if show_variance else 1 + mom_n elif model == "mixture": sub_plot_n = 2 if show_variance else 1 elif model == "mixture_deterministic_stochastic": mom_n = 1 if show_moms_fit else 0 sub_plot_n = 2 + mom_n if show_variance else 1 + mom_n elif model == "mixture_stochastic_stochastic": mom_n = 2 if show_moms_fit else 0 sub_plot_n = 2 + mom_n if show_variance else 1 + mom_n elif experiment_type == "deg": if model == "deterministic": sub_plot_n = 1 elif model == "stochastic": mom_n = 1 if show_moms_fit else 0 sub_plot_n = 1 + mom_n elif experiment_type == "one_shot": # just the labeled RNA sub_plot_n = 1 elif experiment_type == "mix_std_stm": sub_plot_n = 3 elif experiment_type in ["mix_pulse_chase", "mix_kin_deg"]: sub_plot_n = 2 ncols = ( # each column correspond to one gene len(gene_idx) * grp_len if ncols is None else min(len(gene_idx) * grp_len, ncols) ) nrows = int(np.ceil(len(gene_idx) * sub_plot_n * grp_len / ncols)) figsize = [7, 5] if figsize is None else figsize g = plt.figure(None, (figsize[0] * ncols, figsize[1] * nrows), dpi=dpi) gs = plt.GridSpec( nrows, ncols, g, wspace=0.12, ) # there are bugs when fig is not a symmetric matrix fig_mat = np.arange(nrows * ncols).reshape((nrows, ncols)) # we need to visualize gene in row-wise mode for grp_idx, cur_grp in enumerate(_group): if cur_grp == "_all_cells": prefix = "" # kinetic_parameter_ else: prefix = group + "_" + cur_grp + "_" true_p = None true_params = [None, None, None] vel_params_df = get_vel_params(valid_adata) logLL = vel_params_df.loc[valid_gene_names, prefix + "logLL"] est_params_df = vel_params_df.loc[ valid_gene_names, [ prefix + "alpha", prefix + "beta", prefix + "gamma", "half_life", ], ] est_params = [ est_params_df.loc[:, "alpha"].values, est_params_df.loc[:, "beta"].values, est_params_df.loc[:, "gamma"].values, ] if experiment_type in ["mix_pulse_chase", "mix_kin_deg"]: if est_method == "twostep": gs = plot_kin_deg_twostep( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, T, T_uniq, unit, X_data, X_fit_data, logLL, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_variance, show_kin_parameters, ) elif experiment_type == "kin": if est_method == "twostep": gs = plot_kin_twostep( valid_adata, valid_gene_names, has_splicing, use_smoothed, t, T, T_uniq, unit, X_data, X_fit_data, logLL, grp_len, sub_plot_n, ncols, gs, fig_mat, gene_order, true_param_prefix, true_params, est_params, show_kin_parameters, ) elif model == "deterministic": gs = plot_kin_det( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_variance, show_kin_parameters, ) elif model == "stochastic": gs = plot_kin_sto( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_moms_fit, show_variance, show_kin_parameters, ) elif model == "mixture": gs = plot_kin_mix( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_variance, show_kin_parameters, ) elif model == "mixture_deterministic_stochastic": gs = plot_kin_mix_det_sto( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_moms_fit, show_variance, show_kin_parameters, ) elif model == "mixture_stochastic_stochastic": gs = plot_kin_mix_sto_sto( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_moms_fit, show_variance, show_kin_parameters, ) elif experiment_type == "deg": if model == "deterministic": gs = plot_deg_det( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_variance, show_kin_parameters, ) elif model == "stochastic": gs = plot_deg_sto( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_moms_fit, show_variance, show_kin_parameters, ) else: for i, gene_name in enumerate(valid_genes): if model == "moment": a, b, alpha_a, alpha_i, beta, gamma = vel_params_df.loc[ gene_name, [ prefix + "a", prefix + "b", prefix + "alpha_a", prefix + "alpha_i", prefix + "beta", prefix + "gamma", ], ] params = { "a": a, "b": b, "alpha_a": alpha_a, "alpha_i": alpha_i, "beta": beta, "gamma": gamma, } # "la": 1, "si": 0, mom = moments(*list(params.values())) mom.integrate(t) mom_data = mom.get_all_central_moments() if has_splicing else mom.get_nosplice_central_moments() if true_param_prefix is not None: (true_a, true_b, true_alpha_a, true_alpha_i, true_beta, true_gamma,) = ( ( vel_params_df.loc[gene_name, true_param_prefix + "a"] if true_param_prefix + "a" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "b"] if true_param_prefix + "b" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "alpha_a"] if true_param_prefix + "alpha_a" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "alpha_i"] if true_param_prefix + "alpha_i" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "beta"] if true_param_prefix + "beta" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_params = { "a": true_a, "b": true_b, "alpha_a": true_alpha_a, "alpha_i": true_alpha_i, "beta": true_beta, "gamma": true_gamma, } # "la": 1, "si": 0, true_mom = moments(*list(true_params.values())) true_mom.integrate(t) true_mom_data = ( true_mom.get_all_central_moments() if has_splicing else true_mom.get_nosplice_central_moments() ) # n_mean, n_var = x_data[:2, :], x_data[2:, :] if has_splicing: tmp = ( [ valid_adata[:, gene_name].layers["M_ul"].toarray().T, valid_adata.layers["M_sl"].toarray().T, ] if "M_ul" in valid_adata.layers.keys() else [ valid_adata[:, gene_name].layers["ul"].toarray().T, valid_adata.layers["sl"].toarray().T, ] ) x_data = [tmp[0].toarray(), tmp[1].toarray()] if issparse(tmp[0]) else tmp if log_unnormalized and "X_ul" not in valid_adata.layers.keys(): x_data = [np.log1p(tmp[0]), np.log1p(tmp[1])] title_ = [ "(unspliced labeled)", "(spliced labeled)", "(unspliced labeled)", "(spliced labeled)", ] Obs_m = [ valid_adata.uns["M_ul"], valid_adata.uns["M_sl"], ] Obs_v = [ valid_adata.uns["V_ul"], valid_adata.uns["V_sl"], ] j_species = 2 # number of species else: tmp = ( valid_adata[:, gene_name].layers["X_new"].T if "X_new" in valid_adata.layers.keys() else valid_adata[:, gene_name].layers["new"].T ) x_data = [tmp.toarray()] if issparse(tmp) else [tmp] if log_unnormalized and "X_new" not in valid_adata.layers.keys(): x_data = [np.log1p(x_data[0])] # only use new key for calculation, so we only have M, V title_ = [" (labeled)", " (labeled)"] Obs_m, Obs_v = ( [valid_adata.uns["M"]], [valid_adata.uns["V"]], ) j_species = 1 for j in range(sub_plot_n): row_ind = int( np.floor(i / ncols) ) # make sure all related plots for the same gene in the same column. col_loc = (row_ind * sub_plot_n + j) * ncols * grp_len + (i % ncols - 1) * grp_len + 1 row_i, col_i = np.where(fig_mat == col_loc) ax = ( plt.subplot(gs[col_loc]) if gene_order == "column" else plt.subplot(gs[fig_mat[col_i, row_i]]) ) if true_param_prefix is not None and j == 0: if has_splicing: ax.text( 0.05, 0.92, r"$a$" + ": {0:.2f}; ".format(true_a) + r"$\hat a$" + ": {0:.2f} \n".format(a) + r"$b$" + ": {0:.2f}; ".format(true_b) + r"$\hat b$" + ": {0:.2f} \n".format(b) + r"$\alpha_a$" + ": {0:.2f}; ".format(true_alpha_a) + r"$\hat \alpha_a$" + ": {0:.2f} \n".format(alpha_a) + r"$\alpha_i$" + ": {0:.2f}; ".format(true_alpha_i) + r"$\hat \alpha_i$" + ": {0:.2f} \n".format(alpha_i) + r"$\beta$" + ": {0:.2f}; ".format(true_beta) + r"$\hat \beta$" + ": {0:.2f} \n".format(beta) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="left", va="top", transform=ax.transAxes, ) else: ax.text( 0.05, 0.92, r"$a$" + ": {0:.2f}; ".format(true_a) + r"$\hat a$" + ": {0:.2f} \n".format(a) + r"$b$" + ": {0:.2f}; ".format(true_b) + r"$\hat b$" + ": {0:.2f} \n".format(b) + r"$\alpha_a$" + ": {0:.2f}; ".format(true_alpha_a) + r"$\hat \alpha_a$" + ": {0:.2f} \n".format(alpha_a) + r"$\alpha_i$" + ": {0:.2f}; ".format(true_alpha_i) + r"$\hat \alpha_i$" + ": {0:.2f} \n".format(alpha_i) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="left", va="top", transform=ax.transAxes, ) if j < j_species: ax.boxplot( x=[x_data[j][i][T == std] for std in T_uniq], positions=T_uniq, widths=boxwidth, showfliers=False, showmeans=True, ) # x=T.values, y= # ax1.plot(T, u.T, linestyle='None', marker='o', markersize=10) # ax.scatter(T_uniq, Obs_m[j][i], c='r') # ax1.plot(T, u.T, linestyle='None', marker='o', markersize=10) if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Expression (log)") # else: ax.set_ylabel("Expression") ax.plot(t, mom_data[j], "k--") if true_param_prefix is not None: ax.plot(t, true_mom_data[j], "r--") else: ax.scatter(T_uniq, Obs_v[j - j_species][i]) # , c='r' if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Variance (log expression)") # else: ax.set_ylabel("Variance") ax.plot(t, mom_data[j], "k--") if true_param_prefix is not None: ax.plot(t, true_mom_data[j], "r--") ax.set_xlabel("time (" + unit + ")") ax.set_title(gene_name + " " + title_[j]) elif experiment_type == "deg": if has_splicing: layers = ( ["X_uu", "X_ul", "X_su", "X_sl"] if "X_ul" in valid_adata.layers.keys() else ["uu", "ul", "su", "sl"] ) uu, ul, su, sl = ( valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[1]], valid_adata[:, gene_name].layers[layers[2]], valid_adata[:, gene_name].layers[layers[3]], ) uu, ul, su, sl = ( ( uu.toarray().squeeze(), ul.toarray().squeeze(), su.toarray().squeeze(), sl.toarray().squeeze(), ) if issparse(uu) else ( uu.squeeze(), ul.squeeze(), su.squeeze(), sl.squeeze(), ) ) if log_unnormalized and layers == [ "uu", "ul", "su", "sl", ]: uu, ul, su, sl = ( np.log1p(uu), np.log1p(ul), np.log1p(su), np.log1p(sl), ) (alpha, beta, gamma, ul0, sl0, uu0, half_life,) = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "beta", prefix + "gamma", prefix + "ul0", prefix + "sl0", prefix + "uu0", "half_life", ], ] # $u$ - unlabeled, unspliced # $s$ - unlabeled, spliced # $w$ - labeled, unspliced # $l$ - labeled, spliced # beta = sys.float_info.epsilon if beta == 0 else beta gamma = sys.float_info.epsilon if gamma == 0 else gamma u = sol_u(t, uu0, alpha, beta) su0 = np.mean(su[T == np.min(T)]) # this should also be estimated s = sol_s(t, su0, uu0, alpha, beta, gamma) w = sol_u(t, ul0, 0, beta) l = sol_s(t, sl0, ul0, 0, beta, gamma) if true_param_prefix is not None: true_alpha, true_beta, true_gamma = ( ( vel_params_df.loc[gene_name, true_param_prefix + "alpha"] if true_param_prefix + "alpha" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "beta"] if true_param_prefix + "beta" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_u = sol_u(t, uu0, true_alpha, true_beta) true_s = sol_s(t, su0, uu0, true_alpha, true_beta, true_gamma) true_w = sol_u(t, ul0, 0, true_beta) true_l = sol_s(t, sl0, ul0, 0, true_beta, true_gamma) true_p = np.vstack((true_u, true_w, true_s, true_l)) title_ = [ "(unspliced unlabeled)", "(unspliced labeled)", "(spliced unlabeled)", "(spliced labeled)", ] Obs, Pred = ( np.vstack((uu, ul, su, sl)), np.vstack((u, w, s, l)), ) else: layers = ["X_new", "X_total"] if "X_new" in valid_adata.layers.keys() else ["new", "total"] uu, ul = ( valid_adata[:, gene_name].layers[layers[1]] - valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[0]], ) uu, ul = ( (uu.toarray().squeeze(), ul.toarray().squeeze()) if issparse(uu) else (uu.squeeze(), ul.squeeze()) ) if log_unnormalized and layers == ["new", "total"]: uu, ul = np.log1p(uu), np.log1p(ul) alpha, gamma, uu0, ul0, half_life = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "gamma", prefix + "uu0", prefix + "ul0", "half_life", ], ] # require no beta functions u = sol_u(t, uu0, alpha, gamma) s = None # sol_s(t, su0, uu0, 0, 1, gamma) w = sol_u(t, ul0, 0, gamma) l = None # sol_s(t, 0, 0, alpha, 1, gamma) title_ = ["(unlabeled)", "(labeled)"] if true_param_prefix is not None: true_alpha, true_gamma = ( ( vel_params_df.loc[gene_name, true_param_prefix + "alpha"] if true_param_prefix + "alpha" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_u = sol_u(t, uu0, true_alpha, true_gamma) true_w = sol_u(t, ul0, 0, true_gamma) true_p = np.vstack((true_u, true_w)) Obs, Pred = np.vstack((uu, ul)), np.vstack((u, w)) for j in range(sub_plot_n): row_ind = int(np.floor(i / ncols)) # make sure unlabled and labeled are in the same column. col_loc = (row_ind * sub_plot_n + j) * ncols * grp_len + (i % ncols - 1) * grp_len + 1 row_i, col_i = np.where(fig_mat == col_loc) ax = ( plt.subplot(gs[col_loc]) if gene_order == "column" else plt.subplot(gs[fig_mat[col_i, row_i]]) ) if true_param_prefix is not None and j == 0: if has_splicing: ax.text( 0.75, 0.50, r"$\alpha$" + ": {0:.2f}; ".format(true_alpha) + r"$\hat \alpha$" + ": {0:.2f} \n".format(alpha) + r"$\beta$" + ": {0:.2f}; ".format(true_beta) + r"$\hat \beta$" + ": {0:.2f} \n".format(beta) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="right", va="top", transform=ax.transAxes, ) else: ax.text( 0.75, 0.50, r"$\alpha$" + ": {0:.2f}; ".format(true_alpha) + r"$\hat \alpha$" + ": {0:.2f} \n".format(alpha) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="right", va="top", transform=ax.transAxes, ) ax.boxplot( x=[Obs[j][T == std] for std in T_uniq], positions=T_uniq, widths=boxwidth, showfliers=False, showmeans=True, ) ax.plot(t, Pred[j], "k--") if true_param_prefix is not None: ax.plot(t, true_p[j], "r--") if j == sub_plot_n - 1: ax.text( 0.8, 0.8, r"$t_{1/2} = $" + "{0:.2f}".format(half_life) + unit[0], ha="right", va="top", transform=ax.transAxes, ) ax.set_xlabel("time (" + unit + ")") if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Expression (log)") else: ax.set_ylabel("Expression") ax.set_title(gene_name + " " + title_[j]) elif experiment_type == "kin": if model == "deterministic": logLL = vel_params_df.loc[valid_gene_names, prefix + "logLL"] alpha, beta, gamma, half_life = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "beta", prefix + "gamma", "half_life", ], ] est_params = [alpha, beta, gamma] gs = plot_kin_det( valid_adata, valid_gene_names, has_splicing, use_smoothed, log_unnormalized, t, T, T_uniq, unit, X_data, X_fit_data, logLL, true_p, grp_len, sub_plot_n, ncols, boxwidth, gs, fig_mat, gene_order, y_log_scale, true_param_prefix, true_params, est_params, show_variance, show_kin_parameters, ) elif model == "stochastic": if has_splicing: pass else: pass elif model == "mixture": if has_splicing: pass else: pass elif model == "mixture_deterministic_stochastic": if has_splicing: pass else: pass elif model == "mixture_stochastic_stochastic": if has_splicing: pass else: pass else: if has_splicing: layers = ( ["X_uu", "X_ul", "X_su", "X_sl"] if "X_ul" in valid_adata.layers.keys() else ["uu", "ul", "su", "sl"] ) uu, ul, su, sl = ( valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[1]], valid_adata[:, gene_name].layers[layers[2]], valid_adata[:, gene_name].layers[layers[3]], ) uu, ul, su, sl = ( ( uu.toarray().squeeze(), ul.toarray().squeeze(), su.toarray().squeeze(), sl.toarray().squeeze(), ) if issparse(uu) else ( uu.squeeze(), ul.squeeze(), su.squeeze(), sl.squeeze(), ) ) if log_unnormalized and layers == [ "uu", "ul", "su", "sl", ]: uu, ul, su, sl = ( np.log1p(uu), np.log1p(ul), np.log1p(su), np.log1p(sl), ) alpha, beta, gamma, uu0, su0 = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "beta", prefix + "gamma", prefix + "uu0", prefix + "su0", ], ] # $u$ - unlabeled, unspliced # $s$ - unlabeled, spliced # $w$ - labeled, unspliced # $l$ - labeled, spliced # beta = sys.float_info.epsilon if beta == 0 else beta gamma = sys.float_info.epsilon if gamma == 0 else gamma u = sol_u(t, uu0, 0, beta) s = sol_s(t, su0, uu0, 0, beta, gamma) w = sol_u(t, 0, alpha, beta) l = sol_s(t, 0, 0, alpha, beta, gamma) if true_param_prefix is not None: true_alpha, true_beta, true_gamma = ( ( vel_params_df.loc[gene_name, true_param_prefix + "alpha"] if true_param_prefix + "alpha" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "beta"] if true_param_prefix + "beta" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_u = sol_u(t, uu0, 0, true_beta) true_s = sol_s(t, su0, uu0, 0, true_beta, true_gamma) true_w = sol_u(t, 0, true_alpha, true_beta) true_l = sol_s(t, 0, 0, true_alpha, true_beta, true_gamma) true_p = np.vstack((true_u, true_w, true_s, true_l)) title_ = [ "(unspliced unlabeled)", "(unspliced labeled)", "(spliced unlabeled)", "(spliced labeled)", ] Obs, Pred = ( np.vstack((uu, ul, su, sl)), np.vstack((u, w, s, l)), ) else: layers = ["X_new", "X_total"] if "X_new" in valid_adata.layers.keys() else ["new", "total"] uu, ul = ( valid_adata[:, gene_name].layers[layers[1]] - valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[0]], ) uu, ul = ( (uu.toarray().squeeze(), ul.toarray().squeeze()) if issparse(uu) else (uu.squeeze(), ul.squeeze()) ) if log_unnormalized and layers == ["new", "total"]: uu, ul = np.log1p(uu), np.log1p(ul) alpha, gamma, uu0 = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "gamma", prefix + "uu0", ], ] # require no beta functions u = sol_u(t, uu0, 0, gamma) s = None # sol_s(t, su0, uu0, 0, 1, gamma) w = sol_u(t, 0, alpha, gamma) l = None # sol_s(t, 0, 0, alpha, 1, gamma) if true_param_prefix is not None: true_alpha, true_gamma = ( ( vel_params_df.loc[gene_name, true_param_prefix + "alpha"] if true_param_prefix + "alpha" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_u = sol_u(t, uu0, 0, true_gamma) true_w = sol_u(t, 0, true_alpha, true_gamma) true_p = np.vstack((true_u, true_w)) title_ = ["(unlabeled)", "(labeled)"] Obs, Pred = np.vstack((uu, ul)), np.vstack((u, w)) for j in range(sub_plot_n): row_ind = int(np.floor(i / ncols)) # make sure unlabled and labeled are in the same column. col_loc = (row_ind * sub_plot_n + j) * ncols * grp_len + (i % ncols - 1) * grp_len + 1 row_i, col_i = np.where(fig_mat == col_loc) ax = ( plt.subplot(gs[col_loc]) if gene_order == "column" else plt.subplot(gs[fig_mat[col_i, row_i]]) ) if true_param_prefix is not None and j == 0: if has_splicing: ax.text( 0.75, 0.90, r"$\alpha$" + ": {0:.2f}; ".format(true_alpha) + r"$\hat \alpha$" + ": {0:.2f} \n".format(alpha) + r"$\beta$" + ": {0:.2f}; ".format(true_beta) + r"$\hat \beta$" + ": {0:.2f} \n".format(beta) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="right", va="top", transform=ax.transAxes, ) else: ax.text( 0.75, 0.90, r"$\alpha$" + ": {0:.2f}; ".format(true_alpha) + r"$\hat \alpha$" + ": {0:.2f} \n".format(alpha) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="right", va="top", transform=ax.transAxes, ) ax.boxplot( x=[Obs[j][T == std] for std in T_uniq], positions=T_uniq, widths=boxwidth, showfliers=False, showmeans=True, ) ax.plot(t, Pred[j], "k--") if true_param_prefix is not None: ax.plot(t, true_p[j], "k--") ax.set_xlabel("time (" + unit + ")") if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Expression (log)") else: ax.set_ylabel("Expression") ax.set_title(gene_name + " " + title_[j]) elif experiment_type == "one_shot": if has_splicing: layers = ( ["X_uu", "X_ul", "X_su", "X_sl"] if "X_ul" in valid_adata.layers.keys() else ["uu", "ul", "su", "sl"] ) uu, ul, su, sl = ( valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[1]], valid_adata[:, gene_name].layers[layers[2]], valid_adata[:, gene_name].layers[layers[3]], ) uu, ul, su, sl = ( ( uu.toarray().squeeze(), ul.toarray().squeeze(), su.toarray().squeeze(), sl.toarray().squeeze(), ) if issparse(uu) else ( uu.squeeze(), ul.squeeze(), su.squeeze(), sl.squeeze(), ) ) if log_unnormalized and layers == [ "uu", "ul", "su", "sl", ]: uu, ul, su, sl = ( np.log1p(uu), np.log1p(ul), np.log1p(su), np.log1p(sl), ) alpha, beta, gamma, U0, S0 = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "beta", prefix + "gamma", prefix + "U0", prefix + "S0", ], ] # $u$ - unlabeled, unspliced # $s$ - unlabeled, spliced # $w$ - labeled, unspliced # $l$ - labeled, spliced # beta = sys.float_info.epsilon if beta == 0 else beta gamma = sys.float_info.epsilon if gamma == 0 else gamma U_sol = sol_u(t, U0, 0, beta) S_sol = sol_u(t, S0, 0, gamma) l = sol_u(t, 0, alpha, beta) + sol_s(t, 0, 0, alpha, beta, gamma) L = sl + ul if true_param_prefix is not None: true_alpha, true_beta, true_gamma = ( ( vel_params_df.loc[gene_name, true_param_prefix + "alpha"] if true_param_prefix + "alpha" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "beta"] if true_param_prefix + "beta" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_l = sol_u(t, 0, true_alpha, true_beta) + sol_s( t, 0, 0, true_alpha, true_beta, true_gamma ) title_ = ["labeled"] else: layers = ["X_new", "X_total"] if "X_new" in valid_adata.layers.keys() else ["new", "total"] uu, ul = ( valid_adata[:, gene_name].layers[layers[1]] - valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[0]], ) uu, ul = ( (uu.toarray().squeeze(), ul.toarray().squeeze()) if issparse(uu) else (uu.squeeze(), ul.squeeze()) ) if log_unnormalized and layers == ["new", "total"]: uu, ul = np.log1p(uu), np.log1p(ul) alpha, gamma, total0 = vel_params_df.loc[ gene_name, [ prefix + "alpha", prefix + "gamma", prefix + "total0", ], ] # require no beta functions old = sol_u(t, total0, 0, gamma) s = None # sol_s(t, su0, uu0, 0, 1, gamma) w = None l = sol_u(t, 0, alpha, gamma) # sol_s(t, 0, 0, alpha, 1, gamma) L = ul if true_param_prefix is not None: true_alpha, true_gamma = ( ( vel_params_df.loc[gene_name, true_param_prefix + "alpha"] if true_param_prefix + "alpha" in vel_params_df.columns else -np.inf ), ( vel_params_df.loc[gene_name, true_param_prefix + "gamma"] if true_param_prefix + "gamma" in vel_params_df.columns else -np.inf ), ) true_l = sol_u(t, 0, true_alpha, true_gamma) # sol_s(t, 0, 0, alpha, 1, gamma) title_ = ["labeled"] Obs, Pred = np.hstack((np.zeros(L.shape), L)), np.hstack(l) if true_param_prefix is not None: true_p = np.hstack(true_l) row_ind = int(np.floor(i / ncols)) # make sure unlabled and labeled are in the same column. col_loc = (row_ind * sub_plot_n) * ncols * grp_len + (i % ncols - 1) * grp_len + 1 row_i, col_i = np.where(fig_mat == col_loc) ax = plt.subplot(gs[col_loc]) if gene_order == "column" else plt.subplot(gs[fig_mat[col_i, row_i]]) if true_param_prefix is not None: if has_splicing: ax.text( 0.05, 0.90, r"$\alpha$ " + ": {0:.2f}; ".format(true_alpha) + r"$\hat \alpha$" + ": {0:.2f} \n".format(alpha) + r"$\beta$" + ": {0:.2f}; ".format(true_beta) + r"$\hat \beta$" + ": {0:.2f} \n".format(beta) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="left", va="top", transform=ax.transAxes, ) else: ax.text( 0.05, 0.90, r"$\alpha$" + ": {0:.2f}; ".format(true_alpha) + r"$\hat \alpha$" + ": {0:.2f} \n".format(alpha) + r"$\gamma$" + ": {0:.2f}; ".format(true_gamma) + r"$\hat \gamma$" + ": {0:.2f} \n".format(gamma), ha="left", va="top", transform=ax.transAxes, ) ax.boxplot( x=[Obs[np.hstack((np.zeros_like(T), T)) == std] for std in [0, T_uniq[0]]], positions=[0, T_uniq[0]], widths=boxwidth, showfliers=False, showmeans=True, ) ax.plot(t, Pred, "k--") if true_param_prefix is not None: ax.plot(t, true_p, "r--") ax.set_xlabel("time (" + unit + ")") if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Expression (log)") else: ax.set_ylabel("Expression") ax.set_title(gene_name + " " + title_[0]) elif experiment_type == "mix_std_stm": if has_splicing: layers = ( ["X_uu", "X_ul", "X_su", "X_sl"] if "X_ul" in valid_adata.layers.keys() else ["uu", "ul", "su", "sl"] ) uu, ul, su, sl = ( valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[1]], valid_adata[:, gene_name].layers[layers[2]], valid_adata[:, gene_name].layers[layers[3]], ) uu, ul, su, sl = ( ( uu.toarray().squeeze(), ul.toarray().squeeze(), su.toarray().squeeze(), sl.toarray().squeeze(), ) if issparse(uu) else ( uu.squeeze(), ul.squeeze(), su.squeeze(), sl.squeeze(), ) ) if log_unnormalized and layers == [ "uu", "ul", "su", "sl", ]: uu, ul, su, sl = ( np.log1p(uu), np.log1p(ul), np.log1p(su), np.log1p(sl), ) beta, gamma, alpha_std = vel_params_df.loc[ gene_name, [ prefix + "beta", prefix + "gamma", prefix + "alpha_std", ], ] alpha_stm = valid_adata[:, gene_name].varm[prefix + "alpha"].flatten()[1:] alpha_stm0, k, _ = solve_first_order_deg(T_uniq[1:], alpha_stm) # $u$ - unlabeled, unspliced # $s$ - unlabeled, spliced # $w$ - labeled, unspliced # $l$ - labeled, spliced # # calculate labeled unspliced and spliced mRNA amount u1, s1, u1_, s1_ = ( np.zeros(len(t) - 1), np.zeros(len(t) - 1), np.zeros(len(T_uniq) - 1), np.zeros(len(T_uniq) - 1), ) for ind in np.arange(1, len(t)): t_i = t[ind] u0 = sol_u(np.max(t) - t_i, 0, alpha_std, beta) alpha_stm_t_i = alpha_stm0 * np.exp(-k * t_i) u1[ind - 1], s1[ind - 1] = ( sol_u(t_i, u0, alpha_stm_t_i, beta), sol_u(np.max(t), 0, beta, gamma), ) for ind in np.arange(1, len(T_uniq)): t_i = T_uniq[ind] u0 = sol_u(np.max(T_uniq) - t_i, 0, alpha_std, beta) alpha_stm_t_i = alpha_stm[ind - 1] u1_[ind - 1], s1_[ind - 1] = ( sol_u(t_i, u0, alpha_stm_t_i, beta), sol_u(np.max(T_uniq), 0, beta, gamma), ) Obs, Pred, Pred_ = ( np.vstack((ul, sl, uu, su)), np.vstack((u1.reshape(1, -1), s1.reshape(1, -1))), np.vstack((u1_.reshape(1, -1), s1_.reshape(1, -1))), ) j_species, title_ = ( 4, [ "unspliced labeled (new)", "spliced labeled (new)", "unspliced unlabeled (old)", "spliced unlabeled (old)", "alpha (steady state vs. stimulation)", ], ) else: layers = ["X_new", "X_total"] if "X_new" in valid_adata.layers.keys() else ["new", "total"] uu, ul = ( valid_adata[:, gene_name].layers[layers[1]] - valid_adata[:, gene_name].layers[layers[0]], valid_adata[:, gene_name].layers[layers[0]], ) uu, ul = ( (uu.toarray().squeeze(), ul.toarray().squeeze()) if issparse(uu) else (uu.squeeze(), ul.squeeze()) ) if log_unnormalized and layers == ["new", "total"]: uu, ul = np.log1p(uu), np.log1p(ul) gamma, alpha_std = vel_params_df.loc[gene_name, [prefix + "gamma", prefix + "alpha_std"]] alpha_stm = valid_adata[:, gene_name].varm[prefix + "alpha"].flatten()[1:] alpha_stm0, k, _ = solve_first_order_deg(T_uniq[1:], alpha_stm) # require no beta functions u1, u1_ = ( np.zeros(len(t) - 1), np.zeros(len(T_uniq) - 1), ) # interpolation or original time point for ind in np.arange(1, len(t)): t_i = t[ind] u0 = sol_u(np.max(t) - t_i, 0, alpha_std, gamma) alpha_stm_t_i = alpha_stm0 * np.exp(-k * t_i) u1[ind - 1] = sol_u(t_i, u0, alpha_stm_t_i, gamma) for ind in np.arange(1, len(T_uniq)): t_i = T_uniq[ind] u0 = sol_u(np.max(T_uniq) - t_i, 0, alpha_std, gamma) alpha_stm_t_i = alpha_stm[ind - 1] u1_[ind - 1] = sol_u(t_i, u0, alpha_stm_t_i, gamma) Obs, Pred, Pred_ = ( np.vstack((ul, uu)), np.vstack((u1.reshape(1, -1))), np.vstack((u1_.reshape(1, -1))), ) j_species, title_ = ( 2, [ "labeled (new)", "unlabeled (old)", "alpha (steady state vs. stimulation)", ], ) group_list = [ np.repeat(alpha_std, len(alpha_stm)), alpha_stm, ] for j in range(sub_plot_n): row_ind = int( np.floor(i / ncols) ) # make sure all related plots for the same gene in the same column. col_loc = (row_ind * sub_plot_n + j) * ncols * grp_len + (i % ncols - 1) * grp_len + 1 row_i, col_i = np.where(fig_mat == col_loc) ax = ( plt.subplot(gs[col_loc]) if gene_order == "column" else plt.subplot(gs[fig_mat[col_i, row_i]]) ) if j < j_species / 2: ax.boxplot( x=[Obs[j][T == std] for std in T_uniq[1:]], positions=T_uniq[1:], widths=boxwidth, showfliers=False, showmeans=True, ) ax.plot(t[1:], Pred[j], "k--") ax.scatter(T_uniq[1:], Pred_[j], s=20, c="red") ax.set_xlabel("time (" + unit + ")") ax.set_title(gene_name + ": " + title_[j]) if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Expression (log)") else: ax.set_ylabel("Expression") elif j < j_species: ax.boxplot( x=[Obs[j][T == std] for std in T_uniq], positions=T_uniq, widths=boxwidth, showfliers=False, showmeans=True, ) ax.set_xlabel("time (" + unit + ")") ax.set_title(gene_name + ": " + title_[j]) if y_log_scale: ax.set_yscale("log") if log_unnormalized: ax.set_ylabel("Expression (log)") else: ax.set_ylabel("Expression") else: x = T_uniq[1:] # the label locations group_width = barwidth / 2 bar_coord, group_name, group_ind = ( [-1, 1], ["steady state", "stimulation"], 0, ) for group_ind in range(len(group_list)): cur_group = group_list[group_ind] ax.bar( x + bar_coord[group_ind] * group_width, cur_group, barwidth, label=group_name[group_ind], ) # Add gene name, experimental type, etc. ax.set_xlabel("time (" + unit + ")") ax.set_ylabel("alpha (translation rate)") ax.set_xticks(x) ax.set_xticklabels(x) group_ind += 1 ax.legend() ax.set_xlabel("time (" + unit + ")") ax.set_title(gene_name + ": " + title_[j]) elif experiment_type == "multi_time_series": pass # group by different groups elif experiment_type == "coassay": pass # show protein velocity (steady state and the Gamma distribution model) # g.autofmt_xdate(rotation=-30, ha='right') return save_show_ret("dynamics", save_show_or_return, save_kwargs, g)
def dynamics_( adata, gene_names, color, dims=[0, 1], current_layer="spliced", use_raw=False, Vkey="S", Ekey="spliced", basis="umap", mode="all", cmap=None, gs=None, **kwargs, ): """ Parameters ---------- adata basis mode: `str` (default: all) Support mode includes: phase, expression, velocity, all Returns ------- """ import matplotlib.pyplot as plt genes = list(set(gene_names).intersection(adata.var.index)) for i, gn in enumerate(genes): ax = plt.subplot(gs[i * 3]) try: ix = np.where(adata.var["Gene"] == gn)[0][0] except: continue scatters( adata, gene_names, color, dims=[0, 1], current_layer="spliced", use_raw=False, Vkey="S", Ekey="spliced", basis="umap", mode="all", cmap=None, gs=None, **kwargs, ) scatters( adata, gene_names, color, dims=[0, 1], current_layer="spliced", use_raw=False, Vkey="S", Ekey="spliced", basis="umap", mode="all", cmap=None, gs=None, **kwargs, ) scatters( adata, gene_names, color, dims=[0, 1], current_layer="spliced", use_raw=False, Vkey="S", Ekey="spliced", basis="umap", mode="all", cmap=None, gs=None, **kwargs, ) plt.tight_layout()