Source code for dynamo.vectorfield.stochastic_process

from tqdm import tqdm
import numpy as np
from sklearn.neighbors import NearestNeighbors
from ..tools.utils import log1p_
from .utils_vecCalc import vecfld_from_adata, vector_field_function


[docs]def diffusionMatrix(adata, X_data=None, V_data=None, genes=None, layer=None, basis="umap", dims=None, n=30, VecFld=None, residual='vector_field'): """"Calculate the diffusion matrix from the estimated velocity vector and the reconstructed vector field. Parameters ---------- adata: :class:`~anndata.AnnData` an Annodata object. X_data: `np.ndarray` (default: `None`) The user supplied expression (embedding) data that will be used for calculating diffusion matrix directly. V_data: `np.ndarray` (default: `None`) The user supplied velocity data that will be used for calculating diffusion matrix directly. genes: `list` or None (default: `None`) The list of genes that will be used to subset the data. If `None`, all genes will be used. layer: `str` or None (default: None) Which layer of the data will be used for diffusion matrix calculation. basis: `str` (default: `umap`) Which basis of the data will be used for diffusion matrix calculation. dims: `list` or None (default: `None`) The list of dimensions that will be selected for diffusion matrix calculation. If `None`, all dimensions will be used. n: `int` (default: `10`) Number of nearest neighbors when the nearest neighbor graph is not included. VecFld: `dictionary` or None (default: None) The reconstructed vector field function. residual: `str` or None (default: `vector_field`) Method to calculate residual velocity vectors for diffusion matrix calculation. If `average`, all velocity of the nearest neighbor cells will be minused by its average velocity; if `vector_field`, all velocity will be minused by the predicted velocity from the reconstructed deterministic velocity vector field. Returns ------- adata: :class:`~anndata.AnnData` `AnnData` object that is updated with the `diffusion_matrix` key in the `uns` attribute which is a list of the diffusion matrix for each cell. A column `diffusion` corresponds to the square root of the sum of all elements for each cell's diffusion matrix will also be added. """ if X_data is None or V_data is not None: if genes is not None: genes = adata.var_name.intersection(genes).to_list() if len(genes) == 0: raise ValueError(f'no genes from your genes list appear in your adata object.') if layer is not None: if layer not in adata.layers.keys(): raise ValueError(f'the layer {layer} you provided is not included in the adata object!') if basis is None: vkey = 'velocity_' + layer[0].upper() if vkey not in adata.obsm.keys(): raise ValueError( f'the data corresponds to the velocity key {vkey} is not included in the adata object!') if VecFld is None: VecFld, func = vecfld_from_adata(adata, basis) else: func = lambda x: vector_field_function(x, VecFld) prefix = 'X_' if layer is None else layer + '_' if basis is not None: if basis.split(prefix)[-1] not in ['pca', 'umap', 'trimap', 'tsne', 'diffmap']: raise ValueError(f"basis (or the suffix of basis) can only be one of " f"['pca', 'umap', 'trimap', 'tsne', 'diffmap'].") if basis.startswith(prefix): basis = basis vkey = "velocity_" + basis.split(prefix)[-1] else: vkey = "velocity_" + basis basis = prefix + basis if vkey not in adata.obsm_keys(): raise ValueError( f'the data corresponds to the velocity key {vkey} is not included in the adata object!') if basis is None: if layer is None: vkey = "velocity_S" if vkey not in adata.uns_keys(): raise ValueError( f'the data corresponds to the velocity key {vkey} is not included in the adata object!') if genes is not None: X_data, V_data = adata[:, genes].X, adata[:, genes].uns[vkey] else: if 'use_for_dynamics' not in adata.var.keys(): X_data, V_data = adata.X, adata.uns[vkey] else: X_data, V_data = adata[:, adata.var.use_for_dynamics].X, \ adata[:, adata.var.use_for_dynamics].uns[vkey] else: vkey = "velocity_" + layer[0].upper() if vkey not in adata.uns_keys(): raise ValueError( f'the data corresponds to the velocity key {vkey} is not included in the adata object!') if genes is not None: X_data, V_data = adata[:, genes].layers[layer], adata[:, genes].uns[vkey] else: if 'use_for_dynamics' not in adata.var.keys(): X_data, V_data = adata.layers[layer], adata.uns[vkey] else: X_data, V_data = adata[:, adata.var.use_for_dynamics].layers[layer], \ adata[:, adata.var.use_for_dynamics].uns[vkey] X_data = log1p_(adata, X_data) else: X_data, V_data = adata.obsm[basis], adata.obsm[vkey] if dims is not None: X_data, V_data = X_data[:, dims], V_data[:, dims] neighbor_key = "neighbors" if layer is None else layer + "_neighbors" if neighbor_key not in adata.uns_keys() or (X_data is not None and V_data is not None): if X_data.shape[0] > 200000 and X_data.shape[1] > 2: from pynndescent import NNDescent nbrs = NNDescent(X_data, metric='euclidean', n_neighbors=n, n_jobs=-1, random_state=19491001) Idx, _ = nbrs.query(X_data, k=n) else: alg = 'ball_tree' if X_data.shape[1] > 10 else 'kd_tree' nbrs = NearestNeighbors(n_neighbors=n, algorithm=alg, n_jobs=-1).fit(X_data) _, Idx = nbrs.kneighbors(X_data) else: conn_key = "connectivities" if layer is None else layer + "_connectivities" neighbors = adata.obsp[conn_key] Idx = neighbors.tolil().rows if residual == 'average': V_ave = np.zeros_like(V_data) for i in range(X_data.shape[0]): vv = V_data[Idx[i]] V_ave[i] = vv.mean(0) elif residual == 'vector_field': V_ave = func(X_data) else: raise ValueError(f'The method for calculate residual {residual} is not supported. ' f'Currently only {"average", "vector_field"} supported.') V_diff = V_data - V_ave val = np.zeros((V_data.shape[0], 1)) dmatrix = [None] * V_data.shape[0] for i in tqdm(range(X_data.shape[0]), "calculating diffusion matrix for each cell."): vv = V_diff[Idx[i]] d = np.cov(vv.T) val[i] = np.sqrt(sum(sum(d))) dmatrix[i] = d adata.obs['diffusion'] = val adata.uns['diffusion_matrix'] = dmatrix
def diffusionMatrix2D(V_mat): """Function to calculate cell-specific diffusion matrix for based on velocity vectors of neighbors. This function works for two dimension. See :func:`diffusionMatrix` for generalization to arbitrary dimensions. Parameters ---------- V_mat: `np.ndarray` velocity vectors of neighbors Returns ------- Return the cell-specific diffusion matrix See also:: :func:`diffusionMatrix` """ D = np.zeros( (V_mat.shape[0], 2, 2) ) D[:, 0, 0] = np.mean( (V_mat[:, :, 0] - np.mean(V_mat[:, :, 0], axis=1)[:, None]) ** 2, axis=1 ) D[:, 1, 1] = np.mean( (V_mat[:, :, 1] - np.mean(V_mat[:, :, 1], axis=1)[:, None]) ** 2, axis=1 ) D[:, 0, 1] = np.mean( (V_mat[:, :, 0] - np.mean(V_mat[:, :, 0], axis=1)[:, None]) * (V_mat[:, :, 1] - np.mean(V_mat[:, :, 1], axis=1)[:, None]), axis=1, ) D[:, 1, 0] = D[:, 0, 1] return D / 2