| Title: | Single-Cell Meta-Path Based Omic Embedding |
|---|---|
| Description: | Provide a workflow to jointly embed chromatin accessibility peaks and expressed genes into a shared low-dimensional space using paired single-cell 'ATAC-seq' ('scATAC-seq') and single-cell 'RNA-seq' ('scRNA-seq') data. It integrates regulatory relationships among peak-peak interactions (via 'Cicero'), peak-gene interactions (via Lasso, random forest, and 'XGBoost'), and gene-gene interactions (via principal component regression). With the input of paired 'scATAC-seq' and 'scRNA-seq' data matrices, 'scPOEM' assigns a low-dimensional feature vector to each gene and peak. Additionally, it supports the reconstruction of gene-gene network with low-dimensional projections (via 'epsilon-NN') and then the comparison of the networks of two conditions through manifold alignment implemented in 'scTenifoldNet'. |
| Authors: | Yuntong Hou [aut, cre] (https://orcid.org/0009-0005-0587-4692), Yan Zhong [aut, ctb] (https://orcid.org/0000-0003-2412-043X), Yongjian Yang [ctb] (https://orcid.org/0000-0002-4135-5014), Xinyue Zheng [ctb], James Cai [ctb] (https://orcid.org/0000-0002-8081-6725) |
| Maintainer: | Yuntong Hou <[email protected]> |
| License: | GPL (>= 2) |
| Version: | 0.1.2 |
| Built: | 2026-05-28 14:53:09 UTC |
| Source: | https://github.com/houyt23/scpoem |
Reconstruct gene networks via epsilon-NN and compare conditions using manifold alignment implemented in scTenifoldNet."
align_embedding( gene_data1, gene_node1, E1, gene_data2, gene_node2, E2, dirpath, d = 100 )align_embedding( gene_data1, gene_node1, E1, gene_data2, gene_node2, E2, dirpath, d = 100 )
gene_data1 |
The information for genes in state1, must have a col names 'gene_name'. |
gene_node1 |
Gene ids that are associated with other peaks or genes in state1. |
E1 |
Embedding representations of peaks and genes in state1. |
gene_data2 |
The information for genes in state2, must have a col names 'gene_name'. |
gene_node2 |
Gene ids that are associated with other peaks or genes in state2. |
E2 |
Embedding representations of peaks and genes in state2. |
dirpath |
The folder path to read or write file |
d |
The dimension of latent space. |
A list containing the following
E_g2Embedding representations of genes in two conditions.
diffRegulationA list of differential regulation informmation for each gene.
## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download compare mode example data data(example_data_compare) data_S1 <- example_data_compare$S1 data_S2 <- example_data_compare$S2 gg_net1 <- GGN(data_S1$Y, file.path(dirpath, "compare/S1")) pp_net1 <- PPN(data_S1$X, data_S1$peak_data, data_S1$cell_data, data_S1$genome, file.path(dirpath, "compare/S1")) net_Lasso1 <- PGN_Lasso(data_S1$X, data_S1$Y, data_S1$gene_data, data_S1$neibor_peak, file.path(dirpath, "compare/S1")) net_RF1 <- PGN_RF(data_S1$X, data_S1$Y, data_S1$gene_data, data_S1$neibor_peak, file.path(dirpath, "compare/S1")) net_XGB1 <- PGN_XGBoost(data_S1$X, data_S1$Y, data_S1$gene_data, data_S1$neibor_peak, file.path(dirpath, "compare/S1")) E_result_S1 <- pg_embedding(gg_net1, pp_net1, net_lasso1, net_RF1, net_XGB1, file.path(dirpath, "compare/S1")) gg_net2 <- GGN(data_S2$Y, file.path(dirpath, "compare/S2")) pp_net2 <- PPN(data_S2$X, data_S2$peak_data, data_S2$cell_data, data_S2$genome, file.path(dirpath, "compare/S2")) net_Lasso2 <- PGN_Lasso(data_S2$X, data_S2$Y, data_S2$gene_data, data_S2$neibor_peak, file.path(dirpath, "compare/S2")) net_RF2 <- PGN_RF(data_S2$X, data_S2$Y, data_S2$gene_data, data_S2$neibor_peak, file.path(dirpath, "compare/S2")) net_XGB2 <- PGN_XGBoost(data_S2$X, data_S2$Y, data_S2$gene_data, data_S2$neibor_peak, file.path(dirpath, "compare/S2")) E_result_S2 <- pg_embedding(gg_net2, pp_net2, net_lasso2, net_RF2, net_XGB2, file.path(dirpath, "compare/S2")) compare_result <- align_embedding(data_S1$gene_data, E_result_S1$gene_node, E_result_S1$E, data_S2$gene_data, E_result_S2$gene_node, E_result_S2$E, file.path(dirpath, "compare/compare")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download compare mode example data data(example_data_compare) data_S1 <- example_data_compare$S1 data_S2 <- example_data_compare$S2 gg_net1 <- GGN(data_S1$Y, file.path(dirpath, "compare/S1")) pp_net1 <- PPN(data_S1$X, data_S1$peak_data, data_S1$cell_data, data_S1$genome, file.path(dirpath, "compare/S1")) net_Lasso1 <- PGN_Lasso(data_S1$X, data_S1$Y, data_S1$gene_data, data_S1$neibor_peak, file.path(dirpath, "compare/S1")) net_RF1 <- PGN_RF(data_S1$X, data_S1$Y, data_S1$gene_data, data_S1$neibor_peak, file.path(dirpath, "compare/S1")) net_XGB1 <- PGN_XGBoost(data_S1$X, data_S1$Y, data_S1$gene_data, data_S1$neibor_peak, file.path(dirpath, "compare/S1")) E_result_S1 <- pg_embedding(gg_net1, pp_net1, net_lasso1, net_RF1, net_XGB1, file.path(dirpath, "compare/S1")) gg_net2 <- GGN(data_S2$Y, file.path(dirpath, "compare/S2")) pp_net2 <- PPN(data_S2$X, data_S2$peak_data, data_S2$cell_data, data_S2$genome, file.path(dirpath, "compare/S2")) net_Lasso2 <- PGN_Lasso(data_S2$X, data_S2$Y, data_S2$gene_data, data_S2$neibor_peak, file.path(dirpath, "compare/S2")) net_RF2 <- PGN_RF(data_S2$X, data_S2$Y, data_S2$gene_data, data_S2$neibor_peak, file.path(dirpath, "compare/S2")) net_XGB2 <- PGN_XGBoost(data_S2$X, data_S2$Y, data_S2$gene_data, data_S2$neibor_peak, file.path(dirpath, "compare/S2")) E_result_S2 <- pg_embedding(gg_net2, pp_net2, net_lasso2, net_RF2, net_XGB2, file.path(dirpath, "compare/S2")) compare_result <- align_embedding(data_S1$gene_data, E_result_S1$gene_node, E_result_S1$E, data_S2$gene_data, E_result_S2$gene_node, E_result_S2$E, file.path(dirpath, "compare/compare")) ## End(Not run)
Reconstruction of gene-gene network via low-dimentional projections (via epsilon-NN).
eNN(E_g)eNN(E_g)
E_g |
Embedding representations of genes. |
The epsilon-NN network.
A list containing example single-cell multi-omics data used in "compare" mode of the scPOEM package.
data(example_data_compare)data(example_data_compare)
A named list of length 2. Each element is itself a named list with the following components:
XThe scATAC-seq data, sparse matrix.
YThe scRNA-seq data, sparse matrix.
peak_dataA data.frame containing peak information.
gene_dataA data.frame containing gene information (must contain column "gene_name").
cell_dataA data.frame containing cell metadata.
neibor_peakThe peak IDs within a certain range of each gene, must have cols c("gene_name", "start_use", "end_use"). The id numbers in "start_use" and "end_use" are start from 0.
genomeThe genome length for the species.
data(example_data_compare)data(example_data_compare)
A list containing example single-cell multi-omics data used in "single" mode of the scPOEM package.
data(example_data_single)data(example_data_single)
A named list with 7 elements:
XThe scATAC-seq data, sparse matrix.
YThe scRNA-seq data, sparse matrix.
peak_dataA data.frame containing peak information.
gene_dataA data.frame containing gene information (must contain column "gene_name").
cell_dataA data.frame containing cell metadata.
neibor_peakThe peak IDs within a certain range of each gene, must have cols c("gene_name", "start_use", "end_use"). The id numbers in "start_use" and "end_use" are start from 0.
genomeThe genome length for the species.
data(example_data_single)data(example_data_single)
Construct the gene-gene network via principle component regression.
GGN( Y, dirpath, count_device = 1, nComp = 5, rebuild_GGN = T, python_env = "scPOEM_env" )GGN( Y, dirpath, count_device = 1, nComp = 5, rebuild_GGN = T, python_env = "scPOEM_env" )
Y |
The scRNA-seq data, sparse matrix. |
dirpath |
The folder path to read or write file. |
count_device |
The number of cpus used to train the Lasso model. |
nComp |
The number of PCs used for regression |
rebuild_GGN |
Logical. Whether to rebuild the gene-gene network (GGN) from scratch. If FALSE, the function will attempt to read from 'GGN.mtx' under |
python_env |
Name or path of the Python environment to be used. |
The GGN network.
## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct GGN net. gg_net <- GGN(example_data_single$Y, file.path(dirpath, "single")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct GGN net. gg_net <- GGN(example_data_single$Y, file.path(dirpath, "single")) ## End(Not run)
Learn the low-dimensional representations for peaks and genes with a meta-path based method.
pg_embedding( gg_net, pp_net, pg_net_list, dirpath, relearn_pg_embedding = T, d = 100, numwalks = 5, walklength = 3, epochs = 100, neg_sample = 5, batch_size = 32, weighted = TRUE, exclude_pos = FALSE, seed = 0, python_env = "scPOEM_env" )pg_embedding( gg_net, pp_net, pg_net_list, dirpath, relearn_pg_embedding = T, d = 100, numwalks = 5, walklength = 3, epochs = 100, neg_sample = 5, batch_size = 32, weighted = TRUE, exclude_pos = FALSE, seed = 0, python_env = "scPOEM_env" )
gg_net |
The gene-gene network. |
pp_net |
The peak-peak network. |
pg_net_list |
A list of peak-gene networks, constructed via different methods. |
dirpath |
The folder path to read or write file. |
relearn_pg_embedding |
Logical. Whether to relearn the low-dimensional representations for peaks and genes from scratch. If FALSE, the function will attempt to read from |
d |
Dimension of the latent space. Default is 100. |
numwalks |
Number of random walks per node. Default is 5. |
walklength |
Length of walk depth. Default is 3. |
epochs |
Number of training epochs. Default is 100. |
neg_sample |
Number of negative samples per positive sample. Default is 5. |
batch_size |
Batch size for training. Default is 32. |
weighted |
Whether the sampling network is weighted. Default is TRUE. |
exclude_pos |
Whether to exclude positive samples from negative sampling. Default is FALSE. |
seed |
An integer specifying the random seed to ensure reproducible results. |
python_env |
Name or path of the Python environment to be used. |
A list containing the following:
ELow-dimensional representations of peaks and genes
peak_nodePeak ids that are associated with other peaks or genes.
gene_nodeGene ids that are associated with other peaks or genes.
## Not run: library(scPOEM) library(Matrix) library(data.table) dirpath <- "./example_data" # Download single mode example data data(example_data_single) gg_net <- GGN(example_data_single$Y, file.path(dirpath, "single"), 1, 5, T) pp_net <- PPN(example_data_single$X, example_data_single$peak_data, example_data_single$cell_data, example_data_single$genome, file.path(dirpath, "single")) net_Lasso <- PGN_Lasso(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) net_RF <- PGN_RF(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) net_XGB <- PGN_XGBoost(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) E_result <- pg_embedding(gg_net, pp_net, list(net_Lasso, net_RF, net_XGB), file.path(dirpath, "single")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) library(data.table) dirpath <- "./example_data" # Download single mode example data data(example_data_single) gg_net <- GGN(example_data_single$Y, file.path(dirpath, "single"), 1, 5, T) pp_net <- PPN(example_data_single$X, example_data_single$peak_data, example_data_single$cell_data, example_data_single$genome, file.path(dirpath, "single")) net_Lasso <- PGN_Lasso(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) net_RF <- PGN_RF(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) net_XGB <- PGN_XGBoost(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) E_result <- pg_embedding(gg_net, pp_net, list(net_Lasso, net_RF, net_XGB), file.path(dirpath, "single")) ## End(Not run)
Construct the peak-gene network via Lasso.
PGN_Lasso( X, Y, gene_data, neibor_peak, dirpath, count_device = 1, rebuild_PGN_Lasso = T )PGN_Lasso( X, Y, gene_data, neibor_peak, dirpath, count_device = 1, rebuild_PGN_Lasso = T )
X |
The scATAC-seq data, sparse matrix. |
Y |
The scRNA-seq data, sparse matrix. |
gene_data |
The information for genes, must have a col names 'gene_name'. |
neibor_peak |
The peak IDs within a certain range of each gene, must have cols c("gene_name", "start_use", "end_use"). The id numbers in "start_use" and "end_use" are start from 0. |
dirpath |
The folder path to read or write file. |
count_device |
The number of cpus used to train the Lasso model. |
rebuild_PGN_Lasso |
Logical. Whether to rebuild the peak-gene network via Lasso from scratch. If FALSE, the function will attempt to read from 'PGN_Lasso.mtx' under |
The PGN_Lasso network.
## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PGN net via Lasso. net_Lasso <- PGN_Lasso(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PGN net via Lasso. net_Lasso <- PGN_Lasso(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) ## End(Not run)
Construct the peak-gene network via random forest.
PGN_RF( X, Y, gene_data, neibor_peak, dirpath, count_device = 1, rebuild_PGN_RF = T, seed = 0, python_env = "scPOEM_env" )PGN_RF( X, Y, gene_data, neibor_peak, dirpath, count_device = 1, rebuild_PGN_RF = T, seed = 0, python_env = "scPOEM_env" )
X |
The scATAC-seq data, sparse matrix. |
Y |
The scRNA-seq data, sparse matrix. |
gene_data |
The information for genes, must have a col names 'gene_name'. |
neibor_peak |
The peak IDs within a certain range of each gene, must have cols c("gene_name", "start_use", "end_use"). The id numbers in "start_use" and "end_use" are start from 0. |
dirpath |
The folder path to read or write file. |
count_device |
The number of cpus used to train the Lasso model. |
rebuild_PGN_RF |
Logical. Whether to rebuild the peak-gene network via random forest from scratch. If FALSE, the function will attempt to read from 'PGN_RF.mtx' under |
seed |
An integer specifying the random seed to ensure reproducible results. |
python_env |
Name or path of the Python environment to be used. |
The PGN_RF network.
## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PGN net via random forest (RF). net_RF <- PGN_RF(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PGN net via random forest (RF). net_RF <- PGN_RF(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) ## End(Not run)
Construct the peak-gene network via XGBoost.
PGN_XGBoost( X, Y, gene_data, neibor_peak, dirpath, count_device = 1, rebuild_PGN_XGB = T )PGN_XGBoost( X, Y, gene_data, neibor_peak, dirpath, count_device = 1, rebuild_PGN_XGB = T )
X |
The scATAC-seq data, sparse matrix. |
Y |
The scRNA-seq data, sparse matrix. |
gene_data |
The information for genes, must have a col names 'gene_name'. |
neibor_peak |
The peak IDs within a certain range of each gene, must have cols c("gene_name", "start_use", "end_use"). The id numbers in "start_use" and "end_use" are start from 0. |
dirpath |
The folder path to read or write file. |
count_device |
The number of cpus used to train the Lasso model. |
rebuild_PGN_XGB |
Logical. Whether to rebuild the peak-gene network via XGBoost from scratch. If FALSE, the function will attempt to read from 'PGN_XGB.mtx' under |
The PGN_XGBoost network.
## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PGN net via XGBoost. net_XGB <- PGN_XGBoost(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PGN net via XGBoost. net_XGB <- PGN_XGBoost(example_data_single$X, example_data_single$Y, example_data_single$gene_data, example_data_single$neibor_peak, file.path(dirpath, "single")) ## End(Not run)
Construct peak-peak network.
PPN(X, peak_data, cell_data, genome, dirpath, rebuild_PPN = T, seed = 0)PPN(X, peak_data, cell_data, genome, dirpath, rebuild_PPN = T, seed = 0)
X |
The scATAC-seq data, sparse matrix. |
peak_data |
The information for peaks, must have a col names 'peak_name'. |
cell_data |
The information for cells, must have a col names 'cell_name'. |
genome |
The genome length for the species. |
dirpath |
The folder path to read or write file. |
rebuild_PPN |
Logical. Whether to rebuild the peak-peak network (PPN) from scratch. If FALSE, the function will attempt to read from 'PPN.mtx' under |
seed |
An integer specifying the random seed to ensure reproducible results. |
The PPN network.
## Not run: library(scPOEM) library(Matrix) library(data.table) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PPN net. pp_net <- PPN(example_data_single$X, example_data_single$peak_data, example_data_single$cell_data, example_data_single$genome, file.path(dirpath, "single")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) library(data.table) dirpath <- "./example_data" # Download single mode example data data(example_data_single) # Construct PPN net. pp_net <- PPN(example_data_single$X, example_data_single$peak_data, example_data_single$cell_data, example_data_single$genome, file.path(dirpath, "single")) ## End(Not run)
This function takes paired single-cell ATAC-seq (scATAC-seq) and RNA-seq (scRNA-seq) data to embed peaks and genes into a shared low-dimensional space. It integrates regulatory relationships from peak-peak interactions (via Cicero), peak-gene interactions (via Lasso, random forest, and XGBoost), and gene-gene interactions (via principal component regression). Additionally, it supports gene-gene network reconstruction using epsilon-NN projections and compares networks across conditions through manifold alignment (scTenifoldNet).
scPOEM( mode = c("single", "compare"), input_data, dirpath, count_device = 1, nComp = 5, seed = 0, numwalks = 5, walklength = 3, epochs = 100, neg_sample = 5, batch_size = 32, weighted = TRUE, exclude_pos = FALSE, d = 100, rebuild_GGN = T, rebuild_PPN = T, rebuild_PGN_Lasso = T, rebuild_PGN_RF = T, rebuild_PGN_XGB = T, relearn_pg_embedding = T, pg_method = c("Lasso", "RF", "XGBoost"), python_env = "scPOEM_env" )scPOEM( mode = c("single", "compare"), input_data, dirpath, count_device = 1, nComp = 5, seed = 0, numwalks = 5, walklength = 3, epochs = 100, neg_sample = 5, batch_size = 32, weighted = TRUE, exclude_pos = FALSE, d = 100, rebuild_GGN = T, rebuild_PPN = T, rebuild_PGN_Lasso = T, rebuild_PGN_RF = T, rebuild_PGN_XGB = T, relearn_pg_embedding = T, pg_method = c("Lasso", "RF", "XGBoost"), python_env = "scPOEM_env" )
mode |
The mode indicating whether to analyze data from a single condition or to compare two conditions. |
input_data |
A list of input data. If
If |
dirpath |
The folder path to read or write file. |
count_device |
The number of cpus used to train models. |
nComp |
The number of PCs used for regression in constructing GGN. |
seed |
An integer specifying the random seed to ensure reproducible results. |
numwalks |
Number of random walks per node. Default is 5. |
walklength |
Length of walk depth. Default is 3. |
epochs |
Number of training epochs. Default is 100. |
neg_sample |
Number of negative samples per positive sample. Default is 5. |
batch_size |
Batch size for training. Default is 32. |
weighted |
Whether the sampling network is weighted. Default is TRUE. |
exclude_pos |
Whether to exclude positive samples from negative sampling. Default is FALSE. |
d |
The dimension of latent space. Default is 100. |
rebuild_GGN |
Logical. Whether to rebuild the gene-gene network from scratch. If FALSE, the function will attempt to read from 'GGN.mtx' under |
rebuild_PPN |
Logical. Whether to rebuild the peak-peak network from scratch. If FALSE, the function will attempt to read from 'PPN.mtx' under |
rebuild_PGN_Lasso |
Logical. Whether to rebuild the peak-gene network via Lasso from scratch. If FALSE, the function will attempt to read from 'PGN_Lasso.mtx' under |
rebuild_PGN_RF |
Logical. Whether to rebuild the peak-gene network via random forest from scratch. If FALSE, the function will attempt to read from 'PGN_RF.mtx' under |
rebuild_PGN_XGB |
Logical. Whether to rebuild the peak-gene network via XGBoost from scratch. If FALSE, the function will attempt to read from 'PGN_XGB.mtx' under |
relearn_pg_embedding |
Logical. Whether to relearn the low-dimensional representations for peaks and genes from scratch. If FALSE, the function will attempt to read from |
pg_method |
The vector of methods used to construct peak-gene net. Default is c("Lasso", "RF", "XGBoost"). |
python_env |
Name or path of the Python environment to be used. |
The scPOEM result.
## Not run: library(scPOEM) library(Matrix) library(data.table) dirpath <- "./example_data" # An example for analysing a single dataset. # Download and read data. data(example_data_single) single_result <- scPOEM(mode = "single", input_data=example_data_single, dirpath=file.path(dirpath, "single")) # An example for analysing and comparing datasets from two conditions. # Download compare mode example data data(example_data_compare) compare_result <- scPOEM(mode = "compare", input_data=example_data_compare, dirpath=file.path(dirpath, "compare")) ## End(Not run)## Not run: library(scPOEM) library(Matrix) library(data.table) dirpath <- "./example_data" # An example for analysing a single dataset. # Download and read data. data(example_data_single) single_result <- scPOEM(mode = "single", input_data=example_data_single, dirpath=file.path(dirpath, "single")) # An example for analysing and comparing datasets from two conditions. # Download compare mode example data data(example_data_compare) compare_result <- scPOEM(mode = "compare", input_data=example_data_compare, dirpath=file.path(dirpath, "compare")) ## End(Not run)