Load Libraries

# Gene Expression Analysis
library(airway)      # Analysis of single-cell RNA-seq datasets
library(DESeq2)     # Differential gene expression analysis
library(iSEE)       # Interactive visualization of multi-dimensional data
library(iSEEde)     # iSEE extension for differential expression analysis
library(scater)     # Single-cell analysis toolkit for expression data
library(scuttle)    # Utilities for single-cell RNA-seq analysis

# Visualization and Plotting
library(gridExtra)      # Arrange multiple plots in a grid
library(cowplot)        # Arrange and annotate multiple plots
library(pheatmap)       # Creation of heatmaps
library(RColorBrewer)   # Color palettes for plots
library(EnhancedVolcano)# Volcano plots for differential expression analysis
library(ggvenn)         # Venn diagrams using ggplot2
library(ggupset)        # UpSet plots for visualizing set intersections
library(ggstance)       # Horizontal versions of ggplot2 geoms

# Statistical Analysis and Testing
library(qvalue)     # Estimation of q-values
library(vsn)        # Variance stabilization and normalization
library(apeglm)     # Adaptive shrinkage for differential expression
library(ashr)       # Empirical Bayes shrinkage estimation

# Data Management and Manipulation
library(conflicted) # Conflict resolution for function names
library(tidyverse)  # Collection of packages for data manipulation and visualization

# Biological Databases and Annotation
library(org.Mm.eg.db)  # Annotation package for the mouse genome
library(org.Hs.eg.db)  # Annotation package for the human genome
library(AnnotationHub) # Access to various biological annotation resources
library(ensembldb)     # Annotation package for Ensembl databases
library(biomaRt)       # Interface to BioMart biological databases

# Pathway and Functional Analysis
library(DOSE)           # Disease ontology and pathway analysis
library(pathview)       # Pathway-based data integration and visualization
library(clusterProfiler) # Gene set enrichment analysis
library(enrichplot)     # Visualization of enrichment analysis results
library(ReactomePA)     # Pathway analysis using Reactome database

# Other Topics
library(GEOquery)       # Access to Gene Expression Omnibus (GEO) datasets
library(europepmc)      # Access to European PubMed Central literature data
library(bigmemory)      # Management and analysis of large datasets in memory
library(bigalgebra)     # Linear and generalized linear algebra operations on large datasets

Conflicted functions

# Set function preferences
conflicts_prefer(dplyr::select)

conflicts_prefer(dplyr::filter)

conflicts_prefer(dplyr::slice)

conflicts_prefer(dplyr::count)

conflicts_prefer(SummarizedExperiment::rowRanges)

Load data

RangedSummarizedExperiment

#load airway dataset as rse object
data(airway)

rse <- airway

rse

## class: RangedSummarizedExperiment 
## dim: 63677 8 
## metadata(1): ''
## assays(1): counts
## rownames(63677): ENSG00000000003 ENSG00000000005 ... ENSG00000273492
##   ENSG00000273493
## rowData names(10): gene_id gene_name ... seq_coord_system symbol
## colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
## colData names(9): SampleName cell ... Sample BioSample

#extract counts
counts <- as.data.frame(assays(rse)$counts)

#extract metadata
colData <- as.data.frame(colData(rse))

Background

Title

rse@metadata[[1]]@title

## [1] "RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid responsive gene that modulates cytokine function in airway smooth muscle cells."

Experiment information

metadata(rse)

## [[1]]
## Experiment data
##   Experimenter name: Himes BE 
##   Laboratory: NA 
##   Contact information:  
##   Title: RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid responsive gene that modulates cytokine function in airway smooth muscle cells. 
##   URL: http://www.ncbi.nlm.nih.gov/pubmed/24926665 
##   PMIDs: 24926665 
## 
##   Abstract: A 226 word abstract is available. Use 'abstract' method.

Abstract

rse@metadata[[1]]@abstract

## [1] "Asthma is a chronic inflammatory respiratory disease that affects over 300 million people worldwide. Glucocorticoids are a mainstay therapy for asthma because they exert anti-inflammatory effects in multiple lung tissues, including the airway smooth muscle (ASM). However, the mechanism by which glucocorticoids suppress inflammation in ASM remains poorly understood. Using RNA-Seq, a high-throughput sequencing method, we characterized transcriptomic changes in four primary human ASM cell lines that were treated with dexamethasone--a potent synthetic glucocorticoid (1 microM for 18 hours). Based on a Benjamini-Hochberg corrected p-value <0.05, we identified 316 differentially expressed genes, including both well known (DUSP1, KLF15, PER1, TSC22D3) and less investigated (C7, CCDC69, CRISPLD2) glucocorticoid-responsive genes. CRISPLD2, which encodes a secreted protein previously implicated in lung development and endotoxin regulation, was found to have SNPs that were moderately associated with inhaled corticosteroid resistance and bronchodilator response among asthma patients in two previously conducted genome-wide association studies. Quantitative RT-PCR and Western blotting showed that dexamethasone treatment significantly increased CRISPLD2 mRNA and protein expression in ASM cells. CRISPLD2 expression was also induced by the inflammatory cytokine IL1beta, and small interfering RNA-mediated knockdown of CRISPLD2 further increased IL1beta-induced expression of IL6 and IL8. Our findings offer a comprehensive view of the effect of a glucocorticoid on the ASM transcriptome and identify CRISPLD2 as an asthma pharmacogenetics candidate gene that regulates anti-inflammatory effects of glucocorticoids in the ASM."

Samples

rse@colData@rownames

## [1] "SRR1039508" "SRR1039509" "SRR1039512" "SRR1039513" "SRR1039516"
## [6] "SRR1039517" "SRR1039520" "SRR1039521"

Number of samples: 8

as.data.frame(colData(rse))

##            SampleName    cell   dex albut        Run avgLength Experiment
## SRR1039508 GSM1275862  N61311 untrt untrt SRR1039508       126  SRX384345
## SRR1039509 GSM1275863  N61311   trt untrt SRR1039509       126  SRX384346
## SRR1039512 GSM1275866 N052611 untrt untrt SRR1039512       126  SRX384349
## SRR1039513 GSM1275867 N052611   trt untrt SRR1039513        87  SRX384350
## SRR1039516 GSM1275870 N080611 untrt untrt SRR1039516       120  SRX384353
## SRR1039517 GSM1275871 N080611   trt untrt SRR1039517       126  SRX384354
## SRR1039520 GSM1275874 N061011 untrt untrt SRR1039520       101  SRX384357
## SRR1039521 GSM1275875 N061011   trt untrt SRR1039521        98  SRX384358
##               Sample    BioSample
## SRR1039508 SRS508568 SAMN02422669
## SRR1039509 SRS508567 SAMN02422675
## SRR1039512 SRS508571 SAMN02422678
## SRR1039513 SRS508572 SAMN02422670
## SRR1039516 SRS508575 SAMN02422682
## SRR1039517 SRS508576 SAMN02422673
## SRR1039520 SRS508579 SAMN02422683
## SRR1039521 SRS508580 SAMN02422677

Number of Genes: 509416

head(as.data.frame(assays(rse)$counts))

##                 SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## ENSG00000000003        679        448        873        408       1138
## ENSG00000000005          0          0          0          0          0
## ENSG00000000419        467        515        621        365        587
## ENSG00000000457        260        211        263        164        245
## ENSG00000000460         60         55         40         35         78
## ENSG00000000938          0          0          2          0          1
##                 SRR1039517 SRR1039520 SRR1039521
## ENSG00000000003       1047        770        572
## ENSG00000000005          0          0          0
## ENSG00000000419        799        417        508
## ENSG00000000457        331        233        229
## ENSG00000000460         63         76         60
## ENSG00000000938          0          0          0

Genome Information

metadata(rowRanges(rse))

## $genomeInfo
## $genomeInfo$`Db type`
## [1] "TranscriptDb"
## 
## $genomeInfo$`Supporting package`
## [1] "GenomicFeatures"
## 
## $genomeInfo$`Data source`
## [1] "BioMart"
## 
## $genomeInfo$Organism
## [1] "Homo sapiens"
## 
## $genomeInfo$`Resource URL`
## [1] "www.biomart.org:80"
## 
## $genomeInfo$`BioMart database`
## [1] "ensembl"
## 
## $genomeInfo$`BioMart database version`
## [1] "ENSEMBL GENES 75 (SANGER UK)"
## 
## $genomeInfo$`BioMart dataset`
## [1] "hsapiens_gene_ensembl"
## 
## $genomeInfo$`BioMart dataset description`
## [1] "Homo sapiens genes (GRCh37.p13)"
## 
## $genomeInfo$`BioMart dataset version`
## [1] "GRCh37.p13"
## 
## $genomeInfo$`Full dataset`
## [1] "yes"
## 
## $genomeInfo$`miRBase build ID`
## [1] NA
## 
## $genomeInfo$transcript_nrow
## [1] "215647"
## 
## $genomeInfo$exon_nrow
## [1] "745593"
## 
## $genomeInfo$cds_nrow
## [1] "537555"
## 
## $genomeInfo$`Db created by`
## [1] "GenomicFeatures package from Bioconductor"
## 
## $genomeInfo$`Creation time`
## [1] "2014-07-10 14:55:55 -0400 (Thu, 10 Jul 2014)"
## 
## $genomeInfo$`GenomicFeatures version at creation time`
## [1] "1.17.9"
## 
## $genomeInfo$`RSQLite version at creation time`
## [1] "0.11.4"
## 
## $genomeInfo$DBSCHEMAVERSION
## [1] "1.0"

Define functions

MA Plots

# MA Plots
# Arranged into 2 rows
# Color & translucency by DEG
# Plot titles fed in as arguments
# Mean counts (x-axis) Log10 transformed

plotMA_arrange_iterative <- function(res_list) {
  plots <- list()

  for (i in seq_along(res_list)) {
    result <- res_list[[i]]
    res_name <- names(res_list)[i] # Get the name of the resDxC1 object
    res_sum <- sum(result$padj < 0.05, na.rm=TRUE) # Get sum of DEG's
    p <- plotMA(result, returnData = TRUE) # DESeq2's PlotMA function
    p_plot <- ggplot(p, aes(x = mean, y = lfc, color = isDE)) +
      geom_point(aes(alpha = ifelse(isDE == "TRUE", 1, 0.1)), show.legend = FALSE) + # Reduce non-DEG transperancy
      scale_x_continuous(trans = "log10") + # Scale mean counts
      ylim(-2, 2) +   
      labs(title = res_name, 
           subtitle = (gsub(".*\\(MAP\\):\\s*", "",
                            result@elementMetadata@listData[["description"]][2])), # workaround to label plots with resDxC1 contrasts
           caption = paste("Total genes:", res_sum))

    plots[[i]] <- p_plot
  }

  do.call(grid.arrange, c(plots, nrow = 3))
}

pvalue histograms

plotStats <- function(res_list) {
  for (i in seq_along(res_list)) {
    result <- res_list[[i]]
    res_name <- names(res_list)[i]
    qobjs <- qvalue(result$pvalue) # Save qvalue object
    hist <- hist(qobjs) # save pvalue histogram as gg object
    titled <- hist + labs(title = paste(res_name, "|", sub(".*: ", "", result@elementMetadata@listData[["description"]][2]))) # label plot with contrast 
    print(titled) # plot pvalue histogram
    plot(qobjs) # plot various qvalue plots
    summary(qobjs) # summarize p & q value comparisons
  }
}

SD Plots

plot_Transformations <- function(trans_list) {
  plots <- list()
  for (i in seq_along(trans_list)) {
    trans <- trans_list[[i]]
    name <- names(trans_list)[i]
    sd <- meanSdPlot(trans, plot  = FALSE, bins = 100) # save mean/SD plot output as gg object
    plots[[i]] <- sd[["gg"]] + labs(title = name) + theme(legend.position = "none", 
                                                          axis.title.x = element_blank(),
                                                          axis.title.y = element_blank(),
                                                          axis.text.x = element_blank(),
                                                          axis.text.y = element_blank(),
                                                          axis.ticks = element_blank())
  }
  grid.arrange(grobs = plots, ncol = 2, top = "Gene Count Transforms", left = "SD", bottom = "rank(mean)")
}

Construct DESeq Objects

Dex variable only

#Ensure matching sample names & order
all(colnames(counts) %in% row.names(colData))

## [1] TRUE

all(colnames(counts) == row.names(colData))

## [1] TRUE

# Define design matrix for DESeq2
ddsDex <- DESeqDataSetFromMatrix(
  countData = counts,
  colData = colData,
  design = ~ dex
)

# normalization by estimating size factor
ddsDex <- estimateSizeFactors(ddsDex)

# remove low expressed genes
keep <- rowSums(counts(ddsDex)) >= 10

ddsDex <- ddsDex[keep, ]

# set factor level (reference group)
ddsDex$dex <- relevel(ddsDex$dex, ref = "untrt")

# Run DEG analysis
ddsDex <- DESeq(ddsDex)

resDex <- results(ddsDex)

# Log fold shrink
resLFC1 <- lfcShrink(ddsDex, coef = "dex_trt_vs_untrt", type="apeglm")

Cell variable only

# Define design matrix for DESeq2
ddsCell <- DESeqDataSetFromMatrix(
  countData = counts,
  colData = colData,
  design = ~ cell)

# normalization by estimating size factor
ddsCell <- estimateSizeFactors(ddsCell)

# remove low expressed genes
keep <- rowSums(counts(ddsCell)) >= 10

ddsCell <- ddsCell[keep, ]

# set factor level (reference group)
ddsCell$cell <- relevel(ddsCell$cell, ref = "N61311")

# Run DEG analysis
ddsCell <- DESeq(ddsCell)

resultsNames(ddsCell)

## [1] "Intercept"              "cell_N052611_vs_N61311" "cell_N061011_vs_N61311"
## [4] "cell_N080611_vs_N61311"

# Extract results & Log fold shrink
resCell1 <- results(ddsCell, contrast = c("cell", "N052611", "N61311"))
resCellLFC1 <- lfcShrink(ddsCell, coef = "cell_N052611_vs_N61311", type="apeglm")

resCell2 <- results(ddsCell, contrast = c("cell", "N061011", "N61311"))
resCellLFC2 <- lfcShrink(ddsCell, coef = "cell_N061011_vs_N61311", type="apeglm")

resCell3 <- results(ddsCell, contrast = c("cell", "N080611", "N61311"))
resCellLFC3 <- lfcShrink(ddsCell, coef = "cell_N080611_vs_N61311", type="apeglm")

Dex & Cell variables

# Define design matrix for DESeq2
ddsDxC <- DESeqDataSetFromMatrix(
  countData = counts,
  colData = colData,
  design = ~ dex + cell)

# normalization by estimating size factor
ddsDxC <- estimateSizeFactors(ddsDxC)

# remove low expressed genes
keep <- rowSums(counts(ddsDxC)) >= 10
ddsDxC <- ddsDxC[keep, ]

# set factor level (reference group)
ddsDxC$dex <- relevel(ddsDxC$dex, ref = "untrt")
ddsDxC$cell <- relevel(ddsDxC$cell, ref = "N61311")

# Run DEG analysis
ddsDxC <- DESeq(ddsDxC)

resultsNames(ddsDxC)

## [1] "Intercept"              "dex_trt_vs_untrt"       "cell_N052611_vs_N61311"
## [4] "cell_N061011_vs_N61311" "cell_N080611_vs_N61311"

# Extract results & Log fold shrink
resDxC1 <- results(ddsDxC, contrast = c("dex", "trt", "untrt"))
resDxCLFC1 <- lfcShrink(ddsDxC, coef = "dex_trt_vs_untrt", type="apeglm")

resDxC2 <- results(ddsDxC, contrast = c("cell", "N052611", "N61311"))
resDxCLFC2 <- lfcShrink(ddsDxC, coef = "cell_N052611_vs_N61311", type="apeglm")

resDxC3 <- results(ddsDxC, contrast = c("cell", "N061011", "N61311"))
resDxCLFC3 <- lfcShrink(ddsDxC, coef = "cell_N061011_vs_N61311", type="apeglm")

resDxC4 <- results(ddsDxC, contrast = c("cell", "N080611", "N61311"))
resDxCLFC4 <- lfcShrink(ddsDxC, coef = "cell_N080611_vs_N61311", type="apeglm")

Dex & Cell Interaction : Not possible in this case

# Define design matrix for DESeq2
dds3 <- DESeqDataSetFromMatrix(
  countData = counts,
  colData = colData,
  design = ~ dex + cell + dex:cell)

# normalization by estimating size factor
dds3 <- estimateSizeFactors(dds3)

# remove low expressed genes
keep <- rowSums(counts(dds3)) >= 10
dds3 <- dds3[keep, ]

# set factor level (reference group)
dds3$dex <- relevel(dds3$dex, ref = "untrt")
dds3$cell <- relevel(dds3$cell, ref = "N61311")

# Run DEG analysis
# dds3 <- DESeq(dds3)

running DESeq(dds3) returns the following

Error in checkForExperimentalReplicates(object, modelMatrix) :

The design matrix has the same number of samples and coefficients to fit,so estimation of dispersion is not possible. Treating samples as replicates was deprecated in v1.20 and no longer supported since v1.22.

Visualize

MA PLots: all Contrasts

# Prep results list for predefined plotting function
res_list <- list(
  "~ dex" = resLFC1,
  "~ cell" = resCellLFC1,
  "~ cell" = resCellLFC2,
  "~ cell" = resCellLFC3,
  "~ dex + cell" = resDxCLFC1,
  "~ dex + cell" = resDxCLFC2,
  "~ dex + cell" = resDxCLFC3,
  "~ dex + cell" = resDxCLFC4
)

# Call plotting function
plotMA_arrange_iterative(res_list)

Volcano Plot

# Create DF for ggplot volcano plot
resDxCLFC1_DF <- as.data.frame(resDxCLFC1)

resDxCLFC1_DF <- resDxCLFC1_DF |>
  mutate(`-log10(pvalue)` = -log10(pvalue)) |> 
   rownames_to_column(var = "ENSEMBL")


# Query database for gene Symbols
GeneSymbols = bitr(resDxCLFC1_DF$ENSEMBL, 
                  fromType="ENSEMBL", 
                  toType="SYMBOL", 
                  OrgDb="org.Hs.eg.db")

## 'select()' returned 1:many mapping between keys and columns

## Warning in bitr(resDxCLFC1_DF$ENSEMBL, fromType = "ENSEMBL", toType = "SYMBOL",
## : 23.34% of input gene IDs are fail to map...

# Merge two DF's
resDxCLFC1_DF <- resDxCLFC1_DF |> 
  left_join(GeneSymbols, by = "ENSEMBL")

# plot enchanced volcano
EnhancedVolcano(resDxCLFC1_DF,
  lab = resDxCLFC1_DF$SYMBOL,
  x = "log2FoldChange",
  y = "pvalue",
  title = "Treated vs Untreated",
  pCutoff = 1e-05,
  FCcutoff = 1,
  pointSize = 2,
  labSize = 4,
  colAlpha = 0.3
)

# gg volcano plot
ggplot(resDxCLFC1_DF, aes(x = log2FoldChange, y = `-log10(pvalue)`)) +
  geom_point(aes(colour = padj < 0.05), size = 1) +
  geom_text_repel(data = ~ top_n(.x, 20, wt = -padj), aes(label = SYMBOL)) +
  labs(title = "Treated vs Untreated")

FDR & qvalue

# Call previously defined function
plotStats(res_list)

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 0.8251033   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value     1201   1747  2910   3715  4643 5956 22318
## q-value      789   1115  1733   2198  2720 3447 22318
## local FDR    602    806  1196   1442  1716 2110  8855

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value      251    405   819   1161  1654 2486 22369
## q-value      109    164   256    313   386  506 22369
## local FDR     90    114   171    210   252  300  1081

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value      262    409   787   1120  1560 2368 22369
## q-value      127    179   272    323   395  494 22369
## local FDR    104    131   186    211   269  313   958

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value      433    662  1204   1662  2230 3256 22369
## q-value      227    345   503    613   743  908 22369
## local FDR    176    248   356    407   485  578  2131

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 0.7477744   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value     2048   2701  3935   4790  5689 6960 22369
## q-value     1599   2094  2938   3471  4113 5017 22369
## local FDR   1287   1593  2132   2450  2783 3232 11330

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 0.9574035   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value      680   1007  1785   2395  3139 4292 22369
## q-value      395    550   849   1062  1354 1683 22369
## local FDR    306    402   587    714   836 1030  4258

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value      645    972  1692   2322  2990 4072 22369
## q-value      362    503   807    993  1210 1513 22369
## local FDR    288    365   549    680   792  942  3423

## 
## Call:
## qvalue(p = result$pvalue)
## 
## pi0: 0.8484637   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1    <1
## p-value      985   1422  2341   3064  3865 5083 22369
## q-value      650    875  1340   1667  2018 2542 22369
## local FDR    493    656   936   1138  1310 1576  7449

Transformations & Variance

With higher levels of RNA expression comes higher degree of variability. In order to more easily visualize and rank gene expression, this mean-variance relationship needs to be controlled through transformation. Log2 was a common route of transformation but tends to lead to overestimation in the low gene expression range.

Plotting mean by standard deviation, we want to see a flat line for the range of expressions.

# Raw counts Mean-SD plot
# ranked mean
meanSdPlot(assay(ddsDxC), ranks = FALSE, bins = 100)

# original scale
meanSdPlot(assay(ddsDxC), bins = 100)

# Log 2 Transformation
logTrans <- log2(assay(ddsDxC) + 1)

# Variance stabilizing transformation (preferred for speed)
vsdDxC <- vst(ddsDxC, blind = FALSE)

vsdTrans <- assay(vsdDxC)

# Regularized log transformation (preferred for variable size factors)
rldDxC <- rlog(ddsDxC, blind = FALSE)

rldTrans <- assay(rldDxC)

# Normalized counts transformation
ntdDxC <- normTransform(ddsDxC)

ntdTrans <- assay(ntdDxC)


# Prepare list of transformations for predefined plotting function
trans_list <- list(
  "log2 + 1" = logTrans,
  "vst" = vsdTrans,
  "rlog" = rldTrans,
  "norm" = ntdTrans
)

# Call plotting function
plot_Transformations(trans_list)

Quality Assesment

Quality assesment with Heatmaps

# Select top 20 DEG's
selectDex <- order(rowMeans(counts(ddsDex, normalized = TRUE)), decreasing = TRUE)[1:20]

selectCell <- order(rowMeans(counts(ddsCell, normalized = TRUE)), decreasing = TRUE)[1:20]

selectDxC <- order(rowMeans(counts(ddsDxC, normalized = TRUE)), decreasing = TRUE)[1:20]

# Extract clustering variables
dfDex <- as.data.frame(colData(ddsDex)[, c("dex")])

dfCell <- as.data.frame(colData(ddsCell)[, c("cell")])

dfDxC <- as.data.frame(colData(ddsDxC)[, c("dex", "cell")])


# Heatmaps
pheatmap(assay(ntdDxC)[selectDxC, ],
  cluster_rows = TRUE, show_rownames = FALSE,
  cluster_cols = TRUE, annotation_col = dfDxC, show_colnames = FALSE,
  main = "ntd", scale = "row"
)

pheatmap(assay(vsdDxC)[selectDxC, ],
  cluster_rows = TRUE, show_rownames = FALSE,
  cluster_cols = TRUE, annotation_col = dfDxC, show_colnames = FALSE,
  main = "vsd", scale = "row"
)

pheatmap(assay(rldDxC)[selectDxC, ],
  cluster_rows = TRUE, show_rownames = FALSE,
  cluster_cols = TRUE, annotation_col = dfDxC, show_colnames = FALSE,
  main = "rld", scale = "row"
)

Sample distance

# Determine between group variety
sampleDists <- dist(t(assay(vsdDxC)))

sampleDistMatrix <- as.matrix(sampleDists)

rownames(sampleDistMatrix) <- paste(vsdDxC$dex, vsdDxC$cell, sep = "-")

colnames(sampleDistMatrix) <- NULL

colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)

pheatmap(sampleDistMatrix,
  clustering_distance_rows = sampleDists,
  clustering_distance_cols = sampleDists,
  col = colors, main = "Between Group Variability"
)

PCA plots

plotPCA(vsdDxC, intgroup = c("dex", "cell")) + labs(title = "vst(~Dex+Cell): PCA by Dex & Cell")

## using ntop=500 top features by variance

plotPCA(vsdDxC, intgroup = c("cell")) + labs(title = "vst(~Dex+Cell): PCA by Cell")

## using ntop=500 top features by variance

plotPCA(vsdDxC, intgroup = c("dex")) + labs(title = "vst(~Dex+Cell): PCA by Dex")

## using ntop=500 top features by variance

pcaData <- plotPCA(vsdDxC,
  intgroup = c("dex", "cell"),
  returnData = TRUE
)

## using ntop=500 top features by variance

percentVar <- round(100 * attr(pcaData, "percentVar"))

ggplot(pcaData, aes(PC1, PC2, color = dex, shape = cell)) +
  geom_point(size = 3) +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance")) +
  coord_fixed() +
  labs(title = "vst(~Dex+Cell): PCA by Dex & Cell")

Visualize Results

Plot counts

# plot counts for most significant gene for treated vs untreated contract in 2 variable DGE analysis
p <- plotCounts(ddsDxC,
  gene = which.min(resDxC1$padj),
  intgroup = c("dex", "cell"),
  returnData = TRUE
)

ggplot(p, aes(x = dex, y = count, color = cell)) +
  geom_point(position = position_jitter(w = 0.1, h = 0.1), size = 4) +
  scale_y_log10(breaks = c(25, 100, 400)) +
  labs(title = rownames(resDxC1)[which.min(resDxC1$padj)])

Venn Diagram

# # Plot venn diagram
# vennTest <- tibble(ENSEMBLEID = rownames(resDxC1)) |>
#   mutate(Treated = resDxC1$padj < 0.05 &
#     !is.na(resDxC1$padj) &
#     resDxC1$log2FoldChange > 0) |>
#   mutate(Untreated = resDxC1$padj < 0.05 &
#     !is.na(resDxC1$padj) &
#     resDxC1$log2FoldChange < 0) |>
#   mutate(N052611 = resDxC2$padj < 0.05 &
#     !is.na(resDxC2$padj) &
#     resDxC2$log2FoldChange > 0) |>
#   mutate(N61311 = resDxC2$padj < 0.05 &
#     !is.na(resDxC2$padj) &
#     resDxC2$log2FoldChange < 0)
# 
# ggvenn(vennTest, set_name_size = 4)

Enrichment/Over-representation analysis

ORA & Enrichment

# Create background dataset for hypergeometric testing 
all_genes <- as.character(rownames(resDxC1))

# Extract significant genes
signif_res <- resDxC1[resDxC1$padj < 0.05 & !is.na(resDxC1$padj), ]

signif_genes <- as.character(rownames(signif_res))

# Setup ranked genelist
geneSet <- as.data.frame(signif_res)

geneSetmin <- geneSet |> 
  select(log2FoldChange) |> 
  arrange(desc(log2FoldChange)) |> 
  rownames_to_column(var = "ENSEMBL") |> 
  select(ENSEMBL, everything())

# # Determine invalid SYMBOL names and remove
# valid_symbols <- keys(org.Hs.eg.db, keytype = "SYMBOL")
# 
# invalid_genes <- setdiff(geneSetmin$SYMBOL, valid_symbols)
# 
# print(invalid_genes)
# 
# geneSetmin <- geneSetmin |> 
#   filter(SYMBOL %in% valid_symbols)
# 
# geneSetminCOPD <- geneSetminCOPD |> 
#   filter(SYMBOL %in% valid_symbols)
# 
# geneSetminWild <- geneSetminWild |> 
#   filter(SYMBOL %in% valid_symbols)

# ID options
columns(org.Hs.eg.db)

##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS"
##  [6] "ENTREZID"     "ENZYME"       "EVIDENCE"     "EVIDENCEALL"  "GENENAME"    
## [11] "GENETYPE"     "GO"           "GOALL"        "IPI"          "MAP"         
## [16] "OMIM"         "ONTOLOGY"     "ONTOLOGYALL"  "PATH"         "PFAM"        
## [21] "PMID"         "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
## [26] "UNIPROT"

# Translate gene Symbol to Entrez ID
geneSetmin2 = bitr(geneSetmin$ENSEMBL, 
                  fromType="ENSEMBL", 
                  toType=c("SYMBOL","GENENAME", "GENETYPE", "ENTREZID"), 
                  OrgDb="org.Hs.eg.db")

## 'select()' returned 1:many mapping between keys and columns

## Warning in bitr(geneSetmin$ENSEMBL, fromType = "ENSEMBL", toType = c("SYMBOL",
## : 6.18% of input gene IDs are fail to map...

geneSetFinal <- geneSetmin |> 
  right_join(geneSetmin2, by = "ENSEMBL") |> 
  select(SYMBOL, ENSEMBL, ENTREZID, log2FoldChange)


# write.csv(geneSetFinal, file="TreatedVSUntreated_GeneSet.csv")

GO enrichment

# Run GO enrichment analysis
# Biological Process
egoBP <- enrichGO(
  gene = signif_genes,
  universe = all_genes,
  keyType = "ENSEMBL",
  OrgDb = org.Hs.eg.db,
  ont = "BP",
  pAdjustMethod = "BH",
  qvalueCutoff = 0.05,
  readable = TRUE
)

head(egoBP)

##                    ID                                   Description GeneRatio
## GO:0001525 GO:0001525                                  angiogenesis  193/3395
## GO:0003013 GO:0003013                    circulatory system process  187/3395
## GO:0030198 GO:0030198             extracellular matrix organization  125/3395
## GO:0043062 GO:0043062          extracellular structure organization  125/3395
## GO:0045229 GO:0045229 external encapsulating structure organization  125/3395
## GO:0003012 GO:0003012                         muscle system process  145/3395
##              BgRatio       pvalue     p.adjust       qvalue
## GO:0001525 447/13421 4.261653e-17 2.406555e-13 1.793483e-13
## GO:0003013 458/13421 9.309176e-14 2.628446e-10 1.958847e-10
## GO:0030198 274/13421 1.470355e-13 2.767698e-10 2.062624e-10
## GO:0043062 275/13421 2.033858e-13 2.834178e-10 2.112168e-10
## GO:0045229 276/13421 2.804832e-13 2.834178e-10 2.112168e-10
## GO:0003012 335/13421 3.011345e-13 2.834178e-10 2.112168e-10
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 geneID
## GO:0001525 TBXA2R/ADIPOR2/PIK3C2A/BRCA1/ADGRA2/TYMP/GRN/SARS1/HDAC9/HDAC7/HIPK2/FGFR2/PKM/ROCK1/RORA/FGF10/JMJD6/EPN2/PTGS2/NOTCH3/MCAM/B4GALT1/MMP2/TGFB2/HMOX1/MYH9/HIF1A/NFATC4/CD40/ISM1/JAG1/KLF5/RGCC/TJP1/SFRP1/ITGB8/CAV1/HOXA3/HSPB1/ENG/SHB/UNC5B/HDAC5/GAB1/MDK/MAPK14/SRF/VEGFA/SEMA5A/SPARC/NPR3/FGF1/PDGFRB/AMOTL2/PDCD10/WNT5A/STAT1/EPAS1/AKT3/PIK3R3/F3/NRP2/TNFAIP3/CCN2/TCF21/ITGB1BP1/VPS4B/TGFBI/PTK2B/ADGRB2/PIK3CA/CALD1/MMP19/NR4A1/AGO2/PTGIS/HIF3A/AMOT/RRAS/RHOJ/AAMP/ADM2/ARHGAP22/DLL4/SAT1/PXDN/NINJ1/PPARG/EMILIN2/STARD13/EPHB2/BTG1/LOXL2/ROCK2/PDGFRA/COL4A2/ANXA1/CEMIP2/GLUL/SPRY2/GPNMB/HS6ST1/KLF4/NIBAN2/SLC31A1/CCN3/SULF1/THBS1/ACVRL1/WARS1/MFGE8/GATA6/CCN1/VASH2/RHOB/COL8A1/SLIT2/SFRP2/PLK2/MTDH/NOTCH1/ADAM12/ADM/ITGB1/VEGFC/EDNRA/DDAH1/NUS1/THY1/ANGPT1/ADAMTS1/CREB3L1/WASF2/RUNX1/PLXDC1/FMNL3/FZD5/TGFBR2/TLR3/BMPER/VEGFD/VSTM4/E2F7/SPRED1/TMEM100/MAPK7/ANPEP/GREM1/STIM1/ANGPTL4/RBPJ/STAT3/IL12A/COL4A3/MECP2/NPR1/CLIC4/ROBO1/SMAD1/S1PR1/PIK3CD/HPSE/NDNF/DAG1/PTPRM/CSPG4/LEP/FZD4/SPHK1/FZD8/APOLD1/EGR3/EPHB3/ANXA2/COL18A1/CCBE1/ACTG1/PRKD1/TNFAIP2/THBS2/COL4A1/APOD/EPHB4/COL27A1/FLNA/PARVA/DLL1/MAP3K3/COL15A1/EMP2/ANG/GPX1/NOX5
## GO:0003013         CACNA1G/TBXA2R/PIK3C2A/EHD3/SLC38A5/ATP1A2/GCLM/MAP2K3/CELF2/ELN/NEDD4L/PKP2/ROCK1/MEF2A/ABCC9/SLC44A1/ATP2B1/RPS6KA2/SREBF1/FERMT2/PTGS2/SRI/SLC4A4/HSP90AA1/ABCB1/MMP2/DNM1L/SNX5/TBX5/TGFB2/PTGS1/DSP/PCSK5/KCNK6/BDKRB1/HTR2A/ABCC1/SLC7A5/CORO2B/TJP1/DMPK/SLC1A5/SCN1B/CAV1/PLOD3/SLC1A1/CXCL12/ACTA2/ASIC2/ABCC3/MAP2K6/TRPV4/SLC38A1/BVES/VEGFA/TBX18/NPR3/LNPEP/ABCC5/STAT1/EPAS1/RGS4/ECE1/SGK1/TJP2/ITGB1BP1/TBX2/KCNJ8/PIK3CA/RAMP3/ABCC4/ADM2/DLL4/FGF13/TRPM4/LRP3/AKAP12/SLC6A6/GCH1/PPARG/EMILIN2/PER2/POSTN/SLC38A2/ROCK2/MDM2/AGT/EDNRB/BIN1/IER3/YAP1/SLC5A6/MYOF/PDE5A/ACVRL1/TPM1/GATA6/ATP1B1/SLIT2/CORIN/CAMK2D/ANK2/GNA12/SLC16A2/DOCK5/GSN/GSTO1/ADM/SERPING1/PTPRJ/SCN2B/ADRA2A/VEGFC/CACNA1C/THRB/SLC2A13/EDNRA/GPD1L/SLC16A12/BMP6/DDAH1/ANGPT1/CLIC2/KCNMA1/STC1/CBS/SPTBN4/ATP1A1/ADORA1/GUCY1A1/ERAP2/SLC29A4/COL1A2/GPER1/SVEP1/VSTM4/ARHGAP42/CACNB2/SMAD3/SLC27A4/BDKRB2/COL4A3/MECP2/NPR1/ZEB2/ADRA1B/SGCD/AKAP13/INSR/CHD7/KCND3/HSPB7/HEG1/ATP2A2/LEP/KCNE3/HSD11B2/SCN4B/AGTRAP/PLEC/OXTR/CHRM2/EXT1/SLC8A1/ABAT/GAS6/KCNJ12/KCND2/SLC24A3/PRKG1/NR2F2/PPARA/SLC6A9/HRH1/ADA/FLNA/SLC2A10/ADH5/CALM1/DLL1/MTOR/DMD/E2F4/GSTM2/EMP2/ITGA1/GPX1
## GO:0030198                                                                                                                                                                                                                                                                                                                                                    CFLAR/ST7/PHLDB1/TNFRSF1B/GPM6B/COL9A2/ELN/COL11A1/BCL3/NTN4/FSCN1/ITGA8/COL5A3/COL4A4/COL16A1/B4GALT1/ADAMTS2/MMP2/NID2/LAMB1/TGFB2/MMP11/PAPLN/CST3/RGCC/MMP15/CRISPLD2/APLP1/CAV2/CAV1/DNAJB6/PLOD3/ENG/KAZALD1/COL1A1/COL12A1/LOX/COL7A1/NID1/TGFBI/SERAC1/MMP19/SOX9/LOXL1/COLGALT1/PXDN/COL5A1/POSTN/LARGE1/LOXL2/PDGFRA/COL4A2/AGT/LAMC1/ADAMTS7/FOXF2/SULF1/MMP7/ADAMTS14/LUM/RB1/FBLN5/FURIN/CCN1/DPT/HMCN1/ADAMTSL4/COL8A1/PHLDB2/SFRP2/CSGALNACT1/NOTCH1/SERPINH1/ITGB1/ADAMTS1/ADAMTS5/ADAMTS3/MYO1E/CREB3L1/RUNX1/FGFR4/PDPN/OLFML2B/CTSS/PTX3/COL1A2/TNFRSF11B/ATP7A/MFAP4/MMP10/GREM1/SMAD3/SPINT2/TNXB/COL3A1/AXIN2/COL4A3/COL24A1/RXFP1/ANGPTL7/NDNF/DAG1/SH3PXD2B/ADAMTSL1/EXT1/ANXA2/COL18A1/GAS6/FLRT2/OLFML2A/COL4A1/THSD4/COL14A1/COL4A5/SULF2/LAMA2/COL27A1/SLC2A10/DPP4/ERO1A/COL5A2/COL15A1/COLQ/COL28A1/ATXN1L
## GO:0043062                                                                                                                                                                                                                                                                                                                                                    CFLAR/ST7/PHLDB1/TNFRSF1B/GPM6B/COL9A2/ELN/COL11A1/BCL3/NTN4/FSCN1/ITGA8/COL5A3/COL4A4/COL16A1/B4GALT1/ADAMTS2/MMP2/NID2/LAMB1/TGFB2/MMP11/PAPLN/CST3/RGCC/MMP15/CRISPLD2/APLP1/CAV2/CAV1/DNAJB6/PLOD3/ENG/KAZALD1/COL1A1/COL12A1/LOX/COL7A1/NID1/TGFBI/SERAC1/MMP19/SOX9/LOXL1/COLGALT1/PXDN/COL5A1/POSTN/LARGE1/LOXL2/PDGFRA/COL4A2/AGT/LAMC1/ADAMTS7/FOXF2/SULF1/MMP7/ADAMTS14/LUM/RB1/FBLN5/FURIN/CCN1/DPT/HMCN1/ADAMTSL4/COL8A1/PHLDB2/SFRP2/CSGALNACT1/NOTCH1/SERPINH1/ITGB1/ADAMTS1/ADAMTS5/ADAMTS3/MYO1E/CREB3L1/RUNX1/FGFR4/PDPN/OLFML2B/CTSS/PTX3/COL1A2/TNFRSF11B/ATP7A/MFAP4/MMP10/GREM1/SMAD3/SPINT2/TNXB/COL3A1/AXIN2/COL4A3/COL24A1/RXFP1/ANGPTL7/NDNF/DAG1/SH3PXD2B/ADAMTSL1/EXT1/ANXA2/COL18A1/GAS6/FLRT2/OLFML2A/COL4A1/THSD4/COL14A1/COL4A5/SULF2/LAMA2/COL27A1/SLC2A10/DPP4/ERO1A/COL5A2/COL15A1/COLQ/COL28A1/ATXN1L
## GO:0045229                                                                                                                                                                                                                                                                                                                                                    CFLAR/ST7/PHLDB1/TNFRSF1B/GPM6B/COL9A2/ELN/COL11A1/BCL3/NTN4/FSCN1/ITGA8/COL5A3/COL4A4/COL16A1/B4GALT1/ADAMTS2/MMP2/NID2/LAMB1/TGFB2/MMP11/PAPLN/CST3/RGCC/MMP15/CRISPLD2/APLP1/CAV2/CAV1/DNAJB6/PLOD3/ENG/KAZALD1/COL1A1/COL12A1/LOX/COL7A1/NID1/TGFBI/SERAC1/MMP19/SOX9/LOXL1/COLGALT1/PXDN/COL5A1/POSTN/LARGE1/LOXL2/PDGFRA/COL4A2/AGT/LAMC1/ADAMTS7/FOXF2/SULF1/MMP7/ADAMTS14/LUM/RB1/FBLN5/FURIN/CCN1/DPT/HMCN1/ADAMTSL4/COL8A1/PHLDB2/SFRP2/CSGALNACT1/NOTCH1/SERPINH1/ITGB1/ADAMTS1/ADAMTS5/ADAMTS3/MYO1E/CREB3L1/RUNX1/FGFR4/PDPN/OLFML2B/CTSS/PTX3/COL1A2/TNFRSF11B/ATP7A/MFAP4/MMP10/GREM1/SMAD3/SPINT2/TNXB/COL3A1/AXIN2/COL4A3/COL24A1/RXFP1/ANGPTL7/NDNF/DAG1/SH3PXD2B/ADAMTSL1/EXT1/ANXA2/COL18A1/GAS6/FLRT2/OLFML2A/COL4A1/THSD4/COL14A1/COL4A5/SULF2/LAMA2/COL27A1/SLC2A10/DPP4/ERO1A/COL5A2/COL15A1/COLQ/COL28A1/ATXN1L
## GO:0003012                                                                                                                                                                                                                                                                                              CFLAR/CACNA1G/TBXA2R/MYH13/JARID2/PIK3C2A/EHD3/IGF1/ATP1A2/TNFRSF1B/MAP2K3/NEDD4L/CYBA/PKP2/KDM4A/ROCK1/MEF2A/ABCC9/ATP2B1/LMCD1/PTGS2/SRI/PPP1R12B/HSP90AA1/MYL6/DSP/P2RX6/HMOX1/MYL9/HTR2A/DMPK/SCN1B/CAV1/EZH2/ACTA2/MAP2K6/CRYAB/TRPV4/SRF/CERT1/FOXP1/RGS4/FOXO3/NR4A3/TBX2/KCNJ8/PIK3CA/CALD1/NR4A1/PARP2/FGF13/GAMT/TRPM4/CKMT2/PPARG/LARGE1/ROCK2/DTNA/TBX3/AGT/EDNRB/SCN7A/BIN1/SULF1/MYOF/PLCE1/MSTN/PDE5A/TPM1/GATA6/ATP1B1/TPM3/CAMK2D/ANK2/DOCK5/GSN/NOTCH1/GSTO1/SCN2B/ADRA2A/FOXO1/CACNA1C/EDNRA/GPD1L/UTRN/CLIC2/KCNMA1/FBXO32/STC1/G6PD/CCDC78/TPCN2/PDLIM5/ATP1A1/LMOD1/ADORA1/SLMAP/APBB2/KLF15/GUCY1A1/ITGA2/PI16/GPER1/ARHGAP42/CACNB2/SMAD3/TPM4/DAPK3/BDKRB2/GDNF/ADRA1B/SGCD/KCND3/SNTB1/MYOZ2/DAG1/ATP2A2/LEP/KCNE3/SPHK1/SCN4B/OXTR/CHRM2/KCNB2/SLC8A1/ABAT/SMTN/KCNJ12/PRKD1/KCND2/PRKG1/PPARA/MYL6B/SULF2/HDAC2/ADA/FLNA/ANXA6/ASPH/TPM2/CALM1/MTOR/DMD/CHUK/GSTM2
##            Count
## GO:0001525   193
## GO:0003013   187
## GO:0030198   125
## GO:0043062   125
## GO:0045229   125
## GO:0003012   145

# Cellular Component
egoCC <- enrichGO(
  gene = signif_genes,
  universe = all_genes,
  keyType = "ENSEMBL",
  OrgDb = org.Hs.eg.db,
  ont = "CC",
  pAdjustMethod = "BH",
  qvalueCutoff = 0.05,
  readable = TRUE
)

head(egoCC)

##                    ID                              Description GeneRatio
## GO:0030312 GO:0030312         external encapsulating structure  191/3537
## GO:0031012 GO:0031012                     extracellular matrix  191/3537
## GO:0062023 GO:0062023 collagen-containing extracellular matrix  155/3537
## GO:0015629 GO:0015629                       actin cytoskeleton  170/3537
## GO:0030055 GO:0030055                  cell-substrate junction  163/3537
## GO:0005925 GO:0005925                           focal adhesion  159/3537
##              BgRatio       pvalue     p.adjust       qvalue
## GO:0030312 421/14047 5.933258e-20 1.978742e-17 1.730013e-17
## GO:0031012 421/14047 5.933258e-20 1.978742e-17 1.730013e-17
## GO:0062023 331/14047 5.720082e-18 1.271765e-15 1.111904e-15
## GO:0015629 430/14047 2.100930e-11 3.503302e-09 3.062935e-09
## GO:0030055 416/14047 1.234508e-10 1.646833e-08 1.439826e-08
## GO:0005925 409/14047 4.149422e-10 4.612774e-08 4.032947e-08
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              geneID
## GO:0030312 SEMA3B/SERPINB1/VCAN/TNC/COL9A2/LTBP1/ELN/LAMA3/LAMC2/COL11A1/GPC1/CCN5/CDON/NTN1/FGFR2/PKM/FGF10/NTN4/GPC4/COL5A3/HSP90AA1/COL4A4/COL16A1/ADAMTS2/MMP2/NID2/LAMB1/TGFB2/HNRNPM/MMP11/TIMP3/MXRA5/SRPX/TIMP1/MMP15/CRISPLD2/SFRP1/CCN4/APLP1/WNT2/PCOLCE/PLOD3/RARRES2/MEGF9/OGN/CXCL12/ACTA2/KAZALD1/COL1A1/CTSC/MDK/MGP/COL12A1/VEGFA/ERBIN/LOX/SPARC/FGF1/WNT5A/COL7A1/ANGPTL1/PRG4/NID1/MFAP2/P3H1/F3/CCN2/TGFBI/SERAC1/ANXA11/ITIH5/MMP19/FLRT3/OMD/FGL2/LRRC17/LOXL1/PXDN/GDF15/COL5A1/LGALS3/EMILIN2/POSTN/LOXL2/COL4A2/ANXA1/ADAM19/AGT/LAMC1/SERPINE2/ADAMTS7/ANGPTL2/CCN3/SULF1/MMP7/THBS1/ADAMTS14/CILP/FBN2/LRIG3/INHBE/LUM/FBLN5/HAPLN3/MFGE8/TGFB1I1/CCN1/TINAGL1/DPT/HMCN1/ADAMTSL4/FLG/COL8A1/SFRP2/HAPLN1/CASK/SERPING1/SERPINH1/DST/SPARCL1/ANGPT1/ADAMTS1/ADAMTS5/ADAMTS3/TIMP4/SPON2/CSTB/SDC3/OLFML2B/CTSS/COL6A3/PTX3/TLR3/BMPER/COL1A2/TNFRSF11B/CTHRC1/OTOGL/SERPINB8/MFAP4/MMP10/GREM1/ANGPTL4/VASN/TNXB/COL3A1/COL4A3/SDC2/SERPINB9/COL24A1/ANGPTL7/LRRC15/HPSE/LRRN3/NDNF/DAG1/CSPG4/PODN/CD248/LRRN1/SSC5D/BGN/ANXA2/COL18A1/CCBE1/DGCR6/FLRT2/OLFML2A/THBS2/C17orf58/RTN4RL2/BCAM/COL4A1/THSD4/COL14A1/COL4A5/S100A4/LAMA2/COL27A1/LAMB3/ANXA4/ANXA6/MFAP5/TGM2/COL5A2/COL15A1/COL6A6/COLQ/GPC2/ANG/COL28A1
## GO:0031012 SEMA3B/SERPINB1/VCAN/TNC/COL9A2/LTBP1/ELN/LAMA3/LAMC2/COL11A1/GPC1/CCN5/CDON/NTN1/FGFR2/PKM/FGF10/NTN4/GPC4/COL5A3/HSP90AA1/COL4A4/COL16A1/ADAMTS2/MMP2/NID2/LAMB1/TGFB2/HNRNPM/MMP11/TIMP3/MXRA5/SRPX/TIMP1/MMP15/CRISPLD2/SFRP1/CCN4/APLP1/WNT2/PCOLCE/PLOD3/RARRES2/MEGF9/OGN/CXCL12/ACTA2/KAZALD1/COL1A1/CTSC/MDK/MGP/COL12A1/VEGFA/ERBIN/LOX/SPARC/FGF1/WNT5A/COL7A1/ANGPTL1/PRG4/NID1/MFAP2/P3H1/F3/CCN2/TGFBI/SERAC1/ANXA11/ITIH5/MMP19/FLRT3/OMD/FGL2/LRRC17/LOXL1/PXDN/GDF15/COL5A1/LGALS3/EMILIN2/POSTN/LOXL2/COL4A2/ANXA1/ADAM19/AGT/LAMC1/SERPINE2/ADAMTS7/ANGPTL2/CCN3/SULF1/MMP7/THBS1/ADAMTS14/CILP/FBN2/LRIG3/INHBE/LUM/FBLN5/HAPLN3/MFGE8/TGFB1I1/CCN1/TINAGL1/DPT/HMCN1/ADAMTSL4/FLG/COL8A1/SFRP2/HAPLN1/CASK/SERPING1/SERPINH1/DST/SPARCL1/ANGPT1/ADAMTS1/ADAMTS5/ADAMTS3/TIMP4/SPON2/CSTB/SDC3/OLFML2B/CTSS/COL6A3/PTX3/TLR3/BMPER/COL1A2/TNFRSF11B/CTHRC1/OTOGL/SERPINB8/MFAP4/MMP10/GREM1/ANGPTL4/VASN/TNXB/COL3A1/COL4A3/SDC2/SERPINB9/COL24A1/ANGPTL7/LRRC15/HPSE/LRRN3/NDNF/DAG1/CSPG4/PODN/CD248/LRRN1/SSC5D/BGN/ANXA2/COL18A1/CCBE1/DGCR6/FLRT2/OLFML2A/THBS2/C17orf58/RTN4RL2/BCAM/COL4A1/THSD4/COL14A1/COL4A5/S100A4/LAMA2/COL27A1/LAMB3/ANXA4/ANXA6/MFAP5/TGM2/COL5A2/COL15A1/COL6A6/COLQ/GPC2/ANG/COL28A1
## GO:0062023                                                                                                                                                                                                                                  SEMA3B/SERPINB1/VCAN/TNC/COL9A2/LTBP1/ELN/LAMA3/LAMC2/COL11A1/GPC1/CDON/NTN1/FGFR2/PKM/FGF10/NTN4/GPC4/COL5A3/HSP90AA1/COL4A4/COL16A1/ADAMTS2/MMP2/NID2/LAMB1/TGFB2/HNRNPM/TIMP3/MXRA5/SRPX/TIMP1/SFRP1/APLP1/WNT2/PCOLCE/PLOD3/RARRES2/MEGF9/OGN/CXCL12/ACTA2/KAZALD1/COL1A1/CTSC/MDK/MGP/COL12A1/ERBIN/LOX/SPARC/WNT5A/COL7A1/ANGPTL1/PRG4/NID1/MFAP2/P3H1/F3/CCN2/TGFBI/ANXA11/ITIH5/FGL2/LOXL1/PXDN/GDF15/COL5A1/LGALS3/EMILIN2/POSTN/LOXL2/COL4A2/ANXA1/ADAM19/AGT/LAMC1/SERPINE2/ANGPTL2/CCN3/SULF1/THBS1/CILP/FBN2/INHBE/LUM/FBLN5/HAPLN3/MFGE8/TGFB1I1/CCN1/TINAGL1/DPT/HMCN1/ADAMTSL4/FLG/COL8A1/SFRP2/HAPLN1/CASK/SERPING1/SERPINH1/DST/SPARCL1/ANGPT1/ADAMTS1/ADAMTS5/ADAMTS3/CSTB/SDC3/CTSS/COL6A3/COL1A2/CTHRC1/SERPINB8/MFAP4/GREM1/ANGPTL4/TNXB/COL3A1/COL4A3/SDC2/SERPINB9/COL24A1/ANGPTL7/LRRC15/DAG1/CSPG4/PODN/SSC5D/BGN/ANXA2/COL18A1/THBS2/C17orf58/BCAM/COL4A1/THSD4/COL14A1/COL4A5/S100A4/LAMA2/COL27A1/LAMB3/ANXA4/ANXA6/MFAP5/TGM2/COL5A2/COL15A1/COL6A6/COLQ/GPC2/ANG/COL28A1
## GO:0015629                                                                                                                                                                 LASP1/MYH13/GAS7/VCL/MYO16/PPP1R12A/NCKAP1/CNN2/NTN1/WDR1/ACTN1/FERMT2/FSCN1/ACTB/CTTNBP2/CAPZB/AMPH/NEBL/CARMIL1/EPB41L2/STK17B/MPP4/EFR3B/TRIP6/PXN/MYL6/WHRN/SORBS1/ABLIM1/SPECC1L/TRIOBP/MYH9/DAAM1/MYL9/MYL12A/DGKH/COTL1/CORO2B/TJP1/BMF/VPS18/DCTN6/WASL/PDLIM1/ACTA2/CRYAB/LPXN/CORO1C/TRPV4/ARPC3/WASF1/DUSP22/DPYSL3/ACTR3/MLPH/CNN3/FILIP1/MED28/MYL12B/CD274/SHLD2/SEPTIN7/CALD1/AHNAK/VASP/DSTN/AMOT/AIF1L/PTPN12/SPECC1/PALLD/SNX9/ACTN4/LSP1/CAP1/PDLIM4/DIAPH1/LLGL1/FLOT2/MTSS2/MYH10/SWAP70/MICAL2/ARHGAP32/ANXA1/MICAL1/FHOD1/FLNB/SPRY2/RAC1/BIN1/ACTR2/SHROOM3/GAS2L3/DIAPH3/TLNRD1/TPM1/ABL2/TPM3/ACKR2/LPP/CORIN/SHROOM2/DENND2A/CASK/DIAPH2/DOCK5/GSN/ADAM17/BAG3/PLEKHH2/ASAP1/ADCY8/MYO1E/AFAP1L1/WASF2/AUTS2/ZYX/KALRN/SPTBN4/FBLIM1/NEXN/VCAM1/ARPC5/PDLIM5/CDC42EP3/LMOD1/PKNOX2/CRK/TPM4/FAM107A/CLIC4/RAC3/AKAP13/MTSS1/TLN2/MYOZ2/SYNPO2/RND1/CORO1B/CFL1/PEAK1/HSPB7/SH3PXD2B/PAWR/MYADM/RFLNB/SMTN/ACTG1/SEPTIN9/ZNF74/ROR1/NF2/SMTNL2/MYL6B/AFAP1/MYO18A/PDLIM7/FLNA/SVIL/SIPA1L1/SPTAN1/PARVA/MYO1C/TPM2/MSRB1/INF2/STK38L/ANG/ARPC4
## GO:0030055                                                                                                                                                                                                                                              LASP1/ABCB4/ITGA3/RALA/CD9/MRC2/TSPAN9/EHD3/CAPN1/FHL1/VIM/ARHGAP31/VCL/TNC/LAMA3/PPP1R12A/NCKAP1/CNN2/TLE2/ACTN1/LIMS2/FERMT2/ACTB/MCAM/USP33/ITGA8/TNS1/EPB41L2/CPNE3/TRIP6/PXN/ITGA6/SORBS1/JAK2/TRIOBP/PACSIN2/MYH9/EFS/MAPRE1/TNFSF13B/MAPK3/CORO2B/ITGB8/CAV2/CAV1/HSPB1/LIMK1/ENG/PDLIM1/GIT1/HSPA8/LPXN/CBL/CORO1C/TNS2/TRPV4/ARPC3/NEDD9/WASF1/ERBIN/PDGFRB/ACTR3/ITGA4/FHL2/RND3/ARHGEF2/CNN3/SORBS3/PTK2B/CAT/PLAU/SDC4/AHNAK/VASP/FLRT3/RRAS/AIF1L/PTPN12/ARHGAP22/PALLD/NECTIN2/ACTN4/AKAP12/CAP1/FLOT2/RRAS2/ANXA1/FLNB/RAC1/TLN1/HMGA1/ATAT1/ITGA11/ACTR2/GIT2/TGFB1I1/SSH2/DCAF6/HMCN1/PIP5K1A/ARF1/RHOB/PHLDB2/LPP/G3BP1/GNA12/EGFR/CASK/GSN/RSU1/CAPN5/ITGB1/DIXDC1/ADAM17/DST/THY1/ZYX/FBLIM1/NEXN/ARPC5/ITGA2/SLC4A2/ARF6/MAPRE2/TPM4/FAM107A/IRF2/TM4SF20/MAP2K1/LIMS1/ALCAM/TLN2/SNTB1/IL16/GNB2/SYNPO2/CORO1B/CFL1/DAG1/PEAK1/CSPG4/PLEC/ALOX15B/ACTG1/FLRT2/ATP6V0A2/CHP1/NHS/EVL/AFAP1/PDLIM7/FLNA/ANXA6/SVIL/DPP4/PARVA/MPZL1/ITGBL1/DMD/TGM2/PPP1CB/ITGA1/SCARF2
## GO:0005925                                                                                                                                                                                                                                                                    LASP1/ABCB4/ITGA3/RALA/CD9/MRC2/TSPAN9/EHD3/CAPN1/FHL1/VIM/ARHGAP31/VCL/TNC/PPP1R12A/NCKAP1/CNN2/TLE2/ACTN1/LIMS2/FERMT2/ACTB/MCAM/USP33/ITGA8/TNS1/EPB41L2/CPNE3/TRIP6/PXN/ITGA6/SORBS1/JAK2/TRIOBP/PACSIN2/MYH9/EFS/MAPRE1/TNFSF13B/MAPK3/CORO2B/ITGB8/CAV2/CAV1/HSPB1/LIMK1/ENG/PDLIM1/GIT1/HSPA8/LPXN/CBL/CORO1C/TNS2/TRPV4/ARPC3/NEDD9/WASF1/PDGFRB/ACTR3/ITGA4/FHL2/RND3/ARHGEF2/CNN3/SORBS3/PTK2B/CAT/PLAU/SDC4/AHNAK/VASP/FLRT3/RRAS/AIF1L/PTPN12/ARHGAP22/PALLD/NECTIN2/ACTN4/AKAP12/CAP1/FLOT2/RRAS2/ANXA1/FLNB/RAC1/TLN1/HMGA1/ATAT1/ITGA11/ACTR2/GIT2/TGFB1I1/SSH2/DCAF6/PIP5K1A/ARF1/RHOB/PHLDB2/LPP/G3BP1/GNA12/EGFR/CASK/GSN/RSU1/CAPN5/ITGB1/DIXDC1/ADAM17/DST/THY1/ZYX/FBLIM1/NEXN/ARPC5/ITGA2/SLC4A2/ARF6/MAPRE2/TPM4/FAM107A/IRF2/TM4SF20/MAP2K1/LIMS1/ALCAM/TLN2/SNTB1/IL16/GNB2/SYNPO2/CORO1B/CFL1/DAG1/PEAK1/CSPG4/PLEC/ALOX15B/ACTG1/FLRT2/ATP6V0A2/CHP1/NHS/EVL/AFAP1/PDLIM7/FLNA/ANXA6/SVIL/DPP4/PARVA/MPZL1/ITGBL1/TGM2/PPP1CB/ITGA1/SCARF2
##            Count
## GO:0030312   191
## GO:0031012   191
## GO:0062023   155
## GO:0015629   170
## GO:0030055   163
## GO:0005925   159

# Molecular Function
egoMF <- enrichGO(
  gene = signif_genes,
  universe = all_genes,
  keyType = "ENSEMBL",
  OrgDb = org.Hs.eg.db,
  ont = "MF",
  pAdjustMethod = "BH",
  qvalueCutoff = 0.05,
  readable = TRUE
)

head(egoMF)

##                    ID
## GO:0005201 GO:0005201
## GO:0003779 GO:0003779
## GO:0043236 GO:0043236
## GO:0050840 GO:0050840
## GO:0030020 GO:0030020
## GO:0050839 GO:0050839
##                                                                        Description
## GO:0005201                             extracellular matrix structural constituent
## GO:0003779                                                           actin binding
## GO:0043236                                                         laminin binding
## GO:0050840                                            extracellular matrix binding
## GO:0030020 extracellular matrix structural constituent conferring tensile strength
## GO:0050839                                          cell adhesion molecule binding
##            GeneRatio   BgRatio       pvalue     p.adjust       qvalue
## GO:0005201   70/3475 137/13749 6.750697e-11 6.793346e-08 6.117984e-08
## GO:0003779  150/3475 374/13749 1.258027e-10 6.793346e-08 6.117984e-08
## GO:0043236   20/3475  25/13749 1.469406e-08 4.579459e-06 4.124191e-06
## GO:0050840   32/3475  51/13749 1.696096e-08 4.579459e-06 4.124191e-06
## GO:0030020   26/3475  40/13749 1.334489e-07 2.882496e-05 2.595932e-05
## GO:0050839  165/3475 491/13749 1.633133e-05 2.939639e-03 2.647394e-03
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      geneID
## GO:0005201                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                VCAN/TNC/COL9A2/LTBP1/ELN/LAMA3/LAMC2/COL11A1/COL5A3/COL4A4/COL16A1/NID2/LAMB1/MXRA5/SRPX/PCOLCE/OGN/COL1A1/MGP/COL12A1/SPARC/COL7A1/PRG4/NID1/MFAP2/TGFBI/FGL2/PXDN/COL5A1/EMILIN2/POSTN/COL4A2/LAMC1/THBS1/CILP/FBN2/LUM/FBLN5/MFGE8/CCN1/TINAGL1/DPT/HMCN1/COL8A1/HAPLN1/COL6A3/COL1A2/CTHRC1/MFAP4/TNXB/COL3A1/COL4A3/COL24A1/PODN/BGN/COL18A1/THBS2/COL4A1/THSD4/COL14A1/COL4A5/LAMA2/COL27A1/LAMB3/MFAP5/COL5A2/COL15A1/COL6A6/COLQ/COL28A1
## GO:0003779                                                                                              LASP1/MYH13/GAS7/VCL/RAI14/MYO16/CNN2/PFN2/WDR1/ACTN1/FERMT2/FSCN1/CAPZB/NEBL/TNS1/EPB41L2/EPB41L3/SORBS1/ABLIM1/TRIOBP/MYH9/DAAM1/COTL1/CORO2B/VPS18/WASL/PDLIM1/PANX1/CORO1C/TRPV4/ARPC3/PHACTR1/WASF1/PHACTR2/ACTR3/MLPH/CNN3/MED28/CTNNAL1/KIF18A/CALD1/VASP/DSTN/AIF1L/HIP1/INO80/PALLD/ACTN4/MAP1S/LSP1/CAP1/PDLIM4/DIAPH1/MAP1B/MTSS2/WASF3/MYH10/MICAL2/MICAL1/FHOD1/FLNB/BIN1/TLN1/UACA/ACTR2/SHROOM3/FGD4/GAS2L3/DIAPH3/TLNRD1/TPM1/SSH2/ABL2/VASH2/TPM3/DAAM2/EGFR/SHROOM2/DIAPH2/GSN/LRRC27/TAGLN/ITGB1/DIXDC1/EPS8/DST/PLEKHH2/UTRN/ADCY8/KCNMA1/WHAMM/MYO1E/FMNL2/WASF2/TMSB15B/EPB41/SPTBN4/FMNL3/NEXN/ARPC5/PDLIM5/LMOD1/PKNOX2/CACNB2/MAP1A/TPM4/CORO6/FAM107A/MTSS1/PRKCE/ENC1/TLN2/SNTB1/MYOZ2/SYNPO2/CORO1B/CFL1/DAG1/MARCKSL1/PAWR/PLEC/SMTN/FMNL1/ADSS1/NF2/MAPT/LRRK2/S100A4/EVL/AFAP1/MYO18A/MRTFA/PDLIM7/FLNA/ANXA6/GMFB/SVIL/SPTAN1/PARVA/MYO1C/TPM2/MSRB1/DMD/INF2/SPIRE2/STK38L/DNASE1/ANG/ARPC4/MICAL3
## GO:0043236                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          ITGA3/GPC1/NTN4/ITGA6/ADGRG6/NID1/LYPD3/PXDN/LGALS3/THBS1/TINAGL1/SLIT2/ITGB1/CTSS/ITGA2/PLEKHA2/LRRC15/DAG1/SSC5D/BCAM
## GO:0050840                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             ITGA3/ELN/COL11A1/GPC1/NTN4/ITGA6/ADGRG6/VEGFA/SPARC/NID1/TGFBI/LYPD3/PXDN/LGALS3/THBS1/CCN1/TINAGL1/SLIT2/ITGB1/SPARCL1/ADAMTS5/OLFML2B/CTSS/ITGA2/PLEKHA2/LRRC15/DAG1/CD248/SSC5D/BGN/OLFML2A/BCAM
## GO:0030020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     COL9A2/COL11A1/COL5A3/COL4A4/COL16A1/COL1A1/COL12A1/COL7A1/COL5A1/COL4A2/COL8A1/COL6A3/COL1A2/COL3A1/COL4A3/COL24A1/COL18A1/COL4A1/COL14A1/COL4A5/COL27A1/COL5A2/COL15A1/COL6A6/COLQ/COL28A1
## GO:0050839 LASP1/ITGA3/CD9/NISCH/IGF1/NRXN3/VCL/CDH10/PKP2/CCN5/CNN2/NEO1/PKM/ADGRL1/ACTN1/EPN2/FERMT2/PICALM/NOTCH3/FSCN1/CAPZB/ITGA8/PTPRH/PSEN1/BZW1/COL16A1/SNX5/ITGA6/TBC1D2/HSP90AB1/DSP/SH3GLB1/PACSIN2/MYH9/RANGAP1/MAPRE1/VAPA/UBFD1/EHD4/TJP1/CCN4/ITGB8/PTN/PDLIM1/CXCL12/LRRC59/CPE/HSPA8/EHD1/CBL/NECTIN1/CDH6/FGF1/ADAM23/ITGA4/STAT1/CNN3/PRDX6/ESYT2/CCN2/TJP2/ITGB1BP1/CTNNAL1/PCMT1/TGFBI/TMPO/SEPTIN7/CALD1/VAPB/LRRFIP1/AHNAK/VASP/TSPAN8/PALLD/NECTIN2/SNX9/ACTN4/COL5A1/NINJ1/DNAJB1/POSTN/SWAP70/LDHA/ANXA1/CEMIP2/ITGA7/FLNB/GPNMB/BZW2/NIBAN2/CCN3/TLN1/THBS1/ITGA11/FNBP1L/DIAPH3/CDH24/FBLN5/MFGE8/EFHD2/SERBP1/CCN1/ITGA10/PHLDB2/SFRP2/EGFR/TNKS1BP1/PTPRJ/TENM4/ITGB1/ADAM17/GFRA1/DST/BAG3/UTRN/CAST/ASAP1/PTPRD/THY1/ROBO3/ADAMTS5/LARP1/CCT8/NPTN/NRG1/FMNL2/WASF2/PLPP3/NEXN/NTNG1/VCAM1/PDLIM5/ITGA2/DCHS1/VASN/TNXB/COL3A1/STXBP6/COL4A3/KIF5B/FRMD5/TLN2/PPP1CA/CORO1B/PTPRM/ARHGAP1/NECTIN3/PLEC/CDH4/PTPN11/MB21D2/ANXA2/CADM1/SEPTIN9/MRTFB/NF2/BCAM/FLNA/SPTAN1/PARVA/ITGBL1/SNX2/EMP2/ITGA1/DDX3X
##            Count
## GO:0005201    70
## GO:0003779   150
## GO:0043236    20
## GO:0050840    32
## GO:0030020    26
## GO:0050839   165

# Output results from GO analysis to a table
clusterBP_summary <- data.frame(egoBP)

clusterCC_summary <- data.frame(egoCC)

clusterMF_summary <- data.frame(egoMF)

Plot pathways and dotplots

# Visualize Biological Process
dotplot(egoBP, showCategory = 12)

goplot(egoBP)

# Cellular Component
dotplot(egoCC, showCategory = 12)

goplot(egoCC)

# Molecular Function
dotplot(egoMF, showCategory = 12)

goplot(egoMF)

Gene-Concept Network

# To color genes by log2 fold changes
signif_res_lFC <- signif_res$log2FoldChange

cnetplot(egoBP,
  categorySize = "pvalue",
  showCategory = 5,
  foldChange = signif_res_lFC,
  vertex.label.font = 6
)

## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
##  The foldChange parameter will be removed in the next version.

## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.

## Warning: ggrepel: 271 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cnetplot(egoCC,
  categorySize = "pvalue",
  showCategory = 5,
  foldChange = signif_res_lFC,
  vertex.label.font = 6
)

## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
##  The foldChange parameter will be removed in the next version.

## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.

## Warning: ggrepel: 336 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cnetplot(egoMF,
  categorySize = "pvalue",
  showCategory = 5,
  foldChange = signif_res_lFC,
  vertex.label.font = 6
)

## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
##  The foldChange parameter will be removed in the next version.

## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.

## Warning: ggrepel: 92 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Gene Set Enrichment Analysis

GSEA analysis requires a ranked gene list, which contains:

- numeric vector: fold change or other type of numerical variable
- named vector: every number has a name, the corresponding gene ID
- sorted vector: number should be sorted in decreasing order

A csv fileshould contains two columns, one for gene ID (no duplicated ID allowed) and another one for fold change.

## feature 1: numeric vector
geneList <- geneSetFinal |> 
  select(ENTREZID, SYMBOL, log2FoldChange) |> 
  arrange(desc(log2FoldChange)) 

## feature 2: named vector
names(geneList$ENTREZID) = as.character(geneList$ENTREZID)

Dotplot

x <- enrichDO(geneList$ENTREZID[abs(geneList$log2FoldChange) > 1])

## show most significant pathways 
dotplot(x, showCategory = x$Description[1:12])

GSEA

# geneListMat <- geneList |> 
#   select(SYMBOL, log2FoldChange) |> 
#   arrange(desc(log2FoldChange))
# 
# geneListMat <- as.matrix(geneListMat)
# 
# geneList_vector <- geneListMat[, "log2FoldChange"]
# 
# geneList_sorted <- sort(geneList_vector, decreasing = TRUE)
# 
# # Perform the GSEA using KEGG gene sets:
# gseaKEGG <- gseKEGG(
#   geneList = geneListMat,
#   organism = "hsa",
#   nPerm = 1000, # default number permutations
#   minGSSize = 5, # minimum gene set size
#   pvalueCutoff = 0.1, # padj cutoff value
#   verbose = FALSE
# )
# 
# # Extract the GSEA results
# gseaKEGG_results <- gseaKEGG@result

Bar Plot

# Bar Plot
edo <- enrichDGN(geneList$ENTREZID[abs(geneList$log2FoldChange) > 2])

barplot(edo, showCategory = 12)

mutate(edo, qscore = -log(p.adjust, base = 10)) |>
  barplot(x = "qscore")

edo <- pairwise_termsim(edo)

# UpSet Plot
upsetplot(edo)

# # Dot plot
# edo2 <- gseDO(geneList)
# 
# ridgeplot(edo)
# 
# gseaplot(edo2,
#   geneSetID = 1,
#   by = "runningScore",
#   title = edo2$Description[1]
# )
# 
# 
# # Gseaplot2 for GSEA result.
# gseaplot2(edo2, geneSetID = 1, title = edo2$Description[1])
# 
# # Gseaplot2 for GSEA result of multile gene sets.
# gseaplot2(edo2, geneSetID = 1:3)
# 
# # Gseaplot2 for GSEA result of multile gene sets(add pvalue_table)
# gseaplot2(edo2,
#   geneSetID = 1:3, pvalue_table = TRUE,
#   color = c("#E495A5", "#86B875", "#7DB0DD"), ES_geom = "dot"
# )
# 
# # Ranked list of genes belong to the specific gene set.
# gsearank(edo2, 1, title = edo2[1, "Description"])
# 
# # Gsearank for multiple gene sets
# pp <- lapply(1:3, function(i) {
#   anno <- edo2[i, c("NES", "pvalue", "p.adjust")]
#   lab <- paste0(names(anno), "=", round(anno, 3), collapse = "\n")
# 
#   gsearank(edo2, i, edo2[i, 2]) + xlab(NULL) + ylab(NULL) +
#     annotate("text", 10000, edo2[i, "enrichmentScore"] * .75, label = lab, hjust = 0, vjust = 0)
# })
# 
# plot_grid(plotlist = pp, ncol = 1)

Session Info

sessionInfo()

## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] bigalgebra_1.1.1            bigmemory_4.6.4            
##  [3] europepmc_0.4.3             GEOquery_2.70.0            
##  [5] ReactomePA_1.46.0           enrichplot_1.22.0          
##  [7] clusterProfiler_4.10.1      pathview_1.42.0            
##  [9] DOSE_3.28.2                 biomaRt_2.58.2             
## [11] ensembldb_2.26.1            AnnotationFilter_1.26.0    
## [13] GenomicFeatures_1.54.4      AnnotationHub_3.10.1       
## [15] BiocFileCache_2.10.2        dbplyr_2.5.0               
## [17] org.Hs.eg.db_3.18.0         org.Mm.eg.db_3.18.0        
## [19] AnnotationDbi_1.64.1        lubridate_1.9.3            
## [21] forcats_1.0.0               stringr_1.5.1              
## [23] purrr_1.0.2                 readr_2.1.5                
## [25] tidyr_1.3.1                 tibble_3.2.1               
## [27] tidyverse_2.0.0             conflicted_1.2.0           
## [29] ashr_2.2-63                 apeglm_1.24.0              
## [31] vsn_3.70.0                  qvalue_2.34.0              
## [33] ggstance_0.3.7              ggupset_0.3.0              
## [35] ggvenn_0.1.10               dplyr_1.1.4                
## [37] EnhancedVolcano_1.20.0      ggrepel_0.9.5              
## [39] RColorBrewer_1.1-3          pheatmap_1.0.12            
## [41] cowplot_1.1.3               gridExtra_2.3              
## [43] scater_1.30.1               ggplot2_3.5.0              
## [45] scuttle_1.12.0              iSEEde_1.0.0               
## [47] iSEE_2.14.0                 SingleCellExperiment_1.24.0
## [49] DESeq2_1.42.1               airway_1.22.0              
## [51] SummarizedExperiment_1.32.0 Biobase_2.62.0             
## [53] GenomicRanges_1.54.1        GenomeInfoDb_1.38.8        
## [55] IRanges_2.36.0              S4Vectors_0.40.2           
## [57] BiocGenerics_0.48.1         MatrixGenerics_1.14.0      
## [59] matrixStats_1.3.0          
## 
## loaded via a namespace (and not attached):
##   [1] progress_1.2.3                DT_0.33                      
##   [3] Biostrings_2.70.3             vctrs_0.6.5                  
##   [5] digest_0.6.35                 png_0.1-8                    
##   [7] shape_1.4.6.1                 mixsqp_0.3-54                
##   [9] MASS_7.3-60.0.1               reshape2_1.4.4               
##  [11] SQUAREM_2021.1                httpuv_1.6.15                
##  [13] foreach_1.5.2                 withr_3.0.0                  
##  [15] xfun_0.43                     ggfun_0.1.4                  
##  [17] memoise_2.0.1                 hexbin_1.28.3                
##  [19] ggbeeswarm_0.7.2              gson_0.1.0                   
##  [21] tidytree_0.4.6                GlobalOptions_0.1.2          
##  [23] KEGGgraph_1.62.0              prettyunits_1.2.0            
##  [25] KEGGREST_1.42.0               promises_1.3.0               
##  [27] httr_1.4.7                    restfulr_0.0.15              
##  [29] rstudioapi_0.16.0             shinyAce_0.4.2               
##  [31] miniUI_0.1.1.1                generics_0.1.3               
##  [33] reactome.db_1.86.2            curl_5.2.1                   
##  [35] zlibbioc_1.48.2               ScaledMatrix_1.10.0          
##  [37] ggraph_2.2.1                  polyclip_1.10-6              
##  [39] GenomeInfoDbData_1.2.11       SparseArray_1.2.4            
##  [41] interactiveDisplayBase_1.40.0 xtable_1.8-4                 
##  [43] doParallel_1.0.17             evaluate_0.23                
##  [45] S4Arrays_1.2.1                preprocessCore_1.64.0        
##  [47] hms_1.1.3                     irlba_2.3.5.1                
##  [49] colorspace_2.1-0              filelock_1.0.3               
##  [51] shinyWidgets_0.8.5            magrittr_2.0.3               
##  [53] Rgraphviz_2.46.0              later_1.3.2                  
##  [55] viridis_0.6.5                 ggtree_3.10.1                
##  [57] lattice_0.22-6                XML_3.99-0.16.1              
##  [59] shadowtext_0.1.3              triebeard_0.4.1              
##  [61] pillar_1.9.0                  nlme_3.1-164                 
##  [63] iterators_1.0.14              compiler_4.3.3               
##  [65] beachmat_2.18.1               stringi_1.8.3                
##  [67] GenomicAlignments_1.38.2      plyr_1.8.9                   
##  [69] crayon_1.5.2                  abind_1.4-5                  
##  [71] BiocIO_1.12.0                 truncnorm_1.0-9              
##  [73] gridGraphics_0.5-1            emdbook_1.3.13               
##  [75] locfit_1.5-9.9                graphlayouts_1.1.1           
##  [77] bit_4.0.5                     fastmatch_1.1-4              
##  [79] codetools_0.2-20              BiocSingular_1.18.0          
##  [81] bslib_0.7.0                   GetoptLong_1.0.5             
##  [83] mime_0.12                     splines_4.3.3                
##  [85] circlize_0.4.16               Rcpp_1.0.12                  
##  [87] sparseMatrixStats_1.14.0      HDO.db_0.99.1                
##  [89] knitr_1.46                    blob_1.2.4                   
##  [91] utf8_1.2.4                    clue_0.3-65                  
##  [93] BiocVersion_3.18.1            fs_1.6.3                     
##  [95] DelayedMatrixStats_1.24.0     ggplotify_0.1.2              
##  [97] Matrix_1.6-5                  statmod_1.5.0                
##  [99] tzdb_0.4.0                    tweenr_2.0.3                 
## [101] pkgconfig_2.0.3               tools_4.3.3                  
## [103] cachem_1.0.8                  RSQLite_2.3.6                
## [105] viridisLite_0.4.2             DBI_1.2.2                    
## [107] numDeriv_2016.8-1.1           graphite_1.48.0              
## [109] fastmap_1.1.1                 rmarkdown_2.26               
## [111] scales_1.3.0                  shinydashboard_0.7.2         
## [113] Rsamtools_2.18.0              sass_0.4.9                   
## [115] patchwork_1.2.0               coda_0.19-4.1                
## [117] BiocManager_1.30.22           graph_1.80.0                 
## [119] farver_2.1.1                  tidygraph_1.3.1              
## [121] scatterpie_0.2.2              mgcv_1.9-1                   
## [123] yaml_2.3.8                    rtracklayer_1.62.0           
## [125] cli_3.6.2                     lifecycle_1.0.4              
## [127] mvtnorm_1.2-4                 rintrojs_0.3.4               
## [129] BiocParallel_1.36.0           timechange_0.3.0             
## [131] gtable_0.3.4                  rjson_0.2.21                 
## [133] parallel_4.3.3                ape_5.8                      
## [135] limma_3.58.1                  jsonlite_1.8.8               
## [137] colourpicker_1.3.0            edgeR_4.0.16                 
## [139] bitops_1.0-7                  bigmemory.sri_0.1.8          
## [141] bit64_4.0.5                   yulab.utils_0.1.4            
## [143] BiocNeighbors_1.20.2          urltools_1.7.3               
## [145] bdsmatrix_1.3-7               jquerylib_0.1.4              
## [147] highr_0.10                    GOSemSim_2.28.1              
## [149] shinyjs_2.1.0                 lazyeval_0.2.2               
## [151] shiny_1.8.1.1                 htmltools_0.5.8.1            
## [153] affy_1.80.0                   GO.db_3.18.0                 
## [155] rappdirs_0.3.3                glue_1.7.0                   
## [157] XVector_0.42.0                RCurl_1.98-1.14              
## [159] treeio_1.26.0                 igraph_2.0.3                 
## [161] invgamma_1.1                  R6_2.5.1                     
## [163] labeling_0.4.3                cluster_2.1.6                
## [165] bbmle_1.0.25.1                aplot_0.2.2                  
## [167] DelayedArray_0.28.0           tidyselect_1.2.1             
## [169] vipor_0.4.7                   ProtGenerics_1.34.0          
## [171] ggforce_0.4.2                 xml2_1.3.6                   
## [173] rsvd_1.0.5                    munsell_0.5.1                
## [175] affyio_1.72.0                 data.table_1.15.4            
## [177] htmlwidgets_1.6.4             fgsea_1.28.0                 
## [179] ComplexHeatmap_2.18.0         rlang_1.1.3                  
## [181] uuid_1.2-0                    ggnewscale_0.4.10            
## [183] fansi_1.0.6                   beeswarm_0.4.0

airway_GSE52778

Mikias HW

2024-04-26