These are the figures we will reproduce in this section.
Figure 1a, 1b
Figure 1a, 1b


To find the overlapping peaks of these 2 TFs, there are many ways, such as:
1. Using the intersect command of bedtools : This way, the Unix command line will be used.
2. Using the GenomicRanges package in R: We will use this way.

R.version.string # show R version 
## [1] "R version 4.4.1 (2024-06-14)"

Loading the library.

library(GenomicRanges)
library(rtracklayer) # for reading in bed file
library(here) # for file path

Importing the peak files, which are the output from macs3 where narrowPeak mode was used for ChIP-seq of transcription factors.

TAZ_peaks<- import(here("data/HPC/bowtie2/macs3/TAZ_REP1_peaks.narrowPeak"))
YAP_peaks<- import(here("data/HPC/bowtie2/macs3/YAP_REP1_peaks.narrowPeak"))

These are the peak data of TAZ and YAP, respectively.

TAZ_peaks # 9509 peaks
## GRanges object with 9509 ranges and 6 metadata columns:
##          seqnames              ranges strand |               name     score
##             <Rle>           <IRanges>  <Rle> |        <character> <numeric>
##      [1]     chr1          9878-10089      * |    TAZ_REP1_peak_1        90
##      [2]     chr1         10150-10379      * |    TAZ_REP1_peak_2       101
##      [3]     chr1       180750-180982      * |    TAZ_REP1_peak_3        40
##      [4]     chr1     1024619-1025059      * |    TAZ_REP1_peak_4       507
##      [5]     chr1     1264837-1265166      * |    TAZ_REP1_peak_5       161
##      ...      ...                 ...    ... .                ...       ...
##   [9505]     chrX 154733159-154733366      * | TAZ_REP1_peak_9505        23
##   [9506]     chrX 154735685-154735906      * | TAZ_REP1_peak_9506        39
##   [9507]     chrX 154797063-154797321      * | TAZ_REP1_peak_9507        39
##   [9508]     chrX 155888248-155888490      * | TAZ_REP1_peak_9508       108
##   [9509]     chrX 156030285-156030782      * | TAZ_REP1_peak_9509       318
##          signalValue    pValue    qValue      peak
##            <numeric> <numeric> <numeric> <integer>
##      [1]     2.51884  12.68540   9.04597       181
##      [2]     2.94682  13.87440  10.16240        44
##      [3]     2.50179   7.33767   4.07683        41
##      [4]    16.61940  55.66760  50.71850       195
##      [5]     9.57991  20.15700  16.16140       191
##      ...         ...       ...       ...       ...
##   [9505]     3.99163   5.37437   2.32820       170
##   [9506]     4.78995   7.14985   3.91207       146
##   [9507]     4.78995   7.14985   3.91207       136
##   [9508]     6.70607  14.55380  10.80710       159
##   [9509]     2.47456  36.37840  31.87160       260
##   -------
##   seqinfo: 26 sequences from an unspecified genome; no seqlengths
YAP_peaks # 10403 peaks
## GRanges object with 10403 ranges and 6 metadata columns:
##           seqnames              ranges strand |                name     score
##              <Rle>           <IRanges>  <Rle> |         <character> <numeric>
##       [1]     chr1     1024621-1025009      * |     YAP_REP1_peak_1       482
##       [2]     chr1     1079578-1079776      * |     YAP_REP1_peak_2        71
##       [3]     chr1     1264872-1265612      * |     YAP_REP1_peak_3       237
##       [4]     chr1     1360601-1360960      * |     YAP_REP1_peak_4       330
##       [5]     chr1     1724526-1724742      * |     YAP_REP1_peak_5        50
##       ...      ...                 ...    ... .                 ...       ...
##   [10399]     chrX 154600480-154600861      * | YAP_REP1_peak_10399       133
##   [10400]     chrX 154604096-154604413      * | YAP_REP1_peak_10400        53
##   [10401]     chrX 154732664-154733351      * | YAP_REP1_peak_10401        81
##   [10402]     chrX 154805064-154805275      * | YAP_REP1_peak_10402        27
##   [10403]     chrX 155888194-155888500      * | YAP_REP1_peak_10403       126
##           signalValue    pValue    qValue      peak
##             <numeric> <numeric> <numeric> <integer>
##       [1]    15.16740  52.67000  48.25070       193
##       [2]     6.07276  10.57180   7.16736       135
##       [3]    11.27270  27.72950  23.72220       189
##       [4]    14.42280  37.29910  33.09730       175
##       [5]     4.73602   8.26824   5.01273       158
##       ...         ...       ...       ...       ...
##   [10399]     8.35005  17.03310  13.33410       184
##   [10400]     5.31367   8.61245   5.32944       167
##   [10401]     6.45231  11.59140   8.13184       507
##   [10402]     4.09966   5.78000   2.75210        95
##   [10403]     6.91847  16.30110  12.62580       158
##   -------
##   seqinfo: 28 sequences from an unspecified genome; no seqlengths

Let’s find overlapping peaks between TAZ and YAP.

YAP_overlap_TAZ_peaks<- subsetByOverlaps(YAP_peaks, TAZ_peaks)
length(YAP_overlap_TAZ_peaks) 
## [1] 6903

There are 6903 out of 9807 YAP1 peaks overlapping with TAZ peaks.

Next, we will use a Venn diagram to visualize the intersection data. Here, we will use the Vennerable package to do so.

Fig 1a: Venn diagram showing the overlapping peak number of YAP/TAZ

# devtools::install_github("js229/Vennerable")

library(Vennerable)

n_YAP <- length(YAP_peaks)  # peak count of YAP
n_TAZ <- length(TAZ_peaks)  # peak count of TAZ 
n_overlap1 <- length(YAP_overlap_TAZ_peaks) # peak count of YAP overlapping with TAZ 

# Calculate unique peaks
n_YAP_unique <- n_YAP - n_overlap1 # unique peaks to YAP
n_TAZ_unique <- n_TAZ - n_overlap1 # unique peaks to TAZ

venn_data1_unique <- Venn(SetNames = c("YAP=10403", "TAZ=9506"),
                  Weight = c(
                    "10" = n_YAP_unique, # unique to YAP 
                    "01" = n_TAZ_unique, # unique to TAZ
                    "11" = n_overlap1 
                  ))

# Plot the Venn diagram
plot(venn_data1_unique)

NOTE:
10403 = count of all YAP peaks (including the one overlapping with TAZ), not the unique peak of YAP.
Similarly, 9506 = count of all TAZ peaks(including the one overlapping with YAP), not the unique peak of TAZ.

Fig 1b: Venn-diagram showing the overlapping peak number of YAP/TAZ/TEAD4

Import TEAD4 peak

TEAD4_peaks <- import(here("data/HPC/bowtie2/macs3/TEAD4_REP1_peaks.narrowPeak"))

TEAD4_peaks # 10668 peaks
## GRanges object with 10668 ranges and 6 metadata columns:
##           seqnames              ranges strand |                  name     score
##              <Rle>           <IRanges>  <Rle> |           <character> <numeric>
##       [1]     chr1     1024676-1025014      * |     TEAD4_REP1_peak_1       353
##       [2]     chr1     1264836-1265252      * |     TEAD4_REP1_peak_2       432
##       [3]     chr1     1360563-1360967      * |     TEAD4_REP1_peak_3       592
##       [4]     chr1     1659224-1659603      * |     TEAD4_REP1_peak_4        68
##       [5]     chr1     2061314-2061683      * |     TEAD4_REP1_peak_5       301
##       ...      ...                 ...    ... .                   ...       ...
##   [10664]     chrX 154732882-154733284      * | TEAD4_REP1_peak_10664        84
##   [10665]     chrX 154735777-154736046      * | TEAD4_REP1_peak_10665       113
##   [10666]     chrX 154967582-154967937      * | TEAD4_REP1_peak_10666        76
##   [10667]     chrX 155057299-155057659      * | TEAD4_REP1_peak_10667        44
##   [10668]     chrX 155888225-155888537      * | TEAD4_REP1_peak_10668        92
##           signalValue    pValue    qValue      peak
##             <numeric> <numeric> <numeric> <integer>
##       [1]    12.14300   39.4676  35.34760       144
##       [2]    16.19070   47.4950  43.23410       213
##       [3]    20.72730   63.7883  59.26370       205
##       [4]     5.80713   10.1234   6.84313        81
##       [5]    12.75490   34.1251  30.11540       177
##       ...         ...       ...       ...       ...
##   [10664]     6.49252  11.79220   8.44084       204
##   [10665]     7.57341  14.84510  11.36370       176
##   [10666]     5.40363  10.92850   7.61277       181
##   [10667]     4.82624   7.53305   4.40812       193
##   [10668]     5.80006  12.68390   9.29092       124
##   -------
##   seqinfo: 25 sequences from an unspecified genome; no seqlengths

Finding the overlapping peaks between YAP_overlap_TAZ_peaks and TEAD4_peaks

YAP_overlap_TAZ_peaks_overlap_TEAD4 <- subsetByOverlaps(YAP_overlap_TAZ_peaks, TEAD4_peaks)

YAP_overlap_TAZ_peaks_overlap_TEAD4 # 5656 peaks
## GRanges object with 5656 ranges and 6 metadata columns:
##          seqnames              ranges strand |                name     score
##             <Rle>           <IRanges>  <Rle> |         <character> <numeric>
##      [1]     chr1     1024621-1025009      * |     YAP_REP1_peak_1       482
##      [2]     chr1     1264872-1265612      * |     YAP_REP1_peak_3       237
##      [3]     chr1     1360601-1360960      * |     YAP_REP1_peak_4       330
##      [4]     chr1     2061303-2061685      * |     YAP_REP1_peak_6       462
##      [5]     chr1     2140061-2140338      * |     YAP_REP1_peak_7        81
##      ...      ...                 ...    ... .                 ...       ...
##   [5652]     chrX 152896063-152896692      * | YAP_REP1_peak_10393       143
##   [5653]     chrX 154596539-154596884      * | YAP_REP1_peak_10398       291
##   [5654]     chrX 154600480-154600861      * | YAP_REP1_peak_10399       133
##   [5655]     chrX 154732664-154733351      * | YAP_REP1_peak_10401        81
##   [5656]     chrX 155888194-155888500      * | YAP_REP1_peak_10403       126
##          signalValue    pValue    qValue      peak
##            <numeric> <numeric> <numeric> <integer>
##      [1]    15.16740   52.6700  48.25070       193
##      [2]    11.27270   27.7295  23.72220       189
##      [3]    14.42280   37.2991  33.09730       175
##      [4]    17.26140   50.6053  46.21160       187
##      [5]     6.45231   11.5914   8.13184       165
##      ...         ...       ...       ...       ...
##   [5652]     8.69052   18.1155  14.38090       481
##   [5653]    13.28420   33.2336  29.10470       154
##   [5654]     8.35005   17.0331  13.33410       184
##   [5655]     6.45231   11.5914   8.13184       507
##   [5656]     6.91847   16.3011  12.62580       158
##   -------
##   seqinfo: 28 sequences from an unspecified genome; no seqlengths

Peak and the overlapping peaks data

n_YAP_TAZ <- length(YAP_overlap_TAZ_peaks) # YAP and TAZ overlapping peak count
n_TEAD4 <- length(TEAD4_peaks) # TEAD4 peak count
n_overlap2<- length(YAP_overlap_TAZ_peaks_overlap_TEAD4) # YAP, TAZ, and TEAD4 overlapping peak count

Venn diagram represented YAP/TAZ overlap with TEAD4.

# Calculate unique peaks
n_YAP_TAZ_unique <- n_YAP_TAZ - n_overlap2 # unique peaks to YAP/TAZ
n_TEAD4_unique <- n_TEAD4 - n_overlap2 # unique peaks to TEAD4

venn_data2_unique <- Venn(SetNames = c("YAP/TAZ=6903", "TEAD4=10668"),
                  Weight = c(
                    "10" = n_YAP_TAZ_unique, # unique to YAP 
                    "01" = n_TEAD4_unique, # unique to TAZ
                    "11" = n_overlap2 
                  ))

# Plot the Venn diagram
plot(venn_data2_unique)

NOTE 1:
6903 = count of all YAP/TAZ(including the one overlap with TEAD4), not the unique peak of YAP/TAZ.
Similarly, 10668 = count of all TEAD4 peaks(including the one overlap with YAP/TAZ), not the unique peak of TEAD4.

NOTE 2: Other ways to create a Venn diagram.
1. makeVennDiagram in the ChIPpeakAnno package.
2. ggVennDiagram
3. For customized color, you save the file as a .pdf or other vector file, then customize it in Adobe Illustrator.