an analytical & musical journey
Hans van Leeuwen © November 2024
“We thought that we had the answers
It was the questions we had wrong”
∼ 11 O’Clock Tick Tock, U2, 1980
Project development, code & data repositories, deployment and publishing
Wipe / arrow down for more detail on the captured data
Wipe / arrow down to see the R code that generated the distance tree
# required libraries
library(dplyr)
library(seqinr)
library(ape)
library(ggplot2)
library(ggtree)
# load my custom library
devtools::install_github("vanleeuwen-hans/concertData")
library(concertData)
# read the u2 concert data
u2data <- read_concertData_csv('u2data/u2data_all_shows_clean_final.csv')
# Filter for specific tour
tour_data <- u2data[u2data$tour == "U2 Vertigo Tour", ]
# Remove snippets
no_snippets_data <- concertData_remove_snippets(tour_data)
# Remove shows with no setlist
filtered_data <- concertData_remove_showsNoSetlist(no_snippets_data)
# Get a data frame from the mafft clustal output (code to create that is elsewhere)
alignment_data <- read_mafft_clustal_alignment("mafft/u2_setlists_mafft_alignment.ASCII")
# Convert data frame to list which is needed for downstream distance tree analysis
sequences <- setNames(as.list(alignment_data$sequence), paste0("showID", alignment_data$showID))
# Calculate distance matrix
dist_matrix <- calculate_distance_matrix(sequences)
# Convert to dist object
setlist_dist <- as.dist(dist_matrix)
# Construct the tree
setlist_tree <- nj(setlist_dist)
# Collect show info
show_info <- unique(filtered_data[, c("showID", "city", "date", "country", "leg")])
show_info$city_date <- paste(show_info$city, format(show_info$date, "%Y-%m-%d"), sep = " - ")
# create tree labels in the format 'City YYYY-MM-DD'
new_labels <- create_city_date_tree_labels(filtered_data, setlist_tree, show_info)
# Modify tree labels
setlist_tree$tip.label <- new_labels
# create standard tree plot
tree_plot <- create_ggtree_plot(setlist_tree)
# Display the plot
print(tree_plot)
# Create the tree plot with colors based on country
plot_country <- create_ggtree_plot_colored(setlist_tree, show_info, color_by = "country")
print(plot_country)
# Create the tree plot with colors based on leg
plot_leg <- create_ggtree_plot_colored(setlist_tree, show_info, color_by = "leg")
print(plot_leg)
Wipe / arrow down for my project links