This report provides a comprehensive analysis of clinical trial data. Key highlights include:
reactable(
ctg_data,
filterable = TRUE,
searchable = TRUE,
bordered = TRUE,
striped = TRUE,
highlight = TRUE,
compact = TRUE,
defaultPageSize = 10,
columns = list(
`Study Status` = colDef(
style = function(value) {
status_colors <- get_colors(length(unique(ctg_data$`Study Status`)))
color <- status_colors[match(value, unique(ctg_data$`Study Status`))]
list(background = color, color = 'white')
}
),
Enrollment = colDef(
format = colFormat(separators = TRUE)
),
`Start Date` = colDef(
format = colFormat(date = TRUE)
),
`Completion Date` = colDef(
format = colFormat(date = TRUE)
)
),
defaultColDef = colDef(
cell = function(value) {
if (is.character(value)) {
value <- truncate_text(value)
}
value
},
minWidth = 100
)
)
missing_data <- sapply(ctg_data, function(x) sum(is.na(x)))
missing_data_ctg_data <- data.frame(Variable = names(missing_data),
MissingCount = missing_data,
PercentMissing = missing_data / nrow(ctg_data) * 100)
missing_data_ctg_data <- missing_data_ctg_data[order(-missing_data_ctg_data$PercentMissing), ]
ggplot(missing_data_ctg_data, aes(x = reorder(Variable, -PercentMissing), y = PercentMissing)) +
geom_bar(stat = 'identity', fill = color_palette[1]) +
theme_minimal() +
labs(title = 'Percentage of Missing Data by Variable',
x = 'Variable',
y = 'Percent Missing') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = scales::percent_format(scale = 1))
The chart above shows the percentage of missing data for each variable. Variables with high percentages of missing data may require further investigation or imputation techniques.
status_counts <- table(ctg_data$`Study Status`)
status_ctg_data <- data.frame(status = names(status_counts), count = as.numeric(status_counts))
n_colors <- nrow(status_ctg_data)
status_colors <- get_colors(n_colors)
p <- ggplot(status_ctg_data, aes(x = reorder(status, -count), y = count, fill = status)) +
geom_bar(stat = 'identity') +
theme_minimal() +
labs(title = 'Distribution of Study Statuses',
x = 'Study Status',
y = 'Count') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
scale_fill_manual(values = status_colors) +
geom_text(aes(label = count), vjust = -0.5)
ggplotly(p)
This chart shows the distribution of study statuses. The most common status is ‘COMPLETED’ with 443 studies.
phase_counts <- table(ctg_data$Phases)
n_colors <- length(phase_counts)
phase_colors <- get_colors(n_colors)
p <- ggplot(ctg_data, aes(x = Phases, y = Enrollment, fill = Phases)) +
geom_boxplot(outlier.colour = 'red', outlier.shape = 1) +
theme_minimal(base_size = 14) +
labs(title = 'Enrollment by Study Phase',
x = 'Study Phase',
y = 'Enrollment') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
scale_fill_manual(values = phase_colors) +
scale_y_log10(labels = scales::comma_format())
ggplotly(p)
This boxplot visualizes the distribution of enrollment numbers across different study phases. Note the logarithmic scale on the y-axis to better show the wide range of enrollment numbers.
ctg_data$start_date <- as.Date(ctg_data$`Start Date`, format = '%Y-%m-%d')
ctg_data$completion_date <- as.Date(ctg_data$`Completion Date`, format = '%Y-%m-%d')
ctg_data$duration <- as.numeric(ctg_data$completion_date - ctg_data$start_date) / 365.25 # Duration in years
status_counts <- table(ctg_data$`Study Status`)
n_colors <- length(status_counts)
status_colors <- get_colors(n_colors)
p <- ggplot(ctg_data, aes(x = start_date, y = duration, color = `Study Status`)) +
geom_point(alpha = 0.6) +
theme_minimal() +
labs(title = 'Study Duration Timeline',
x = 'Start Date',
y = 'Study Duration (Years)') +
scale_color_manual(values = status_colors) +
scale_x_date(date_labels = '%Y', date_breaks = '1 year') +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(p)
This scatter plot shows the relationship between study start dates and durations. Each point represents a study, colored by its status.
ctg_data_summary <- ctg_data %>%
count(`Funder Type`, `Study Type`) %>%
group_by(`Funder Type`) %>%
mutate(prop = n / sum(n))
study_type_counts <- table(ctg_data$`Study Type`)
n_colors <- length(study_type_counts)
study_type_colors <- get_colors(n_colors)
p <- ggplot(ctg_data_summary, aes(x = `Funder Type`, y = prop, fill = `Study Type`)) +
geom_bar(stat = 'identity', position = 'dodge') +
theme_minimal() +
labs(title = 'Funding Sources and Study Types',
x = 'Funder Type',
y = 'Proportion') +
scale_fill_manual(values = study_type_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = scales::percent_format())
ggplotly(p)
This stacked bar chart shows the proportion of different study types for each funder type.
This report provides a comprehensive overview of the clinical trial data, highlighting key trends in study status, enrollment, duration, and funding. The visualizations offer insights into the distribution and relationships within the data, which can be valuable for decision-making and further analysis.
For any questions or further analysis requests, please contact the report author.