library(zoomstudentengagement)
library(dplyr)
library(ggplot2)
This vignette shows how to create visualizations and analyze student
engagement patterns using the zoomstudentengagement
package.
First, let’s prepare some sample data for analysis:
# Create sample data for demonstration
# In practice, you would load actual transcript and roster data
# Sample transcript metrics
<- tibble::tibble(
transcripts_metrics_df name = c("Alice Johnson", "Bob Smith", "Carol Davis", "David Wilson", "Eva Brown"),
n = c(8, 12, 5, 15, 3),
duration = c(45.2, 67.8, 23.1, 89.4, 12.5),
wordcount = c(1200, 1800, 650, 2200, 400),
comments = list("Good point", "Interesting question", "I agree", "Follow-up question", "Brief comment"),
n_perc = c(18.6, 27.9, 11.6, 34.9, 7.0),
duration_perc = c(19.2, 28.8, 9.8, 38.0, 5.3),
wordcount_perc = c(19.2, 28.8, 10.4, 35.2, 6.4),
wpm = c(26.5, 26.6, 28.1, 24.6, 32.0),
course_section = "LTF.201.1",
course = 201,
section = 1,
day = "Thursday",
time = "18:30",
name_raw = name,
start_time_local = as.POSIXct("2024-01-24 18:30:00", tz = "America/Los_Angeles"),
dept = "LTF",
session_num = 1
)
# Sample roster sessions
<- tibble::tibble(
roster_sessions student_id = c("12345", "12346", "12347", "12348", "12349"),
first_last = c("Alice Johnson", "Bob Smith", "Carol Davis", "David Wilson", "Eva Brown"),
preferred_name = c("Alice", "Bob", "Carol", "David", "Eva"),
dept = "LTF",
course = 201,
section = 1,
session_num = 1,
start_time_local = as.POSIXct("2024-01-24 18:30:00", tz = "America/Los_Angeles"),
course_section = "LTF.201.1"
)
# Create clean names dataframe
# Use a temporary directory since we're working with sample data
<- tempdir()
temp_dir <- make_clean_names_df(
clean_names_df data_folder = temp_dir,
section_names_lookup_file = "section_names_lookup.csv",
transcripts_metrics_df,
roster_sessions
)#> Warning in load_section_names_lookup(data_folder = data_folder,
#> names_lookup_file = section_names_lookup_file, : File does not exist:
#> /var/folders/gm/wnk5gljx6yd_ffmqb8vf48qh0000gn/T//Rtmpux9gKP/section_names_lookup.csv
#> Warning in load_section_names_lookup(data_folder = data_folder,
#> names_lookup_file = section_names_lookup_file, : Creating empty lookup table.
# Create summary dataframes
<- make_transcripts_session_summary_df(clean_names_df)
transcripts_session_summary_df <- make_transcripts_summary_df(transcripts_session_summary_df)
transcripts_summary_df
# View the summary data
head(transcripts_summary_df)
#> # A tibble: 5 × 10
#> section preferred_name session_ct n duration wordcount wpm perc_n
#> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 David 1 1 89.4 2200 24.6 20
#> 2 1 Bob 1 1 67.8 1800 26.5 20
#> 3 1 Alice 1 1 45.2 1200 26.5 20
#> 4 1 Carol 1 1 23.1 650 28.1 20
#> 5 1 Eva 1 1 12.5 400 32 20
#> # ℹ 2 more variables: perc_duration <dbl>, perc_wordcount <dbl>
The plot_users_by_metric()
function creates
visualizations for different engagement metrics:
# Plot session count
plot_users_by_metric(transcripts_summary_df, metric = "session_ct")
# Plot comment count
plot_users_by_metric(transcripts_summary_df, metric = "n")
# Plot speaking duration
plot_users_by_metric(transcripts_summary_df, metric = "duration")
# Plot word count
plot_users_by_metric(transcripts_summary_df, metric = "wordcount")
The package provides several engagement metrics:
Visualize relative participation:
# Plot percentage of comments
plot_users_by_metric(transcripts_summary_df, metric = "perc_n")
# Plot percentage of speaking time
plot_users_by_metric(transcripts_summary_df, metric = "perc_duration")
# Plot percentage of words
plot_users_by_metric(transcripts_summary_df, metric = "perc_wordcount")
Focus analysis on enrolled students only:
# Create students-only summary
<- make_students_only_transcripts_summary_df(
students_only_summary
transcripts_session_summary_df
)
# Plot students-only metrics
plot_users_by_metric(students_only_summary, metric = "session_ct")
Use masked names for privacy-conscious analysis:
# Plot with masked names
plot_users_masked_section_by_metric(
df = students_only_summary,
metric = "n"
)
plot_users_masked_section_by_metric(
df = students_only_summary,
metric = "duration"
)
Filter data for specific analysis:
# Filter for specific sections
<- transcripts_summary_df %>%
section_data filter(section == 1) # Use the actual section number from our sample data
# Plot filtered data
plot_users_by_metric(section_data, metric = "wpm")
Compare different metrics:
# Create comparison plots
par(mfrow = c(2, 2))
plot_users_by_metric(transcripts_summary_df, metric = "n")
plot_users_by_metric(transcripts_summary_df, metric = "duration")
plot_users_by_metric(transcripts_summary_df, metric = "wordcount")
plot_users_by_metric(transcripts_summary_df, metric = "wpm")
Look for patterns in participation:
# Analyze participation distribution
<- transcripts_summary_df %>%
participation_summary group_by(section) %>%
summarise(
total_students = n(),
active_students = sum(n > 0),
avg_comments = mean(n),
median_comments = median(n),
participation_rate = active_students / total_students
)
participation_summary#> # A tibble: 1 × 6
#> section total_students active_students avg_comments median_comments
#> <chr> <int> <int> <dbl> <int>
#> 1 1 5 5 1 1
#> # ℹ 1 more variable: participation_rate <dbl>
Identify different types of engagement:
# Categorize students by engagement type
<- transcripts_summary_df %>%
engagement_categories mutate(
engagement_type = case_when(
== 0 ~ "No participation",
n <= 2 ~ "Low participation",
n <= 5 ~ "Moderate participation",
n TRUE ~ "High participation"
)%>%
) count(engagement_type)
engagement_categories#> # A tibble: 1 × 2
#> engagement_type n
#> <chr> <int>
#> 1 Low participation 5
Use the data to create custom visualizations:
# Custom participation distribution
ggplot(transcripts_summary_df, aes(x = n)) +
geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
labs(
title = "Distribution of Comment Counts",
x = "Number of Comments",
y = "Number of Students"
+
) theme_minimal()
# Custom duration vs word count
ggplot(transcripts_summary_df, aes(x = duration, y = wordcount)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Speaking Duration vs Word Count",
x = "Duration (seconds)",
y = "Word Count"
+
) theme_minimal()
#> `geom_smooth()` using formula = 'y ~ x'
session_ct
n
or
perc_n
duration
or
perc_duration
wordcount
or perc_wordcount
wpm