Plotting and Analysis with zoomstudentengagement

zoomstudentengagement package

2025-08-03

library(zoomstudentengagement)
library(dplyr)
library(ggplot2)

Plotting and Analysis

This vignette shows how to create visualizations and analyze student engagement patterns using the zoomstudentengagement package.

Preparing Data for Analysis

Loading Sample Data

First, let’s prepare some sample data for analysis:

# Create sample data for demonstration
# In practice, you would load actual transcript and roster data

# Sample transcript metrics
transcripts_metrics_df <- tibble::tibble(
  name = c("Alice Johnson", "Bob Smith", "Carol Davis", "David Wilson", "Eva Brown"),
  n = c(8, 12, 5, 15, 3),
  duration = c(45.2, 67.8, 23.1, 89.4, 12.5),
  wordcount = c(1200, 1800, 650, 2200, 400),
  comments = list("Good point", "Interesting question", "I agree", "Follow-up question", "Brief comment"),
  n_perc = c(18.6, 27.9, 11.6, 34.9, 7.0),
  duration_perc = c(19.2, 28.8, 9.8, 38.0, 5.3),
  wordcount_perc = c(19.2, 28.8, 10.4, 35.2, 6.4),
  wpm = c(26.5, 26.6, 28.1, 24.6, 32.0),
  course_section = "LTF.201.1",
  course = 201,
  section = 1,
  day = "Thursday",
  time = "18:30",
  name_raw = name,
  start_time_local = as.POSIXct("2024-01-24 18:30:00", tz = "America/Los_Angeles"),
  dept = "LTF",
  session_num = 1
)

# Sample roster sessions
roster_sessions <- tibble::tibble(
  student_id = c("12345", "12346", "12347", "12348", "12349"),
  first_last = c("Alice Johnson", "Bob Smith", "Carol Davis", "David Wilson", "Eva Brown"),
  preferred_name = c("Alice", "Bob", "Carol", "David", "Eva"),
  dept = "LTF",
  course = 201,
  section = 1,
  session_num = 1,
  start_time_local = as.POSIXct("2024-01-24 18:30:00", tz = "America/Los_Angeles"),
  course_section = "LTF.201.1"
)

# Create clean names dataframe
# Use a temporary directory since we're working with sample data
temp_dir <- tempdir()
clean_names_df <- make_clean_names_df(
  data_folder = temp_dir,
  section_names_lookup_file = "section_names_lookup.csv",
  transcripts_metrics_df,
  roster_sessions
)
#> Warning in load_section_names_lookup(data_folder = data_folder,
#> names_lookup_file = section_names_lookup_file, : File does not exist:
#> /var/folders/gm/wnk5gljx6yd_ffmqb8vf48qh0000gn/T//Rtmpux9gKP/section_names_lookup.csv
#> Warning in load_section_names_lookup(data_folder = data_folder,
#> names_lookup_file = section_names_lookup_file, : Creating empty lookup table.

# Create summary dataframes
transcripts_session_summary_df <- make_transcripts_session_summary_df(clean_names_df)
transcripts_summary_df <- make_transcripts_summary_df(transcripts_session_summary_df)

# View the summary data
head(transcripts_summary_df)
#> # A tibble: 5 × 10
#>   section preferred_name session_ct     n duration wordcount   wpm perc_n
#>   <chr>   <chr>               <int> <int>    <dbl>     <dbl> <dbl>  <dbl>
#> 1 1       David                   1     1     89.4      2200  24.6     20
#> 2 1       Bob                     1     1     67.8      1800  26.5     20
#> 3 1       Alice                   1     1     45.2      1200  26.5     20
#> 4 1       Carol                   1     1     23.1       650  28.1     20
#> 5 1       Eva                     1     1     12.5       400  32       20
#> # ℹ 2 more variables: perc_duration <dbl>, perc_wordcount <dbl>

Basic Visualization

Plotting by Metric

The plot_users_by_metric() function creates visualizations for different engagement metrics:

# Plot session count
plot_users_by_metric(transcripts_summary_df, metric = "session_ct")

# Plot comment count
plot_users_by_metric(transcripts_summary_df, metric = "n")

# Plot speaking duration
plot_users_by_metric(transcripts_summary_df, metric = "duration")

# Plot word count
plot_users_by_metric(transcripts_summary_df, metric = "wordcount")

Understanding the Metrics

Available Metrics

The package provides several engagement metrics:

  • session_ct: Number of sessions attended
  • n: Number of comments made
  • perc_n: Percentage of total comments
  • duration: Total speaking time (seconds)
  • perc_duration: Percentage of total speaking time
  • wordcount: Total words spoken
  • perc_wordcount: Percentage of total words
  • wpm: Words per minute

Percentage Metrics

Visualize relative participation:

# Plot percentage of comments
plot_users_by_metric(transcripts_summary_df, metric = "perc_n")


# Plot percentage of speaking time
plot_users_by_metric(transcripts_summary_df, metric = "perc_duration")


# Plot percentage of words
plot_users_by_metric(transcripts_summary_df, metric = "perc_wordcount")

Students-Only Analysis

Filtering for Enrolled Students

Focus analysis on enrolled students only:

# Create students-only summary
students_only_summary <- make_students_only_transcripts_summary_df(
  transcripts_session_summary_df
)

# Plot students-only metrics
plot_users_by_metric(students_only_summary, metric = "session_ct")

Masked Names for Privacy

Use masked names for privacy-conscious analysis:

# Plot with masked names
plot_users_masked_section_by_metric(
  df = students_only_summary,
  metric = "n"
)


plot_users_masked_section_by_metric(
  df = students_only_summary,
  metric = "duration"
)

Advanced Analysis

Custom Filtering

Filter data for specific analysis:

# Filter for specific sections
section_data <- transcripts_summary_df %>%
  filter(section == 1) # Use the actual section number from our sample data

# Plot filtered data
plot_users_by_metric(section_data, metric = "wpm")

Comparative Analysis

Compare different metrics:

# Create comparison plots
par(mfrow = c(2, 2))
plot_users_by_metric(transcripts_summary_df, metric = "n")

plot_users_by_metric(transcripts_summary_df, metric = "duration")

plot_users_by_metric(transcripts_summary_df, metric = "wordcount")

plot_users_by_metric(transcripts_summary_df, metric = "wpm")

Interpreting the Results

Participation Equity

Look for patterns in participation:

# Analyze participation distribution
participation_summary <- transcripts_summary_df %>%
  group_by(section) %>%
  summarise(
    total_students = n(),
    active_students = sum(n > 0),
    avg_comments = mean(n),
    median_comments = median(n),
    participation_rate = active_students / total_students
  )

participation_summary
#> # A tibble: 1 × 6
#>   section total_students active_students avg_comments median_comments
#>   <chr>            <int>           <int>        <dbl>           <int>
#> 1 1                    5               5            1               1
#> # ℹ 1 more variable: participation_rate <dbl>

Engagement Patterns

Identify different types of engagement:

# Categorize students by engagement type
engagement_categories <- transcripts_summary_df %>%
  mutate(
    engagement_type = case_when(
      n == 0 ~ "No participation",
      n <= 2 ~ "Low participation",
      n <= 5 ~ "Moderate participation",
      TRUE ~ "High participation"
    )
  ) %>%
  count(engagement_type)

engagement_categories
#> # A tibble: 1 × 2
#>   engagement_type       n
#>   <chr>             <int>
#> 1 Low participation     5

Custom Visualizations

Creating Your Own Plots

Use the data to create custom visualizations:

# Custom participation distribution
ggplot(transcripts_summary_df, aes(x = n)) +
  geom_histogram(binwidth = 1, fill = "steelblue", alpha = 0.7) +
  labs(
    title = "Distribution of Comment Counts",
    x = "Number of Comments",
    y = "Number of Students"
  ) +
  theme_minimal()


# Custom duration vs word count
ggplot(transcripts_summary_df, aes(x = duration, y = wordcount)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Speaking Duration vs Word Count",
    x = "Duration (seconds)",
    y = "Word Count"
  ) +
  theme_minimal()
#> `geom_smooth()` using formula = 'y ~ x'

Best Practices

Choosing the Right Metric

  • For attendance: Use session_ct
  • For participation frequency: Use n or perc_n
  • For speaking time: Use duration or perc_duration
  • For content contribution: Use wordcount or perc_wordcount
  • For speaking pace: Use wpm

Privacy Considerations

  • Use masked names when sharing results
  • Consider aggregating data for small groups
  • Be mindful of FERPA compliance requirements

Visualization Tips

  • Use consistent color schemes
  • Include clear titles and labels
  • Consider log scales for skewed distributions
  • Add context (e.g., class averages, benchmarks)

Next Steps