library(zoomstudentengagement)
library(dplyr)
library(ggplot2)
This vignette shows how to use session mapping to handle complex scenarios where you have multiple courses, sections, or instructors with overlapping Zoom recordings.
Session mapping is useful when:
First, define your course information:
# Create course information for multiple courses
<- create_course_info(
course_info dept = c("CS", "CS", "MATH", "LTF"),
course = c("101", "101", "250", "201"),
section = c(1, 2, 1, 1),
instructor = c("Dr. Smith", "Dr. Smith", "Dr. Johnson", "Dr. Smith"),
session_length_hours = c(1.5, 1.5, 2.0, 1.5),
session_days = c("Mon", "Wed", "Tue", "Thu"),
session_times = c("10:00", "14:00", "09:00", "15:00")
)
# View course information
course_info#> # A tibble: 4 × 11
#> dept course section instructor session_length_hours semester_start
#> <chr> <chr> <chr> <chr> <dbl> <date>
#> 1 CS 101 1 Dr. Smith 1.5 2024-01-01
#> 2 CS 101 2 Dr. Smith 1.5 2024-01-01
#> 3 LTF 201 1 Dr. Smith 1.5 2024-01-01
#> 4 MATH 250 1 Dr. Johnson 2 2024-01-01
#> # ℹ 5 more variables: semester_end <date>, session_days <chr>,
#> # session_times <chr>, course_id <chr>, course_name <chr>
Set up configuration with session mapping enabled:
# Create configuration with session mapping
<- create_analysis_config(
config dept = "CS", # Primary department (can be overridden by mapping)
semester_start_mdy = "Jan 15, 2024",
scheduled_session_length_hours = 1.5,
instructor_name = "Dr. Smith",
data_folder = system.file("extdata", package = "zoomstudentengagement"),
transcripts_folder = "transcripts",
names_to_exclude = c("dead_air"),
use_session_mapping = TRUE,
session_mapping_file = "session_mapping.csv"
)
# View configuration
cat("Session mapping enabled:", config$session_mapping$use_session_mapping, "\n")
#> Session mapping enabled: TRUE
cat("Session mapping file:", config$session_mapping$session_mapping_file, "\n")
#> Session mapping file: session_mapping.csv
Load Zoom recordings without automatic parsing:
# Load raw Zoom recordings (without regex parsing)
<- load_zoom_recorded_sessions_list(
zoom_recordings_raw data_folder = config$paths$data_folder,
transcripts_folder = config$paths$transcripts_folder,
zoom_recorded_sessions_csv_names_pattern = config$patterns$zoom_recordings_csv,
zoom_recorded_sessions_csv_col_names = config$patterns$zoom_recordings_csv_col_names,
dept = NULL, # Don't filter by department initially
semester_start_mdy = config$course$semester_start,
scheduled_session_length_hours = config$course$session_length_hours
)#> [1] "CSV files to process:"
#> [1] "zoomus_recordings__20240124.csv"
#> [1] "After reading CSV:"
#> # A tibble: 4 × 9
#> filepath Topic ID `Start Time` `File Size (MB)` `File Count` `Total Views`
#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 /private… LTF … 996 … Jan 24, 202… 147.0676 11 0
#> 2 /private… LTF … 960 … Jan 18, 202… 1200.6551 24 0
#> 3 /private… LTF … 960 … Jan 11, 202… 2317.0028 36 0
#> 4 /private… Cono… 901 … Jan 03, 202… 2819.0706 9 1
#> # ℹ 2 more variables: `Total Downloads` <dbl>, `Last Accessed` <chr>
#> [1] "After summarise:"
#> Topic ID Start Time
#> 1 Conor Healy's Personal Meeting Room 901 075 7783 Jan 03, 2024 19:16:16
#> 2 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 11, 2024 18:22:24
#> 3 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 18, 2024 18:26:14
#> 4 LTF 23.24 - Thurs 6:30PM (Healy) 996 6354 4011 Jan 24, 2024 12:25:59
#> File Size (MB) File Count Total Views Total Downloads Last Accessed
#> 1 2819.0706 9 1 4 Jan 10, 2024 20:52:26
#> 2 2317.0028 36 0 6 Jan 18, 2024 11:43:46
#> 3 1200.6551 24 0 6 Jan 19, 2024 04:59:50
#> 4 147.0676 11 0 6 Jan 24, 2024 13:39:20
#> Warning in load_zoom_recorded_sessions_list(data_folder =
#> config$paths$data_folder, : Some Topic entries did not match the expected
#> pattern and section could not be extracted.
#> [1] "After topic parsing:"
#> Topic ID Start Time
#> 1 Conor Healy's Personal Meeting Room 901 075 7783 Jan 03, 2024 19:16:16
#> 2 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 11, 2024 18:22:24
#> 3 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 18, 2024 18:26:14
#> 4 LTF 23.24 - Thurs 6:30PM (Healy) 996 6354 4011 Jan 24, 2024 12:25:59
#> File Size (MB) File Count Total Views Total Downloads Last Accessed
#> 1 2819.0706 9 1 4 Jan 10, 2024 20:52:26
#> 2 2317.0028 36 0 6 Jan 18, 2024 11:43:46
#> 3 1200.6551 24 0 6 Jan 19, 2024 04:59:50
#> 4 147.0676 11 0 6 Jan 24, 2024 13:39:20
#> dept course_section course section
#> 1 <NA> <NA> NA NA
#> 2 LTF 23.24 23 24
#> 3 LTF 23.24 23 24
#> 4 LTF 23.24 23 24
#> [1] "Start Time values:"
#> [1] "Jan 03, 2024 19:16:16" "Jan 11, 2024 18:22:24" "Jan 18, 2024 18:26:14"
#> [4] "Jan 24, 2024 12:25:59"
#> [1] "After date parsing:"
#> Topic ID Start Time
#> 1 Conor Healy's Personal Meeting Room 901 075 7783 Jan 03, 2024 19:16:16
#> 2 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 11, 2024 18:22:24
#> 3 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 18, 2024 18:26:14
#> 4 LTF 23.24 - Thurs 6:30PM (Healy) 996 6354 4011 Jan 24, 2024 12:25:59
#> File Size (MB) File Count Total Views Total Downloads Last Accessed
#> 1 2819.0706 9 1 4 Jan 10, 2024 20:52:26
#> 2 2317.0028 36 0 6 Jan 18, 2024 11:43:46
#> 3 1200.6551 24 0 6 Jan 19, 2024 04:59:50
#> 4 147.0676 11 0 6 Jan 24, 2024 13:39:20
#> dept course_section course section match_start_time match_end_time
#> 1 <NA> <NA> NA NA 2024-01-03 19:16:16 2024-01-03 21:16:16
#> 2 LTF 23.24 23 24 2024-01-11 18:22:24 2024-01-11 20:22:24
#> 3 LTF 23.24 23 24 2024-01-18 18:26:14 2024-01-18 20:26:14
#> 4 LTF 23.24 23 24 2024-01-24 12:25:59 2024-01-24 14:25:59
#> [1] "Final result after filtering:"
#> Topic ID Start Time
#> 3 LTF 23.24 - Thurs 6:30PM (Healy) 960 8322 8914 Jan 18, 2024 18:26:14
#> 4 LTF 23.24 - Thurs 6:30PM (Healy) 996 6354 4011 Jan 24, 2024 12:25:59
#> File Size (MB) File Count Total Views Total Downloads Last Accessed
#> 3 1200.6551 24 0 6 Jan 19, 2024 04:59:50
#> 4 147.0676 11 0 6 Jan 24, 2024 13:39:20
#> dept course_section course section match_start_time match_end_time
#> 3 LTF 23.24 23 24 2024-01-18 18:26:14 2024-01-18 20:26:14
#> 4 LTF 23.24 23 24 2024-01-24 12:25:59 2024-01-24 14:25:59
# View raw recordings
zoom_recordings_raw#> # A tibble: 2 × 14
#> Topic ID `Start Time` `File Size (MB)` `File Count` `Total Views`
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 LTF 23.24 - Th… 960 … Jan 18, 202… 1201. 24 0
#> 2 LTF 23.24 - Th… 996 … Jan 24, 202… 147. 11 0
#> # ℹ 8 more variables: `Total Downloads` <dbl>, `Last Accessed` <chr>,
#> # dept <chr>, course_section <chr>, course <int>, section <int>,
#> # match_start_time <dttm>, match_end_time <dttm>
Use patterns to automatically assign recordings to courses:
# Create session mapping with automatic patterns
<- create_session_mapping(
session_mapping zoom_recordings_df = zoom_recordings_raw,
course_info_df = course_info,
output_file = config$session_mapping$session_mapping_file,
auto_assign_patterns = list(
"CS 101" = "CS.*101",
"MATH 250" = "MATH.*250",
"LTF 201" = "LTF.*201"
),interactive = FALSE # Set to TRUE for interactive mode
)#> Warning in create_session_mapping(zoom_recordings_df = zoom_recordings_raw, : 2
#> recordings need manual assignment
# View session mapping
session_mapping#> # A tibble: 2 × 11
#> recording_id topic start_time dept course section course_section
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 960 8322 8914 LTF 23.24 - Thur… Jan 18, 2… <NA> <NA> <NA> NA.NA.NA
#> 2 996 6354 4011 LTF 23.24 - Thur… Jan 24, 2… <NA> <NA> <NA> NA.NA.NA
#> # ℹ 4 more variables: session_date <dttm>, session_time <chr>,
#> # instructor <chr>, notes <chr>
For more control, use interactive mode:
# Interactive session mapping (commented out for vignette)
# session_mapping_interactive <- create_session_mapping(
# zoom_recordings_df = zoom_recordings_raw,
# course_info_df = course_info,
# output_file = "session_mapping_interactive.csv",
# interactive = TRUE
# )
Load the mapped recordings:
# Load mapped recordings
<- load_session_mapping(
zoom_recorded_sessions_df $session_mapping$session_mapping_file,
configzoom_recordings_df = zoom_recordings_raw
)#> Warning in load_session_mapping(config$session_mapping$session_mapping_file, :
#> Found 2 unmapped recordings in session mapping file
#> Unmapped recordings:
#> # A tibble: 2 × 3
#> zoom_recording_id topic notes
#> <chr> <chr> <chr>
#> 1 960 8322 8914 LTF 23.24 - Thurs 6:30PM (Healy) NEEDS MANUAL ASSIGNMENT
#> 2 996 6354 4011 LTF 23.24 - Thurs 6:30PM (Healy) NEEDS MANUAL ASSIGNMENT
# View mapped sessions
zoom_recorded_sessions_df#> # A tibble: 2 × 24
#> Topic ID `Start Time` `File Size (MB)` `File Count` `Total Views`
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 LTF 23.24 - Th… 960 … Jan 18, 202… 1201. 24 0
#> 2 LTF 23.24 - Th… 996 … Jan 24, 202… 147. 11 0
#> # ℹ 18 more variables: `Total Downloads` <dbl>, `Last Accessed` <chr>,
#> # dept.x <chr>, course_section <chr>, course.x <int>, section.x <int>,
#> # match_start_time <dttm>, match_end_time <dttm>, topic <chr>,
#> # start_time <chr>, dept.y <lgl>, course.y <lgl>, section.y <lgl>,
#> # instructor <lgl>, notes <chr>, dept <chr>, course <chr>, section <chr>
# Load transcript files
<- load_transcript_files_list(
transcript_files_df data_folder = config$paths$data_folder,
transcripts_folder = config$paths$transcripts_folder
)
transcript_files_df#> date_extract recording_start start_time_local
#> 1 20240124 2024-01-24 20:29:01 2024-01-24 12:29:01
#> closed_caption_file
#> 1 GMT20240124-202901_Recording.cc.vtt
#> transcript_file
#> 1 GMT20240124-202901_Recording.transcript.vtt
#> chat_file
#> 1 GMT20240124-202901_RecordingnewChat.txt
# Load cancelled classes
<- load_cancelled_classes(
cancelled_classes_df data_folder = config$paths$data_folder,
cancelled_classes_file = config$paths$cancelled_classes_file,
write_blank_cancelled_classes = TRUE
)
cancelled_classes_df#> # A tibble: 0 × 23
#> # ℹ 23 variables: dept <chr>, course_section <chr>, course <chr>,
#> # section <chr>, day <chr>, time <chr>, instructor <chr>, Topic <chr>,
#> # ID <dbl>, Start Time <dbl>, File Size (MB) <dbl>, File Count <dbl>,
#> # Total Views <chr>, Total Downloads <dttm>, Last Accessed <dttm>,
#> # match_start_time <chr>, match_end_time <dttm>, date_extract <dttm>,
#> # recording_start <chr>, start_time_local <chr>, transcript_file <chr>,
#> # chat_file <int>, closed_caption_file <chr>
# Join all transcript data
<- join_transcripts_list(
transcripts_list_df df_zoom_recorded_sessions = zoom_recorded_sessions_df,
df_transcript_files = transcript_files_df,
df_cancelled_classes = cancelled_classes_df
)
transcripts_list_df#> # A tibble: 0 × 5
#> # ℹ 5 variables: section <chr>, match_start_time <dttm>, match_end_time <dttm>,
#> # start_time_local <dttm>, session_num <int>
# Process transcripts
# Extract transcript file names from the joined data
<- transcripts_list_df$transcript_file[!is.na(transcripts_list_df$transcript_file)]
transcript_file_names #> Warning: Unknown or uninitialised column: `transcript_file`.
#> Unknown or uninitialised column: `transcript_file`.
<- summarize_transcript_files(
transcripts_metrics_df transcript_file_names = transcript_file_names,
data_folder = config$paths$data_folder,
transcripts_folder = config$paths$transcripts_folder,
names_to_exclude = config$analysis$names_to_exclude
)
head(transcripts_metrics_df)
#> NULL
Create custom assignment patterns:
# Example of custom patterns
<- list(
custom_patterns "CS 101 Section 1" = "CS.*101.*Section.*1|CS.*101.*Monday",
"CS 101 Section 2" = "CS.*101.*Section.*2|CS.*101.*Wednesday",
"MATH 250" = "MATH.*250|Mathematics.*250",
"LTF 201" = "LTF.*201|Language.*201"
)
# Use custom patterns in session mapping
<- create_session_mapping(
session_mapping_custom zoom_recordings_df = zoom_recordings_raw,
course_info_df = course_info,
output_file = "session_mapping_custom.csv",
auto_assign_patterns = custom_patterns,
interactive = FALSE
)#> Warning in create_session_mapping(zoom_recordings_df = zoom_recordings_raw, : 2
#> recordings need manual assignment
Edit the session mapping file manually for fine-grained control:
# The session mapping CSV file can be edited manually
# Columns: recording_id, course_section, dept, course, section, instructor
# Example:
# recording_id,course_section,dept,course,section,instructor
# GMT20240124-202901_Recording,CS.101.1,CS,101,1,Dr. Smith
# GMT20240125-143000_Recording,CS.101.2,CS,101,2,Dr. Smith
Issue: No recordings matched automatically
# Solution: Check your patterns
= list(
auto_assign_patterns "CS 101" = "CS.*101", # Make sure pattern matches actual recording names
"MATH 250" = "MATH.*250"
)
Issue: Recordings assigned to wrong courses
# Solution: Use more specific patterns or manual assignment
= list(
auto_assign_patterns "CS 101 Section 1" = "CS.*101.*Section.*1",
"CS 101 Section 2" = "CS.*101.*Section.*2"
)
Issue: Missing recordings
# Solution: Check the raw recordings list
print(zoom_recordings_raw$recording_name)
Validate your session mapping:
# Check mapping coverage
<- session_mapping %>%
mapping_summary group_by(course_section) %>%
summarise(
recordings_count = n(),
unique_recordings = n_distinct(recording_id)
)
mapping_summary#> # A tibble: 1 × 3
#> course_section recordings_count unique_recordings
#> <chr> <int> <int>
#> 1 NA.NA.NA 2 2
# Check for unmapped recordings
<- zoom_recordings_raw %>%
unmapped anti_join(session_mapping, by = c("ID" = "recording_id"))
if (nrow(unmapped) > 0) {
cat("Unmapped recordings found:\n")
print(unmapped$Topic)
else {
} cat("All recordings mapped successfully!\n")
}#> All recordings mapped successfully!