scrips <- read_csv("lds-scriptures.csv") names <- read_rds(gzcon(url("https://byuistats.github.io/M335/data/BoM_SaviorNames.rds"))) verse <- read_lines("https://byuistats.github.io/M335/data/2nephi2516.txt") Background In 1978, Susan Easton Black calculated the average number of verses per mention of Christ’s name by each book in the Book of Mormon. She found that Christ’s name is mentioned about every 1.7 verses. But what is the average number of words between each reference of Christ outside the context of books, chapters, and verses?
Data Analysis names <- names %>% arrange(desc(nchar)) #------- This prevents splitting inside of larger references names2 <- names$name #-------------------------- From tibble to list names3 <- str_c(names2, collapse = "|") #------- Creates one string w/ all references seperated by or statements BoM <- scrips %>% filter(volume_id == 3) %>% #------------------ Filter for just Book of Mormon select(scripture_text) %>% #------------------ We just want scripture text str_c(collapse = " ") %>% #------------------ Creates one string of whole Book of Mormon str_split(names3) #--------------------------- Splits the string into many based on references #map(function(x) str_count(x, "\\w")) for (split in BoM) { #-------------------------- Lets iterate over all those new strings count <- str_count(split, "\\w+") #--------- Counts the words in each string, assigns to count } count_tbl <- tibble(y = count, #---------------- Turn vector into tibble x = seq_along(y)) %>% #----- Create index variable filter(x !