Lower Risk Guidelines for Online Wagering (Analysis Document)

Author

Rob Heirene

Published

June 3, 2025

Study pre-registration: https://osf.io/bx43k

Load Required Packages

Load in packages using groundhog to ensure consistency of the versions used here:

Show code
# Install and load the groundhog package to ensure consistency of the package versions used here:
# install.packages("groundhog") # Install

library(groundhog) # Load

# List desired packages:
packages <- c('tidyverse', # Clean, organise, and visualise data
              'haven',# read spsss files
               "readxl", #  load Excel files
               "survey", # weight study data
              "weights", # Weighted statistics 
              "wCorr", # Weighted correlations (https://cran.r-project.org/web/packages/wCorr/wCorr.pdf)
              'kableExtra', # Make tables
              'formattable', #  Add visualisations to tables
              'gt', # Alternative table options
              'gtExtras', # Add colours to gt tables
              'gtsummary', # Create summary tables
              'scales', # Allows for the removal of scientific notation in axis labels
              'plotly', # Add interactive elements to figures
              'htmlwidgets', # Make plotly plots HTML format
              'ggrain', # Make rain cloud plots
              'waffle', # make waffle plots for proportions
              'networkD3', # Make Sankey plots to show relationships
              'patchwork', # Join plots in multipanel layouts
              'pwr', # Check statistical power
              'car', # Perform ANCOVA stats tests
              'rstatix', # Perform ANCOVA stats tests
              'ggpubr', # Plots for linearity checks 
              'broom', # Print summaries of statistical test outputs
              'psych', # get detailed summary figures to Supplement statistical tests
              'sysfonts', # Special fonts for figures
              'showtext', # Special fonts for figures
              'ggstatsplot', # Plots with statistical outputs
              'janitor', # Make column names consistent format
              'caret', # Compute model performance indices
              'sessioninfo', # Detailed session info for reproducibility
              "osfr",
              "readxl",
              "googlesheets4",# Access data from Google sheets
              "Gmisc", # Produce prisma flow diagram
              'grid', # Produce prisma flow diagram
              "glue", # Produce prisma flow diagram
              "httpuv", # supports access to Google sheets
              "irr", # Compute interrater reliability stats
              "apa", # print test results in apa format
              "apaTables", # print test results in apa format
              "ggh4x", # truncate graph axis lines
              "remotes",
              "misty", # read sav files
              "truncnorm", # Generate normally distributed data with limits
              "ggplot2",
              "readr", # Load data
              'fmsb', # Compute NagelkerkeR2
              "performance", # Check model assumptions
              "see",# supports above package
              "robustbase", # Robust linear models
              "pscl", # Zero inflation model
              "cutpointr", # AUC/ROC
              "bestglm" # subset of limits modelling
              
)
# Load desired package with versions specific to project start date:
groundhog.library(packages, "2025-02-28")

Setup Presentation & Graph Specifications

Set up a standard theme for plots/data visualisations:

Show code
# Load new font for figures/graphs
font_add_google("Poppins")
font_add_google("Reem Kufi", "Reem Kufi")
font_add_google("Share Tech Mono", "techmono")
windowsFonts(`Segoe UI` = windowsFont('Segoe UI'))
showtext_auto()
showtext_auto(enable = TRUE)

# Save new theme for figures/graphs.This will determine the layout, presentation, font type and font size used in all data visualisations presented here:
plot_theme<- theme_classic() +
  theme(
    text=element_text(family="Poppins"),
    plot.title = element_text(hjust = 0.5, size = 16),
          plot.subtitle = element_text(hjust = 0.5, size = 13),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
    plot.caption = element_text(size = 12),
    legend.title=element_text(size=12), 
    legend.text=element_text(size=10)
        ) 

Load Data

Load in master data set containing all customer details (code hidden):

Process the data for analysis by removing individuals who didn`t pass the attention checks and those who did not complete the survey:

Show code
master_dataset <-  master_dataset_initial %>%
  as_tibble() %>%
    filter(SURVEY_STATUS_PGSI == "Complete") %>%
filter(PAS_6M_BET_FREQ_DAYS != 0) %>% # Remove anybody who didn't bet in the window of interest
     filter(
            (ATTENTION_CHECK_1 == "PASSED" | is.na(ATTENTION_CHECK_1)) &
            (ATTENTION_CHECK_2 == "PASSED" | is.na(ATTENTION_CHECK_2))
             ) %>%
  mutate(GHM_OVER_PRIORITISE = as.numeric(GHM_OVER_PRIORITISE),
    GHM_STRAIN = as.numeric(GHM_STRAIN),
    GHM_SEVERE = as.numeric(GHM_SEVERE)
  ) %>%
  # Enforce the order of factors so that they present properly in tables and graphs:
  mutate(PGSI_CATEGORY = factor(PGSI_CATEGORY, levels = c('No risk',
                                                              'Low risk',
                                                              'Moderate risk',
                                                              'High risk')))
# colnames(master_dataset)

Analyses - Survey

Recruitment

Provide some key figures for the participant section in the methods.

How many people were invited to the survey?

Show code
 master_dataset_initial %>%
  nrow()
[1] 24829

How many people completed up to the PGSI?

Show code
 master_dataset_initial %>%
   filter(SURVEY_STATUS_PGSI == "Complete") %>%
  nrow()
[1] 1959

How many people were removed for not passing the attention check?

Show code
 master_dataset_initial %>%
  filter(SURVEY_STATUS_PGSI == "Complete") %>%
  filter(ATTENTION_CHECK_1 == "NOT PASSED" | ATTENTION_CHECK_2 == "NOT PASSED")  %>%
  nrow()
[1] 309

And how many remaining people didn’t place a bet in the 6 month before survey?

Show code
master_dataset_initial %>%
  as_tibble() %>%
  filter(SURVEY_STATUS_PGSI == "Complete") %>%
  filter(ATTENTION_CHECK_1 == "PASSED" | ATTENTION_CHECK_2 == "PASSED")  %>%
filter(PAS_6M_BET_FREQ_DAYS == 0) %>% 
  nrow()
[1] 3

What’s left is how many participants in our sample used for analyses:

Show code
 master_dataset_initial %>%
    as_tibble() %>%
  filter(SURVEY_STATUS_PGSI == "Complete") %>%
     filter(
            (ATTENTION_CHECK_1 == "PASSED" | is.na(ATTENTION_CHECK_1)) &
            (ATTENTION_CHECK_2 == "PASSED" | is.na(ATTENTION_CHECK_2))
             ) %>%
  filter(PAS_6M_BET_FREQ_DAYS != 0) %>% 
  nrow()
[1] 1647

Analysis

The remainder of this document follows our pre-registered analysis plan (Study 1; ADD LINK ROB) until the “Exploratory Analyses” section.

Phase 1: Labelling & Counting

Labelling & recoding

Recode Behavioural Risk Indicators (Predictor/independent variables)

All behavioural indicator variables have been previously created in a separate script. For the interest of transparency, we will summarise all of these variables here:

Show code
master_dataset %>%
  select(PAS_6M_MONTHLY_BET_FREQ_DAYS, 
         PAS_6M_MONTHLY_SPEND,
         PAS_6M_N_ACTIVITIES,
         PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
         PAS_6M_MONTHLY_DEPOSIT_FREQ,
         PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
         PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
         GAM_ACCOUNTS # Including this account, how many online gambling accounts have you used in the past 12 months?
         )  %>%
 gt_plt_summary()
.
1647 rows x 8 cols
Column Plot Overview Missing Mean Median SD
PAS_6M_MONTHLY_BET_FREQ_DAYS 0.0% 9.1 6.6 8.2
PAS_6M_MONTHLY_SPEND 0.0% 8,040.2 540.8 24,719.4
PAS_6M_N_ACTIVITIES 0.0% 5.1 4.0 3.8
PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M 35.9% 67.2 4.7 202.9
PAS_6M_MONTHLY_DEPOSIT_FREQ 0.0% 26.8 4.0 60.6
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 0.0% 2,931.7 184.2 7,413.6
PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M 35.9% 25.2 1.6 70.7
GAM_ACCOUNTS 2, 3, 1, 5+ and 4
0.0%

We do, however, need to recode the ‘number of accounts’ variable to be consistent with our preregistration by converting it to a numeric format and replacing the ‘5 or more’ category with the value 5. We also need to limit people’s percentage of income deposited values to 100% to be consistent with our pre-registration and previous literature. Note that we will not look to restrict the percentage of income spent on gambling as it is theoretically possible for people to spend more than their total income if they are turning over winnings. By contrast, it would be unlikely for somebody to be able to deposit more than 100% of their income, unless they were reinvesting substantial winnings already withdrawn.

Let’s check how many people deposited more than 100% of their stated income:

Show code
master_dataset  %>%
filter(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M > 100) %>%
  nrow()
[1] 74

74 people. Okay, let’s restrict these values to 100% when we recode the demographic control variables below.

Recode Other gambling control variables

Show code
master_dataset <- master_dataset %>%
  # Create Variable with dichotomous response options for engaging with gambling:
      mutate(Engages_in_other_forms = ifelse(GAM_ACTIVITIES != "None of the above" & !is.na(GAM_ACTIVITIES), TRUE, FALSE)) %>% 
# Convert number of gambling accounts to numeric:
  mutate(GAM_ACCOUNTS_NUM = case_when(GAM_ACCOUNTS == "1" ~ 1,
                                      GAM_ACCOUNTS == "2" ~ 2,
                                      GAM_ACCOUNTS == "3" ~ 3,
                                      GAM_ACCOUNTS == "4" ~ 4,
                                      GAM_ACCOUNTS == "5+" ~ 5
                                      )) 

  # count(GAM_ACCOUNTS, GAM_ACCOUNTS_NUM) # Check the coding

Recode Demographic Control Variables

Some work will be needed to code the demographic control variables listed in the preregistration:

  • Age: A continuous variable provided by the sites. [No recoding required]

  • Gender: A categorical variable with 3 groups: male, female, and unknown. The male value will be set as the reference value. [No recoding required]

  • Employment status: Participants in the survey are asked to state their current employment status. Consistent with the approach used by Brosowki et al. (2015), we will simply recode all categories so that each person is assigned one of two labels: “Employed” (which will include the options for retirement and a full-time student) or “Unemployed” (which will also include being principally engaged in domestic duties), with the former being the reference value. [Recoded in code chunk below]

  • Education level: Participants in the survey were asked to state their highest level of educational achievement, selecting from 8 possible options of increasing obtainment. We will convert these categories into numerical values, with lower values indicating less education and vice versa. [Recoded in code chunk below]

Show code
# Education level:
master_dataset$EDUCATION <- factor(master_dataset$EDUCATION, levels = rev(c(
  "Doctoral degree",
  "Master's degree",
  "Graduate diploma or graduate level certificate",
  "Bachelor's degree",
  "Advanced diploma/diploma",
  "Certificate III/IV",
  "Year 12",
  "Year 11 or below (includes Certificate I/II/n)"
)))

# levels(master_dataset$EDUCATION) # Check

# Define the numeric levels:
education_levels <- c(
  "Year 11 or below (includes Certificate I/II/n)" = 1,
  "Year 12" = 2,
  "Certificate III/IV" = 3,
  "Advanced diploma/diploma" = 4,
  "Bachelor's degree" = 5,
  "Graduate diploma or graduate level certificate" = 6,
  "Master's degree" = 7,
  "Doctoral degree" = 8
)

# Convert to numeric:
master_dataset$EDUCATION_NUM <- as.numeric(factor(master_dataset$EDUCATION, levels = names(education_levels), labels = education_levels))


# Add missing values:

# First check median value of each PgSI group
# master_dataset  %>%
#   group_by(PGSI_CATEGORY) %>%
#   summarise(median(EDUCATION_NUM, na.rm = TRUE))

# Okay, they're all the same, which is probably a good thing.

# Now we come to impute group-specific median value for missing responses:
master_dataset_w_demographics <- master_dataset  %>%
group_by(PGSI_CATEGORY) %>%
  mutate(
    EDUCATION_NUM = if_else(
      is.na(EDUCATION_NUM),
      median(EDUCATION_NUM, na.rm = TRUE),
      EDUCATION_NUM
    )
  ) %>%
  ungroup()

# master_dataset_w_demographics%>%
#  count(EDUCATION_NUM)




# Employment status:

# Check levels:
# master_dataset %>%
# count(EMPLOYMENT)

master_dataset_w_demographics <- master_dataset_w_demographics %>%
  mutate(
    EMPLOYMENT_RECODED = case_when(
      EMPLOYMENT %in% c("Employed full time", 
                               "Employed full time,Employed part time or casual", 
                               "Employed full time,Full-time student", 
                               "Employed full time,Retired", 
                               "Employed part time or casual", 
                               "Employed part time or casual,Full-time student", 
                               "Employed part time or casual,Retired",
                               "Employed part time or casual,Principally engaged in domestic duties", 
                               "Full-time student", 
                               "Retired",
                               "Not currently employed,Retired") ~ "Employed",
      # Categories for "Unemployed"
      EMPLOYMENT %in% c("Employed part time or casual,Not currently employed", 
                               "Not currently employed", 
                               "Not currently employed,Principally engaged in domestic duties", 
                               "Principally engaged in domestic duties") ~ "Unemployed",
      is.na(EMPLOYMENT) ~ "Unknown",
      # Default case to handle any unforeseen values (will work with any new data then!):
      TRUE ~ "Other"
    )
  ) %>%
    # Per our preregistration, cap percentage of income deposited to 100%
    mutate(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M = case_when(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M >100 ~ 100,
                                                             TRUE ~ PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M))

# master_dataset_w_demographics %>%
#  count(EMPLOYMENT_RECODED, PGSI_CATEGORY)

Recode Outcome Variables

Below is the description of the two outcome measures we will be using in this study taken from our preregistration.

  • PGSI harm index: Consistent with previous investigations of low-risk guidelines (e.g., Dowling et al., 2021), we will focus on the seven items relating to gambling harms within the PGSI (i.e., feeling guilty, losing more than one can afford, recognition of the problem, health problems, financial problems, criticism from others, and borrowing money). For each of these items, the participant will be scored as 0, representing a response of “never” on the PGSI, or 1, representing a score of “sometimes”, “most of the time”, or “almost always”. A score of 2 or more on these 7 items will be used to classify a participant as harmed.

For the purposes of accurate recoding, let’s list out the items in the PGSI and code them as to whether they are relevant or not:

  1. Have you bet more than you could really afford to lose? [losing more than one can afford]

  2. Have you needed to gamble with larger amounts to get the same feeling of excitement? [not scored]

  3. When you gambled, did you go back another day to try to win back the money you lost? [not scored]

  4. Have you borrowed money or sold anything to get money to gamble? [borrowing money]

  5. Have you felt you might have a problem with gambling? [recognition of the problem]

  6. Has gambling caused you any health problems, including stress or anxiety? [physical health problems]

  7. Have people criticised your betting or told you that you had a gambling problem, regardless of whether or not you thought it was true? [criticism from others]

  8. Has your gambling caused any financial problems for you or your household? [financial problems]

  9. Have you felt guilty about the way you gamble or what happens when you gamble? [feeling guilty]

  • GHM harm index: For this index, we will use the Gambling Harm Measure (O’Neil et al., 2021), a 16-item scale measuring the impact of gambling across six life domains (financial, psychological, relationship, physical health, work/study, and legal). Dichotomous responses (yes/no) are provided to all questions which are asked in relation to the last 12 months (e.g., In the last 12 MONTHS, have you experienced any serious relationship consequences because of your gambling? For example, have you lost friends or family; or experienced separation or divorce; or engaged in physically violent arguments?). Questions are asked in a graded manner with three questions for each domain, the first asking about prioritising gambling over an activity/responsibility, the second asking about strains or pressures caused by gambling in that domain, and the third asking about serious consequences of gambling in that domain (the third level of questioning is only presented to participants if they say they have experienced harm at the strain/pressure level [2nd question] in that domain). For example, see below the three questions for financial harm:

    • In the last 12 MONTHS, has gambling led you to prioritise or put gambling ahead of other important financial expenditures? For example, has your gambling reduced money available for household or other important expenses?

    • In the last 12 MONTHS, have you experienced any financial pressures due to your gambling? For example, have you been building up debt; or found it hard to pay bills; or had to borrow money; or taken on extra work to finance gambling?

    • [Asked if participant responded “yes” to above question] In the last 12 MONTHS, have you experienced any serious financial consequences because of your gambling? For example, have you had to sell important assets; or been unable to pay rent or meet essential daily expenses; or had utilities disconnected; or lost your home; or filed for bankruptcy?

      For this index, any participant who responds “yes” to a question at the second level of any domain (i.e., strain/pressure level of harm) will be classed as harmed. Note that the legal domain only has one question (”In the last 12 MONTHS, have you done anything illegal due to your gambling? For example, have you stolen money or valuables, or committed fraud or embezzlement, etc.?“). A positive response to this question will also result in a participant being classed as harmed.

    Using the above descriptions for guidance, let’s now recode the outcome variables:

Show code
master_dataset_recoded <- master_dataset_w_demographics %>%
  rowwise() %>% 
mutate(across(c(PGSI_Q1, PGSI_Q2, PGSI_Q3, PGSI_Q4, PGSI_Q5, PGSI_Q6, PGSI_Q7, PGSI_Q8, PGSI_Q9), 
                ~case_when(
                  . == "Never" ~ "0",
                  . == "Sometimes" ~ "1",
                  . == "Most of the Time" ~ "1",
                  . == "Always" ~ "1"))) %>%
  mutate(across(c(PGSI_Q1, # Convert PGSI questions from character to numeric
                  PGSI_Q2,
                  PGSI_Q3,
                  PGSI_Q4,
                  PGSI_Q5,
                  PGSI_Q6,
                  PGSI_Q7,
                  PGSI_Q8,
                  PGSI_Q9), as.numeric)) %>%
  mutate(PGSI_COUNT_STATUS = sum(PGSI_Q1, #
                                 PGSI_Q4, #
                                 PGSI_Q5, #
                                 PGSI_Q6, #
                                 PGSI_Q7, #
                                 PGSI_Q8, #
                                 PGSI_Q9, #
                              na.rm = TRUE
)) %>%
  mutate(PGSI_STATUS = case_when(PGSI_COUNT_STATUS <2 ~ "No harm",
                                PGSI_COUNT_STATUS >=2 ~ "Harm",
                                 is.na(PGSI_COUNT_STATUS) ~ NA
         )) %>%
  # Do some checks to see if coding worked correctly:
  # count(PGSI_COUNT_STATUS, PGSI_STATUS)
  # count(PGSI_STATUS)

  # GHM outcome measure:
  mutate(GHM_L1 = if_else(GHM_L1 == 3,
                          1,
                          GHM_L1)) %>%
mutate(
  GHM_COUNT_STATUS = if_else(
    !is.na(GHM_F2) & !is.na(GHM_P2) & !is.na(GHM_R2) & !is.na(GHM_PH2) & !is.na(GHM_WS2) & !is.na(GHM_L1), # Only compute this value if all scores are present  (i.e., someone completed the measure)
    sum(
      GHM_F2, # Financial 
      GHM_P2, # Psychological health
      GHM_R2, # Relationships
      GHM_PH2, # Physical health
      GHM_WS2, # Work or study
      GHM_L1, # Legal
      na.rm = TRUE
    ),
    NA # Set to NA if any values are missing
  )
) %>%
  mutate(GHM_STATUS = case_when(GHM_COUNT_STATUS == 0 ~ "No harm",
                                GHM_COUNT_STATUS >0 ~ "Harm",
                                 is.na(GHM_COUNT_STATUS) ~ NA
         )) %>%
  ungroup() # Remove rowise grouping!

  
  # Do some checks to see if coding worked correctly:
  # count(GHM_COUNT_STATUS, GHM_STATUS)
#   select(GHM_F2,
# GHM_P2,
# GHM_R2,
# GHM_PH2,
# GHM_WS2,
# GHM_L1,
#   GHM_COUNT_STATUS,
# GHM_STATUS) %>%
# print(n=1000)

# master_dataset_recoded %>%
#   count(GHM_STATUS)

Weighting

Many of the results presented from now are weighted outcomes. Let’s create the data frame required for weighting:

Show code
# Create survey design object:
survey_design <- survey::svydesign(
  id = ~1,
  data = master_dataset_recoded,
  weights = ~ELIGIBLE_SURVEY_SAMPLE_WEIGHTS
)

Sample summary

Produce weighted summary variables that describe the entire survey sample.

Show code
# Gender percentages
q_table_GENDER <- svytable(~GENDER, survey_design)
prop_table_GENDER <- prop.table(q_table_GENDER)
GENDER_Percentages <- prop_table_GENDER %>% 
  as.data.frame() %>%
  mutate('Percent' = round(Freq*100, 2)) %>%
  select(-Freq) %>%
  mutate(Characteristic = paste("Gender:", GENDER),
         Value = paste0(Percent, "%")) %>%
  select(Characteristic, Value)

# Age mean and SD
Age_mean <- svymean(~AGE, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  filter(rowname == "mean")
Age_sd <- svyvar(~AGE, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
   rownames_to_column() %>%
  mutate(SD = sqrt(variance)) %>%
  select(SD)

Age_summary <- bind_cols(Age_mean, Age_sd) %>%
  mutate(Characteristic = "Age",
         Value = paste0(round(AGE, 2), " (", round(SD, 2), ")")) %>%
  select(Characteristic, Value)

# PGSI mean and SD
PGSI_TOTAL_mean <- svymean(~PGSI_TOTAL, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  t() %>%
  as.data.frame() %>%
  
  rownames_to_column() %>%
  filter(rowname == "mean") 
PGSI_TOTAL_sd <- svyvar(~PGSI_TOTAL, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  mutate(SD = sqrt(variance)) %>%
  select(SD)

PGSI_summary <- bind_cols(PGSI_TOTAL_mean, PGSI_TOTAL_sd) %>%
  mutate(Characteristic = "PGSI Total Score",
         Value = paste0(round(PGSI_TOTAL, 2), " (", round(SD, 2), ")")) %>%
  select(Characteristic, Value)

# Education percentages
q_table_EDUCATION <- svytable(~EDUCATION, survey_design)
prop_table_EDUCATION <- prop.table(q_table_EDUCATION)
EDUCATION_Percentages <- prop_table_EDUCATION %>%
  as.data.frame()

# Check column names to see the correct variable name
# colnames(EDUCATION_Percentages)

EDUCATION_Percentages <- EDUCATION_Percentages %>%
  mutate('Percent' = round(Freq*100, 2)) %>%
  select(-Freq) %>%
  mutate(Characteristic = paste("Education:", EDUCATION),
         Value = paste0(Percent, "%")) %>%
  select(Characteristic, Value)

# Betting intensity mean and SD
PAS_30_BET_INTENSITY_mean <- svymean(~PAS_30_BET_INTENSITY, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  filter(rowname == "mean") 
PAS_30_BET_INTENSITY_sd <- svyvar(~PAS_30_BET_INTENSITY, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  
  rownames_to_column() %>%
  mutate(SD = sqrt(variance)) %>%
  select(SD)

Betting_intensity_summary <- bind_cols(PAS_30_BET_INTENSITY_mean, PAS_30_BET_INTENSITY_sd) %>%
  mutate(Characteristic = "Past 30 Day Betting Intensity",
         Value = paste0(round(PAS_30_BET_INTENSITY, 2), " (", round(SD, 2), ")")) %>%
  select(Characteristic, Value)

# Past 30 day net outcome mean and SD
PAS_30_NET_OUTCOME_mean <- svymean(~PAS_30_NET_OUTCOME, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  t() %>%
  as.data.frame() %>%
  
  rownames_to_column() %>%
  filter(rowname == "mean")
PAS_30_NET_OUTCOME_sd <- svyvar(~PAS_30_NET_OUTCOME, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  mutate(SD = sqrt(variance)) %>%
  select(SD)

Net_outcome_summary <- bind_cols(PAS_30_NET_OUTCOME_mean, PAS_30_NET_OUTCOME_sd) %>%
  mutate(Characteristic = "Past 30 Day Net Outcome",
         Value = paste0(round(PAS_30_NET_OUTCOME, 2), " (", round(SD, 2), ")")) %>%
  select(Characteristic, Value)

# Past 30 day deposit frequency mean and SD
PAS_30_TOTAL_DEPOSIT_FREQ_mean <- svymean(~PAS_30_TOTAL_DEPOSIT_FREQ, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  filter(rowname == "mean") 

PAS_30_TOTAL_DEPOSIT_FREQ_sd <- svyvar(~PAS_30_TOTAL_DEPOSIT_FREQ, survey_design, na.rm = TRUE) %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  mutate(SD = sqrt(variance)) %>%
  select(SD)

Deposit_freq_summary <- bind_cols(PAS_30_TOTAL_DEPOSIT_FREQ_mean, PAS_30_TOTAL_DEPOSIT_FREQ_sd) %>%
  mutate(Characteristic = "Past 30 Day Deposit Frequency",
         Value = paste0(round(PAS_30_TOTAL_DEPOSIT_FREQ, 2), " (", round(SD, 2), ")")) %>%
  select(Characteristic, Value)

# Active deposit limit percentages
q_table_ACTIVE_DEPOSIT_LIMIT <- svytable(~ACTIVE_DEPOSIT_LIMIT, survey_design)
prop_table_ACTIVE_DEPOSIT_LIMIT <- prop.table(q_table_ACTIVE_DEPOSIT_LIMIT)
Deposit_limit_Percentages <- prop_table_ACTIVE_DEPOSIT_LIMIT %>%
  as.data.frame() %>%
  mutate('Percent' = round(Freq*100, 2)) %>%
  select(-Freq) %>%
  mutate(Characteristic = paste("Active Deposit Limit:", ACTIVE_DEPOSIT_LIMIT),
         Value = paste0(Percent, "%")) %>%
  select(Characteristic, Value)

# Combine all summaries
combined_summary <- bind_rows(GENDER_Percentages,
                              Age_summary,
                              PGSI_summary,
                              EDUCATION_Percentages,
                              Betting_intensity_summary,
                              Net_outcome_summary,
                              Deposit_freq_summary,
                              Deposit_limit_Percentages)

# Display as a table
combined_summary %>%
  gt()%>%
  tab_options(data_row.padding = px(3)) %>%
  tab_header("Supplemental Table. Sample characteristics")
Supplemental Table. Sample characteristics
Characteristic Value
Gender: Female 13.79%
Gender: Male 84.62%
Gender: Unknown 1.59%
Age 44.06 (15.5)
PGSI Total Score 3.64 (4.12)
Education: Year 11 or below (includes Certificate I/II/n) 16.95%
Education: Year 12 24.07%
Education: Certificate III/IV 19.23%
Education: Advanced diploma/diploma 9.05%
Education: Bachelor's degree 17.85%
Education: Graduate diploma or graduate level certificate 8.47%
Education: Master's degree 4.05%
Education: Doctoral degree 0.33%
Past 30 Day Betting Intensity 7.87 (15.54)
Past 30 Day Net Outcome -181.01 (1226.21)
Past 30 Day Deposit Frequency 9.41 (28.48)
Active Deposit Limit: 0 90.45%
Active Deposit Limit: 1 9.55%

Confirm sample size for above:

Show code
nrow(master_dataset)
[1] 1647

Rates by harm status

Show code
survey_design %>%
tbl_svysummary(include = c(PGSI_STATUS,GHM_STATUS),
                 statistic = list(all_categorical() ~ "{n_unweighted} ({p}%)"),
                                  missing = "no"
) %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Rates of harm for GHM & PGSI",
    subtitle = "Unweighted count  (weighted %)"
  ) 
Rates of harm for GHM & PGSI
Unweighted count (weighted %)
Characteristic Overall1
PGSI_STATUS
    Harm 893 (50%)
    No harm 754 (50%)
GHM_STATUS
    Harm 472 (25%)
    No harm 1,108 (75%)
1 n (unweighted) (%)
Show code
survey_design %>%
tbl_svysummary(include = c(GHM_STATUS),
               by = PGSI_STATUS,
                 statistic = list(all_categorical() ~ "{n_unweighted} ({p}%)"),
               missing = "no"
)  %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  # show_header_names() 
modify_header(stat_1 = "PGSI: Harm",
              stat_2 = "PGSI: No harm") %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Rates of harm for GHM grouped by PGSI status",
    subtitle = "Unweighted count (weighted %)"
  ) 
Rates of harm for GHM grouped by PGSI status
Unweighted count (weighted %)
Characteristic PGSI: Harm1 PGSI: No harm1
GHM_STATUS

    Harm 448 (48%) 24 (2.7%)
    No harm 406 (52%) 702 (97%)
1 n (unweighted) (%)

Correlation: PGSI & GHM

Show code
master_dataset_recoded_corr <- master_dataset_recoded %>%
  filter(!is.na(GHM_TOTAL))

weightedCorr(x = master_dataset_recoded_corr$PGSI_TOTAL, 
             y = master_dataset_recoded_corr$GHM_TOTAL, 
              method = "Spearman",
             weights = master_dataset_recoded_corr$ELIGIBLE_SURVEY_SAMPLE_WEIGHTS)
[1] 0.710879

Phase 2: Identifying Optimal Limits for Behavioural Indicators

Now we are going to see if we can identify optimal limit/cut off/thresholds for each of the behavioural indicators against both the PGSI and the GHM harm index.

We’re going to need to extract a lot of outcomes from this process and summarise them into a table. Hence, we will create some functions to help with this process here:

Show code
# Youden optimisation function: 
# Start by defining key  variables/inputs for the cutpoint function:
summarize_auc_results_youden <- function(data, predictor_var, pgsi_status, GHM_status, pos_class = "Harm", neg_class = "No harm", boot_runs = 2000) {
  
  # Calculate Threshold value & AUC results for PGSI:
  youden_cutpoint_pgsi <- cutpointr(data, {{ predictor_var }}, {{ pgsi_status }},
                                    pos_class = pos_class, 
                                    direction = ">=",
                                    neg_class = neg_class,
                                    method = maximize_boot_metric, metric = youden,
                                    boot_runs = boot_runs, na.rm = TRUE)
  
  # Calculate CI's for this:  
  pgsi_auc_cis <- boot_ci(youden_cutpoint_pgsi, AUC, in_bag = TRUE, alpha = 0.05) %>%
    select(values) %>%
    t() %>%
    as_tibble() %>%
    mutate(across(everything(), ~ round(as.numeric(.), 3))) %>% 
    unite("95%CIs_PGSI", V1, V2, sep = ", ")
  
  # Join these results so far:
  auc_results_pgsi <- youden_cutpoint_pgsi %>%
    select(predictor,
           optimal_cutpoint_PGSI = optimal_cutpoint,
           AUC_PGSI = AUC,
           sensitivity_PGSI = sensitivity,
           specificity_PGSI = specificity,   
           acc_PGSI = acc) %>%
    mutate(Metric_maximised = "Youden")
  
  
  # Calculate Threshold value & AUC results for GHM:
  youden_cutpoint_ghm <- cutpointr(data, {{ predictor_var }}, {{ GHM_status }},
                                   pos_class = pos_class, 
                                   neg_class = neg_class,
                                   direction = ">=",
                                   method = maximize_boot_metric, 
                                   metric = youden,
                                   boot_runs = boot_runs, 
                                   na.rm = TRUE)
  
  # Calculate CI's for this:
  GHM_auc_cis <- boot_ci(youden_cutpoint_ghm, AUC, in_bag = TRUE, alpha = 0.05) %>%
    select(values) %>%
    t() %>%
    as_tibble() %>%
    mutate(across(everything(), ~ round(as.numeric(.),3))) %>% 
    unite("95%CIs_GHM", V1, V2, sep = ", ")
  
  # Join these results so far:
  auc_results_ghm <- youden_cutpoint_ghm %>%
    select(optimal_cutpoint_GHM = optimal_cutpoint,
           AUC_GHM = AUC,
           sensitivity_GHM = sensitivity,
           specificity_GHM = specificity,   
           acc_GHM = acc)
  
  # Combine all results into a single summary table:
  auc_summary <- bind_cols(auc_results_pgsi, pgsi_auc_cis, auc_results_ghm, GHM_auc_cis)
  
  return(auc_summary)
}

# I use the below code to check we get similar results for the first variable of interest (they won't be identical because of bootstrapping) and indeed we do. 
# Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_PGSI<- cutpointr(master_dataset_recoded, PAS_6M_MONTHLY_BET_FREQ_DAYS, PGSI_STATUS, pos_class = "Harm", neg_class = "No harm", method = maximize_boot_metric, metric = youden, boot_runs = 1000)
# 
# Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_PGSI
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_PGSI_AUC_CIS_PGSI<- boot_ci(Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_PGSI, AUC, in_bag = TRUE, alpha = 0.05) # This should compute 95% CIs for AUC values
# 
# # plot(Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_PGSI)
# 
# Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_GHM<- cutpointr(master_dataset_recoded, PAS_6M_MONTHLY_BET_FREQ_DAYS, GHM_STATUS, pos_class = "Harm", neg_class = "No harm", method = maximize_boot_metric, metric = youden, boot_runs = 1000)
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_PGSI_AUC_CIS_GHM<-boot_ci(Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_GHM, AUC, in_bag = TRUE, alpha = 0.05) # This should compute 95% CIs for AUC values
# 
# # plot(Youden_cutpoint_PAS_6M_MONTHLY_BET_FREQ_DAYS_GHM)





# Sensitivity optimisation function: 
# Start by defining key  variables/inputs for the cutpoint function, This time noting that we are maximising sensitivity and constraining specificity to a minimum of 0.5:
summarize_auc_results_max_sensitivity <- function(data, predictor_var, pgsi_status, GHM_status, pos_class = "Harm", neg_class = "No harm", boot_runs = 2000) {
  
  # Calculate Threshold value & AUC results for PGSI:
  youden_cutpoint_pgsi <- cutpointr(data, {{ predictor_var }}, {{ pgsi_status }},
                                    pos_class = pos_class,
                                    neg_class = neg_class,
                                    direction = ">=",
                                    method = maximize_boot_metric, 
                                    metric = sens_constrain,
                                    constrain_metric = specificity, 
                                    min_constrain = 0.5,
                                    boot_runs = boot_runs,
                                    na.rm = TRUE)
  
  # Calculate CI's for this:  
  pgsi_auc_cis <- boot_ci(youden_cutpoint_pgsi, AUC, in_bag = TRUE, alpha = 0.05) %>%
    select(values) %>%
    t() %>%
    as_tibble() %>%
    mutate(across(everything(), ~ round(as.numeric(.),3))) %>% 
    unite("95%CIs_PGSI", V1, V2, sep = ", ")
  
  # Join these results so far:
  auc_results_pgsi <- youden_cutpoint_pgsi %>%
    select(predictor,
           optimal_cutpoint_PGSI = optimal_cutpoint,
           AUC_PGSI = AUC,
           sensitivity_PGSI = sensitivity,
           specificity_PGSI = specificity,   
           acc_PGSI = acc) %>%
    mutate(Metric_maximised = "Sensitivity")
  
  
  # Calculate Threshold value & AUC results for GHM:
  youden_cutpoint_ghm <- cutpointr(data, {{ predictor_var }}, {{ GHM_status }},
                                   pos_class = pos_class,
                                   neg_class = neg_class,
                                    direction = ">=",
                                   method = maximize_boot_metric, 
                                   metric = sens_constrain,
                                    constrain_metric = specificity,
                                   min_constrain = 0.5,
                                    boot_runs = boot_runs, na.rm = TRUE)
  
  # Calculate CI's for this:
  GHM_auc_cis <- boot_ci(youden_cutpoint_ghm, AUC, in_bag = TRUE, alpha = 0.05) %>%
    select(values) %>%
    t() %>%
    as_tibble() %>%
    mutate(across(everything(), ~ round(as.numeric(.),3))) %>% 
    unite("95%CIs_GHM", V1, V2, sep = ", ")
  
  # Join these results so far:
  auc_results_ghm <- youden_cutpoint_ghm %>%
    select(optimal_cutpoint_GHM = optimal_cutpoint,
           AUC_GHM = AUC,
           sensitivity_GHM = sensitivity,
           specificity_GHM = specificity,   
           acc_GHM = acc)
  
  # Combine all results into a single summary table:
  auc_summary <- bind_cols(auc_results_pgsi, pgsi_auc_cis, auc_results_ghm, GHM_auc_cis)
  
  return(auc_summary)
}




# Specificity optimisation function: 
# Start by defining key  variables/inputs for the cutpoint function, This time noting that we are maximising Specificity and constraining Sensitivity to a minimum of 0.5:
summarize_auc_results_max_specificity <- function(data, predictor_var, pgsi_status, GHM_status, pos_class = "Harm", neg_class = "No harm", boot_runs = 2000) {
  
  # Calculate Threshold value & AUC results for PGSI:
  youden_cutpoint_pgsi <- cutpointr(data, {{ predictor_var }}, {{ pgsi_status }},
                                    pos_class = pos_class,
                                    neg_class = neg_class,
                                    direction = ">=",
                                    method = maximize_boot_metric,
                                    metric = spec_constrain,
                                    constrain_metric = sensitivity,
                                    min_constrain = 0.5,
                                    boot_runs = boot_runs, 
                                    na.rm = TRUE)
  
  # Calculate CI's for this:  
  pgsi_auc_cis <- boot_ci(youden_cutpoint_pgsi, AUC, in_bag = TRUE, alpha = 0.05) %>%
    select(values) %>%
    t() %>%
    as_tibble() %>%
    mutate(across(everything(), ~ round(as.numeric(.),3))) %>% 
    unite("95%CIs_PGSI", V1, V2, sep = ", ")
  
  # Join these results so far:
  auc_results_pgsi <- youden_cutpoint_pgsi %>%
    select(predictor,
           method,
           optimal_cutpoint_PGSI = optimal_cutpoint,
           AUC_PGSI = AUC,
           sensitivity_PGSI = sensitivity,
           specificity_PGSI = specificity,   
           acc_PGSI = acc) %>%
    mutate(Metric_maximised = "Specificity")
  
  
  # Calculate Threshold value & AUC results for GHM:
  youden_cutpoint_ghm <- cutpointr(data, {{ predictor_var }}, {{ GHM_status }},
                                   pos_class = pos_class, 
                                   neg_class = neg_class,
                                    direction = ">=",
                                   method = maximize_boot_metric,
                                   metric = spec_constrain,
                                    constrain_metric = sensitivity,
                                    min_constrain = 0.5,
                                    boot_runs = boot_runs, na.rm = TRUE)
  
  
  # Calculate CI's for this:
  GHM_auc_cis <- boot_ci(youden_cutpoint_ghm, AUC, in_bag = TRUE, alpha = 0.05) %>%
    select(values) %>%
    t() %>%
    as_tibble() %>%
    mutate(across(everything(), ~ round(as.numeric(.),3))) %>% 
    unite("95%CIs_GHM", V1, V2, sep = ", ")
  
  # Join these results so far:
  auc_results_ghm <- youden_cutpoint_ghm %>%
    select(optimal_cutpoint_GHM = optimal_cutpoint,
           AUC_GHM = AUC,
           sensitivity_GHM = sensitivity,
           specificity_GHM = specificity,   
           acc_GHM = acc)
  
  # Combine all results into a single summary table:
  auc_summary <- bind_cols(auc_results_pgsi, pgsi_auc_cis, auc_results_ghm, GHM_auc_cis)
  
  return(auc_summary)
}

Because of the extensive bootstrapping required for the three approaches across eight different behavioural indicators, the code below takes quite a bit of time to run. Hence, it has been commented out to save time each time the script runs or document knits. The outcomes from the bootstrapping were saved to a data file which is loaded in at the end of this process.

Betting Frequency

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_BET_FREQ_DAYS,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_BET_FREQ_DAYS,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_BET_FREQ_DAYS,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Specificity

Amount Spent

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_SPEND_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_SPEND,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_SPEND_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_SPEND_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_SPEND,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_SPEND_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_SPEND_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_SPEND,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_SPEND_AUC_Specificity

Percentage of Income Spent

Show code
# set.seed(12345)
# 
# # Youden Index:
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Specificity

Number of Different Sports/races

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_N_ACTIVITIES_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_N_ACTIVITIES,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_N_ACTIVITIES_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_N_ACTIVITIES_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_N_ACTIVITIES,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_N_ACTIVITIES_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_N_ACTIVITIES_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_N_ACTIVITIES,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_N_ACTIVITIES_AUC_Specificity

Deposit Frequency

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_FREQ,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_FREQ,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_FREQ,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Specificity

Deposit Amount

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Specificity

Percentage of Income Deposited

Show code
# set.seed(12345)
# 
# # Youden Index:
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Specificity

Number of Wagering Accounts

Show code
# set.seed(12345)
# 
# # Youden Index:
# GAM_ACCOUNTS_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded,
#   predictor_var = GAM_ACCOUNTS_NUM,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# GAM_ACCOUNTS_AUC_Youden
# 
# 
# # Maximise sensitivity:
# GAM_ACCOUNTS_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded,
#   predictor_var = GAM_ACCOUNTS_NUM,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# GAM_ACCOUNTS_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# GAM_ACCOUNTS_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded,
#   predictor_var = GAM_ACCOUNTS_NUM,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# GAM_ACCOUNTS_AUC_Specificity

Table All Limit Cut-offs

Show code
# First join and then save all outcomes as a dataset for easy retrieval later on:
# AUC_limit_outcome_data <- bind_rows(
#           PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Youden,
#           PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Sensitivity,
#           PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Specificity,
#           PAS_6M_MONTHLY_SPEND_AUC_Youden,
#           PAS_6M_MONTHLY_SPEND_AUC_Sensitivity,
#           PAS_6M_MONTHLY_SPEND_AUC_Specificity,
#           PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Youden,
#           PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Sensitivity,
#           PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Specificity,
#           PAS_6M_N_ACTIVITIES_AUC_Youden,
#           PAS_6M_N_ACTIVITIES_AUC_Sensitivity,
#           PAS_6M_N_ACTIVITIES_AUC_Specificity,
#           PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Youden,
#           PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Sensitivity,
#           PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Specificity,
#           PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Youden,
#           PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Sensitivity,
#           PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Specificity,
#           PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Youden,
#           PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Sensitivity,
#           PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Specificity,
#           GAM_ACCOUNTS_AUC_Youden,
#           GAM_ACCOUNTS_AUC_Sensitivity,
#           GAM_ACCOUNTS_AUC_Specificity
# ) %>%
#   mutate_if(is.numeric, round, 3)
# 
# write.csv(AUC_limit_outcome_data,
#           "R Data files/AUC_limit_outcome_data.csv",
#           row.names=FALSE)


# Read in outcome data:
AUC_limit_outcome_data<- read.csv("R Data files/AUC_limit_outcome_data.csv")

# Table them:
AUC_limit_outcome_data %>%
  gt() %>%
  tab_options(data_row.padding = px(1.5))%>%
tab_header(
    title = "Supplemental Table 1. Performance metrics for all cut-off points for all behavioural indicators"
  ) 
Supplemental Table 1. Performance metrics for all cut-off points for all behavioural indicators
predictor optimal_cutpoint_PGSI AUC_PGSI sensitivity_PGSI specificity_PGSI acc_PGSI Metric_maximised X95.CIs_PGSI optimal_cutpoint_GHM AUC_GHM sensitivity_GHM specificity_GHM acc_GHM X95.CIs_GHM method
PAS_6M_MONTHLY_BET_FREQ_DAYS 9.051 0.560 0.443 0.646 0.536 Youden 0.533, 0.587 7.447 0.573 0.553 0.565 0.561 0.542, 0.604 NA
PAS_6M_MONTHLY_BET_FREQ_DAYS 5.681 0.560 0.579 0.507 0.546 Sensitivity 0.532, 0.587 6.285 0.573 0.587 0.510 0.533 0.542, 0.606 NA
PAS_6M_MONTHLY_BET_FREQ_DAYS 7.861 0.560 0.496 0.602 0.545 Specificity 0.533, 0.589 8.563 0.573 0.498 0.605 0.573 0.544, 0.603 maximize_boot_metric
PAS_6M_MONTHLY_SPEND 932.621 0.621 0.514 0.688 0.594 Youden 0.593, 0.648 899.123 0.648 0.602 0.649 0.635 0.619, 0.678 NA
PAS_6M_MONTHLY_SPEND 251.420 0.621 0.685 0.497 0.599 Sensitivity 0.594, 0.648 332.348 0.648 0.712 0.514 0.573 0.619, 0.679 NA
PAS_6M_MONTHLY_SPEND 1062.813 0.621 0.497 0.700 0.590 Specificity 0.594, 0.649 2108.328 0.648 0.494 0.737 0.665 0.619, 0.678 maximize_boot_metric
PERCENT_MON_INCOME_SPENT_MONTHLY 6.380 0.655 0.580 0.686 0.627 Youden 0.622, 0.688 8.923 0.667 0.635 0.692 0.674 0.631, 0.703 NA
PERCENT_MON_INCOME_SPENT_MONTHLY 2.119 0.655 0.711 0.511 0.621 Sensitivity 0.624, 0.69 2.749 0.667 0.739 0.508 0.582 0.633, 0.701 NA
PERCENT_MON_INCOME_SPENT_MONTHLY 11.280 0.655 0.499 0.745 0.609 Specificity 0.623, 0.687 16.941 0.667 0.501 0.751 0.671 0.632, 0.702 maximize_boot_metric
PAS_6M_N_ACTIVITIES 4.080 0.611 0.499 0.650 0.568 Youden 0.583, 0.637 4.580 0.618 0.551 0.623 0.601 0.588, 0.647 NA
PAS_6M_N_ACTIVITIES 4.100 0.611 0.499 0.650 0.568 Sensitivity 0.584, 0.638 4.820 0.618 0.551 0.623 0.601 0.59, 0.648 NA
PAS_6M_N_ACTIVITIES 4.680 0.611 0.499 0.650 0.568 Specificity 0.584, 0.639 5.000 0.618 0.551 0.623 0.601 0.588, 0.649 maximize_boot_metric
PAS_6M_MONTHLY_DEPOSIT_FREQ 3.806 0.651 0.623 0.630 0.626 Youden 0.624, 0.676 6.415 0.669 0.633 0.651 0.646 0.64, 0.698 NA
PAS_6M_MONTHLY_DEPOSIT_FREQ 1.776 0.651 0.719 0.513 0.625 Sensitivity 0.624, 0.677 2.299 0.669 0.754 0.504 0.578 0.64, 0.699 NA
PAS_6M_MONTHLY_DEPOSIT_FREQ 8.841 0.651 0.493 0.741 0.607 Specificity 0.626, 0.679 14.005 0.669 0.492 0.756 0.677 0.642, 0.699 maximize_boot_metric
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 216.240 0.648 0.591 0.647 0.617 Youden 0.62, 0.674 259.193 0.667 0.676 0.634 0.647 0.638, 0.695 NA
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 80.171 0.648 0.717 0.504 0.619 Sensitivity 0.621, 0.674 109.794 0.667 0.765 0.504 0.582 0.637, 0.696 NA
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 471.498 0.648 0.496 0.725 0.601 Specificity 0.622, 0.674 904.495 0.667 0.494 0.742 0.668 0.637, 0.696 maximize_boot_metric
PERCENT_MON_INCOME_DEPOSIT_MONTHLY 2.129 0.691 0.604 0.709 0.651 Youden 0.66, 0.724 3.393 0.690 0.638 0.698 0.679 0.654, 0.724 NA
PERCENT_MON_INCOME_DEPOSIT_MONTHLY 0.652 0.691 0.766 0.502 0.647 Sensitivity 0.659, 0.724 0.925 0.690 0.786 0.506 0.595 0.656, 0.723 NA
PERCENT_MON_INCOME_DEPOSIT_MONTHLY 5.122 0.691 0.504 0.785 0.630 Specificity 0.66, 0.723 9.617 0.690 0.496 0.788 0.695 0.656, 0.723 maximize_boot_metric
GAM_ACCOUNTS_NUM 3.000 0.610 0.606 0.576 0.592 Youden 0.585, 0.636 3.240 0.612 0.375 0.764 0.647 0.581, 0.64 NA
GAM_ACCOUNTS_NUM 3.000 0.610 0.606 0.576 0.592 Sensitivity 0.583, 0.636 3.100 0.612 0.375 0.764 0.647 0.583, 0.641 NA
GAM_ACCOUNTS_NUM 3.000 0.610 0.606 0.576 0.592 Specificity 0.585, 0.637 3.000 0.612 0.633 0.526 0.558 0.581, 0.641 maximize_boot_metric

Find Optimal Limits

Isolate the optimal limits/cut-off values for PGSI and GHM classification based on our preregistered plan:

Show code
PGSI_optimal_limits <- AUC_limit_outcome_data %>%
  group_by(predictor) %>%
  filter(acc_PGSI == max(acc_PGSI)) %>%
  slice(1) %>%
    mutate(
    AUC_with_CI_PGSI = paste0(
      round(AUC_PGSI, 3), " [", 
      gsub(",", ",", gsub("\\[|\\]", "", X95.CIs_PGSI)), "]"
    )
  ) %>%
  select(predictor,
         AUC_with_CI_PGSI,
         Metric_maximised,
         optimal_cutpoint_PGSI,
         acc_PGSI,
         sensitivity_PGSI,
         specificity_PGSI)

GHM_optimal_limits <- AUC_limit_outcome_data %>%
  group_by(predictor) %>%
  filter(acc_GHM == max(acc_GHM)) %>%
  slice(1) %>%
    mutate(
    AUC_with_CI_GHM = paste0(
      round(AUC_GHM, 3), " [", 
      gsub(",", ",", gsub("\\[|\\]", "", X95.CIs_GHM)), "]"
    )
  ) %>%
  select(predictor,
         AUC_with_CI_GHM,
         Metric_maximised,
         optimal_cutpoint_GHM,
         acc_GHM,
         sensitivity_GHM,
         specificity_GHM)

optimal_limits<- bind_cols(PGSI_optimal_limits,
          GHM_optimal_limits) %>%
rename(Predictor = 1,
       'Metric maximised PGSI' = Metric_maximised...3,
       'Metric maximised GHM' = Metric_maximised...10) %>%
select(-predictor...8) %>%
  mutate(Predictor = case_when(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY" ~ "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M",
                               Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY" ~ "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M",
                               TRUE ~ Predictor))

# Sort order:
predictor_order <- c(
  "PAS_6M_MONTHLY_BET_FREQ_DAYS",
  "PAS_6M_MONTHLY_SPEND",
  "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M",
  "PAS_6M_N_ACTIVITIES",
  "PAS_6M_MONTHLY_DEPOSIT_FREQ",
  "PAS_6M_MONTHLY_DEPOSIT_AMOUNT",
  "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M",
  "GAM_ACCOUNTS_NUM"
)

optimal_limits <- optimal_limits %>%
  mutate(Predictor = factor(Predictor, levels = predictor_order)) %>%
  arrange(Predictor) %>%
  mutate(optimal_cutpoint_PGSI = round(optimal_cutpoint_PGSI,2),
         optimal_cutpoint_GHM = round(optimal_cutpoint_GHM,2))

# Make table:
gt(optimal_limits) %>%
   tab_row_group(
    label = "Guidelines examined in previous studies", rows = 5:8
  ) %>% 
  tab_row_group(
    label = "Online wagering specific guidelines not previously examined", rows = 1:4
  ) %>%
    #### Example from other table:
     cols_label(
    Predictor = "Behavioural indicator",
    AUC_with_CI_PGSI = "AUC [95% CIs]",
    'Metric maximised PGSI' = "Metric maximised",
    optimal_cutpoint_PGSI = "Optimal cut point",
    acc_PGSI = "Accuracy",
    sensitivity_PGSI = "Sensitivity",
    specificity_PGSI = "Specificity",
    AUC_with_CI_GHM = "AUC [95% CIs]",
    'Metric maximised GHM' = "Metric maximised",
    optimal_cutpoint_GHM = "Optimal cut point",
    acc_GHM = "Accuracy",
    sensitivity_GHM = "Sensitivity",
    specificity_GHM = "Specificity",
  ) %>%
  tab_options(data_row.padding = px(2.5)) %>%
tab_header(
    title = "Table 2. Optimal limit values for low-risk guidelines for online wagering with classification performance metrics"
  )  %>%
  tab_footnote( "Note: PGSI = Problem Gambling Severity Index; GHM = Gambling Harm Measure; AUC = Area under the curve; CIs = 95% Confidence intervals. Metric maximised = the approach used to reach the limit with the highest accuracy value for each behavioural indicator."
  )
Table 2. Optimal limit values for low-risk guidelines for online wagering with classification performance metrics
Behavioural indicator AUC [95% CIs] Metric maximised Optimal cut point Accuracy Sensitivity Specificity AUC [95% CIs] Metric maximised Optimal cut point Accuracy Sensitivity Specificity
Online wagering specific guidelines not previously examined
PAS_6M_MONTHLY_BET_FREQ_DAYS 0.56 [0.532, 0.587] Sensitivity 5.68 0.546 0.579 0.507 0.573 [0.544, 0.603] Specificity 8.56 0.573 0.498 0.605
PAS_6M_MONTHLY_SPEND 0.621 [0.594, 0.648] Sensitivity 251.42 0.599 0.685 0.497 0.648 [0.619, 0.678] Specificity 2108.33 0.665 0.494 0.737
PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M 0.655 [0.622, 0.688] Youden 6.38 0.627 0.580 0.686 0.667 [0.631, 0.703] Youden 8.92 0.674 0.635 0.692
PAS_6M_N_ACTIVITIES 0.611 [0.583, 0.637] Youden 4.08 0.568 0.499 0.650 0.618 [0.588, 0.647] Youden 4.58 0.601 0.551 0.623
Guidelines examined in previous studies
PAS_6M_MONTHLY_DEPOSIT_FREQ 0.651 [0.624, 0.676] Youden 3.81 0.626 0.623 0.630 0.669 [0.642, 0.699] Specificity 14.01 0.677 0.492 0.756
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 0.648 [0.621, 0.674] Sensitivity 80.17 0.619 0.717 0.504 0.667 [0.637, 0.696] Specificity 904.50 0.668 0.494 0.742
PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M 0.691 [0.66, 0.724] Youden 2.13 0.651 0.604 0.709 0.69 [0.656, 0.723] Specificity 9.62 0.695 0.496 0.788
GAM_ACCOUNTS_NUM 0.61 [0.585, 0.636] Youden 3.00 0.592 0.606 0.576 0.612 [0.581, 0.64] Youden 3.24 0.647 0.375 0.764
Note: PGSI = Problem Gambling Severity Index; GHM = Gambling Harm Measure; AUC = Area under the curve; CIs = 95% Confidence intervals. Metric maximised = the approach used to reach the limit with the highest accuracy value for each behavioural indicator.

Interestingly, when using the PGSI, maximising either the Youden index or sensitivity led to the best accuracy, when using the GHM, maximising Youden or specificity was most effective.

Phase 3: Demographic-adjusted Predictive Ability of New Limits

Here we started focus on testing H2. From our pre-registration:

H2: Participants who surpass each of our newly derived low-risk limits for the eight different behavioural indicators in the six months preceding survey participation will have a greater odds of reporting the experience of harm than who do not surpass the limits after accounting for the impact of demographic factors (i.e., age, gender, employment status, and education level) on harm. Specifically, the odds ratios produced via logistic regression models[1] for each of the eight limits will be ≥1.10[2] and 95% confidence intervals for these values will not cross 1, both when assessing each limit in isolation [H2.1] and when all limits are included in one statistical model to adjust for their relative contributions to the experience of harm [H2.2]. [Limit evaluation: demographic and other limit adjusted ability of limit breaches to predict harm status]

[1] All logistic regression models used to test hypotheses will include age and gender as predictor variables to account for their impact on gambling harm.

[2] This value represents our minimum effect size of interest in this context. We view a 10% increased odds of harm resulting from passing a low-risk limit as the minimum amount for a guideline to have practical value in informing about and identifying at-risk play.

First, to do this we need to add some new variables to a data set that label whether somebody passed our new limits or not.

Let’s now add new variables which represent whether somebody passed or did not pass the limits for PGSI and GHM. We will make each of the optimal limits rounded to the nearest whole number so that they are more actionable and representative of what we would recommend. Let’s also create a variable that sums the total number of limits someone passes:

Show code
master_dataset_recoded_w_new_limits <- master_dataset_recoded %>%
  
  # Betting frequency:
  mutate(NEW_LIMIT_PGSI_BET_FREQ = case_when(
    PAS_6M_MONTHLY_BET_FREQ_DAYS > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_PGSI),0) ~ 1,
    PAS_6M_MONTHLY_BET_FREQ_DAYS <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_PGSI), 0) ~ 0,
    TRUE ~ NA_real_
  )) %>%
   mutate(NEW_LIMIT_GHM_BET_FREQ = case_when(
    PAS_6M_MONTHLY_BET_FREQ_DAYS > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_GHM),0) ~ 1,
    PAS_6M_MONTHLY_BET_FREQ_DAYS <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_GHM), 0) ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PAS_6M_MONTHLY_BET_FREQ_DAYS,
  #        NEW_LIMIT_PGSI_BET_FREQ)
  
  # Spend/turnover:
    mutate(NEW_LIMIT_PGSI_SPEND = case_when(
    PAS_6M_MONTHLY_SPEND > 250 ~ 1,
    PAS_6M_MONTHLY_SPEND <= 250  ~ 0,
    TRUE ~ NA_real_
  )) %>%
      mutate(NEW_LIMIT_GHM_SPEND = case_when(
    PAS_6M_MONTHLY_SPEND > 2100 ~ 1,
    PAS_6M_MONTHLY_SPEND <= 2100  ~ 0,
    TRUE ~ NA_real_
  )) %>%
   # Check coding has worked:
  # select(PAS_6M_MONTHLY_SPEND,
  #        NEW_LIMIT_PGSI_SPEND)
  
  # Percentage of income spent:
      mutate(NEW_LIMIT_PGSI_INCOME_SPENT = case_when(
    PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_INCOME_SPENT = case_when(
    PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
  #        NEW_LIMIT_PGSI_INCOME_SPENT)

  # Number of sports/races bet on:
      mutate(NEW_LIMIT_PGSI_N_ACTIVITIES = case_when(
    PAS_6M_N_ACTIVITIES > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PAS_6M_N_ACTIVITIES <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_N_ACTIVITIES = case_when(
    PAS_6M_N_ACTIVITIES > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    PAS_6M_N_ACTIVITIES <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PAS_6M_N_ACTIVITIES,
  #        NEW_LIMIT_PGSI_N_ACTIVITIES)
  
  # Deposit frequency:
      mutate(NEW_LIMIT_PGSI_DEPOSIT_FREQ = case_when(
    PAS_6M_MONTHLY_DEPOSIT_FREQ > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PAS_6M_MONTHLY_DEPOSIT_FREQ <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_DEPOSIT_FREQ = case_when(
    PAS_6M_MONTHLY_DEPOSIT_FREQ > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    PAS_6M_MONTHLY_DEPOSIT_FREQ <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PAS_6M_MONTHLY_DEPOSIT_FREQ,
  #        NEW_LIMIT_PGSI_DEPOSIT_FREQ)
  
  # Deposit amount:
      mutate(NEW_LIMIT_PGSI_DEPOSIT_AMOUNT = case_when(
    PAS_6M_MONTHLY_DEPOSIT_AMOUNT > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_AMOUNT") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PAS_6M_MONTHLY_DEPOSIT_AMOUNT <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_AMOUNT") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  mutate(NEW_LIMIT_GHM_DEPOSIT_AMOUNT = case_when(
    PAS_6M_MONTHLY_DEPOSIT_AMOUNT >=900  ~ 1,
    PAS_6M_MONTHLY_DEPOSIT_AMOUNT < 900  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
  #        NEW_LIMIT_PGSI_DEPOSIT_AMOUNT)
  
  # Percentage of income deposited:
      mutate(NEW_LIMIT_PGSI_INCOME_DEPOSITED = case_when(
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_INCOME_DEPOSITED = case_when(
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
  #        NEW_LIMIT_PGSI_INCOME_DEPOSITED)

# Number of gambling accounts:
      mutate(NEW_LIMIT_PGSI_GAM_ACCOUNTS = case_when(
    GAM_ACCOUNTS_NUM >= round(optimal_limits %>% 
      filter(Predictor == "GAM_ACCOUNTS_NUM") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    GAM_ACCOUNTS_NUM < round(optimal_limits %>% 
      filter(Predictor == "GAM_ACCOUNTS_NUM") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_GAM_ACCOUNTS = case_when(
    GAM_ACCOUNTS_NUM > round(optimal_limits %>% 
      filter(Predictor == "GAM_ACCOUNTS_NUM") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    GAM_ACCOUNTS_NUM <= round(optimal_limits %>% 
      filter(Predictor == "GAM_ACCOUNTS_NUM") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(GAM_ACCOUNTS_NUM,
  #        NEW_LIMIT_PGSI_GAM_ACCOUNTS) %>%

# recode outcome variables for simplicity of interpretation in regression models:
mutate(PGSI_STATUS = case_when(PGSI_STATUS == "Harm" ~ 1,
                               PGSI_STATUS == "No harm" ~ 0,
                    TRUE ~ NA)) %>%
mutate(GHM_STATUS = case_when(GHM_STATUS == "Harm" ~ 1,
                               GHM_STATUS == "No harm" ~ 0,
                    TRUE ~ NA)) %>%
  # Calculate the total no. of limits reached:
  mutate(TOTAL_LIMITS_REACHED_PGSI = rowSums(select(., starts_with("NEW_LIMIT_PGSI_")), na.rm = TRUE)) %>%
  mutate(TOTAL_LIMITS_REACHED_PGSI = relevel(as.factor(TOTAL_LIMITS_REACHED_PGSI), ref = "0")) %>%
    mutate(TOTAL_LIMITS_REACHED_GHM = rowSums(select(., starts_with("NEW_LIMIT_GHM_")), na.rm = TRUE)) %>%
  mutate(TOTAL_LIMITS_REACHED_GHM = relevel(as.factor(TOTAL_LIMITS_REACHED_GHM), ref = "0")) %>%
#Compute total limits passed other than gamb freq as our analyses do not support the value of this limit:
mutate(TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ = rowSums(select(., starts_with("NEW_LIMIT_PGSI_") & !matches("NEW_LIMIT_PGSI_BET_FREQ")), na.rm = TRUE)) %>%
  mutate(TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ = relevel(as.factor(TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ), ref = "0")) %>%
  mutate(TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ = rowSums(select(., starts_with("NEW_LIMIT_GHM_")& !matches("NEW_LIMIT_GHM_BET_FREQ")), na.rm = TRUE)) %>%
  mutate(TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ = relevel(as.factor(TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ), ref = "0"))

  # Check coding:


# master_dataset_recoded_w_new_limits %>%
# select(NEW_LIMIT_PGSI_BET_FREQ,
# NEW_LIMIT_PGSI_SPEND,
# NEW_LIMIT_PGSI_INCOME_SPENT,
# NEW_LIMIT_PGSI_N_ACTIVITIES,
# NEW_LIMIT_PGSI_DEPOSIT_FREQ,
# NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
# NEW_LIMIT_PGSI_INCOME_DEPOSITED,
# NEW_LIMIT_PGSI_GAM_ACCOUNTS,
# TOTAL_LIMITS_REACHED_PGSI,
# TOTAL_LIMITS_REACHED_GHM,
# TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ,
# TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ)

# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_BET_FREQ)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_SPEND)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_INCOME_SPENT)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_N_ACTIVITIES)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_DEPOSIT_FREQ)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_DEPOSIT_AMOUNT)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_INCOME_DEPOSITED)
# 
# master_dataset_recoded_w_new_limits %>%
# count(NEW_LIMIT_PGSI_GAM_ACCOUNTS)

Re-weighting

We need to update the survey design object with new variables:

Show code
# Create survey design object:
survey_design_w_new_limits <- survey::svydesign(
  id = ~1,
  data = master_dataset_recoded_w_new_limits,
  weights = ~ELIGIBLE_SURVEY_SAMPLE_WEIGHTS
)

Logistic Regressions

Next, perform logistic regressions to examine how well exceeding each limit predicts harm status, controlling for demographic variables. For each analysis, two models will be run: one for the PGSI and one for the GHM.

For each model, we will run a function to produce plots showing diagnostic checks and then table regression outcomes together for the PGSI and GHM models, exponentiating betas into odds ratios.

Betting Frequency

Show code
# PGSI model:
weighted_lr_freq_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_BET_FREQ,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_freq_new_limit_PGSI <- tbl_regression(weighted_lr_freq_new_limit_PGSI, 
                 exponentiate = TRUE) %>% 
 add_glance_table() # View outcomes
  # add_glance_source_note()   

check_model(weighted_lr_freq_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_freq_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_BET_FREQ,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_freq_new_limit_GHM<- tbl_regression(weighted_lr_freq_new_limit_GHM, 
                 exponentiate = TRUE) %>% # View outcomes
  add_glance_table()
  #add_glance_source_note()   


check_model(weighted_lr_freq_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_freq_new_limit_PGSI, tbl_weighted_lr_freq_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal gambling frequency limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal gambling frequency limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.74 1.22, 2.49 0.002 2.28 1.36, 3.81 0.002
    Unknown 2.24 0.82, 6.12 0.12 2.32 0.71, 7.62 0.2
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.38 0.77, 2.47 0.3 2.72 1.48, 4.99 0.001
    Unknown 1.17 0.88, 1.56 0.3 0.98 0.69, 1.40 >0.9
EDUCATION_NUM 0.90 0.83, 0.97 0.007 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.36 1.23, 1.49 <0.001 1.34 1.20, 1.48 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.88 1.38, 2.56 <0.001 1.74 1.20, 2.52 0.004
NEW_LIMIT_PGSI_BET_FREQ 1.37 1.07, 1.74 0.012


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,103

1,647

BIC 2,154

1,699

Deviance 2,080

1,625

Residual df 1,637

1,570

No. Obs. 1,647

1,580

NEW_LIMIT_GHM_BET_FREQ


1.37 1.01, 1.85 0.046
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_bet_freq_PGSI<- NagelkerkeR2(weighted_lr_freq_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.1548543
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 15.4854331% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_bet_freq_GHM<- NagelkerkeR2(weighted_lr_freq_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1437819

The model accounts for 14.3781926% of variance in PGSI harm rates.

Risk Curve

Let’s plot some risk curves for the relationship between this predictor and the two outcomes. We didn’t pre-register this, but it’s purpose is solely for visual exploration/supplementation and will not be used for inference.

Show code
# Set up a good theme for these plots going forward:
risk_curve_theme<-  theme_classic() +
  theme(
    text = element_text(family = "Poppins"),
    plot.title = element_text(hjust = 0.5, size = 10, face = "plain"),
    plot.subtitle = element_text(hjust = 0.5, size = 13),
    axis.text.x = element_text(size = 9, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 9),
    axis.title = element_text(size = 10),
    plot.caption = element_text(size = 12),
    legend.title = element_text(size = 12), 
    legend.text = element_text(size = 10)
  ) 

# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PAS_6M_MONTHLY_BET_FREQ_DAYS, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PAS_6M_MONTHLY_BET_FREQ_DAYS, na.rm = TRUE),
    max_val = max(PAS_6M_MONTHLY_BET_FREQ_DAYS, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 1), round(max_val, 1), sep = "-"))

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_betting_freq <- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "No. betting days",
       x = "",
       y = "") +
    ylim(0, 100) +
  # PGSI LIMIT:
   geom_vline(xintercept = 4, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 5, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
   scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    risk_curve_theme +
     theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) 

combined_risk_curve_betting_freq

Amount Spent

Show code
# PGSI model:
weighted_lr_spend_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_SPEND,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_spend_new_limit_PGSI <- tbl_regression(weighted_lr_spend_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_spend_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_spend_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_SPEND,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_spend_new_limit_GHM<- tbl_regression(weighted_lr_spend_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_spend_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_spend_new_limit_PGSI, tbl_weighted_lr_spend_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal gambling spending amount (turnover) limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal gambling spending amount (turnover) limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.63 1.14, 2.34 0.008 2.11 1.25, 3.55 0.005
    Unknown 1.95 0.68, 5.58 0.2 1.94 0.51, 7.37 0.3
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.38 0.77, 2.50 0.3 2.78 1.51, 5.12 0.001
    Unknown 1.14 0.85, 1.53 0.4 0.90 0.62, 1.30 0.6
EDUCATION_NUM 0.90 0.84, 0.98 0.012 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.34 1.22, 1.48 <0.001 1.36 1.22, 1.51 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.91 1.40, 2.62 <0.001 1.84 1.26, 2.68 0.002
NEW_LIMIT_PGSI_SPEND 2.00 1.56, 2.55 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,070

1,605

BIC 2,121

1,655

Deviance 2,047

1,582

Residual df 1,637

1,570

No. Obs. 1,647

1,580

NEW_LIMIT_GHM_SPEND


3.26 2.25, 4.74 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_spend_lr_PGSI<- NagelkerkeR2(weighted_lr_spend_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.1781409
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 17.8140946% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_spend_lr_GHM<- NagelkerkeR2(weighted_lr_spend_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1797802

The model accounts for 17.9780248% of variance in PGSI harm rates.

Risk Curve

Plot some risk curves for the relationship between this predictor and the two outcomes.

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PAS_6M_MONTHLY_SPEND, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PAS_6M_MONTHLY_SPEND, na.rm = TRUE),
    max_val = max(PAS_6M_MONTHLY_SPEND, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 0), round(max_val, 0), sep = "-")) %>%
  mutate(decile_label = paste0("$", round(min_val, 0), "-", round(max_val, 0))) 

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_spend <- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "Mean spend",
       x = "",
       y = "") +
    ylim(0, 100) +
    # PGSI LIMIT:
   geom_vline(xintercept = 3.95, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 6.7, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
   theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) +
  risk_curve_theme 
 

combined_risk_curve_spend

Percentage of Income Spent

Show code
# PGSI model:
weighted_lr_income_spent_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_INCOME_SPENT,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_income_spent_new_limit_PGSI <- tbl_regression(weighted_lr_income_spent_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_income_spent_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_income_spent_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_INCOME_SPENT,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_income_spent_new_limit_GHM<- tbl_regression(weighted_lr_income_spent_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_income_spent_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_income_spent_new_limit_PGSI, tbl_weighted_lr_income_spent_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal percentage of income spent limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal percentage of income spent limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.73 1.07, 2.80 0.026 2.22 1.16, 4.24 0.015
    Unknown 3.82 1.02, 14.3 0.047 1.71 0.44, 6.62 0.4
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.06 0.52, 2.16 0.9 1.58 0.72, 3.46 0.3
EDUCATION_NUM 0.91 0.83, 1.0 0.037 0.99 0.90, 1.09 0.9
GAM_ACCOUNTS_NUM 1.34 1.19, 1.50 <0.001 1.38 1.20, 1.57 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.93 1.32, 2.82 <0.001 1.97 1.25, 3.10 0.003
NEW_LIMIT_PGSI_INCOME_SPENT 2.83 2.05, 3.93 <0.001


Null deviance 1,527

1,288

Null df 1,054

1,054

AIC 1,362

1,122

BIC 1,404

1,165

Deviance 1,342

1,103

Residual df 1,046

1,046

No. Obs. 1,055

1,055

NEW_LIMIT_GHM_INCOME_SPENT


4.49 3.11, 6.49 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_income_spent_lr_PGSI<- NagelkerkeR2(weighted_lr_income_spent_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1055 0.2101737
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 21.0173697% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_income_spent_lr_GHM<- NagelkerkeR2(weighted_lr_income_spent_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1055 0.2288529

The model accounts for 22.8852894% of variance in PGSI harm rates.

Risk curve

Plot some risk curves for the relationship between this predictor and the two outcomes.

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M, na.rm = TRUE),
    max_val = max(PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 1), round(max_val, 1), sep = "-")) %>%
  filter(!is.na(decile))

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_income_spent <- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "% of income spent",
       x = "",
       y = "") +
    ylim(0, 100) +
    # PGSI LIMIT:
   geom_vline(xintercept = 5.5, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 6.3, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    risk_curve_theme +
     theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) 

combined_risk_curve_income_spent

Number of Different Sports/races

Show code
# PGSI model:
weighted_lr_n_activities_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_N_ACTIVITIES,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_n_activities_new_limit_PGSI <- tbl_regression(weighted_lr_n_activities_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_n_activities_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_n_activities_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_N_ACTIVITIES,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_n_activities_new_limit_GHM<- tbl_regression(weighted_lr_n_activities_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_n_activities_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_n_activities_new_limit_PGSI, tbl_weighted_lr_n_activities_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal number of different sport and race types limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal number of different sport and race types limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.97, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.68 1.17, 2.40 0.005 2.25 1.34, 3.78 0.002
    Unknown 2.19 0.84, 5.70 0.11 2.36 0.73, 7.64 0.2
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.40 0.78, 2.51 0.3 2.80 1.51, 5.18 0.001
    Unknown 1.16 0.87, 1.55 0.3 0.99 0.70, 1.41 >0.9
EDUCATION_NUM 0.90 0.83, 0.97 0.007 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.34 1.22, 1.48 <0.001 1.33 1.19, 1.47 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.92 1.40, 2.63 <0.001 1.78 1.22, 2.60 0.003
NEW_LIMIT_PGSI_N_ACTIVITIES 1.55 1.20, 2.00 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,096

1,649

BIC 2,147

1,700

Deviance 2,073

1,626

Residual df 1,637

1,570

No. Obs. 1,647

1,580

NEW_LIMIT_GHM_N_ACTIVITIES


1.30 0.95, 1.77 0.10
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_n_activities_lr_PGSI<- NagelkerkeR2(weighted_lr_n_activities_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.1594645
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 15.9464454% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_n_activities_lr_GHM<- NagelkerkeR2(weighted_lr_n_activities_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1425208

The model accounts for 14.2520775% of variance in PGSI harm rates.

Risk Curve

Plot some risk curves for the relationship between this predictor and the two outcomes.

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PAS_6M_N_ACTIVITIES, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PAS_6M_N_ACTIVITIES, na.rm = TRUE),
    max_val = max(PAS_6M_N_ACTIVITIES, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 2), round(max_val, 2), sep = "-")) %>%
  filter(!is.na(decile))

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_n_activities <- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "No. of different activities bet on",
       x = "",
       y = "") +
    ylim(0, 100) +
      # PGSI LIMIT:
   geom_vline(xintercept = 5.5, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 6.5, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    risk_curve_theme +
     theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) 

combined_risk_curve_n_activities

Deposit Frequency

Show code
# PGSI model:
weighted_lr_deposit_freq_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_DEPOSIT_FREQ,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_deposit_freq_new_limit_PGSI <- tbl_regression(weighted_lr_deposit_freq_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_deposit_freq_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_deposit_freq_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_DEPOSIT_FREQ,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_deposit_freq_new_limit_GHM<- tbl_regression(weighted_lr_deposit_freq_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_deposit_freq_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_deposit_freq_new_limit_PGSI, tbl_weighted_lr_deposit_freq_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal deposit frequency limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal deposit frequency limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.97, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.60 1.11, 2.29 0.011 2.12 1.27, 3.54 0.004
    Unknown 2.02 0.72, 5.67 0.2 1.74 0.49, 6.19 0.4
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.38 0.76, 2.50 0.3 2.63 1.45, 4.79 0.002
    Unknown 1.06 0.79, 1.43 0.7 0.90 0.62, 1.31 0.6
EDUCATION_NUM 0.91 0.84, 0.98 0.015 0.98 0.89, 1.07 0.6
GAM_ACCOUNTS_NUM 1.36 1.24, 1.50 <0.001 1.37 1.23, 1.53 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.94 1.41, 2.66 <0.001 1.75 1.19, 2.56 0.004
NEW_LIMIT_PGSI_DEPOSIT_FREQ 2.60 2.01, 3.36 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,041

1,590

BIC 2,091

1,641

Deviance 2,017

1,567

Residual df 1,637

1,570

No. Obs. 1,647

1,580

NEW_LIMIT_GHM_DEPOSIT_FREQ


3.39 2.42, 4.75 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_deposit_freq_lr_PGSI<- NagelkerkeR2(weighted_lr_deposit_freq_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N       R2
1 1647 0.198684
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 19.8684028% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_deposit_freq_lr_GHM<- NagelkerkeR2(weighted_lr_deposit_freq_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1915685

The model accounts for 19.1568532% of variance in PGSI harm rates.

Risk Curve

Plot some risk curves for the relationship between this predictor and the two outcomes.

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PAS_6M_MONTHLY_DEPOSIT_FREQ, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PAS_6M_MONTHLY_DEPOSIT_FREQ, na.rm = TRUE),
    max_val = max(PAS_6M_MONTHLY_DEPOSIT_FREQ, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 1), round(max_val, 1), sep = "-")) %>%
  filter(!is.na(decile))

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_deposit_frequency<- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "Mean monthly deposit frequency",
       x = "",
       y = "") +
    ylim(0, 100) +
        # PGSI LIMIT:
   geom_vline(xintercept = 5.5, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 7.3, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    risk_curve_theme +
     theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) 

combined_risk_curve_deposit_frequency

Deposit Amount

Show code
# PGSI model:
weighted_lr_deposit_amount_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_deposit_amount_new_limit_PGSI <- tbl_regression(weighted_lr_deposit_amount_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_deposit_amount_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_deposit_amount_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_deposit_amount_new_limit_GHM<- tbl_regression(weighted_lr_deposit_amount_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_deposit_amount_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_deposit_amount_new_limit_PGSI, tbl_weighted_lr_deposit_amount_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal deposit amount limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal deposit amount limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.65 1.15, 2.37 0.007 2.15 1.28, 3.61 0.004
    Unknown 1.70 0.61, 4.70 0.3 2.08 0.55, 7.92 0.3
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.45 0.79, 2.66 0.2 2.79 1.53, 5.09 <0.001
    Unknown 1.11 0.83, 1.49 0.5 0.89 0.62, 1.30 0.6
EDUCATION_NUM 0.90 0.83, 0.98 0.010 0.96 0.88, 1.05 0.4
GAM_ACCOUNTS_NUM 1.33 1.21, 1.46 <0.001 1.35 1.21, 1.51 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.90 1.38, 2.61 <0.001 1.86 1.27, 2.72 0.001
NEW_LIMIT_PGSI_DEPOSIT_AMOUNT 2.27 1.78, 2.89 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,053

1,601

BIC 2,104

1,652

Deviance 2,030

1,578

Residual df 1,637

1,570

No. Obs. 1,647

1,580

NEW_LIMIT_GHM_DEPOSIT_AMOUNT


3.46 2.39, 5.01 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_deposit_amount_lr_PGSI<- NagelkerkeR2(weighted_lr_deposit_amount_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.1901446
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 19.0144593% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_deposit_amount_lr_GHM<- NagelkerkeR2(weighted_lr_deposit_amount_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1824433

The model accounts for 18.2443336% of variance in PGSI harm rates.

Risk curve

Plot some risk curves for the relationship between this predictor and the two outcomes.

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PAS_6M_MONTHLY_DEPOSIT_AMOUNT, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PAS_6M_MONTHLY_DEPOSIT_AMOUNT, na.rm = TRUE),
    max_val = max(PAS_6M_MONTHLY_DEPOSIT_AMOUNT, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 0), round(max_val, 0), sep = "-")) %>%
  mutate(decile_label = paste0("$", round(min_val, 0), "-", round(max_val, 0))) %>%
  filter(!is.na(decile))

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_deposit_amount<- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "Mean monthly deposit amount",
       x = "",
       y = "") +
    ylim(0, 100) +
          # PGSI LIMIT:
   geom_vline(xintercept = 4.6, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 7.3, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    risk_curve_theme +
     theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) 

combined_risk_curve_deposit_amount

Percentage of income deposited

Show code
# PGSI model:
weighted_lr_income_deposited_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_INCOME_DEPOSITED,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_income_deposited_new_limit_PGSI <- tbl_regression(weighted_lr_income_deposited_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_income_deposited_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_income_deposited_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_INCOME_DEPOSITED,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_income_deposited_new_limit_GHM<- tbl_regression(weighted_lr_income_deposited_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_income_deposited_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_income_deposited_new_limit_PGSI, tbl_weighted_lr_income_deposited_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal amount of income deposited limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal amount of income deposited limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.71 1.06, 2.76 0.028 2.53 1.28, 4.97 0.007
    Unknown 3.53 0.92, 13.6 0.067 2.09 0.32, 13.8 0.4
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.06 0.52, 2.17 0.9 1.92 0.84, 4.38 0.12
EDUCATION_NUM 0.91 0.84, 1.00 0.045 0.96 0.87, 1.06 0.4
GAM_ACCOUNTS_NUM 1.35 1.20, 1.52 <0.001 1.40 1.23, 1.60 <0.001
Engages_in_other_forms





    FALSE

    TRUE 2.15 1.44, 3.19 <0.001 1.96 1.20, 3.18 0.007
NEW_LIMIT_PGSI_INCOME_DEPOSITED 3.78 2.70, 5.30 <0.001


Null deviance 1,527

1,288

Null df 1,054

1,054

AIC 1,330

1,122

BIC 1,373

1,164

Deviance 1,310

1,101

Residual df 1,046

1,046

No. Obs. 1,055

1,055

NEW_LIMIT_GHM_INCOME_DEPOSITED


7.10 4.50, 11.2 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_income_deposited_lr_PGSI<- NagelkerkeR2(weighted_lr_income_deposited_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1055 0.2426181
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 24.2618149% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_income_deposited_lr_GHM<- NagelkerkeR2(weighted_lr_income_deposited_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1055 0.2307804

The model accounts for 23.0780376% of variance in PGSI harm rates.

Risk curve

Plot some risk curves for the relationship between this predictor and the two outcomes.

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  mutate(decile = ntile(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M, 10)) %>%
  group_by(decile) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    min_val = min(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M, na.rm = TRUE),
    max_val = max(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M, na.rm = TRUE)
  ) %>%
  mutate(decile_label = paste(round(min_val, 1), round(max_val, 1), sep = "-")) %>%
  filter(!is.na(decile))

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_income_deposited<- ggplot(long_data, aes(x = as.numeric(decile), y = pct_harmed, 
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  scale_x_continuous(breaks = 1:10, labels = summary_data$decile_label) +
  labs(title = "% of monthly income deposited",
       x = "",
       y = "") +
    ylim(0, 100) +
  risk_curve_theme +
            # PGSI LIMIT:
   geom_vline(xintercept = 6, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 7.8, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    theme(legend.position = "none")

combined_risk_curve_income_deposited

Number of wagering accounts

Show code
# PGSI model:
weighted_lr_gam_accounts_new_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_GAM_ACCOUNTS,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_gam_accounts_new_limit_PGSI <- tbl_regression(weighted_lr_gam_accounts_new_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_gam_accounts_new_limit_PGSI)

Show code
# GHM model:
weighted_lr_gam_accounts_new_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_GAM_ACCOUNTS,
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_gam_accounts_new_limit_GHM<- tbl_regression(weighted_lr_gam_accounts_new_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_gam_accounts_new_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_gam_accounts_new_limit_PGSI, tbl_weighted_lr_gam_accounts_new_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal number of wagering accounts limit for online wagering as a predictor of harm status (demographic-adjusted models)"
  ) 
New optimal number of wagering accounts limit for online wagering as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.82 1.27, 2.61 0.001 2.62 1.57, 4.38 <0.001
    Unknown 2.51 0.98, 6.39 0.054 2.96 0.91, 9.57 0.070
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.36 0.77, 2.39 0.3 2.74 1.49, 5.05 0.001
    Unknown 1.16 0.87, 1.54 0.3 0.96 0.68, 1.36 0.8
EDUCATION_NUM 0.90 0.83, 0.97 0.007 0.97 0.89, 1.06 0.5
Engages_in_other_forms





    FALSE

    TRUE 1.88 1.38, 2.57 <0.001 1.77 1.22, 2.57 0.003
NEW_LIMIT_PGSI_GAM_ACCOUNTS 2.17 1.71, 2.76 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,108

1,665

BIC 2,154

1,711

Deviance 2,087

1,645

Residual df 1,638

1,571

No. Obs. 1,647

1,580

NEW_LIMIT_GHM_GAM_ACCOUNTS


1.83 1.38, 2.44 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_gam_accounts_lr_PGSI<- NagelkerkeR2(weighted_lr_gam_accounts_new_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.1493899
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 14.9389916% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_gam_accounts_lr_GHM<- NagelkerkeR2(weighted_lr_gam_accounts_new_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1266771

The model accounts for 12.6677066% of variance in PGSI harm rates.

Risk curve

Plot some risk curves for the relationship between this predictor and the two outcomes. We won’t use deciles here as the values are so low in range:

Show code
# Create a summary dataset:
summary_data <- master_dataset_recoded_w_new_limits %>%
  group_by(GAM_ACCOUNTS_NUM) %>%
  summarise(
    PGSI_pct = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100
  ) 

long_data <- summary_data %>%
  pivot_longer(
    cols = c(PGSI_pct, GHM_pct),
    names_to = "HarmMeasure",
    values_to = "pct_harmed"
  ) %>%
  mutate(HarmMeasure = case_when(HarmMeasure == "PGSI_pct" ~"PGSI",
                                 HarmMeasure == "GHM_pct" ~"GHM"))

# Plot both curves on the same plot for ease:
combined_risk_curve_n_accounts<- ggplot(long_data, aes(x = GAM_ACCOUNTS_NUM, y = pct_harmed,
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size = 1) +
  geom_point() +
  ylim(0, 100) +
  scale_x_continuous(breaks = 1:5) +
  labs(title = "No. of wagering accounts", 
       x = "",
       y = "") +
              # PGSI LIMIT:
   geom_vline(xintercept = 2, linetype = "dashed", color = "#0B8AAD", size = 1, alpha = 0.7) +
  # GHM LIMIT:
   geom_vline(xintercept = 3, linetype = "dashed", color = "orange", size = 1, alpha = 0.7) +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
    risk_curve_theme +
     theme(legend.position = "none",
        plot.margin = margin(0,0,-1,0)) 

combined_risk_curve_n_accounts

All-limit adjusted model

And now perform the models that incorporate all of the new limits to determine the independent predictive ability of each limit:

Show code
# PGSI model:
weighted_lr_all_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
Engages_in_other_forms +
NEW_LIMIT_PGSI_BET_FREQ +
NEW_LIMIT_PGSI_SPEND +
NEW_LIMIT_PGSI_INCOME_SPENT +
NEW_LIMIT_PGSI_N_ACTIVITIES +
NEW_LIMIT_PGSI_DEPOSIT_FREQ +
NEW_LIMIT_PGSI_DEPOSIT_AMOUNT +
NEW_LIMIT_PGSI_INCOME_DEPOSITED +
NEW_LIMIT_PGSI_GAM_ACCOUNTS, 
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_all_limit_PGSI <- tbl_regression(weighted_lr_all_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_all_limit_PGSI)

Show code
# GHM model:
weighted_lr_all_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
Engages_in_other_forms +
NEW_LIMIT_GHM_BET_FREQ +
NEW_LIMIT_GHM_SPEND +
NEW_LIMIT_GHM_INCOME_SPENT +
NEW_LIMIT_GHM_N_ACTIVITIES +
NEW_LIMIT_GHM_DEPOSIT_FREQ +
NEW_LIMIT_GHM_DEPOSIT_AMOUNT +
NEW_LIMIT_GHM_INCOME_DEPOSITED +
NEW_LIMIT_GHM_GAM_ACCOUNTS, 
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_all_limit_GHM<- tbl_regression(weighted_lr_all_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_all_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_all_limit_PGSI, tbl_weighted_lr_all_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "New optimal limits for online wagering as a predictor of harm status (demographic- and limit-adjusted models)"
  ) 
New optimal limits for online wagering as a predictor of harm status (demographic- and limit-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.98 0.96, 0.99 <0.001 0.97 0.96, 0.99 <0.001
GENDER





    Female

    Male 1.65 1.01, 2.71 0.047 2.64 1.32, 5.26 0.006
    Unknown 3.01 0.83, 10.9 0.094 2.18 0.45, 10.7 0.3
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.16 0.55, 2.43 0.7 1.58 0.72, 3.44 0.3
EDUCATION_NUM 0.93 0.85, 1.02 0.12 0.99 0.90, 1.10 >0.9
Engages_in_other_forms





    FALSE

    TRUE 2.22 1.46, 3.36 <0.001 2.11 1.29, 3.45 0.003
NEW_LIMIT_PGSI_BET_FREQ 0.36 0.22, 0.60 <0.001


NEW_LIMIT_PGSI_SPEND 1.18 0.68, 2.04 0.6


NEW_LIMIT_PGSI_INCOME_SPENT 1.23 0.68, 2.22 0.5


NEW_LIMIT_PGSI_N_ACTIVITIES 1.25 0.87, 1.81 0.2


NEW_LIMIT_PGSI_DEPOSIT_FREQ 2.25 1.36, 3.73 0.002


NEW_LIMIT_PGSI_DEPOSIT_AMOUNT 1.48 0.91, 2.40 0.12


NEW_LIMIT_PGSI_INCOME_DEPOSITED 2.21 1.25, 3.90 0.007


NEW_LIMIT_PGSI_GAM_ACCOUNTS 1.94 1.41, 2.65 <0.001


Null deviance 1,527

1,288

Null df 1,054

1,054

AIC 1,311

1,100

BIC 1,379

1,168

Deviance 1,274

1,063

Residual df 1,040

1,040

No. Obs. 1,055

1,055

NEW_LIMIT_GHM_BET_FREQ


0.27 0.14, 0.51 <0.001
NEW_LIMIT_GHM_SPEND


1.31 0.57, 3.01 0.5
NEW_LIMIT_GHM_INCOME_SPENT


3.50 1.94, 6.34 <0.001
NEW_LIMIT_GHM_N_ACTIVITIES


0.92 0.59, 1.43 0.7
NEW_LIMIT_GHM_DEPOSIT_FREQ


3.12 1.63, 5.98 <0.001
NEW_LIMIT_GHM_DEPOSIT_AMOUNT


0.63 0.25, 1.55 0.3
NEW_LIMIT_GHM_INCOME_DEPOSITED


3.22 1.48, 6.98 0.003
NEW_LIMIT_GHM_GAM_ACCOUNTS


2.04 1.42, 2.95 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_all_limit_lr_PGSI<- NagelkerkeR2(weighted_lr_all_limit_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1055 0.2780255
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 27.8025472% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_all_limit_lr_GHM<- NagelkerkeR2(weighted_lr_all_limit_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1055 0.2724964

The model accounts for 27.2496388% of variance in PGSI harm rates.

Show code
# First, extract the model coefficients:
model_coefs <- coef(weighted_lr_all_limit_PGSI)

# Define the NEW_LIMIT variables in the order you want to accumulate them.
# (Change the order below if you wish to follow a different sequence.)
new_limit_vars <- c( "NEW_LIMIT_PGSI_DEPOSIT_FREQ",
                    "NEW_LIMIT_PGSI_INCOME_DEPOSITED",
                    "NEW_LIMIT_PGSI_GAM_ACCOUNTS")

# Extract the intercept (baseline for a reference person, with demographics at reference)
baseline <- model_coefs["(Intercept)"]

# Initialize a vector to store the cumulative linear predictors (z)
cumulative_z <- numeric(length(new_limit_vars))
# And a vector to store probabilities
cumulative_prob <- numeric(length(new_limit_vars))

# Loop through the NEW_LIMIT variables in the specified order
for(i in seq_along(new_limit_vars)) {
  # For the cumulative effect, add the coefficient for the current indicator.
  # For the first indicator, add it to the intercept;
  # for subsequent ones, add the new coefficient to the previous cumulative sum.
  if(i == 1) {
    cumulative_z[i] <- baseline + model_coefs[new_limit_vars[i]]
  } else {
    cumulative_z[i] <- cumulative_z[i - 1] + model_coefs[new_limit_vars[i]]
  }
  
  # Compute the predicted probability using the logistic function:
  cumulative_prob[i] <- exp(cumulative_z[i]) / (1 + exp(cumulative_z[i]))
}

# Create a summary table:
cumulative_summary <- data.frame(
  "Number_of_Indicators" = 1:length(new_limit_vars),
  "Cumulative_z" = round(cumulative_z, 3),
  "Predicted_Probability" = round(cumulative_prob, 3),
  "Predicted_Probability_Percent" = round(cumulative_prob * 100, 1)
)

print(cumulative_summary)
  Number_of_Indicators Cumulative_z Predicted_Probability
1                    1        0.145                 0.536
2                    2        0.937                 0.718
3                    3        1.598                 0.832
  Predicted_Probability_Percent
1                          53.6
2                          71.8
3                          83.2

No. Limits Model

Finally, the test H3, we need to look at how the accumulation of limits past relates to harm. From our preregistration:

H3: Participants who surpass successively more of the eight newly derived low-risk in the six months preceding survey participation will have an increasingly greater odds of reporting the experience of harm after accounting for the impact of demographic factors (i.e., age, gender, employment status, and education level) on harm. That is, using logistic regression models with harm status as the outcome and the number of limits passed as an ordered categorical predictor variable, there will be a monotonic relationship between the number of limits passed and odds of harm (i.e., a larger odds ratio with each additional limit passed). [Limit evaluation: cumulative impact of limit breaches on harm status]

Show code
# PGSI model:
weighted_lr_limit_count_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
Engages_in_other_forms +
TOTAL_LIMITS_REACHED_PGSI, 
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_limit_count_PGSI <- tbl_regression(weighted_lr_limit_count_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_limit_count_PGSI)

Show code
# GHM model:
weighted_lr_limit_count_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
  Engages_in_other_forms +
TOTAL_LIMITS_REACHED_GHM, 
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_limit_count_GHM<- tbl_regression(weighted_lr_limit_count_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_limit_count_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_limit_count_PGSI, tbl_weighted_lr_limit_count_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Cumulative number of limits passed as a predictor of harm status (demographic-adjusted models)"
  ) 
Cumulative number of limits passed as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.71 1.19, 2.44 0.003 2.06 1.23, 3.47 0.006
    Unknown 2.42 0.82, 7.17 0.11 2.03 0.56, 7.37 0.3
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.37 0.74, 2.54 0.3 2.50 1.32, 4.72 0.005
    Unknown 1.28 0.95, 1.71 0.10 0.96 0.66, 1.39 0.8
EDUCATION_NUM 0.92 0.85, 0.99 0.035 0.99 0.90, 1.08 0.8
Engages_in_other_forms





    FALSE

    TRUE 2.06 1.49, 2.85 <0.001 1.92 1.30, 2.82 <0.001
TOTAL_LIMITS_REACHED_PGSI





    0



    1 1.48 1.01, 2.18 0.046


    2 1.39 0.86, 2.23 0.2


    3 2.13 1.33, 3.39 0.002


    4 1.74 1.08, 2.79 0.022


    5 2.76 1.68, 4.52 <0.001


    6 4.78 2.87, 7.97 <0.001


    7 4.77 2.76, 8.24 <0.001


    8 8.09 3.96, 16.5 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,068

1,599

BIC 2,147

1,680

Deviance 2,029

1,562

Residual df 1,631

1,564

No. Obs. 1,647

1,580

TOTAL_LIMITS_REACHED_GHM





    0



    1


2.12 1.46, 3.08 <0.001
    2


1.58 0.96, 2.60 0.071
    3


2.44 1.36, 4.37 0.003
    4


4.04 2.27, 7.16 <0.001
    5


6.51 3.67, 11.6 <0.001
    6


5.84 2.54, 13.4 <0.001
    7


8.85 4.35, 18.0 <0.001
    8


8.92 2.52, 31.7 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Exponentiate coefficients and interpret them as odds-ratios in a data frame:

Show code
sumarisedORs_weighted_lr_limit_count_PGSI<- exp(cbind(OR = coef(weighted_lr_limit_count_PGSI), confint(weighted_lr_limit_count_PGSI)))

sumarisedORs_weighted_lr_limit_count_PGSI %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_PGSI")) %>%
   gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
  tab_header(md("**Odds ratios with 95% CIs: Total number of limits passed (PGSI)**"), subtitle = NULL) %>%
   cols_label('2.5 %' = "Lower CI: 2.5%",
              '97.5 %' = "Upper CI: 97.5%") 
Odds ratios with 95% CIs: Total number of limits passed (PGSI)
OR Lower CI: 2.5% Upper CI: 97.5%
1 1.4815 1.0077 2.1781
2 1.3868 0.8621 2.2309
3 2.1277 1.3338 3.3942
4 1.7375 1.0830 2.7876
5 2.7577 1.6837 4.5167
6 4.7812 2.8700 7.9652
7 4.7652 2.7551 8.2420
8 8.0913 3.9599 16.5330
Show code
sumarisedORs_weighted_lr_limit_count_GHM<- exp(cbind(OR = coef(weighted_lr_limit_count_GHM), confint(weighted_lr_limit_count_GHM)))

sumarisedORs_weighted_lr_limit_count_GHM %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_GHM")) %>%
   gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
  tab_header(md("**Odds ratios with 95% CIs: Total number of limits passed (GHM)**"), subtitle = NULL) %>%
   cols_label('2.5 %' = "Lower CI: 2.5%",
              '97.5 %' = "Upper CI: 97.5%") 
Odds ratios with 95% CIs: Total number of limits passed (GHM)
OR Lower CI: 2.5% Upper CI: 97.5%
1 2.1239 1.4644 3.0804
2 1.5828 0.9620 2.6044
3 2.4383 1.3604 4.3703
4 4.0363 2.2742 7.1637
5 6.5143 3.6728 11.5541
6 5.8445 2.5416 13.4397
7 8.8496 4.3491 18.0070
8 8.9248 2.5163 31.6549

Now present them as plots:

Show code
odds_plot_theme <- theme_classic() +
  theme(axis.text.x = element_text(hjust = 1),
        plot.title = element_text(hjust= 0.5))

PGSI_total_limits_plot<- sumarisedORs_weighted_lr_limit_count_PGSI %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_PGSI")) %>%
  ggplot(aes(x = rowname, y = OR)) +
  scale_y_log10() +
  geom_hline(yintercept = 1, linetype = "dashed", color = "grey70", size = 0.6) +
  geom_point(size = 3, colour = "dodgerblue") +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`),size = 0.7, width = 0.2,
                colour = "black") +
  labs(
    x = "Total limits surpassed",
    y = "Adjusted Odds Ratio (log-10 scale)",
    title = "PGSI Harm Prediction"
  ) +
  odds_plot_theme

GHM_total_limits_plot<- sumarisedORs_weighted_lr_limit_count_GHM %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_GHM")) %>%
  ggplot(aes(x = rowname, y = OR)) +
  scale_y_log10() +
  geom_hline(yintercept = 1, linetype = "dashed", color = "grey70", size = 0.6) +
  geom_point(size = 3, colour = "dodgerblue") +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`),size = 0.7, width = 0.2,
                colour = "black") +
  labs(
    x = "Total limits surpassed",
    y = "Adjusted Odds Ratio (log-10 scale)",
    title = "GHM Harm Prediction"
  ) +
  odds_plot_theme

total_limits_OR_plot<- PGSI_total_limits_plot + GHM_total_limits_plot

total_limits_OR_plot

Show code
ggsave("Figures/total_limits_OR_plot.pdf", 
       plot = total_limits_OR_plot, 
       height = 80, 
       width = 170, 
       units = "mm")

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_lr_limit_count_GHM<- NagelkerkeR2(weighted_lr_limit_count_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.1908373
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 19.0837288% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_lr_limit_count_PGSI<- NagelkerkeR2(weighted_lr_limit_count_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.1954103

The model accounts for 19.5410337% of variance in GHM harm rates.

Plot risk curves for this (again, no deciles):

Show code
# Create a summary dataset:
summary_data_PGSI <- master_dataset_recoded_w_new_limits %>%
  group_by(TOTAL_LIMITS_REACHED_PGSI) %>%
  summarise(
    pct_harmed = sum(PGSI_STATUS == 1, na.rm = TRUE) / n() * 100,
    # GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100
  ) %>%
  mutate(HarmMeasure = 'PGSI') %>%
  rename('TOTAL_LIMITS_REACHED' = TOTAL_LIMITS_REACHED_PGSI)

summary_data_GHM <- master_dataset_recoded_w_new_limits %>%
  group_by(TOTAL_LIMITS_REACHED_GHM) %>%
  summarise(
    pct_harmed = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100,
    # GHM_pct = sum(GHM_STATUS == 1, na.rm = TRUE) / n() * 100
  ) %>%
  mutate(HarmMeasure = 'GHM') %>%
  rename('TOTAL_LIMITS_REACHED' = TOTAL_LIMITS_REACHED_GHM)


long_data <- bind_rows(summary_data_PGSI,
            summary_data_GHM)

# Plot both curves on the same plot for ease:
combined_risk_curve_total_limits<- ggplot(long_data, aes(x = TOTAL_LIMITS_REACHED, y = pct_harmed,
                                               color = HarmMeasure, group = HarmMeasure)) +
  geom_line(size=1) +
  geom_point() +
  ylim(0, 100) +
  labs(title = "Total no. of limits surpassed",
       x = "",
       y = "") +
  scale_color_manual(name = "", values = c("PGSI" = "#0B8AAD", "GHM" = "orange")) +
  risk_curve_theme

combined_risk_curve_total_limits

No. limits sans frequency

Let’s repeat the analyses from above, but exclude betting frequency from the count of limits passed, given its limited predictive utility.

Show code
# PGSI model:
weighted_lr_limit_count_sans_freq_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
Engages_in_other_forms +
TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ, 
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_limit_count_sans_freq_PGSI <- tbl_regression(weighted_lr_limit_count_sans_freq_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_limit_count_sans_freq_PGSI)

Show code
# GHM model:
weighted_lr_limit_count_sans_freq_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
  Engages_in_other_forms +
TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ, 
design = survey_design_w_new_limits, 
family = quasibinomial()) 

tbl_weighted_lr_limit_count_sans_freq_GHM<- tbl_regression(weighted_lr_limit_count_sans_freq_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_limit_count_sans_freq_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_limit_count_sans_freq_PGSI, tbl_weighted_lr_limit_count_sans_freq_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Cumulative number of limits passed as a predictor of harm status (demographic-adjusted models)"
  ) 
Cumulative number of limits passed as a predictor of harm status (demographic-adjusted models)
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.63 1.14, 2.33 0.008 1.99 1.18, 3.36 0.010
    Unknown 2.22 0.77, 6.38 0.14 1.84 0.49, 6.92 0.4
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.33 0.72, 2.47 0.4 2.59 1.38, 4.89 0.003
    Unknown 1.28 0.95, 1.72 0.11 0.99 0.68, 1.44 >0.9
EDUCATION_NUM 0.92 0.85, 1.00 0.040 0.98 0.90, 1.08 0.7
Engages_in_other_forms





    FALSE

    TRUE 2.05 1.48, 2.85 <0.001 1.96 1.32, 2.91 <0.001
TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ





    0



    1 1.51 1.03, 2.20 0.033


    2 1.47 0.94, 2.30 0.087


    3 2.13 1.38, 3.30 <0.001


    4 2.82 1.75, 4.55 <0.001


    5 4.32 2.64, 7.07 <0.001


    6 6.19 3.66, 10.5 <0.001


    7 7.53 3.85, 14.7 <0.001


Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,050

1,572

BIC 2,125

1,649

Deviance 2,014

1,538

Residual df 1,632

1,565

No. Obs. 1,647

1,580

TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ





    0



    1


2.40 1.67, 3.46 <0.001
    2


1.86 1.11, 3.11 0.018
    3


4.21 2.49, 7.11 <0.001
    4


6.60 3.72, 11.7 <0.001
    5


8.59 4.14, 17.8 <0.001
    6


10.0 4.98, 20.3 <0.001
    7


12.2 3.83, 38.8 <0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Exponentiate coefficients and interpret them as odds-ratios in a data frame:

Show code
sumarisedORs_weighted_lr_limit_count_sans_freq_PGSI<- exp(cbind(OR = coef(weighted_lr_limit_count_sans_freq_PGSI), confint(weighted_lr_limit_count_sans_freq_PGSI)))

sumarisedORs_weighted_lr_limit_count_sans_freq_PGSI %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ")) %>%
   gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
  tab_header(md("**Odds ratios with 95% CIs: Total number of limits passed (PGSI)**"), subtitle = NULL) %>%
   cols_label('2.5 %' = "Lower CI: 2.5%",
              '97.5 %' = "Upper CI: 97.5%") 
Odds ratios with 95% CIs: Total number of limits passed (PGSI)
OR Lower CI: 2.5% Upper CI: 97.5%
1 1.5091 1.0344 2.2019
2 1.4748 0.9447 2.3024
3 2.1318 1.3772 3.2998
4 2.8233 1.7532 4.5463
5 4.3227 2.6440 7.0671
6 6.1910 3.6586 10.4762
7 7.5273 3.8530 14.7055
Show code
sumarisedORs_weighted_lr_limit_count_sans_freq_GHM<- exp(cbind(OR = coef(weighted_lr_limit_count_sans_freq_GHM), confint(weighted_lr_limit_count_sans_freq_GHM)))

sumarisedORs_weighted_lr_limit_count_sans_freq_GHM %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ")) %>%
   gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
  tab_header(md("**Odds ratios with 95% CIs: Total number of limits passed (GHM)**"), subtitle = NULL) %>%
   cols_label('2.5 %' = "Lower CI: 2.5%",
              '97.5 %' = "Upper CI: 97.5%") 
Odds ratios with 95% CIs: Total number of limits passed (GHM)
OR Lower CI: 2.5% Upper CI: 97.5%
1 2.4047 1.6733 3.4556
2 1.8602 1.1129 3.1095
3 4.2090 2.4934 7.1050
4 6.6001 3.7212 11.7060
5 8.5947 4.1410 17.8381
6 10.0450 4.9812 20.2568
7 12.1923 3.8297 38.8151

Now present them as plots:

Show code
odds_plot_theme <- theme_classic() +
  theme(axis.text.x = element_text(hjust = 1),
        plot.title = element_text(hjust= 0.5))

PGSI_total_limits_plot<- sumarisedORs_weighted_lr_limit_count_sans_freq_PGSI %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_PGSI_SANS_BET_FREQ")) %>%
  ggplot(aes(x = rowname, y = OR)) +
  scale_y_log10() +
  geom_hline(yintercept = 1, linetype = "dashed", color = "grey70", size = 0.6) +
  geom_point(size = 3, colour = "dodgerblue") +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`),size = 0.7, width = 0.2,
                colour = "black") +
  labs(
    x = "Total limits surpassed",
    y = "Adjusted Odds Ratio (log-10 scale)",
    title = "PGSI Harm Prediction"
  ) +
  odds_plot_theme

GHM_total_limits_plot<- sumarisedORs_weighted_lr_limit_count_sans_freq_GHM %>% as.data.frame() %>%
  rownames_to_column() %>%
  mutate(across(c(OR, `2.5 %`, `97.5 %`), ~round(., 4))) %>%
  filter(str_detect(rowname, "TOTAL")) %>%
  mutate(rowname = str_remove(rowname, "TOTAL_LIMITS_REACHED_GHM_SANS_BET_FREQ")) %>%
  ggplot(aes(x = rowname, y = OR)) +
  scale_y_log10() +
  geom_hline(yintercept = 1, linetype = "dashed", color = "grey70", size = 0.6) +
  geom_point(size = 3, colour = "dodgerblue") +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`),size = 0.7, width = 0.2,
                colour = "black") +
  labs(
    x = "Total limits surpassed",
    y = "Adjusted Odds Ratio (log-10 scale)",
    title = "GHM Harm Prediction"
  ) +
  odds_plot_theme

total_limits_OR_plot<- PGSI_total_limits_plot + GHM_total_limits_plot

total_limits_OR_plot

Show code
ggsave("Figures/total_limits_OR_plot_sans_frequency.pdf", 
       plot = total_limits_OR_plot, 
       height = 80, 
       width = 170, 
       units = "mm")

Calculate Nagelkerke’s R squared for the PGSI model:

Show code
NagelkerkeR2_weighted_lr_limit_count_sans_freq_GHM<- NagelkerkeR2(weighted_lr_limit_count_sans_freq_PGSI) %>% 
  as.data.frame() %>%
  print()
     N        R2
1 1647 0.2008997
Show code
# NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value

The model accounts for 20.0899743% of variance in PGSI harm rates.

Calculate Nagelkerke’s R squared for the GHM model:

Show code
NagelkerkeR2_weighted_lr_limit_count_sans_freq_PGSI<- NagelkerkeR2(weighted_lr_limit_count_sans_freq_GHM) %>%   
  as.data.frame() %>%   
  print() # NOTE: R2 is calculate as a proportion here and so in the paper we have multiplied it by 100 to form a percentage value
     N        R2
1 1580 0.2150508

The model accounts for 21.5050806% of variance in GHM harm rates.

Risk curves (exploratory)

Show code
risk_curves_combined<- (combined_risk_curve_betting_freq + combined_risk_curve_spend)/
(combined_risk_curve_income_spent + combined_risk_curve_n_activities)/
(combined_risk_curve_deposit_frequency + combined_risk_curve_deposit_amount)/
(combined_risk_curve_income_deposited + combined_risk_curve_n_accounts) + plot_layout(guides = 'collect') 
   


risk_curves_combined

Show code
ggsave("Figures/risk_curves_combined.pdf", 
       plot = risk_curves_combined, 
       height = 240, 
       width = 170, 
       units = "mm")

Phase 4: Demographic-adjusted predictive ability of old limits

First, to do this, we need to add new variables to the dataset indicating whether each participant exceeded the previous limits or not. The limits are taken from:

  • Dowling, N. A., Youssef, G. J., Greenwood, C., Merkouris, S. S., Suomi, A., & Room, R. (2021). The Identification of Low-risk Gambling Limits for Specific Gambling Activities. Journal of Gambling Studies, 38(2), Article 2. https://doi.org/10.1007/s10899-021-10036-z

These were the old limits:

  • Racing: No more than $288 spent per year [$24 per month]
  • Sports: No more than $400 spent per year [$33.3 per month]
  • Racing: No more than 0.55% of income spent on gambling
  • Sports: No more than 0.55% of income spent on gambling
  • Racing: No more than 15 active gambling days per year [1.25 per month]
  • Sports: No more than 14 active gambling days per year [1.17 per month]

Now add new variables for each of these which represent whether somebody passed or did not pass these limits:

Show code
master_dataset_recoded_w_old_limits <- master_dataset_recoded %>%
        mutate(OLD_LIMIT_RACING_SPEND = case_when(PAS_6M_MONTHLY_SPEND > 24 ~ 1,
                                                       PAS_6M_MONTHLY_SPEND <= 24 ~ 0,
                                                       TRUE ~ PAS_6M_MONTHLY_SPEND
                                                       )) %>%
        mutate(OLD_LIMIT_SPORT_SPEND = case_when(PAS_6M_MONTHLY_SPEND > 33.3 ~ 1,
                                                       PAS_6M_MONTHLY_SPEND <= 33.3 ~ 0,
                                                       TRUE ~ PAS_6M_MONTHLY_SPEND
                                                       ))  %>%
mutate(OLD_LIMIT_RACING_SPORT_INCOME_SPENT = case_when(PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M > 0.55 ~ 1,
                                                       PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M <= 0.55 ~ 0,
                                                       TRUE ~ PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M
                                                       )) %>%
  mutate(OLD_LIMIT_RACING_FREQ = case_when(PAS_6M_MONTHLY_BET_FREQ_DAYS > 1.25 ~ 1,
                                                       PAS_6M_MONTHLY_BET_FREQ_DAYS <= 1.25 ~ 0,
                                                       TRUE ~ PAS_6M_MONTHLY_BET_FREQ_DAYS
                                                       )) %>%
    mutate(OLD_LIMIT_SPORT_FREQ = case_when(PAS_6M_MONTHLY_BET_FREQ_DAYS > 1.17 ~ 1,
                                                       PAS_6M_MONTHLY_BET_FREQ_DAYS <= 1.17 ~ 0,
                                                       TRUE ~ PAS_6M_MONTHLY_BET_FREQ_DAYS
                                                       ))  %>%
mutate(PGSI_STATUS = case_when(PGSI_STATUS == "Harm" ~ 1,
                               PGSI_STATUS == "No harm" ~ 0,
                    TRUE ~ NA)) %>%
mutate(GHM_STATUS = case_when(GHM_STATUS == "Harm" ~ 1,
                               GHM_STATUS == "No harm" ~ 0,
                    TRUE ~ NA))

Accuracy computation

For each of the old limit values, we need to compute the accuracy level if these were used to screen for harm status in our sample, as per H4.1 in our pre-reg:

  • “Our limits will be better at differentiating between harmed and unharmed participants in our sample compared to when limits for both horse/dog racing and sports/other events betting identified by Dowling et al. (2021) are used. Specifically, the accuracy (i.e., overall proportion of cases correctly classified) will be at least 5% greater when using our limits compared to the existing Dowling et al. limits [H4.1].”

We can compute many different types of performance metrics, including sensitivity (recall) and specificity as well as accuracy. First, we need to be able to compute all of the values that would go into a confusion matrix:

False positives, False negatives, True negatives, True positives

Let’s engineer a data set that contains these values for each of the old limits and compute the relevant metrics:

Show code
master_dataset_recoded_w_old_limits_predictive_categories  <- master_dataset_recoded_w_old_limits %>%
  mutate(PGSI_predictive_category_OLD_LIMIT_RACING_SPEND = case_when(
    PGSI_STATUS == 0 & OLD_LIMIT_RACING_SPEND == 0 ~ "True_negative",
    PGSI_STATUS == 0 & OLD_LIMIT_RACING_SPEND == 1 ~ "False_positive",
    PGSI_STATUS == 1 & OLD_LIMIT_RACING_SPEND == 0 ~ "False_negative",
    PGSI_STATUS == 1 & OLD_LIMIT_RACING_SPEND == 1 ~ "True_positive"
    ))  %>%
mutate(PGSI_predictive_category_OLD_LIMIT_SPORT_SPEND = case_when(
    PGSI_STATUS == 0 & OLD_LIMIT_SPORT_SPEND == 0 ~ "True_negative",
    PGSI_STATUS == 0 & OLD_LIMIT_SPORT_SPEND == 1 ~ "False_positive",
    PGSI_STATUS == 1 & OLD_LIMIT_SPORT_SPEND == 0 ~ "False_negative",
    PGSI_STATUS == 1 & OLD_LIMIT_SPORT_SPEND == 1 ~ "True_positive"
    ))  %>%
  mutate(PGSI_predictive_category_OLD_LIMIT_RACING_SPORT_INCOME_SPENT = case_when(
    PGSI_STATUS == 0 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 0 ~ "True_negative",
    PGSI_STATUS == 0 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 1 ~ "False_positive",
    PGSI_STATUS == 1 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 0 ~ "False_negative",
    PGSI_STATUS == 1 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 1 ~ "True_positive"
    ))  %>%
  mutate(PGSI_predictive_category_OLD_LIMIT_RACING_FREQ = case_when(
    PGSI_STATUS == 0 & OLD_LIMIT_RACING_FREQ == 0 ~ "True_negative",
    PGSI_STATUS == 0 & OLD_LIMIT_RACING_FREQ == 1 ~ "False_positive",
    PGSI_STATUS == 1 & OLD_LIMIT_RACING_FREQ == 0 ~ "False_negative",
    PGSI_STATUS == 1 & OLD_LIMIT_RACING_FREQ == 1 ~ "True_positive"
    ))  %>%
  mutate(PGSI_predictive_category_OLD_LIMIT_SPORT_FREQ = case_when(
    PGSI_STATUS == 0 & OLD_LIMIT_SPORT_FREQ == 0 ~ "True_negative",
    PGSI_STATUS == 0 & OLD_LIMIT_SPORT_FREQ == 1 ~ "False_positive",
    PGSI_STATUS == 1 & OLD_LIMIT_SPORT_FREQ == 0 ~ "False_negative",
    PGSI_STATUS == 1 & OLD_LIMIT_SPORT_FREQ == 1 ~ "True_positive"
    ))%>%
  # GMH:
  mutate(GHM_predictive_category_OLD_LIMIT_RACING_SPEND = case_when(
    GHM_STATUS == 0 & OLD_LIMIT_RACING_SPEND == 0 ~ "True_negative",
    GHM_STATUS == 0 & OLD_LIMIT_RACING_SPEND == 1 ~ "False_positive",
    GHM_STATUS == 1 & OLD_LIMIT_RACING_SPEND == 0 ~ "False_negative",
    GHM_STATUS == 1 & OLD_LIMIT_RACING_SPEND == 1 ~ "True_positive"
    ))  %>%
mutate(GHM_predictive_category_OLD_LIMIT_SPORT_SPEND = case_when(
    GHM_STATUS == 0 & OLD_LIMIT_SPORT_SPEND == 0 ~ "True_negative",
    GHM_STATUS == 0 & OLD_LIMIT_SPORT_SPEND == 1 ~ "False_positive",
    GHM_STATUS == 1 & OLD_LIMIT_SPORT_SPEND == 0 ~ "False_negative",
    GHM_STATUS == 1 & OLD_LIMIT_SPORT_SPEND == 1 ~ "True_positive"
    ))  %>%
  mutate(GHM_predictive_category_OLD_LIMIT_RACING_SPORT_INCOME_SPENT = case_when(
    GHM_STATUS == 0 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 0 ~ "True_negative",
    GHM_STATUS == 0 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 1 ~ "False_positive",
    GHM_STATUS == 1 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 0 ~ "False_negative",
    GHM_STATUS == 1 & OLD_LIMIT_RACING_SPORT_INCOME_SPENT == 1 ~ "True_positive"
    ))  %>%
  mutate(GHM_predictive_category_OLD_LIMIT_RACING_FREQ = case_when(
    GHM_STATUS == 0 & OLD_LIMIT_RACING_FREQ == 0 ~ "True_negative",
    GHM_STATUS == 0 & OLD_LIMIT_RACING_FREQ == 1 ~ "False_positive",
    GHM_STATUS == 1 & OLD_LIMIT_RACING_FREQ == 0 ~ "False_negative",
    GHM_STATUS == 1 & OLD_LIMIT_RACING_FREQ == 1 ~ "True_positive"
    ))  %>%
  mutate(GHM_predictive_category_OLD_LIMIT_SPORT_FREQ = case_when(
    GHM_STATUS == 0 & OLD_LIMIT_SPORT_FREQ == 0 ~ "True_negative",
    GHM_STATUS == 0 & OLD_LIMIT_SPORT_FREQ == 1 ~ "False_positive",
    GHM_STATUS == 1 & OLD_LIMIT_SPORT_FREQ == 0 ~ "False_negative",
    GHM_STATUS == 1 & OLD_LIMIT_SPORT_FREQ == 1 ~ "True_positive"
    ))



# Define the function
compute_performance_metrics <- function(data, category_var, limit_label) {
  # Count the predictive categories for the specified variable
  data %>%
    dplyr::count(!!sym(category_var)) %>%
    pivot_wider(names_from = !!sym(category_var), values_from = n, values_fill = 0) %>%
    mutate(total_n = False_negative + False_positive + True_negative + True_positive) %>%
    mutate(
      Accuracy = round(((True_negative + True_positive) / total_n * 100), 3),
      Sensitivity = round(True_positive / (True_positive + False_negative) * 100, 3),
      Specificity = round(True_negative / (True_negative + False_positive) * 100, 3),
      Limit = limit_label
    ) %>%
    select(Limit, everything(), -total_n)
}

# Apply function to PGSI classification:
PGSI_results_racing_spend <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "PGSI_predictive_category_OLD_LIMIT_RACING_SPEND",
  limit_label = "Racing spend"
)

PGSI_results_sport_spend <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "PGSI_predictive_category_OLD_LIMIT_SPORT_SPEND",
  limit_label = "Sport spend"
)

PGSI_results_income_spent <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "PGSI_predictive_category_OLD_LIMIT_RACING_SPORT_INCOME_SPENT",
  limit_label = "Racing & sport percentage of income spend"
)

PGSI_results_racing_freq <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "PGSI_predictive_category_OLD_LIMIT_RACING_FREQ",
  limit_label = "Racing betting frequency"
)

PGSI_results_sport_freq <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "PGSI_predictive_category_OLD_LIMIT_SPORT_FREQ",
  limit_label = "Sport betting frequency"
)


# Apply function to GHM classification:
GHM_results_racing_spend <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "GHM_predictive_category_OLD_LIMIT_RACING_SPEND",
  limit_label = "Racing spend"
)

GHM_results_sport_spend <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "GHM_predictive_category_OLD_LIMIT_SPORT_SPEND",
  limit_label = "Sport spend"
)

GHM_results_income_spent <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "GHM_predictive_category_OLD_LIMIT_RACING_SPORT_INCOME_SPENT",
  limit_label = "Racing & sport % of income spend"
)

GHM_results_racing_freq <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "GHM_predictive_category_OLD_LIMIT_RACING_FREQ",
  limit_label = "Racing betting frequency"
)

GHM_results_sport_freq <- compute_performance_metrics(
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  category_var = "GHM_predictive_category_OLD_LIMIT_SPORT_FREQ",
  limit_label = "Sport betting frequency"
)

Now that we have all of the metrics, we can produce a table that gives us quite a bit of detail to start:

Show code
# Combine all results into a table:
bind_rows(
    PGSI_results_racing_freq,
  PGSI_results_sport_freq,
  PGSI_results_racing_spend,
  PGSI_results_sport_spend,
  PGSI_results_income_spent,
  GHM_results_racing_freq,
  GHM_results_sport_freq,
  GHM_results_racing_spend,
  GHM_results_sport_spend,
  GHM_results_income_spent,

) %>%
  gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Accuracy of Dowling et al.'s (2021) limits when classifying harm status"
  ) %>%
   tab_row_group(
    label = "PGSI harm classification",
    rows = c(1,2,3,4,5)
  ) %>%
   tab_row_group(
    label = "GHM harm classification",
    rows = c(6,7,8,9,10)
  ) %>%
  row_group_order(groups = c("PGSI harm classification",
                             "GHM harm classification"))
Accuracy of Dowling et al.'s (2021) limits when classifying harm status
Limit False_negative False_positive True_negative True_positive Accuracy Sensitivity Specificity NA
PGSI harm classification
Racing betting frequency 143 599 155 750 54.948 83.987 20.557 NA
Sport betting frequency 143 600 154 750 54.888 83.987 20.424 NA
Racing spend 87 622 132 806 56.952 90.258 17.507 NA
Sport spend 101 598 156 792 57.559 88.690 20.690 NA
Racing & sport percentage of income spend 85 329 145 496 60.758 85.370 30.591 592
GHM harm classification
Racing betting frequency 63 889 219 409 39.747 86.653 19.765 67
Sport betting frequency 63 890 218 409 39.684 86.653 19.675 67
Racing spend 36 939 169 436 38.291 92.373 15.253 67
Sport spend 41 907 201 431 40.000 91.314 18.141 67
Racing & sport % of income spend 38 526 192 299 46.540 88.724 26.741 592

Now we can make a fancy table for the manuscript:

Show code
PGSI_accuracy_old<- bind_rows(
    PGSI_results_racing_freq,
  PGSI_results_sport_freq,
  PGSI_results_racing_spend,
  PGSI_results_sport_spend,
  PGSI_results_income_spent
) %>%
select(Limit, Accuracy, Sensitivity, Specificity)

GHM_accuracy_old<- bind_rows(
    GHM_results_racing_freq,
  GHM_results_sport_freq,
  GHM_results_racing_spend,
  GHM_results_sport_spend,
  GHM_results_income_spent
 ) %>%
select(Limit, Accuracy, Sensitivity, Specificity)
 
accuracy_outcomes_old_limits <- bind_cols(PGSI_accuracy_old,
          GHM_accuracy_old) %>%
    select(-Limit...5,
           Limit = Limit...1,
           "Accuracy (PGSI)" = Accuracy...2,
          "Sensitivity (PGSI)"  = Sensitivity...3,
           "Specificity (PGSI)" = Specificity...4,
          "Accuracy (GHM)" = Accuracy...6,
          "Sensitivity (GHM)"  = Sensitivity...7,
           "Specificity (GHM)" = Specificity...8,
           ) %>%
 mutate_if(is.numeric, round, 2) 


  gt(accuracy_outcomes_old_limits) %>%
tab_header(
    title = "Accuracy of Dowling et al.'s (2021) limits for classifying harm status"
  ) 
Accuracy of Dowling et al.'s (2021) limits for classifying harm status
Limit Accuracy (PGSI) Sensitivity (PGSI) Specificity (PGSI) Accuracy (GHM) Sensitivity (GHM) Specificity (GHM)
Racing betting frequency 54.95 83.99 20.56 39.75 86.65 19.76
Sport betting frequency 54.89 83.99 20.42 39.68 86.65 19.68
Racing spend 56.95 90.26 17.51 38.29 92.37 15.25
Sport spend 57.56 88.69 20.69 40.00 91.31 18.14
Racing & sport percentage of income spend 60.76 85.37 30.59 46.54 88.72 26.74

Re-weighting

We need to update the the survey design object with new variables:

Show code
# Create survey design object:
survey_design_w_old_limits <- survey::svydesign(
  id = ~1,
  data = master_dataset_recoded_w_old_limits_predictive_categories,
  weights = ~ELIGIBLE_SURVEY_SAMPLE_WEIGHTS
)

Logistic regressions

Now, perform logistic regressions to look at how well passing each limit predicts harm status after coding for demographic variables. We will need to run two models each time, one for the PGSI and one for the GHM.

For each model, we will run a function to produce plots showing diagnostic checks and then table regression outcomes together for the PGSI and GHM models, exponentiating betas into odds ratios.

Betting frequency

Racing

Show code
# PGSI model:
weighted_lr_freq_old_limit_PGSI_racing <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_RACING_FREQ,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_freq_old_limit_PGSI_racing <- tbl_regression(weighted_lr_freq_old_limit_PGSI_racing, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_freq_old_limit_PGSI_racing)

Show code
# GHM model:
weighted_lr_freq_old_limit_GHM_racing <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_RACING_FREQ,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_freq_old_limit_GHM_racing<- tbl_regression(weighted_lr_freq_old_limit_GHM_racing, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_freq_old_limit_GHM_racing)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_freq_old_limit_PGSI_racing, tbl_weighted_lr_freq_old_limit_GHM_racing),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Dowling et al.'s (2021) gambling frequency limit for racing as a predictor of harm status"
  ) 
Dowling et al.'s (2021) gambling frequency limit for racing as a predictor of harm status
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.80 1.26, 2.56 0.001 2.35 1.40, 3.93 0.001
    Unknown 2.39 0.92, 6.22 0.075 2.46 0.76, 7.99 0.13
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.41 0.79, 2.51 0.2 2.87 1.56, 5.29 <0.001
    Unknown 1.17 0.88, 1.56 0.3 0.99 0.70, 1.41 >0.9
EDUCATION_NUM 0.90 0.83, 0.97 0.007 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.35 1.23, 1.48 <0.001 1.33 1.19, 1.47 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.86 1.36, 2.54 <0.001 1.74 1.19, 2.52 0.004
OLD_LIMIT_RACING_FREQ 1.19 0.89, 1.58 0.2 1.29 0.92, 1.81 0.15
Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,109

1,650

BIC 2,160

1,701

Deviance 2,086

1,627

Residual df 1,637

1,570

No. Obs. 1,647

1,580

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Sports

Show code
# PGSI model:
weighted_lr_freq_old_limit_PGSI_sport <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_SPORT_FREQ,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_freq_old_limit_PGSI_sport <- tbl_regression(weighted_lr_freq_old_limit_PGSI_sport, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_freq_old_limit_PGSI_sport)

Show code
# GHM model:
weighted_lr_freq_old_limit_GHM_sport <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_SPORT_FREQ,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_freq_old_limit_GHM_sport<- tbl_regression(weighted_lr_freq_old_limit_GHM_sport, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_freq_old_limit_GHM_sport)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_freq_old_limit_PGSI_sport, tbl_weighted_lr_freq_old_limit_GHM_sport),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Dowling et al.'s (2021)  gambling frequency limit for sport as a predictor of harm status"
  ) 
Dowling et al.'s (2021) gambling frequency limit for sport as a predictor of harm status
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.80 1.26, 2.56 0.001 2.35 1.40, 3.93 0.001
    Unknown 2.39 0.92, 6.22 0.074 2.46 0.76, 7.99 0.13
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.41 0.79, 2.51 0.2 2.87 1.56, 5.28 <0.001
    Unknown 1.17 0.88, 1.56 0.3 0.99 0.70, 1.42 >0.9
EDUCATION_NUM 0.90 0.83, 0.97 0.006 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.35 1.23, 1.48 <0.001 1.33 1.19, 1.47 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.86 1.36, 2.54 <0.001 1.74 1.19, 2.52 0.004
OLD_LIMIT_SPORT_FREQ 1.17 0.88, 1.56 0.3 1.28 0.91, 1.80 0.2
Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,109

1,650

BIC 2,160

1,701

Deviance 2,086

1,627

Residual df 1,637

1,570

No. Obs. 1,647

1,580

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Interpretation in the context of H4.2: The odds ratio for PGSI harm was more than 0.1 larger for our limit for race and sports betting. For GHM harm, the odds ratios for our limits were larger, but only by 0.08 (Race betting) and 0.09 (Sports betting).

Amount spent

Racing

Show code
# PGSI model:
weighted_lr_spend_old_limit_PGSI_racing <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_RACING_SPEND,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_spend_old_limit_PGSI_racing <- tbl_regression(weighted_lr_spend_old_limit_PGSI_racing, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_spend_old_limit_PGSI_racing)

Show code
# GHM model:
weighted_lr_spend_old_limit_GHM_racing <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_RACING_SPEND,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_spend_old_limit_GHM_racing<- tbl_regression(weighted_lr_spend_old_limit_GHM_racing, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_spend_old_limit_GHM_racing)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_spend_old_limit_PGSI_racing, tbl_weighted_lr_spend_old_limit_GHM_racing),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Dowling et al.'s (2021) spend limit for racing as a predictor of harm status"
  ) 
Dowling et al.'s (2021) spend limit for racing as a predictor of harm status
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.74 1.22, 2.49 0.002 2.27 1.36, 3.79 0.002
    Unknown 2.24 0.86, 5.85 0.10 2.32 0.72, 7.40 0.2
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.46 0.81, 2.63 0.2 2.92 1.58, 5.39 <0.001
    Unknown 1.16 0.87, 1.54 0.3 0.97 0.68, 1.39 0.9
EDUCATION_NUM 0.90 0.83, 0.97 0.007 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.34 1.22, 1.48 <0.001 1.32 1.19, 1.47 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.85 1.35, 2.53 <0.001 1.73 1.19, 2.51 0.004
OLD_LIMIT_RACING_SPEND 1.72 1.25, 2.37 <0.001 1.70 1.14, 2.52 0.009
Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,096

1,643

BIC 2,147

1,694

Deviance 2,073

1,621

Residual df 1,637

1,570

No. Obs. 1,647

1,580

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Sports

Show code
# PGSI model:
weighted_lr_spend_old_limit_PGSI_sport <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_SPORT_SPEND,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_spend_old_limit_PGSI_sport <- tbl_regression(weighted_lr_spend_old_limit_PGSI_sport, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_spend_old_limit_PGSI_sport)

Show code
# GHM model:
weighted_lr_spend_old_limit_GHM_sport <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_SPORT_SPEND,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_spend_old_limit_GHM_sport<- tbl_regression(weighted_lr_spend_old_limit_GHM_sport, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_spend_old_limit_GHM_sport)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_spend_old_limit_PGSI_sport, tbl_weighted_lr_spend_old_limit_GHM_sport),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Dowling et al.'s (2021) spend limit for sport as a predictor of harm status"
  ) 
Dowling et al.'s (2021) spend limit for sport as a predictor of harm status
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.97 0.96, 0.98 <0.001
GENDER





    Female

    Male 1.72 1.21, 2.45 0.003 2.22 1.33, 3.71 0.002
    Unknown 2.19 0.83, 5.73 0.11 2.23 0.70, 7.15 0.2
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.49 0.82, 2.68 0.2 2.97 1.59, 5.54 <0.001
    Unknown 1.14 0.86, 1.52 0.4 0.95 0.67, 1.36 0.8
EDUCATION_NUM 0.90 0.83, 0.97 0.007 0.97 0.89, 1.06 0.5
GAM_ACCOUNTS_NUM 1.34 1.21, 1.47 <0.001 1.31 1.18, 1.46 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.87 1.36, 2.56 <0.001 1.75 1.20, 2.54 0.004
OLD_LIMIT_SPORT_SPEND 1.79 1.32, 2.43 <0.001 1.84 1.26, 2.69 0.002
Null deviance 2,283

1,787

Null df 1,646

1,579

AIC 2,092

1,639

BIC 2,143

1,690

Deviance 2,069

1,616

Residual df 1,637

1,570

No. Obs. 1,647

1,580

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Interpretation in the context of H4.2: The odds ratio for both PGSI harm and GHM harm was higher for our limits than these limits(both sport and race betting) by substantially more than 0.1 (10%).

Percentage of income spent

Racing & sports

The cut-off value is identical for sports and racing so there’s no need to do this twice:

Show code
# PGSI model:
weighted_lr_income_percent_spend_old_limit_PGSI <- svyglm(PGSI_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_RACING_SPORT_INCOME_SPENT,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_income_percent_spend_old_limit_PGSI <- tbl_regression(weighted_lr_income_percent_spend_old_limit_PGSI, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_income_percent_spend_old_limit_PGSI)

Show code
# GHM model:
weighted_lr_income_percent_spend_old_limit_GHM <- svyglm(GHM_STATUS ~ AGE + 
GENDER +
EMPLOYMENT_RECODED +
EDUCATION_NUM +
GAM_ACCOUNTS_NUM +
Engages_in_other_forms +
OLD_LIMIT_RACING_SPORT_INCOME_SPENT,
design = survey_design_w_old_limits, 
family = quasibinomial()) 

tbl_weighted_lr_income_percent_spend_old_limit_GHM<- tbl_regression(weighted_lr_income_percent_spend_old_limit_GHM, 
                 exponentiate = TRUE) %>%   add_glance_table() # View outcomes

check_model(weighted_lr_income_percent_spend_old_limit_GHM)

Show code
tbl_merge(tbls = list(tbl_weighted_lr_income_percent_spend_old_limit_PGSI, tbl_weighted_lr_income_percent_spend_old_limit_GHM),
          tab_spanner = c("**PGSI Status**", "**GHM Status**")) %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Dowling et al.'s (2021) percentage of income limit for racing and sport as a predictor of harm status"
  ) 
Dowling et al.'s (2021) percentage of income limit for racing and sport as a predictor of harm status
Characteristic
PGSI Status
GHM Status
OR 95% CI p-value OR 95% CI p-value
AGE 0.97 0.96, 0.98 <0.001 0.98 0.96, 0.99 <0.001
GENDER





    Female

    Male 1.93 1.22, 3.04 0.005 2.76 1.46, 5.23 0.002
    Unknown 3.99 1.10, 14.5 0.036 2.35 0.56, 9.79 0.2
EMPLOYMENT_RECODED





    Employed

    Unemployed 1.26 0.63, 2.51 0.5 2.14 1.05, 4.38 0.037
EDUCATION_NUM 0.91 0.83, 0.99 0.026 0.97 0.89, 1.07 0.6
GAM_ACCOUNTS_NUM 1.32 1.18, 1.49 <0.001 1.33 1.17, 1.51 <0.001
Engages_in_other_forms





    FALSE

    TRUE 1.89 1.30, 2.75 <0.001 1.76 1.15, 2.70 0.009
OLD_LIMIT_RACING_SPORT_INCOME_SPENT 2.04 1.47, 2.84 <0.001 2.19 1.47, 3.25 <0.001
Null deviance 1,527

1,288

Null df 1,054

1,054

AIC 1,391

1,189

BIC 1,433

1,232

Deviance 1,371

1,169

Residual df 1,046

1,046

No. Obs. 1,055

1,055

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Interpretation in the context of H4.2: The odds ratio for both PGSI harm and GHM harm was higher for our limits than these limits by substantially more than 0.1 (10%).

Phase 5: Rates of limit passing in different groups

Do some recoding of the data to get us the most simple categories for passing/not passing limits:

Show code
master_dataset_recoded_w_new_limits_post_survey <- master_dataset_recoded_w_new_limits %>%
  
  # Betting frequency:
  mutate(NEW_LIMIT_PGSI_BET_FREQ_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_BET_FREQ_DAYS > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_PGSI),0) ~ 1,
    NEX_6M_MONTHLY_BET_FREQ_DAYS <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_PGSI), 0) ~ 0,
    TRUE ~ NA_real_
  )) %>%
   mutate(NEW_LIMIT_GHM_BET_FREQ_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_BET_FREQ_DAYS > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_GHM),0) ~ 1,
    NEX_6M_MONTHLY_BET_FREQ_DAYS <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_BET_FREQ_DAYS") %>% 
      pull(optimal_cutpoint_GHM), 0) ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(NEX_6M_MONTHLY_BET_FREQ_DAYS,
  #        NEW_LIMIT_PGSI_BET_FREQ_POST_SURVEY,
  #        NEW_LIMIT_GHM_BET_FREQ_POST_SURVEY)
  # 
  # Spend/turnover:
    mutate(NEW_LIMIT_PGSI_SPEND_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_SPEND > 250 ~ 1,
    NEX_6M_MONTHLY_SPEND <= 250  ~ 0,
    TRUE ~ NA_real_
  )) %>%
      mutate(NEW_LIMIT_GHM_SPEND_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_SPEND > 2100 ~ 1,
    NEX_6M_MONTHLY_SPEND <= 2100  ~ 0,
    TRUE ~ NA_real_
  )) %>%
   # Check coding has worked:
  # select(NEX_6M_MONTHLY_SPEND,
  # NEW_LIMIT_PGSI_SPEND_POST_SURVEY,
  #          NEW_LIMIT_GHM_SPEND_POST_SURVEY)
  
  # Percentage of income spent:
      mutate(NEW_LIMIT_PGSI_INCOME_SPENT_POST_SURVEY = case_when(
    PERCENT_MON_INCOME_SPENT_MONTHLY_NEX_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PERCENT_MON_INCOME_SPENT_MONTHLY_NEX_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_INCOME_SPENT_POST_SURVEY = case_when(
    PERCENT_MON_INCOME_SPENT_MONTHLY_NEX_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    PERCENT_MON_INCOME_SPENT_MONTHLY_NEX_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PERCENT_MON_INCOME_SPENT_MONTHLY_NEX_6M,
  #          NEW_LIMIT_GHM_INCOME_SPENT_POST_SURVEY,
  # NEW_LIMIT_PGSI_INCOME_SPENT_POST_SURVEY) %>%
  # print(n=100)


  # Number of sports/races bet on:
      mutate(NEW_LIMIT_PGSI_N_ACTIVITIES_POST_SURVEY = case_when(
    NEX_6M_N_ACTIVITIES > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    NEX_6M_N_ACTIVITIES <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_N_ACTIVITIES_POST_SURVEY = case_when(
    NEX_6M_N_ACTIVITIES > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    NEX_6M_N_ACTIVITIES <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_N_ACTIVITIES") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(NEX_6M_N_ACTIVITIES,
  #        NEW_LIMIT_PGSI_N_ACTIVITIES_POST_SURVEY,
  #        NEW_LIMIT_GHM_N_ACTIVITIES_POST_SURVEY) %>%
  # print(n=100)
  
  
  # Deposit frequency:
      mutate(NEW_LIMIT_PGSI_DEPOSIT_FREQ_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_DEPOSIT_FREQ > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    NEX_6M_MONTHLY_DEPOSIT_FREQ <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_DEPOSIT_FREQ_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_DEPOSIT_FREQ > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    NEX_6M_MONTHLY_DEPOSIT_FREQ <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_FREQ") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(NEX_6M_MONTHLY_DEPOSIT_FREQ,
  # NEW_LIMIT_PGSI_DEPOSIT_FREQ_POST_SURVEY,
  #        NEW_LIMIT_GHM_DEPOSIT_FREQ_POST_SURVEY) %>%
  # print(n=100)
  

  # Deposit amount:
      mutate(NEW_LIMIT_PGSI_DEPOSIT_AMOUNT_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_DEPOSIT_AMOUNT > round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_AMOUNT") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    NEX_6M_MONTHLY_DEPOSIT_AMOUNT <= round(optimal_limits %>% 
      filter(Predictor == "PAS_6M_MONTHLY_DEPOSIT_AMOUNT") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  mutate(NEW_LIMIT_GHM_DEPOSIT_AMOUNT_POST_SURVEY = case_when(
    NEX_6M_MONTHLY_DEPOSIT_AMOUNT >=900  ~ 1,
    NEX_6M_MONTHLY_DEPOSIT_AMOUNT < 900  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(NEX_6M_MONTHLY_DEPOSIT_AMOUNT,
  # NEW_LIMIT_PGSI_DEPOSIT_AMOUNT_POST_SURVEY,
  #        NEW_LIMIT_GHM_DEPOSIT_AMOUNT_POST_SURVEY) %>%
  # print(n=100)
  

  # Percentage of income deposited:
      mutate(NEW_LIMIT_PGSI_INCOME_DEPOSITED_POST_SURVEY = case_when(
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_NEX_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 1,
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_NEX_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_PGSI),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
        mutate(NEW_LIMIT_GHM_INCOME_DEPOSITED_POST_SURVEY = case_when(
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_NEX_6M > round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 1,
    PERCENT_MON_INCOME_DEPOSIT_MONTHLY_NEX_6M <= round(optimal_limits %>% 
      filter(Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M") %>% 
      pull(optimal_cutpoint_GHM),0)  ~ 0,
    TRUE ~ NA_real_
  )) %>%
  # Check coding has worked:
  # select(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_NEX_6M,
  # NEW_LIMIT_PGSI_INCOME_DEPOSITED_POST_SURVEY,
  #        NEW_LIMIT_GHM_INCOME_DEPOSITED_POST_SURVEY)%>%
  # print(n=100)
  
        
# Number of gambling accounts:
      mutate(NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY = case_when( # this is obviously the same as pre-survey, but re-doing it here just helps with totals computed below)
    GAM_ACCOUNTS_NUM > 3 ~ 1,
    GAM_ACCOUNTS_NUM <= 3  ~ 0,
    TRUE ~ NA_real_
  )) %>%
# Number of gambling accounts proposed:
      mutate(NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED = case_when( # this is obviously the same as pre-survey, but re-doing it here just helps with totals computed below)
    GAM_ACCOUNTS_NUM > 2 ~ 1,
    GAM_ACCOUNTS_NUM <= 2  ~ 0,
    TRUE ~ NA_real_
  )) %>%

# recode outcome variables for simplicity of interpretation in regression models:
mutate(PGSI_STATUS = case_when(PGSI_STATUS ==  1 ~ "Harm",
                               PGSI_STATUS ==  0 ~  "No harm",
                    TRUE ~ NA)) %>%
mutate(GHM_STATUS = case_when(GHM_STATUS ==  1 ~ "Harm",
                               GHM_STATUS == 0 ~  "No harm",
                    TRUE ~ NA)) %>%
# Calculate the total no. of limits reached:
mutate(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI = rowSums(select(., matches("PGSI.*_POST_SURVEY$")), na.rm = TRUE)) %>%
  # Make it so that  the variable recognises zero values, even though there are none yet.
# mutate(
#   TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI = factor(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI),
#   TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI = fct_expand(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI, "0"),
#   TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI = relevel(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI, ref = "0")
# ) %>%
# # now set the reference level to 0:
#   mutate(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI = relevel(as.factor(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI), ref = "0")) %>%
select(CLIENT_ID,
       ELIGIBLE_SURVEY_SAMPLE_WEIGHTS,
       PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
       GHM_STATUS,
       PGSI_STATUS,
       NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_BET_FREQ_POST_SURVEY,
       NEW_LIMIT_GHM_BET_FREQ_POST_SURVEY,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_SPEND_POST_SURVEY,
       NEW_LIMIT_GHM_SPEND_POST_SURVEY,
        NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_INCOME_SPENT_POST_SURVEY,
       NEW_LIMIT_GHM_INCOME_SPENT_POST_SURVEY,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_N_ACTIVITIES_POST_SURVEY,
       NEW_LIMIT_GHM_N_ACTIVITIES_POST_SURVEY,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ_POST_SURVEY,
       NEW_LIMIT_GHM_DEPOSIT_FREQ_POST_SURVEY,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT_POST_SURVEY,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT_POST_SURVEY,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED_POST_SURVEY,
       NEW_LIMIT_GHM_INCOME_DEPOSITED_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED,
       TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI
       ) %>%
  filter(!is.na(TOTAL_LIMITS_REACHED_POST_SURVEY_PGSI)) %>%
filter(!is.na(PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M)) 

How many customers are in this dataset:

Show code
# nrow(master_dataset_recoded_w_new_limits_post_survey)

master_dataset_recoded_w_new_limits_post_survey %>%
  distinct(CLIENT_ID) %>%
  nrow()
[1] 1055

Note that this sample purposely excludes anyone who has any missing data in any of our relevant variables to ensure a consistent sample is used to compute rates across the different behavioural indicators. ### Re-weighting

We need to update the the survey design object with new variables:

Show code
# Create survey design object:
survey_design_w_post_survey_limit_passing <- survey::svydesign(
  id = ~1,
  data = master_dataset_recoded_w_new_limits_post_survey,
  weights = ~ELIGIBLE_SURVEY_SAMPLE_WEIGHTS
)

Summary figures

Unweighted figures:

PRE-SURVEY:

Show code
pre_survey_rates_overall<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
       NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p_unweighted}%)"),
     digits = all_continuous() ~ 1
) %>%
 
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")

pre_survey_rates_by_PGSI_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
  by = PGSI_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p_unweighted}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")
  

pre_survey_rates_by_GHM_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
  by = GHM_STATUS,
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
    all_categorical() ~ "{n_unweighted} ({p_unweighted}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**") 

pre_survey_passing_rates <- tbl_merge(
  tbls = list(pre_survey_rates_overall,
              pre_survey_rates_by_PGSI_status, 
              pre_survey_rates_by_GHM_status),
  tab_spanner = c("",
                  "**PGSI Status**",
                  "**GHM Status**")
)  

# pre_survey_passing_rates

POST-SURVEY:

Show code
# First update the survey design objects we can make the names the same for pre-and post-limit passing variables:

master_dataset_recoded_w_new_limits_post_survey_2 <- master_dataset_recoded_w_new_limits_post_survey %>%
  select(-c(NEW_LIMIT_PGSI_BET_FREQ, # Need to 1st remove the  pre-survey variables otherwise they will duplicate
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED)) %>%
  rename_with(~ str_replace(., "_POST_SURVEY$", ""), 
              .cols = c(
                NEW_LIMIT_PGSI_BET_FREQ_POST_SURVEY,
                NEW_LIMIT_GHM_BET_FREQ_POST_SURVEY,
                NEW_LIMIT_PGSI_SPEND_POST_SURVEY,
                NEW_LIMIT_GHM_SPEND_POST_SURVEY,
                NEW_LIMIT_PGSI_INCOME_SPENT_POST_SURVEY,
                NEW_LIMIT_GHM_INCOME_SPENT_POST_SURVEY,
                NEW_LIMIT_PGSI_N_ACTIVITIES_POST_SURVEY,
                NEW_LIMIT_GHM_N_ACTIVITIES_POST_SURVEY,
                NEW_LIMIT_PGSI_DEPOSIT_FREQ_POST_SURVEY,
                NEW_LIMIT_GHM_DEPOSIT_FREQ_POST_SURVEY,
                NEW_LIMIT_PGSI_DEPOSIT_AMOUNT_POST_SURVEY,
                NEW_LIMIT_GHM_DEPOSIT_AMOUNT_POST_SURVEY,
                NEW_LIMIT_PGSI_INCOME_DEPOSITED_POST_SURVEY,
                NEW_LIMIT_GHM_INCOME_DEPOSITED_POST_SURVEY,
              ))

survey_design_w_post_survey_limit_passing <- survey::svydesign(
  id = ~1,
  data = master_dataset_recoded_w_new_limits_post_survey_2,
  weights = ~ELIGIBLE_SURVEY_SAMPLE_WEIGHTS
)

# now create our post-survey limit passing rates:

post_survey_rates_overall<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
       NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p_unweighted}%)"),
     digits = all_continuous() ~ 1
) %>%
 
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")

post_survey_rates_by_PGSI_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
  by = PGSI_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p_unweighted}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")

post_survey_rates_by_GHM_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED
  ),
  by = GHM_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p_unweighted}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**") 


post_survey_passing_rates<- tbl_merge(
  tbls = list(post_survey_rates_overall,
              post_survey_rates_by_PGSI_status, 
              post_survey_rates_by_GHM_status),
  tab_spanner = c("",
                  "**PGSI Status**",
                  "**GHM Status**")
)  

# post_survey_passing_rates

Join them all!

Show code
tbl_merge(tbls = list(pre_survey_passing_rates, post_survey_passing_rates),
  tab_spanner = c(
                  "**Six months pre-survey**",
                  "**Six months post-survey**")) %>% 
  as_gt() %>%
              tab_row_group(
    label = "Number of active gambling accounts (12 months pre-survey)", rows = 15:16
  ) %>%   
            tab_row_group(
    label = "% monthly income deposited per month", rows = 13:14
  ) %>%   
  tab_row_group(
    label = "Monthly deposit amount", rows = 11:12
  ) %>%   
       tab_row_group(
    label = "Monthly deposit frequency", rows = 9:10
  ) %>%   
     tab_row_group(
    label = "Number of different activities bet on", rows = 7:8
  ) %>%   
   tab_row_group(
    label = "% monthly income spent per month", rows = 5:6
  ) %>%   
   tab_row_group(
    label = "Spend per month", rows = 3:4
  ) %>% 
  tab_row_group(
    label = "Betting days per month", rows = 1:2
  ) %>%
  tab_options(data_row.padding = px(2),
               row_group.padding = px(2)) %>%
  tab_header("Table 5. Number and percentage of customers above each new optimal limit in the six months before and after participation: Overall rates and grouped by PGSI and GHM status") %>%
  tab_footnote( ""
  )
Table 5. Number and percentage of customers above each new optimal limit in the six months before and after participation: Overall rates and grouped by PGSI and GHM status
Characteristic
Six months pre-survey
Six months post-survey
Overall1 Harm1 No harm1 Harm1 No harm1 Overall1 Harm1 No harm1 Harm1 No harm1
Betting days per month
NEW_LIMIT_PGSI_BET_FREQ 521 (49%) 316 (54%) 205 (43%) 190 (56%) 331 (46%) 475 (45%) 275 (47%) 200 (42%) 171 (51%) 304 (42%)
NEW_LIMIT_GHM_BET_FREQ 383 (36%) 237 (41%) 146 (31%) 141 (42%) 242 (34%) 342 (32%) 209 (36%) 133 (28%) 124 (37%) 218 (30%)
Spend per month
NEW_LIMIT_PGSI_SPEND 609 (58%) 396 (68%) 213 (45%) 252 (75%) 357 (50%) 549 (52%) 352 (61%) 197 (42%) 224 (66%) 325 (45%)
NEW_LIMIT_GHM_SPEND 314 (30%) 223 (38%) 91 (19%) 153 (45%) 161 (22%) 256 (24%) 179 (31%) 77 (16%) 116 (34%) 140 (19%)
% monthly income spent per month
NEW_LIMIT_PGSI_INCOME_SPENT 494 (47%) 341 (59%) 153 (32%) 228 (68%) 266 (37%) 439 (42%) 293 (50%) 146 (31%) 193 (57%) 246 (34%)
NEW_LIMIT_GHM_INCOME_SPENT 434 (41%) 307 (53%) 127 (27%) 213 (63%) 221 (31%) 383 (36%) 262 (45%) 121 (26%) 174 (52%) 209 (29%)
Number of different activities bet on
NEW_LIMIT_PGSI_N_ACTIVITIES 470 (45%) 306 (53%) 164 (35%) 191 (57%) 279 (39%) 422 (40%) 279 (48%) 143 (30%) 175 (52%) 247 (34%)
NEW_LIMIT_GHM_N_ACTIVITIES 366 (35%) 246 (42%) 120 (25%) 150 (45%) 216 (30%) 317 (30%) 215 (37%) 102 (22%) 138 (41%) 179 (25%)
Monthly deposit frequency
NEW_LIMIT_PGSI_DEPOSIT_FREQ 473 (45%) 340 (59%) 133 (28%) 224 (66%) 249 (35%) 390 (37%) 276 (48%) 114 (24%) 181 (54%) 209 (29%)
NEW_LIMIT_GHM_DEPOSIT_FREQ 297 (28%) 226 (39%) 71 (15%) 153 (45%) 144 (20%) 222 (21%) 166 (29%) 56 (12%) 107 (32%) 115 (16%)
Monthly deposit amount
NEW_LIMIT_PGSI_DEPOSIT_AMOUNT 623 (59%) 419 (72%) 204 (43%) 266 (79%) 357 (50%) 514 (49%) 344 (59%) 170 (36%) 215 (64%) 299 (42%)
NEW_LIMIT_GHM_DEPOSIT_AMOUNT 312 (30%) 226 (39%) 86 (18%) 155 (46%) 157 (22%) 209 (20%) 149 (26%) 60 (13%) 94 (28%) 115 (16%)
% monthly income deposited per month
NEW_LIMIT_PGSI_INCOME_DEPOSITED 495 (47%) 354 (61%) 141 (30%) 233 (69%) 262 (36%) 393 (37%) 275 (47%) 118 (25%) 182 (54%) 211 (29%)
NEW_LIMIT_GHM_INCOME_DEPOSITED 313 (30%) 231 (40%) 82 (17%) 167 (50%) 146 (20%) 213 (20%) 157 (27%) 56 (12%) 102 (30%) 111 (15%)
Number of active gambling accounts (12 months pre-survey)
NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY 321 (30%) 208 (36%) 113 (24%) 134 (40%) 187 (26%) 321 (30%) 208 (36%) 113 (24%) 134 (40%) 187 (26%)
NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED 589 (56%) 369 (64%) 220 (46%) 229 (68%) 360 (50%) 589 (56%) 369 (64%) 220 (46%) 229 (68%) 360 (50%)
1 n (unweighted) (% (unweighted))

Weighted percentages added

Remake tables but with unweighted and weighted percentages included:

PRE-SURVEY:

Show code
post_survey_rates_overall<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
       NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p_unweighted}%; {p}%)"),
     digits = all_continuous() ~ 1
) %>%
 
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")

post_survey_rates_by_PGSI_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
  by = PGSI_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
            all_categorical() ~ "{n_unweighted} ({p_unweighted}%; {p}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")
  

post_survey_rates_by_GHM_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED
  ),
  by = GHM_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
            all_categorical() ~ "{n_unweighted} ({p_unweighted}%; {p}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**") 

pre_survey_passing_rates <- tbl_merge(
  tbls = list(post_survey_rates_overall,
              post_survey_rates_by_PGSI_status, 
              post_survey_rates_by_GHM_status),
  tab_spanner = c("",
                  "**PGSI Status**",
                  "**GHM Status**")
)  

# pre_survey_passing_rates

POST-SURVEY:

Show code
# First update the survey design objects we can make the names the same for pre-and post-limit passing variables:

master_dataset_recoded_w_new_limits_post_survey_2 <- master_dataset_recoded_w_new_limits_post_survey %>%
  select(-c(NEW_LIMIT_PGSI_BET_FREQ, # Need to 1st remove the  pre-survey variables otherwise they will duplicate
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED)) %>%
  rename_with(~ str_replace(., "_POST_SURVEY$", ""), 
              .cols = c(
                NEW_LIMIT_PGSI_BET_FREQ_POST_SURVEY,
                NEW_LIMIT_GHM_BET_FREQ_POST_SURVEY,
                NEW_LIMIT_PGSI_SPEND_POST_SURVEY,
                NEW_LIMIT_GHM_SPEND_POST_SURVEY,
                NEW_LIMIT_PGSI_INCOME_SPENT_POST_SURVEY,
                NEW_LIMIT_GHM_INCOME_SPENT_POST_SURVEY,
                NEW_LIMIT_PGSI_N_ACTIVITIES_POST_SURVEY,
                NEW_LIMIT_GHM_N_ACTIVITIES_POST_SURVEY,
                NEW_LIMIT_PGSI_DEPOSIT_FREQ_POST_SURVEY,
                NEW_LIMIT_GHM_DEPOSIT_FREQ_POST_SURVEY,
                NEW_LIMIT_PGSI_DEPOSIT_AMOUNT_POST_SURVEY,
                NEW_LIMIT_GHM_DEPOSIT_AMOUNT_POST_SURVEY,
                NEW_LIMIT_PGSI_INCOME_DEPOSITED_POST_SURVEY,
                NEW_LIMIT_GHM_INCOME_DEPOSITED_POST_SURVEY
              ))

survey_design_w_post_survey_limit_passing <- survey::svydesign(
  id = ~1,
  data = master_dataset_recoded_w_new_limits_post_survey_2,
  weights = ~ELIGIBLE_SURVEY_SAMPLE_WEIGHTS
)

# now create our post-surveylimit passing rates:

post_survey_rates_overall<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
       NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p}%)"),
     digits = all_continuous() ~ 1
) %>%
 
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")

post_survey_rates_by_PGSI_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED),
  by = PGSI_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**")
  

post_survey_rates_by_GHM_status<- tbl_svysummary(survey_design_w_post_survey_limit_passing,
  include = c(
      NEW_LIMIT_PGSI_BET_FREQ,
       NEW_LIMIT_GHM_BET_FREQ,
       NEW_LIMIT_PGSI_SPEND,
       NEW_LIMIT_GHM_SPEND,
       NEW_LIMIT_PGSI_INCOME_SPENT,
       NEW_LIMIT_GHM_INCOME_SPENT,
       NEW_LIMIT_PGSI_N_ACTIVITIES,
       NEW_LIMIT_GHM_N_ACTIVITIES,
       NEW_LIMIT_PGSI_DEPOSIT_FREQ,
       NEW_LIMIT_GHM_DEPOSIT_FREQ,
       NEW_LIMIT_PGSI_DEPOSIT_AMOUNT,
       NEW_LIMIT_GHM_DEPOSIT_AMOUNT,
       NEW_LIMIT_PGSI_INCOME_DEPOSITED,
       NEW_LIMIT_GHM_INCOME_DEPOSITED,
       NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY,
      NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED
  ),
  by = GHM_STATUS,
                   # type = list(c(EDUCATION_NUM,
                   #               AGE,
                   #               IRSAD_SCORE) ~ "continuous"),
    statistic = list(all_continuous() ~ c(
      "{mean} ({sd}), {median}"),
      all_categorical() ~ "{n_unweighted} ({p}%)"),
     digits = all_continuous() ~ 1
) %>%
  italicize_levels() %>%
  modify_header(all_stat_cols() ~ "**{level}**") 


post_survey_passing_rates<- tbl_merge(
  tbls = list(post_survey_rates_overall,
              post_survey_rates_by_PGSI_status, 
              post_survey_rates_by_GHM_status),
  tab_spanner = c("",
                  "**PGSI Status**",
                  "**GHM Status**")
)  

# post_survey_passing_rates

Join them all!

Show code
tbl_merge(tbls = list(pre_survey_passing_rates, post_survey_passing_rates),
  tab_spanner = c(
                  "**Six months pre-survey**",
                  "**Six months post-survey**")) %>% 
  as_gt() %>%
              tab_row_group(
    label = "Number of active gambling accounts (12 months pre-survey)", rows = 15
  ) %>%   
            tab_row_group(
    label = "% monthly income deposited per month", rows = 13:14
  ) %>%   
  tab_row_group(
    label = "Monthly deposit amount", rows = 11:12
  ) %>%   
       tab_row_group(
    label = "Monthly deposit frequency", rows = 9:10
  ) %>%   
     tab_row_group(
    label = "Number of different activities bet on", rows = 7:8
  ) %>%   
   tab_row_group(
    label = "% monthly income spent per month", rows = 5:6
  ) %>%   
   tab_row_group(
    label = "Spend per month", rows = 3:4
  ) %>% 
  tab_row_group(
    label = "Betting days per month", rows = 1:2
  ) %>%
  tab_options(data_row.padding = px(2),
               row_group.padding = px(2)) %>%
  tab_header("Supplemental Table 2. Number and percentage (unweighted; weighted) of customers above each new optimal limit in the six months before and after participation: Overall rates and grouped by PGSI and GHM status") %>%
  tab_footnote( ""
  )
Supplemental Table 2. Number and percentage (unweighted; weighted) of customers above each new optimal limit in the six months before and after participation: Overall rates and grouped by PGSI and GHM status
Characteristic
Six months pre-survey
Six months post-survey
Overall1 Harm1 No harm1 Harm1 No harm1 Overall2 Harm2 No harm2 Harm2 No harm2
Betting days per month
NEW_LIMIT_PGSI_BET_FREQ 475 (45%; 37%) 275 (47%; 38%) 200 (42%; 36%) 171 (51%; 43%) 304 (42%; 35%) 475 (37%) 275 (38%) 200 (36%) 171 (43%) 304 (35%)
NEW_LIMIT_GHM_BET_FREQ 342 (32%; 24%) 209 (36%; 27%) 133 (28%; 22%) 124 (37%; 29%) 218 (30%; 23%) 342 (24%) 209 (27%) 133 (22%) 124 (29%) 218 (23%)
Spend per month
NEW_LIMIT_PGSI_SPEND 549 (52%; 40%) 352 (61%; 48%) 197 (42%; 33%) 224 (66%; 55%) 325 (45%; 35%) 549 (40%) 352 (48%) 197 (33%) 224 (55%) 325 (35%)
NEW_LIMIT_GHM_SPEND 256 (24%; 9.7%) 179 (31%; 14%) 77 (16%; 5.7%) 116 (34%; 18%) 140 (19%; 6.5%) 256 (9.7%) 179 (14%) 77 (5.7%) 116 (18%) 140 (6.5%)
% monthly income spent per month
NEW_LIMIT_PGSI_INCOME_SPENT 439 (42%; 29%) 293 (50%; 36%) 146 (31%; 21%) 193 (57%; 44%) 246 (34%; 23%) 439 (29%) 293 (36%) 146 (21%) 193 (44%) 246 (23%)
NEW_LIMIT_GHM_INCOME_SPENT 383 (36%; 22%) 262 (45%; 30%) 121 (26%; 15%) 174 (52%; 38%) 209 (29%; 17%) 383 (22%) 262 (30%) 121 (15%) 174 (38%) 209 (17%)
Number of different activities bet on
NEW_LIMIT_PGSI_N_ACTIVITIES 422 (40%; 33%) 279 (48%; 42%) 143 (30%; 25%) 175 (52%; 47%) 247 (34%; 28%) 422 (33%) 279 (42%) 143 (25%) 175 (47%) 247 (28%)
NEW_LIMIT_GHM_N_ACTIVITIES 317 (30%; 24%) 215 (37%; 31%) 102 (22%; 17%) 138 (41%; 35%) 179 (25%; 19%) 317 (24%) 215 (31%) 102 (17%) 138 (35%) 179 (19%)
Monthly deposit frequency
NEW_LIMIT_PGSI_DEPOSIT_FREQ 390 (37%; 27%) 276 (48%; 38%) 114 (24%; 15%) 181 (54%; 47%) 209 (29%; 19%) 390 (27%) 276 (38%) 114 (15%) 181 (47%) 209 (19%)
NEW_LIMIT_GHM_DEPOSIT_FREQ 222 (21%; 11%) 166 (29%; 18%) 56 (12%; 4.1%) 107 (32%; 23%) 115 (16%; 6.6%) 222 (11%) 166 (18%) 56 (4.1%) 107 (23%) 115 (6.6%)
Monthly deposit amount
NEW_LIMIT_PGSI_DEPOSIT_AMOUNT 514 (49%; 39%) 344 (59%; 50%) 170 (36%; 28%) 215 (64%; 56%) 299 (42%; 32%) 514 (39%) 344 (50%) 170 (28%) 215 (56%) 299 (32%)
NEW_LIMIT_GHM_DEPOSIT_AMOUNT 209 (20%; 6.5%) 149 (26%; 10%) 60 (13%; 2.8%) 94 (28%; 13%) 115 (16%; 4.1%) 209 (6.5%) 149 (10%) 60 (2.8%) 94 (13%) 115 (4.1%)
% monthly income deposited per month
NEW_LIMIT_PGSI_INCOME_DEPOSITED 393 (37%; 25%) 275 (47%; 36%) 118 (25%; 15%) 182 (54%; 45%) 211 (29%; 18%) 393 (25%) 275 (36%) 118 (15%) 182 (45%) 211 (18%)
NEW_LIMIT_GHM_INCOME_DEPOSITED 213 (20%; 7.4%) 157 (27%; 12%) 56 (12%; 2.5%) 102 (30%; 17%) 111 (15%; 3.8%) 213 (7.4%) 157 (12%) 56 (2.5%) 102 (17%) 111 (3.8%)
Number of active gambling accounts (12 months pre-survey)
NEW_LIMIT_PGSI_GAM_ACCOUNTS_POST_SURVEY 321 (30%; 30%) 208 (36%; 37%) 113 (24%; 22%) 134 (40%; 40%) 187 (26%; 26%) 321 (30%) 208 (37%) 113 (22%) 134 (40%) 187 (26%)
NEW_LIMIT_PGSI_GAM_ACCOUNTS_PROPOSED 589 (56%; 55%) 369 (64%; 65%) 220 (46%; 45%) 229 (68%; 70%) 360 (50%; 50%) 589 (55%) 369 (65%) 220 (45%) 229 (70%) 360 (50%)
1 n (unweighted) (% (unweighted); %)
2 n (unweighted) (%)

EXPLORATORY ANALYSES

Subset of predictors

Which model most effectively and efficiently predicts PGSI status (i.e., which combination of the variables are most useful; noting that this doesn’t consider demographic or other gambling controls):

Show code
master_dataset_recoded_w_new_limits_filtered_NAs <- master_dataset_recoded_w_new_limits %>%
  filter(!is.na(NEW_LIMIT_PGSI_INCOME_SPENT)) %>%
  as.data.frame()

data_subset <- master_dataset_recoded_w_new_limits_filtered_NAs[, c("PGSI_STATUS", "NEW_LIMIT_PGSI_SPEND", "NEW_LIMIT_PGSI_INCOME_SPENT", "NEW_LIMIT_PGSI_N_ACTIVITIES", "NEW_LIMIT_PGSI_DEPOSIT_FREQ", "NEW_LIMIT_PGSI_DEPOSIT_AMOUNT", "NEW_LIMIT_PGSI_INCOME_DEPOSITED", "NEW_LIMIT_PGSI_GAM_ACCOUNTS")]

best_model <- bestglm(data_subset, 
                      IC = "AIC",
                      family = binomial)
# The output provides the best model based on AIC
print(best_model$BestModel)

Call:  glm(formula = y ~ ., family = family, data = Xi, weights = weights)

Coefficients:
                  (Intercept)                    PGSI_STATUS  
                      -0.2720                         0.6763  
  NEW_LIMIT_PGSI_DEPOSIT_FREQ  NEW_LIMIT_PGSI_DEPOSIT_AMOUNT  
                      -0.4973                         0.6205  

Degrees of Freedom: 1054 Total (i.e. Null);  1051 Residual
Null Deviance:      1448 
Residual Deviance: 1405     AIC: 1413

SPECIFIC LIMITS FOR YOUNG PEOPLE

Phase 2 young people: Identifying optimal limits for behavioural indicators

Isolate young people in the sample:

Show code
master_dataset_recoded_under26s <-  master_dataset_recoded %>%
  filter(AGE<26)

How many people does give us:

Show code
nrow(master_dataset_recoded_under26s)
[1] 239

Rates by harm status

Show code
master_dataset_recoded_under26s %>%
tbl_summary(include = c(PGSI_STATUS,GHM_STATUS),

                                  missing = "no"
) %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Rates of harm for GHM & PGSI",
    subtitle = "25s and under only (unweighted)"
  ) 
Rates of harm for GHM & PGSI
25s and under only (unweighted)
Characteristic Overall1
PGSI_STATUS
    Harm 159 (67%)
    No harm 80 (33%)
GHM_STATUS
    Harm 87 (41%)
    No harm 125 (59%)
1 n (%)
Show code
master_dataset_recoded_under26s %>%
tbl_summary(include = c(GHM_STATUS),
               by = PGSI_STATUS,
               missing = "no"
)  %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  # show_header_names() 
modify_header(stat_1 = "PGSI: Harm",
              stat_2 = "PGSI: No harm") %>%
  as_gt() %>%
  tab_options(data_row.padding = px(1.5)) %>%
tab_header(
    title = "Rates of harm for GHM grouped by PGSI status",
    subtitle = "25s and under only (unweighted)"
  ) 
Rates of harm for GHM grouped by PGSI status
25s and under only (unweighted)
Characteristic PGSI: Harm1 PGSI: No harm1
GHM_STATUS

    Harm 83 (57%) 4 (6.0%)
    No harm 62 (43%) 63 (94%)
1 n (%)

Select limits

Because of the extensive bootstrapping required for the three approaches for eight different behavioural indicators, the code below takes quite some time to run and has been commented out to save time each time the script runs and document knits. The outcomes from the bootstrapping were saved to a data file which is loaded in at the end of this process.

Betting frequency

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_BET_FREQ_DAYS,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_BET_FREQ_DAYS,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_BET_FREQ_DAYS,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# # PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Specificity

Amount spent

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_SPEND_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_SPEND,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_SPEND_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_SPEND_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_SPEND,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_SPEND_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_SPEND_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_SPEND,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_SPEND_AUC_Specificity

Percentage of income spent

Show code
# set.seed(12345)
# 
# # Youden Index:
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Specificity

Number of different sports/races

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_N_ACTIVITIES_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_N_ACTIVITIES,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_N_ACTIVITIES_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_N_ACTIVITIES_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_N_ACTIVITIES,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_N_ACTIVITIES_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_N_ACTIVITIES_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_N_ACTIVITIES,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_N_ACTIVITIES_AUC_Specificity

Deposit frequency

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_FREQ,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_FREQ,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_FREQ,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Specificity

Deposit amount

Show code
# set.seed(12345)
# 
# # Youden Index:
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PAS_6M_MONTHLY_DEPOSIT_AMOUNT,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Specificity

Percentage of income deposited

Show code
# set.seed(12345)
# 
# # Youden Index:
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Youden
# 
# 
# # Maximise sensitivity:
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Specificity

Number of wagering accounts

Show code
# set.seed(12345)
# 
# # Youden Index:
# GAM_ACCOUNTS_AUC_Youden <- summarize_auc_results_youden(
#   data = master_dataset_recoded_under26s,
#   predictor_var = GAM_ACCOUNTS_NUM,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# GAM_ACCOUNTS_AUC_Youden
# 
# 
# # Maximise sensitivity:
# GAM_ACCOUNTS_AUC_Sensitivity <- summarize_auc_results_max_sensitivity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = GAM_ACCOUNTS_NUM,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# GAM_ACCOUNTS_AUC_Sensitivity
# 
# 
# # Maximise specificity:
# GAM_ACCOUNTS_AUC_Specificity <- summarize_auc_results_max_specificity(
#   data = master_dataset_recoded_under26s,
#   predictor_var = GAM_ACCOUNTS_NUM,
#   pgsi_status = PGSI_STATUS,
#   GHM_status = GHM_STATUS
# )
# 
# GAM_ACCOUNTS_AUC_Specificity

Table all limit cut-offs

Show code
# 
# # First join and then save all outcomes as a dataset for easy retrieval later on:
# AUC_limit_outcome_data <- bind_rows(
#           PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Youden,
#           PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Sensitivity,
#           PAS_6M_MONTHLY_BET_FREQ_DAYS_AUC_Specificity,
#           PAS_6M_MONTHLY_SPEND_AUC_Youden,
#           PAS_6M_MONTHLY_SPEND_AUC_Sensitivity,
#           PAS_6M_MONTHLY_SPEND_AUC_Specificity,
#           PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Youden,
#           PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Sensitivity,
#           PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M_AUC_Specificity,
#           PAS_6M_N_ACTIVITIES_AUC_Youden,
#           PAS_6M_N_ACTIVITIES_AUC_Sensitivity,
#           PAS_6M_N_ACTIVITIES_AUC_Specificity,
#           PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Youden,
#           PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Sensitivity,
#           PAS_6M_MONTHLY_DEPOSIT_FREQ_AUC_Specificity,
#           PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Youden,
#           PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Sensitivity,
#           PAS_6M_MONTHLY_DEPOSIT_AMOUNT_AUC_Specificity,
#           PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Youden,
#           PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Sensitivity,
#           PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M_AUC_Specificity,
#           GAM_ACCOUNTS_AUC_Youden,
#           GAM_ACCOUNTS_AUC_Sensitivity,
#           GAM_ACCOUNTS_AUC_Specificity
# ) %>%
#   mutate_if(is.numeric, round, 3)
# 
# write.csv(AUC_limit_outcome_data,
#           "R Data files/AUC_limit_outcome_data_young_people.csv",
#           row.names=FALSE)


# Read in outcome data:
AUC_limit_outcome_data_young_people<- read.csv("R Data files/AUC_limit_outcome_data_young_people.csv")

# Table them:
AUC_limit_outcome_data_young_people %>%
  gt() %>%
  tab_options(data_row.padding = px(1.5))%>%
tab_header(
    title = "Supplemental Table 3. Performance metrics for all cut-off points for all behavioural indicators: Specific to those aged under 26"
  ) 
Supplemental Table 3. Performance metrics for all cut-off points for all behavioural indicators: Specific to those aged under 26
predictor optimal_cutpoint_PGSI AUC_PGSI sensitivity_PGSI specificity_PGSI acc_PGSI Metric_maximised X95.CIs_PGSI optimal_cutpoint_GHM AUC_GHM sensitivity_GHM specificity_GHM acc_GHM X95.CIs_GHM method
PAS_6M_MONTHLY_BET_FREQ_DAYS 2.653 0.575 0.686 0.475 0.615 Youden 0.497, 0.651 3.461 0.589 0.713 0.488 0.580 0.514, 0.664 NA
PAS_6M_MONTHLY_BET_FREQ_DAYS 3.624 0.575 0.623 0.525 0.590 Sensitivity 0.496, 0.654 3.817 0.589 0.678 0.512 0.580 0.512, 0.668 NA
PAS_6M_MONTHLY_BET_FREQ_DAYS 5.101 0.575 0.509 0.613 0.544 Specificity 0.493, 0.654 6.081 0.589 0.494 0.648 0.585 0.516, 0.666 maximize_boot_metric
PAS_6M_MONTHLY_SPEND 116.285 0.597 0.692 0.500 0.628 Youden 0.516, 0.677 354.345 0.599 0.598 0.616 0.608 0.518, 0.673 NA
PAS_6M_MONTHLY_SPEND 135.708 0.597 0.679 0.512 0.623 Sensitivity 0.514, 0.678 191.601 0.599 0.678 0.528 0.590 0.524, 0.677 NA
PAS_6M_MONTHLY_SPEND 369.924 0.597 0.522 0.662 0.569 Specificity 0.508, 0.676 696.569 0.599 0.471 0.696 0.604 0.521, 0.674 maximize_boot_metric
PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M 3.257 0.611 0.591 0.614 0.599 Youden 0.508, 0.706 8.653 0.578 0.528 0.702 0.635 0.48, 0.685 NA
PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M 1.749 0.611 0.699 0.477 0.628 Sensitivity 0.509, 0.712 3.263 0.578 0.623 0.536 0.569 0.482, 0.681 NA
PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M 5.828 0.611 0.505 0.727 0.577 Specificity 0.506, 0.715 11.349 0.578 0.472 0.714 0.620 0.479, 0.677 maximize_boot_metric
PAS_6M_N_ACTIVITIES 3.960 0.547 0.660 0.425 0.582 Youden 0.469, 0.628 3.320 0.545 0.701 0.416 0.533 0.468, 0.618 NA
PAS_6M_N_ACTIVITIES 5.240 0.547 0.409 0.637 0.485 Sensitivity 0.466, 0.623 5.510 0.545 0.368 0.600 0.505 0.466, 0.619 NA
PAS_6M_N_ACTIVITIES 4.860 0.547 0.528 0.537 0.531 Specificity 0.469, 0.628 4.600 0.545 0.506 0.496 0.500 0.465, 0.621 maximize_boot_metric
PAS_6M_MONTHLY_DEPOSIT_FREQ 2.311 0.592 0.692 0.512 0.632 Youden 0.515, 0.67 4.357 0.617 0.621 0.560 0.585 0.542, 0.691 NA
PAS_6M_MONTHLY_DEPOSIT_FREQ 2.672 0.592 0.642 0.537 0.607 Sensitivity 0.509, 0.671 3.021 0.617 0.690 0.536 0.599 0.539, 0.694 NA
PAS_6M_MONTHLY_DEPOSIT_FREQ 5.834 0.592 0.509 0.625 0.548 Specificity 0.51, 0.67 9.494 0.617 0.494 0.712 0.623 0.54, 0.691 maximize_boot_metric
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 71.424 0.592 0.667 0.537 0.623 Youden 0.513, 0.672 146.335 0.607 0.621 0.600 0.608 0.528, 0.681 NA
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 72.402 0.592 0.667 0.537 0.623 Sensitivity 0.509, 0.67 81.967 0.607 0.724 0.520 0.604 0.532, 0.685 NA
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 215.908 0.592 0.503 0.662 0.556 Specificity 0.506, 0.672 400.522 0.607 0.494 0.696 0.613 0.529, 0.682 maximize_boot_metric
PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M 1.145 0.623 0.677 0.591 0.650 Youden 0.514, 0.719 3.652 0.587 0.566 0.667 0.628 0.488, 0.693 NA
PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M 0.736 0.623 0.763 0.500 0.679 Sensitivity 0.522, 0.726 1.586 0.587 0.623 0.548 0.577 0.491, 0.691 NA
PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M 3.330 0.623 0.516 0.682 0.569 Specificity 0.515, 0.726 6.593 0.587 0.491 0.714 0.628 0.487, 0.687 maximize_boot_metric
GAM_ACCOUNTS_NUM 3.020 0.686 0.447 0.787 0.561 Youden 0.613, 0.753 3.700 0.645 0.529 0.712 0.637 0.572, 0.717 NA
GAM_ACCOUNTS_NUM 3.000 0.686 0.686 0.650 0.674 Sensitivity 0.616, 0.755 3.520 0.645 0.529 0.712 0.637 0.571, 0.718 NA
GAM_ACCOUNTS_NUM 3.120 0.686 0.447 0.787 0.561 Specificity 0.608, 0.756 3.820 0.645 0.529 0.712 0.637 0.571, 0.718 maximize_boot_metric

Find optimal limits

Isolate the optimal limits/cut-off values for PGSI and GHM classification for this subsample:

Show code
PGSI_optimal_limits <- AUC_limit_outcome_data_young_people %>%
  group_by(predictor) %>%
  filter(acc_PGSI == max(acc_PGSI)) %>%
  slice(1) %>%
    mutate(
    AUC_with_CI_PGSI = paste0(
      round(AUC_PGSI, 3), " [", 
      gsub(",", ",", gsub("\\[|\\]", "", X95.CIs_PGSI)), "]"
    )
  ) %>%
  select(predictor,
         AUC_with_CI_PGSI,
         Metric_maximised,
         optimal_cutpoint_PGSI,
         acc_PGSI,
         sensitivity_PGSI,
         specificity_PGSI)

GHM_optimal_limits <- AUC_limit_outcome_data_young_people %>%
  group_by(predictor) %>%
  filter(acc_GHM == max(acc_GHM)) %>%
  slice(1) %>%
    mutate(
    AUC_with_CI_GHM = paste0(
      round(AUC_GHM, 3), " [", 
      gsub(",", ",", gsub("\\[|\\]", "", X95.CIs_GHM)), "]"
    )
  ) %>%
  select(predictor,
         AUC_with_CI_GHM,
         Metric_maximised,
         optimal_cutpoint_GHM,
         acc_GHM,
         sensitivity_GHM,
         specificity_GHM)

optimal_limits<- bind_cols(PGSI_optimal_limits,
          GHM_optimal_limits) %>%
rename(Predictor = 1,
       'Metric maximised PGSI' = Metric_maximised...3,
       'Metric maximised GHM' = Metric_maximised...10) %>%
select(-predictor...8) %>%
  mutate(Predictor = case_when(Predictor == "PERCENT_MON_INCOME_SPENT_MONTHLY" ~ "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M",
                               Predictor == "PERCENT_MON_INCOME_DEPOSIT_MONTHLY" ~ "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M",
                               TRUE ~ Predictor))

# Sort order:
predictor_order <- c(
  "PAS_6M_MONTHLY_BET_FREQ_DAYS",
  "PAS_6M_MONTHLY_SPEND",
  "PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M",
  "PAS_6M_N_ACTIVITIES",
  "PAS_6M_MONTHLY_DEPOSIT_FREQ",
  "PAS_6M_MONTHLY_DEPOSIT_AMOUNT",
  "PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M",
  "GAM_ACCOUNTS_NUM"
)

optimal_limits <- optimal_limits %>%
  mutate(Predictor = factor(Predictor, levels = predictor_order)) %>%
  arrange(Predictor) %>%
  mutate(optimal_cutpoint_PGSI = round(optimal_cutpoint_PGSI,2),
         optimal_cutpoint_GHM = round(optimal_cutpoint_GHM,2))

# Make table:
gt(optimal_limits) %>%
   tab_row_group(
    label = "Guidelines examined in previous studies", rows = 5:8
  ) %>% 
  tab_row_group(
    label = "Online wagering specific guidelines not previously examined", rows = 1:4
  ) %>%
    #### Example from other table:
     cols_label(
    Predictor = "Behavioural indicator",
    AUC_with_CI_PGSI = "AUC [95% CIs]",
    'Metric maximised PGSI' = "Metric maximised",
    optimal_cutpoint_PGSI = "Optimal cut point",
    acc_PGSI = "Accuracy",
    sensitivity_PGSI = "Sensitivity",
    specificity_PGSI = "Specificity",
    AUC_with_CI_GHM = "AUC [95% CIs]",
    'Metric maximised GHM' = "Metric maximised",
    optimal_cutpoint_GHM = "Optimal cut point",
    acc_GHM = "Accuracy",
    sensitivity_GHM = "Sensitivity",
    specificity_GHM = "Specificity",
  ) %>%
  tab_options(data_row.padding = px(2.5)) %>%
tab_header(
    title = "Supplemental Table 4. Optimal limit values for low-risk guidelines for online wagering with classification performance metrics (young people sub-sample; aged 25 and under)"
  )  %>%
  tab_footnote( "Note: PGSI = Problem Gambling Severity Index; GHM = Gambling Harm Measure; AUC = Area under the curve; CIs = 95% Confidence intervals. Metric maximised = the approach used to reach the limit with the highest accuracy value for each behavioural indicator."
  )
Supplemental Table 4. Optimal limit values for low-risk guidelines for online wagering with classification performance metrics (young people sub-sample; aged 25 and under)
Behavioural indicator AUC [95% CIs] Metric maximised Optimal cut point Accuracy Sensitivity Specificity AUC [95% CIs] Metric maximised Optimal cut point Accuracy Sensitivity Specificity
Online wagering specific guidelines not previously examined
PAS_6M_MONTHLY_BET_FREQ_DAYS 0.575 [0.497, 0.651] Youden 2.65 0.615 0.686 0.475 0.589 [0.516, 0.666] Specificity 6.08 0.585 0.494 0.648
PAS_6M_MONTHLY_SPEND 0.597 [0.516, 0.677] Youden 116.28 0.628 0.692 0.500 0.599 [0.518, 0.673] Youden 354.35 0.608 0.598 0.616
PERCENT_MON_INCOME_SPENT_MONTHLY_PAS_6M 0.611 [0.509, 0.712] Sensitivity 1.75 0.628 0.699 0.477 0.578 [0.48, 0.685] Youden 8.65 0.635 0.528 0.702
PAS_6M_N_ACTIVITIES 0.547 [0.469, 0.628] Youden 3.96 0.582 0.660 0.425 0.545 [0.468, 0.618] Youden 3.32 0.533 0.701 0.416
Guidelines examined in previous studies
PAS_6M_MONTHLY_DEPOSIT_FREQ 0.592 [0.515, 0.67] Youden 2.31 0.632 0.692 0.512 0.617 [0.54, 0.691] Specificity 9.49 0.623 0.494 0.712
PAS_6M_MONTHLY_DEPOSIT_AMOUNT 0.592 [0.513, 0.672] Youden 71.42 0.623 0.667 0.537 0.607 [0.529, 0.682] Specificity 400.52 0.613 0.494 0.696
PERCENT_MON_INCOME_DEPOSIT_MONTHLY_PAS_6M 0.623 [0.522, 0.726] Sensitivity 0.74 0.679 0.763 0.500 0.587 [0.488, 0.693] Youden 3.65 0.628 0.566 0.667
GAM_ACCOUNTS_NUM 0.686 [0.616, 0.755] Sensitivity 3.00 0.674 0.686 0.650 0.645 [0.572, 0.717] Youden 3.70 0.637 0.529 0.712
Note: PGSI = Problem Gambling Severity Index; GHM = Gambling Harm Measure; AUC = Area under the curve; CIs = 95% Confidence intervals. Metric maximised = the approach used to reach the limit with the highest accuracy value for each behavioural indicator.

Reproducibility of this analysis

Package stability

To ensure that every time this script runs it will use the same versions of the packages used during the script development, all packages are loaded using the groundhog.library() function from the groundhog package.

Rendering & publishing

This document is rendered in HTML format and published online using Quarto Pub by using the following command in the terminal: quarto publish quarto-pub Low-Risk-Wagering-Guidelines-Analysis-Script.qmd

Session details

Warning in system2("quarto", "-V", stdout = TRUE, env = paste0("TMPDIR=", :
running command '"quarto"
TMPDIR=C:/Users/rhei4496/AppData/Local/Temp/RtmpkrmvRT/file64c012dace2 -V' had
status 1
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.4.1 (2024-06-14 ucrt)
 os       Windows 11 x64 (build 26100)
 system   x86_64, mingw32
 ui       RTerm
 language (EN)
 collate  English_Australia.utf8
 ctype    English_Australia.utf8
 tz       Australia/Sydney
 date     2025-06-03
 pandoc   3.4 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
 quarto   NA @ C:\\PROGRA~1\\RStudio\\RESOUR~1\\app\\bin\\quarto\\bin\\quarto.exe

─ Packages ───────────────────────────────────────────────────────────────────
 package       * version  date (UTC) lib source
 apa           * 0.3.4    2023-10-06 [1] CRAN (R 4.4.1)
 apaTables     * 2.0.8    2021-01-04 [1] CRAN (R 4.4.1)
 bestglm       * 0.37.3   2020-03-13 [1] CRAN (R 4.4.3)
 broom         * 1.0.7    2024-09-26 [1] CRAN (R 4.4.2)
 car           * 3.1-3    2024-09-27 [1] CRAN (R 4.4.3)
 carData       * 3.0-5    2022-01-06 [1] CRAN (R 4.4.1)
 caret         * 7.0-1    2024-12-10 [1] CRAN (R 4.4.3)
 cutpointr     * 1.2.0    2024-12-10 [1] CRAN (R 4.4.3)
 dplyr         * 1.1.4    2023-11-17 [1] CRAN (R 4.4.1)
 fmsb          * 0.7.6    2024-01-19 [1] CRAN (R 4.4.1)
 forcats       * 1.0.0    2023-01-29 [1] CRAN (R 4.4.1)
 formattable   * 0.2.1    2021-01-07 [1] CRAN (R 4.4.1)
 ggh4x         * 0.3.0    2024-12-15 [1] CRAN (R 4.4.3)
 ggplot2       * 3.5.1    2024-04-23 [1] CRAN (R 4.4.1)
 ggpubr        * 0.6.0    2023-02-10 [1] CRAN (R 4.4.1)
 ggrain        * 0.0.4    2024-01-23 [1] CRAN (R 4.4.1)
 ggstatsplot   * 0.13.0   2024-12-04 [1] CRAN (R 4.4.3)
 glue          * 1.8.0    2024-09-30 [1] CRAN (R 4.4.3)
 Gmisc         * 3.0.3    2023-08-25 [1] CRAN (R 4.4.1)
 googlesheets4 * 1.1.1    2023-06-11 [1] CRAN (R 4.4.1)
 groundhog     * 3.2.1    2024-09-29 [2] CRAN (R 4.4.1)
 gt            * 0.11.1   2024-10-04 [1] CRAN (R 4.4.3)
 gtExtras      * 0.5.0    2023-09-15 [1] CRAN (R 4.4.1)
 gtsummary     * 2.1.0    2025-02-19 [1] CRAN (R 4.4.3)
 haven         * 2.5.4    2023-11-30 [1] CRAN (R 4.4.1)
 Hmisc         * 5.2-2    2025-01-10 [1] CRAN (R 4.4.2)
 htmlTable     * 2.4.3    2024-07-21 [1] CRAN (R 4.4.3)
 htmlwidgets   * 1.6.4    2023-12-06 [1] CRAN (R 4.4.1)
 httpuv        * 1.6.15   2024-03-26 [1] CRAN (R 4.4.1)
 irr           * 0.84.1   2019-01-26 [1] CRAN (R 4.4.1)
 janitor       * 2.2.1    2024-12-22 [1] CRAN (R 4.4.3)
 kableExtra    * 1.4.0    2024-01-24 [1] CRAN (R 4.4.1)
 lattice       * 0.22-6   2024-03-20 [1] CRAN (R 4.4.1)
 leaps         * 3.2      2024-06-10 [1] CRAN (R 4.4.3)
 lpSolve       * 5.6.23   2024-12-14 [1] CRAN (R 4.4.2)
 lubridate     * 1.9.4    2024-12-08 [1] CRAN (R 4.4.3)
 Matrix        * 1.7-2    2025-01-23 [1] CRAN (R 4.4.2)
 misty         * 0.7.0    2025-02-02 [1] CRAN (R 4.4.2)
 networkD3     * 0.4      2017-03-18 [1] CRAN (R 4.4.1)
 osfr          * 0.2.9    2022-09-25 [1] CRAN (R 4.4.1)
 patchwork     * 1.3.0    2024-09-16 [1] CRAN (R 4.4.3)
 performance   * 0.13.0   2025-01-15 [1] CRAN (R 4.4.3)
 plotly        * 4.10.4   2024-01-13 [1] CRAN (R 4.4.1)
 pscl          * 1.5.9    2024-01-31 [1] CRAN (R 4.4.1)
 psych         * 2.4.12   2024-12-23 [1] CRAN (R 4.4.2)
 purrr         * 1.0.4    2025-02-05 [1] CRAN (R 4.4.3)
 pwr           * 1.3-0    2020-03-17 [1] CRAN (R 4.4.1)
 Rcpp          * 1.0.14   2025-01-12 [1] CRAN (R 4.4.3)
 readr         * 2.1.5    2024-01-10 [1] CRAN (R 4.4.1)
 readxl        * 1.4.4    2025-02-27 [1] CRAN (R 4.4.3)
 remotes       * 2.5.0    2024-03-17 [1] CRAN (R 4.4.1)
 robustbase    * 0.99-4-1 2024-09-27 [1] CRAN (R 4.4.3)
 rstatix       * 0.7.2    2023-02-01 [1] CRAN (R 4.4.1)
 scales        * 1.3.0    2023-11-28 [1] CRAN (R 4.4.1)
 see           * 0.10.0   2025-01-22 [1] CRAN (R 4.4.2)
 sessioninfo   * 1.2.3    2025-02-05 [1] CRAN (R 4.4.3)
 showtext      * 0.9-7    2024-03-02 [1] CRAN (R 4.4.1)
 showtextdb    * 3.0      2020-06-04 [1] CRAN (R 4.4.1)
 stringr       * 1.5.1    2023-11-14 [1] CRAN (R 4.4.1)
 survey        * 4.4-2    2024-03-20 [1] CRAN (R 4.4.1)
 survival      * 3.8-3    2024-12-17 [1] CRAN (R 4.4.3)
 sysfonts      * 0.8.9    2024-03-02 [1] CRAN (R 4.4.1)
 tibble        * 3.2.1    2023-03-20 [1] CRAN (R 4.4.1)
 tidyr         * 1.3.1    2024-01-24 [1] CRAN (R 4.4.1)
 tidyverse     * 2.0.0    2023-02-22 [1] CRAN (R 4.4.1)
 truncnorm     * 1.0-9    2023-03-20 [1] CRAN (R 4.4.1)
 waffle        * 1.0.2    2023-09-30 [1] CRAN (R 4.4.1)
 wCorr         * 1.9.8    2023-08-19 [1] CRAN (R 4.4.1)
 weights       * 1.0.4    2021-06-10 [1] CRAN (R 4.4.1)

 [1] C:/Users/rhei4496/AppData/Local/R/win-library/4.4
 [2] C:/Users/rhei4496/AppData/Local/Programs/R/R-4.4.1/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────
R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8   
[3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
[5] LC_TIME=English_Australia.utf8    

time zone: Australia/Sydney
tzcode source: internal

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] bestglm_0.37.3      leaps_3.2           cutpointr_1.2.0    
 [4] pscl_1.5.9          robustbase_0.99-4-1 see_0.10.0         
 [7] performance_0.13.0  fmsb_0.7.6          truncnorm_1.0-9    
[10] misty_0.7.0         remotes_2.5.0       ggh4x_0.3.0        
[13] apaTables_2.0.8     apa_0.3.4           irr_0.84.1         
[16] lpSolve_5.6.23      httpuv_1.6.15       glue_1.8.0         
[19] Gmisc_3.0.3         htmlTable_2.4.3     Rcpp_1.0.14        
[22] googlesheets4_1.1.1 osfr_0.2.9          sessioninfo_1.2.3  
[25] caret_7.0-1         lattice_0.22-6      janitor_2.2.1      
[28] ggstatsplot_0.13.0  showtext_0.9-7      showtextdb_3.0     
[31] sysfonts_0.8.9      psych_2.4.12        broom_1.0.7        
[34] ggpubr_0.6.0        rstatix_0.7.2       car_3.1-3          
[37] carData_3.0-5       pwr_1.3-0           patchwork_1.3.0    
[40] networkD3_0.4       waffle_1.0.2        ggrain_0.0.4       
[43] htmlwidgets_1.6.4   plotly_4.10.4       scales_1.3.0       
[46] gtsummary_2.1.0     gtExtras_0.5.0      gt_0.11.1          
[49] formattable_0.2.1   kableExtra_1.4.0    wCorr_1.9.8        
[52] weights_1.0.4       Hmisc_5.2-2         survey_4.4-2       
[55] survival_3.8-3      Matrix_1.7-2        readxl_1.4.4       
[58] haven_2.5.4         lubridate_1.9.4     forcats_1.0.0      
[61] stringr_1.5.1       dplyr_1.1.4         purrr_1.0.4        
[64] readr_2.1.5         tidyr_1.3.1         tibble_3.2.1       
[67] ggplot2_3.5.1       tidyverse_2.0.0     groundhog_3.2.1    

loaded via a namespace (and not attached):
  [1] fs_1.6.5               fontawesome_0.5.3      gghalves_0.1.4        
  [4] httr_1.4.7             RColorBrewer_1.1-3     insight_1.0.2         
  [7] grpreg_3.5.0           tools_4.4.1            backports_1.5.0       
 [10] R6_2.6.1               DT_0.33                statsExpressions_1.6.2
 [13] mgcv_1.9-1             lazyeval_0.2.2         jomo_2.7-6            
 [16] withr_3.0.2            gridExtra_2.3          textshaping_1.0.0     
 [19] cli_3.6.4              sandwich_3.1-0         labeling_0.4.3        
 [22] sass_0.4.9             mvtnorm_1.3-3          commonmark_1.9.2      
 [25] systemfonts_1.2.1      foreign_0.8-88         svglite_2.1.3         
 [28] parallelly_1.42.0      labelled_2.13.0        rstudioapi_0.17.1     
 [31] httpcode_0.3.0         generics_0.1.3         shape_1.4.6.1         
 [34] gtools_3.9.5           abind_1.4-8            lifecycle_1.0.4       
 [37] multcomp_1.4-25        yaml_2.3.10            snakecase_0.11.1      
 [40] recipes_1.1.1          paletteer_1.6.0        pls_2.8-5             
 [43] promises_1.3.2         gdata_3.0.1            mitml_0.4-5           
 [46] zeallot_0.1.0          pillar_1.10.1          knitr_1.49            
 [49] boot_1.3-31            estimability_1.5.1     future.apply_1.11.3   
 [52] codetools_0.2-20       pan_1.9                broom.helpers_1.20.0  
 [55] data.table_1.17.0      vctrs_0.6.5            Rdpack_2.6.2          
 [58] cellranger_1.1.0       gtable_0.3.6           rematch2_2.1.2        
 [61] datawizard_1.0.0       ggpp_0.5.8-1           cachem_1.1.0          
 [64] gower_1.0.2            xfun_0.51              rbibutils_2.3         
 [67] prodlim_2024.06.25     correlation_0.8.6      coda_0.19-4.1         
 [70] reformulas_0.4.0       gargle_1.5.2           timeDate_4041.110     
 [73] iterators_1.0.14       hardhat_1.4.1          lava_1.8.1            
 [76] TH.data_1.1-2          ipred_0.9-15           nlme_3.1-167          
 [79] MBESS_4.9.3            rpart_4.1.24           colorspace_2.1-1      
 [82] DBI_1.2.3              nnet_7.3-20            mnormt_2.1.1          
 [85] tidyselect_1.2.1       emmeans_1.10.2         compiler_4.4.1        
 [88] extrafontdb_1.0        curl_6.2.1             glmnet_4.1-8          
 [91] mice_3.17.0            xml2_1.3.6             bayestestR_0.15.2     
 [94] checkmate_2.3.2        DEoptimR_1.1-3-1       digest_0.6.37         
 [97] minqa_1.2.8            rmarkdown_2.29         htmltools_0.5.8.1     
[100] pkgconfig_2.0.3        base64enc_0.1-3        extrafont_0.19        
[103] lme4_1.1-36            fastmap_1.2.0          rlang_1.1.5           
[106] farver_2.1.2           zoo_1.8-13             jsonlite_1.9.0        
[109] ModelMetrics_1.2.2.2   magrittr_2.0.3         polynom_1.4-1         
[112] Formula_1.2-5          parameters_0.24.1      munsell_0.5.1         
[115] stringi_1.8.4          pROC_1.18.5            MASS_7.3-64           
[118] plyr_1.8.9             ggrepel_0.9.6          parallel_4.4.1        
[121] listenv_0.9.1          splines_4.4.1          hms_1.1.3             
[124] igraph_2.1.4           markdown_1.13          ggsignif_0.6.4        
[127] effectsize_1.0.0       reshape2_1.4.4         stats4_4.4.1          
[130] rstantools_2.4.0       crul_1.5.0             XML_3.99-0.18         
[133] evaluate_1.0.3         mitools_2.4            RcppParallel_5.1.10   
[136] nloptr_2.1.1           tzdb_0.4.0             foreach_1.5.2         
[139] cards_0.5.0            Rttf2pt1_1.3.12        future_1.34.0         
[142] cardx_0.2.3            xtable_1.8-4           later_1.4.1           
[145] ragg_1.3.3             viridisLite_0.4.2      class_7.3-23          
[148] googledrive_2.1.1      forestplot_3.1.6       memoise_2.0.1         
[151] cluster_2.1.8          timechange_0.3.0       globals_0.16.3        

END