Video poker simulator

This video poker simulator seeks to illustrate the spread (variance) of wins and losses between multiple hypothetical players and different levels of game volatility. The simulator may be helpful for players who are unfamiliar with the logic of gambling, and who may be at risk of becoming problem gamblers. In the simulations…

The bet size is 1 euro each round
5 rounds are played per minute
The spread of wins and their probabilities (under optimal strategy) in the low / medium / high volatility version of the game are
- Royal flush: 2000e / 500e / 100e (0.002%)
- Straight flush: 50e / 55e / 50e (0.011%)
- Four of a kind: 33e / 45e / 30e (0.236%)
- Full house: 14e / 13e / 15e (1.151%)
- Flush: 7e / 7e / 8e (1.101%)
- Straight: 5e / 6e / 7e (1.123%)
- Three of a kind: 3e / 3e / 3e (7.445%)
- Two pair: 2e / 2e / 2e (12.928%)
The return to player (RTP) rates on optimal strategy are 89.955% (low volatility), 89.934% (medium volatility), and 89.965% (high volatility)
Optimal playing strategy is used
The “luckiest” and “unluckiest” players are visualized based on their results at the end of each simulation

These wins and their probabilities are based on information provided in https://www.pinnacle.com/en/betting-articles/casino/video-poker-odds-and-strategy/8RP2WGBBQFJ498LD , but may significantly differ from other video poker games (and other similar games) elsewhere

See also https://link.springer.com/article/10.1007/s10899-011-9238-0 for simulations and analyses on volatility, house edge and prize structures in different gambling games

library(tidyverse)
library(gganimate)
library(transformr)
library(truncnorm)


casino_simulator <- function(rounds, players, topbottom = F, volatility = "low") {
  
  simulations = data.frame(nrow = 0)
  
  #For first win simulations (Nigel Turner's idea)
  first_win = data.frame(nrow = 0)
  
  if (volatility=="low") {
    payout <- c(100, 50, 30, 15, 8, 7, 3, 2, 0) - 1
    probability <- c(0.002, 0.011, 0.236, 1.151, 1.101, 1.123, 7.445, 12.928, 76.003)/100
    ev <- sum(payout*probability) #ev = -0.10045
    sd <- sd(sample(payout, 1000000, replace = TRUE, prob = probability))
    cumEV = cumsum(rep(ev, rounds))
    cumSD = sqrt(cumsum(rep(sd^2, rounds))) #first convert to variance, then convert the sum back to SD
    lower <- qnorm(0.025, cumEV, cumSD)
    upper <- qnorm(0.975, cumEV, cumSD)
    confidence <- data.frame(cbind(lower, upper))
    confidence$ID <- seq.int(nrow(confidence))
  }
  
  else if (volatility=="med") {
    payout <- c(500, 65, 45, 13, 7, 6, 3, 2, 0) - 1
    probability <- c(0.002, 0.011, 0.236, 1.151, 1.101, 1.123, 7.445, 12.928, 76.003)/100
    ev <- sum(payout*probability) #ev = -0.10066
    sd <- sd(sample(payout, 1000000, replace = TRUE, prob = probability))
    cumEV = cumsum(rep(ev, rounds))
    cumSD = sqrt(cumsum(rep(sd^2, rounds))) #first convert to variance, then convert the sum back to SD
    lower <- qnorm(0.025, cumEV, cumSD)
    upper <- qnorm(0.975, cumEV, cumSD)
    confidence <- data.frame(cbind(lower, upper))
    confidence$ID <- seq.int(nrow(confidence))
  }
  
  else { 
    payout <- c(2000, 50, 33, 14, 7, 5, 3, 2, 0) - 1
    probability <- c(0.002, 0.011, 0.236, 1.151, 1.101, 1.123, 7.445, 12.928, 76.003)/100
    ev <- sum(payout*probability) #ev = -0.10035
    sd <- sd(sample(payout, 1000000, replace = TRUE, prob = probability))
    cumEV = cumsum(rep(ev, rounds))
    cumSD = sqrt(cumsum(rep(sd^2, rounds))) #first convert to variance, then convert the sum back to SD
    lower <- qnorm(0.025, cumEV, cumSD)
    upper <- qnorm(0.975, cumEV, cumSD)
    confidence <- data.frame(cbind(lower, upper))
    confidence$ID <- seq.int(nrow(confidence))
  }

  for (i in 1:players) {
    
    test <- sample(payout, rounds, replace=TRUE, prob=probability)
    test <- data.frame(run = test)
    names(test)[1] <- i #replaces run with simple integer (for simplicity). Code needs tidying a bit.
  
    simulations <- cbind(simulations, cumsum(test))
    simulations$nrow <- NULL
    
    #Locate first win among all bets
    first_win_temp <- which(test > 0)[1]
    first_win_temp <- data.frame(run = first_win_temp)
    names(first_win_temp)[1] <- i
    first_win <- cbind(first_win, first_win_temp)
    first_win$nrow <- NULL
    
  }
  
  max_value <- which(tail(simulations, 1) == max(tail(simulations, 1))) # Among the last row, which column has the highest value
  min_value <- which(tail(simulations, 1) == min(tail(simulations, 1))) # Among the last row, which column has the lowest value
  
  
  #How many are winning at the end
  winners <- length(which(tail(simulations, 1) > 0))
  
  
  simulations$ID <- seq.int(nrow(simulations))

  simulations.long <- simulations %>% 
    gather(key, value, -ID) %>%
    group_by(key) %>%
    dplyr::mutate(profit = factor(ifelse(value > 0, 1, 0)), # Is the player winning or not
                  profitriskcumsum = cumsum(as.numeric(profit)-1), # cumulative number of rounds in profit
                  randomrisk = which(sample(c(0,1), n(), prob=c(.9999603, 1-.9999603), replace=T) == 1)[1], #among n(), produce a moment when problem gambling starts. Probability: 84 (one full week) hours of playing one has 50% chance of becoming "problem gambler" (or "high risk gambler"). 84 * 60 minutes * 5 rounds = 25200 (1- 1/25200 =  .9999603)
                  randomrisk = ifelse(is.na(randomrisk), n(), randomrisk), #if prob gambling doesn't start, replace NA with total length of sample
                  randomrisk = c(rep(0, randomrisk[1]), rep(1, n() - randomrisk[1])), #vector with 0s until problem gambling starts (1s); have to index at 1 since rep-counts argument is a vector of identical values
                  problem = factor(case_when(profitriskcumsum > 900 | randomrisk == 1 ~ sample(c("Not at risk", "At risk"), 1, prob=c(.5, .5)), # when winning too much, 50% chance of becoming probl. gambl. OR random risk of becoming problem gambler
                                             TRUE ~ "Not at risk"))) %>%
    ungroup()
  
  
  #Upswing calculations using custom sliding-window regression (width-300 i.e. 1 hour)
  simulations.long <- simulations.long %>% group_by(key) %>% dplyr::mutate(id = seq(1:n()))
  
  upswing_detector <- function(window_width = 300, fidelity=1) { #window_width = 300 = 1 hour as a definition of upswing range
  estimates_slope <- list()
  counter = 0
  for (x in 1:length(unique(simulations.long$key))) { #iterate through all players
    for (i in seq(1, length(unique(simulations.long$id))+1-window_width, by = fidelity)) { #how many sliding windows fit within the sample, fidelity to speed calculation
      temp <- subset(simulations.long, key==x & id >= i & id < i+window_width) #subset based on selected window width
      
      temp_model <- lm(value ~ id, data=temp)
      counter = counter + 1
      estimates_slope[[counter]] <- coef(summary(temp_model))[2,1]
    }
  }
  
  estimates_slope <- as.data.frame(unlist(estimates_slope)) 
  estimates_slope <- estimates_slope %>% 
    dplyr::mutate(ID=rep(1:players, each=(length(unique(simulations.long$id))-window_width) / fidelity + 1)) # +1 before -window_width removed since otherwise doesn't work with fidelity=1 for some reason
  names(estimates_slope)[1] <- "slope"

  
  # Calculate proportion of upswings (vs. downswings) for all players, then obtain mean and sd across players, as well as maximum streak length with sd
  output1 <- estimates_slope %>%
    group_by(ID) %>%
    dplyr::mutate(positive = ifelse(slope > 0, 1, 0),
                  run_length = rep(rle(positive)$lengths, rle(positive)$lengths)) %>%
    dplyr::summarize(pos_prop = mean(positive),
                     max_run = max(run_length)) %>%
    ungroup() %>%
    dplyr::summarize(mean_grand_prop = mean(pos_prop),
                     sd_grand_prop = sd(pos_prop),
                     mean_run = mean(max_run),
                     sd_run = sd(max_run))
  
  output2 <- estimates_slope %>%
    group_by(ID) %>%
    dplyr::mutate(numb = 1:n(), upswing = factor(ifelse(slope > 0, "Upswing", "Downswing"))) %>%
    ggplot(aes(numb, slope, color=upswing)) + 
    geom_line(aes(group=1)) + 
    #facet_wrap("ID", scales="free") + 
    labs(color=NULL, x = "Hours played", y = "Slope") +
    scale_color_manual(values=c("red", "blue")) +
    scale_x_continuous(breaks = seq(0, (rounds-window_width)/10, by = 60),
                       labels=function(x)round(x/5/6 +1, 1)) + #+1 jotta saadaan tasattua normiplotin kanssa
    theme_minimal(base_size=14) +
    theme(legend.position="bottom")
  
  output3 <- estimates_slope %>%
    rename(key = ID) %>%
    group_by(key) %>%
    dplyr::mutate(key = as.character(key), ID = 1:n(), upswing = factor(ifelse(lag(slope, window_width/2) > 0, "Upswing", "Downswing")),
                  slope_corrected = lag(slope, window_width/2),
                  upswing_prop = mean(as.numeric(upswing)-1, na.rm=T)) #difficult to visualize upswings!! (this is not optimal, using lag() since the last 300 (window length) observations are missing by design)
  
  
  #return(ggarrange(output2, plot3))
  return(output3) #for colored visulaization of upswing
  #return(output1) #for upswing proportion calculations
}
  
  # !!!!!uncomment below to be able to use plot4!!!!!
  
  #simulations.long <- simulations.long %>% left_join(upswing_detector())
  
  #This was for testing whether the upswing detector works (un-comment below, and add "labs(title = output)" to plot3)
#output <- upswing_detector()
  
  #THIS IS AN ALPHA VERSION: Illustrations of "at risk" players based on time as a winner and length of play
  # plot <- simulations.long %>%
  #   dplyr::mutate(problem = fct_relevel(problem, "Not at risk")) %>% #force factor level order (simulations mess this up every now and then...)
  #   ggplot(aes(ID, value, group=key)) + # group=key here required for complex animation
  #   geom_line(aes(colour = problem, size = problem)) +
  #   geom_abline(intercept = 0, slope = ev, linetype="dashed", size=0.5, color="red", alpha=.5) +
  #   geom_hline(yintercept = 0, size=0.5, alpha=.5) +
  #   xlab("Hours played") + ylab("Money (euros)") +
  #   theme_minimal(base_size=14) +
  #   scale_x_continuous(breaks = seq(0, rounds, by = 600), #by = 600 on by 2 hours (i.e. 600 rounds / 5 / 60)
  #                      labels=function(x)round(x/5/60, 1)) +
  #   theme(legend.title = element_blank(),
  #         legend.position = "top") +
  #   scale_color_manual(values=c("lightblue", "red")) +
  #   scale_size_manual(values = c(.3, 1))
  #   # guides(color="none")

  # Plot only the top and bottom "players"
  # Note, for animations, the coloring is sensitive to alphabetical order of the key-factor labels!
  plot2 <- simulations.long %>%
    dplyr::filter(key == max_value | key == min_value) %>%
    dplyr::mutate(key = factor(key, levels = c(max_value, min_value),
                               labels = c("Luckiest\nplayer", "Unluckiest\nplayer"))) %>%
    ggplot(aes(ID, value, group=key)) +
    geom_line(aes(colour = key)) +
    geom_abline(intercept = 0, slope = ev, linetype="dashed", size=0.5, color="red", alpha=.5) +
    geom_hline(yintercept = 0, size=0.5, alpha=.5) +
    xlab("Hours played") + ylab("Money (euros)") +
    theme_minimal(base_size=14) +
    scale_x_continuous(breaks = seq(0, rounds, by = 600), # If <0 hour is visualized, the x-axis will not show correctly
                       labels=function(x)round(x/5/60, 1)) +
    guides(color="none")
  
  # Combined plot
  # (code may fail if there are 3 or more identical values (shared bottom or top))
  plot3 <- simulations.long %>%
    dplyr::mutate(maxmin = case_when(key == max_value ~ "Luckiest\nplayer",
                                     key == min_value ~ "Unluckiest\nplayer",
                                     TRUE ~ "Others"),
                  maxmin = factor(maxmin, levels = c("Luckiest\nplayer", "Unluckiest\nplayer", "Others"))) %>%
    ggplot(aes(ID, value, group=key)) +
    geom_line(aes(colour = maxmin, alpha = maxmin, size = maxmin)) +
    geom_abline(intercept = 0, slope = ev, linetype="dashed", size=0.5, color="red", alpha=.5) +
    geom_hline(yintercept = 0, size=0.5, alpha=.5) +
    
    #uncomment these if we want confidence interval:
    # geom_line(data = confidence, aes(x = ID, y = lower), linetype="dashed", color="black", size=0.5, inherit.aes=F) +
    # geom_line(data = confidence, aes(x = ID, y = upper), linetype="dashed", color="black", size=0.5, inherit.aes=F) +
    
    xlab("Hours played") + ylab("Money (euros)") +
    scale_color_manual(values=c("blue", "red", "grey")) +
    theme_minimal(base_size=14) +
    theme(legend.title = element_blank(),
          legend.position = "top") +
    scale_x_continuous(breaks = seq(0, rounds, by = 600),
                       labels=function(x)round(x/5/60, 1)) +
    scale_alpha_manual(values = c(1, 1, .3)) +
    scale_size_manual(values = c(1, 1, .5))
    #labs(title = paste("Upswing proportion (SD): ", round(output[1], 3), "(", round(output[2], 3), ")"))
  
  #Upswing visualization (not optimal!)
  plot4 <- simulations.long %>%
    # dplyr::mutate(slope_corrected = ifelse(is.na(slope_corrected), 0, slope_corrected)) %>% ## I'm switching back to numeric for testing purposes, fix later
    na.omit() %>%
    ggplot(aes(ID, value, group=key)) +
    geom_line(aes(colour = slope_corrected), size=0.9) +
    geom_abline(intercept = 0, slope = ev, linetype="dashed", size=0.5, color="red", alpha=.5) +
    geom_hline(yintercept = 0, size=0.5, alpha=.5) +
    xlab("Hours played") + ylab("Money (euros)") +
    labs(color = "Streak strength") +
    theme_minimal(base_size=14) +
    theme(legend.position = "top") +
    scale_alpha_manual(values = c(1, 1, .3)) +
    scale_size_manual(values = c(1, 1, .5)) +
    scale_color_gradient2(low="red", mid = "lightblue", high="blue") +
    scale_x_continuous(breaks = seq(0, rounds, by = 600),
                       labels=function(x)round(x/5/60, 1))
  
  #Add the below only if upswing_detector is used!
  
    # annotate("text", x = 550, y = 60, 
    #          label = paste("Winning streaks:", round(simulations.long$upswing_prop[1], 3)*100, "%"))
    # # annotate("text", x = 550, y = 60, 
    # #          label = paste("Winning streaks:", round(simulations.long$upswing_prop[1], 3)*100, "%", 
    # #                        round(simulations.long$upswing_prop[rounds+1], 3)*100, "%")) #This is an ugly fix, not for long-term use

  
  
  if (topbottom == F) {
    return(plot3) #winners for number of winners
    }
  else {
    return(plot2) #or upswing_detector()
    
    #plot for problem gambling risk plot (ALPHA VERSION!), plot2 for top and bottom, winners for number of winners at end. Visualizations work with "plot2". Note that the code may break (update to fix this is pending) if two players have idential wins or losses at the top or bottom (this is quite rare but may happen). upswing_detector() for mean length and sd of average upswings (defined as upwards trending streak of 2 hours)
    }
}

#for basic calculations e.g. mean(replicate(5, casino_simulator(300, 100, F, "low"))); FOR THIS USE "winners" as the return output!

#Histograms of first win distributions. To use these, return must be at "first_win"

# gather(casino_simulator(500, 1000, F, "high")) %>%
#   ggplot(aes(value)) +
#   geom_histogram(color = "grey", fill ="lightblue", bins = 25) +
#   theme_classic() +
#   labs(y = "Observations", x = "First win occurrence", color = NULL, title = "Volatility: High") +
#   geom_vline(aes(xintercept=median(as.numeric(value), na.rm=T), color="Median"), size=1) +
#   geom_vline(aes(xintercept=mean(as.numeric(value), na.rm=T), color="Mean"), size=1) +
#   geom_text(aes(label = mean(value), y = 200, x = mean(value)), color = "red") +
#   geom_text(aes(label = median(value), y = 250, x = median(value)), color = "blue") +
#   scale_color_manual(values = c("red", "blue")) +
#   theme(legend.position = c(.5, .5))

Low volatility (10 hours)

Animation

If you and 99 of your friends all played the low volatility video poker game for 10 hours, the luckiest and unluckiest one of you might have the following results:

set.seed(1)
animation <- casino_simulator(3000, 100, T, "low") + 
  geom_segment(aes(xend = 3000, yend = value), linetype = 2, colour = 'grey') + 
  geom_point(size=2) +
  geom_text(aes(x = 3000, label = key, color=profit), hjust = 0) +
  geom_text(aes(x = ID, label = round(value, -1), color = profit), vjust=2) +
  scale_color_manual(values=c("red", "darkgreen", "lightblue", "salmon")) +
  transition_reveal(ID) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = margin(5.5, 50, 5.5, 5.5))

animate(animation, duration = 15, fps = 10)

All players

set.seed(1)
casino_simulator(3000, 100, F, "low")

Medium volatility (10 hours)

Animation

If you and 99 of your friends all played the medium volatility video poker game for 10 hours, the luckiest and unluckiest one of you might have the following results:

set.seed(1)
animation <- casino_simulator(3000, 100, T, "med") + 
  geom_segment(aes(xend = 3000, yend = value), linetype = 2, colour = 'grey') + 
  geom_point(size=2) +
  geom_text(aes(x = 3000, label = key, color=profit), hjust = 0) +
  geom_text(aes(x = ID, label = round(value, -1), color = profit), vjust=2) +
  scale_color_manual(values=c("red", "darkgreen", "lightblue", "salmon")) +
  transition_reveal(ID) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = margin(5.5, 50, 5.5, 5.5))

animate(animation, duration = 15, fps = 10)

All players

set.seed(1)
casino_simulator(3000, 100, F, "med")

Medium volatility (30 hours)

Animation

If you and 99 of your friends all played the medium volatility video poker game for 30 hours, the luckiest and unluckiest one of you would have the following results:

set.seed(1)
animation <- casino_simulator(9000, 100, T, "med") + 
  geom_segment(aes(xend = 9000, yend = value), linetype = 2, colour = 'grey') + 
  geom_point(size=2) +
  geom_text(aes(x = 9000, label = key, color=profit), hjust = 0) +
  geom_text(aes(x = ID, label = round(value, -1), color = profit), vjust=2) +
  scale_color_manual(values=c("red", "darkgreen", "lightblue", "salmon")) +
  transition_reveal(ID) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = margin(5.5, 50, 5.5, 5.5))

animate(animation, duration = 25, fps = 10)

All players

set.seed(1)
casino_simulator(9000, 100, F, "med")

High volatility (30 hours)

When the “main prize” is very large, there are proportionally, and perhaps even deceptively, more players in profit (“lucky winners”).

set.seed(1)
casino_simulator(9000, 100, F, "high")

Proportion of winners across volatility levels

Below I’ve calculated the mean- and standard deviation values for proportion of winning players after a specific number of rounds played for all three levels of volatility. This is done by rerunning the simulations multiple times (500, 100, 75 or 50 times depending on the number of rounds being simulated and computing time requirements – note that this part of the code was run externally).

The mean proportion of players who are winning after 1, 10, 30, 50, 75 or 100 hours of play depends heavily on the game’s volatility level. Figure 1 shows that the proportion of players in profit (“lucky winners”) significantly depends on volatility. In the high volatility version of the game, 16.37 % are winning after 30 hours of play, and 5.16 % are winning even after 100 hours of continued play. Error bars represent 95 % confidence intervals.

#Values below obtained from externally run simulations!
winners <- tibble(mean = c(23.52, 2.03, 0.02, 0, 0, 0,
                           23.74, 7.11, 1.46, 0.307, 0.06, 0,
                           16.76, 6.33, 16.37, 11.68, 7.38, 5.16),
                  sd = c(4.14, 1.39, 0.14, 0, 0, 0,
                         4.15, 2.56, 1.25, 0.544, 0.24, 0,
                         3.71, 2.45, 4.06, 3.414, 2.31, 2.06),
                  sims = rep(c(500, 500, 100, 75, 50, 50), 3),
                    volatility = rep(c("Low\nvolatility", "Medium\nvolatility", "High\nvolatility"), each = 6),
                  length = rep(c(1, 10, 30, 50, 75, 100), 3)) %>%
  group_by(sims) %>%
  dplyr::mutate(se = sd / sqrt(sims),
                lower = mean - qt(.975, n())*se,
                upper = mean + qt(.975, n())*se,
                volatility = factor(volatility, levels=c("Low\nvolatility", "Medium\nvolatility", "High\nvolatility")))

winners %>% ggplot(aes(length, mean, color = volatility)) +
  geom_line(position = position_dodge(.9), size=.8, alpha=.5) +
  geom_point(position = position_dodge(.9), size=2, alpha =.5) +
  geom_errorbar(aes(ymax = upper, ymin = pmax(lower, 0)),
                position=position_dodge(.9), width=5) +
  theme_bw(base_size=14) +
  theme(legend.position="top") + 
  #geom_text(aes(length, mean, label=round(mean, 2)), show.legend = F, inherit.aes=F) +
  scale_color_manual(values = c("blue", "darkgreen", "Red")) +
  scale_x_continuous(breaks = c(1, 10, 30, 50, 75,100)) +
  labs(x = "Hours of play", y = "% of players in profit", color = NULL)

Video poker simulator

(c) Jussi Palomäki, 2023

Low volatility (10 hours)

Animation

All players

Medium volatility (10 hours)

Animation

All players

Medium volatility (30 hours)

Animation

All players

High volatility (30 hours)

Proportion of winners across volatility levels