########################################################
# Name: John Palomino
# CSC-315
# Lab #4: Probability
########################################################
library(gtools)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##########################################################################
# Add R code to the script below and create a Notebook to complete
# the steps and explicitly answer the following questions
##########################################################################
# Basic Probability Questions -- Use R as a calculator to display the
# answers below
# A standard deck of cards contains 52 cards, with 13 cards from each
# suit (hearts, clubs, diamonds, and spades).
Question 1 -- [6 / 8 points] ❌For (a) and (b), you calculated the probability that the card was (or is not) one of the spades. The question is for the Ace of Spades, specifically. The probability for an ace of spades is 1/52.
# 1) If one card is selected at random, what is the probability that
# (a) the card is the ace of spades?
deck<-52
spades<-13
prob_spade<-spades/deck
prob_spade
## [1] 0.25
#Ans: If one card is chosen at random, the probability that the card is the ace of spades is 0.25
# (b) the card is NOT the ace of spades?
prob_not_spade<-1-prob_spade
prob_not_spade
## [1] 0.75
#Ans: The probability that the card is not the ace of spade is 0.75
# (c) the card is an ace (of any suit)?
#The probability that the card is an ace the probability that card is either a heart, club,diamond or spade
aces<-4
prob_ace<-aces/deck
prob_ace
## [1] 0.07692308
#Ans: The probability that the card is an ace(of any type) is 0.07692308.
# (d) the card is an ace OR a 4?
card_4<-4
prob_4<-card_4/deck
prob_4_or_ace<-prob_4+prob_ace
prob_4_or_ace
## [1] 0.1538462
#the probability that the card is an ace or a 4 is 0.1538462.
# Use R to answer the remaining questions. You MUST use R to
# enumerate and analyze the sample space or to carry out
# probability experiments (simulations) so that you can
# calculate an empirical probability.
Question 2 -- [8 / 8 points] ✅This is correct, but you should use the built-in R functions such as rowSums or vectorized operations, rather than
for loops. For example for (b) you can use:
r <- rowSums(sample_space) == 4
count <- sum(r)
# 2) This question looks at the probability of rolling two dice
# (each with values 1 - 6) and getting a sum of 4.
# You will answer this question in parts.
# (a) Use the 'permutations' function from 'gtools'
# to enumerate the sample space obtained by rolling two dice.
# (Note: the correct sample space has 36 outcomes)
sample_space<-permutations(6,2,repeats=TRUE)
sample_space
## [,1] [,2]
## [1,] 1 1
## [2,] 1 2
## [3,] 1 3
## [4,] 1 4
## [5,] 1 5
## [6,] 1 6
## [7,] 2 1
## [8,] 2 2
## [9,] 2 3
## [10,] 2 4
## [11,] 2 5
## [12,] 2 6
## [13,] 3 1
## [14,] 3 2
## [15,] 3 3
## [16,] 3 4
## [17,] 3 5
## [18,] 3 6
## [19,] 4 1
## [20,] 4 2
## [21,] 4 3
## [22,] 4 4
## [23,] 4 5
## [24,] 4 6
## [25,] 5 1
## [26,] 5 2
## [27,] 5 3
## [28,] 5 4
## [29,] 5 5
## [30,] 5 6
## [31,] 6 1
## [32,] 6 2
## [33,] 6 3
## [34,] 6 4
## [35,] 6 5
## [36,] 6 6
# (b) Use R and your answer to (a) to find the number of outcomes
# where the sum is 4
count <- 0
n1<-36
for (i in 1:n1)
{
if(sum(sample_space[i,]) == 4)
count = count+1
}
sum_4=count
sum_4
## [1] 3
#Ans: There are 3 outcomes where the sum is 4.
# (c) divide your answer from (b) by the size of the sample space
# to find the probability that the sum is 4.
prob_4<-sum_4/n1
prob_4
## [1] 0.08333333
#The probability that the sum is 4 is 0.08333333
Question 3 -- [7 / 8 points] ❗Okay -- but don't use for loops for this. Use sum(roll_500 == 4). You also did the experiment 500 times, and not 5000.
# 3) Calculate the same probability in (2) but by finding the empirical
# probability by completing the steps below.
# (a) Write a function that rolls two dice and returns the sum of the
# die rolls (this is done for you)
roll2 <- function() {
s <- sample(1:6, 2, replace = TRUE)
sum(s)
}
# (b) Use the 'replicate' function to roll two dice 5000 times, to
# get a vector containing the sum of die rolls for each experiment.
set.seed(123)
roll_500<-replicate(500,roll2())
roll_500
## [1] 9 5 8 8 10 7 5 8 4 5 6 5 3 9 10 4 9 7 2 5 9 8 7 7 9
## [26] 7 2 4 11 3 8 12 9 7 8 3 9 11 4 10 7 4 10 7 9 11 9 7 8 4
## [51] 6 4 10 10 7 12 9 9 4 7 12 5 11 8 8 10 8 7 3 7 7 5 12 5 9
## [76] 7 9 8 7 5 7 7 10 5 11 7 6 9 11 10 3 3 10 3 9 8 5 2 10 5
## [101] 11 8 4 6 3 9 2 6 12 9 3 3 3 7 8 9 3 10 8 6 6 7 7 3 6
## [126] 6 6 6 7 8 6 6 9 10 6 6 3 3 6 5 7 5 7 11 4 10 7 6 8 5
## [151] 4 11 7 6 8 5 9 8 10 4 7 7 5 9 7 8 6 3 7 4 7 10 9 10 3
## [176] 3 5 4 6 8 11 9 8 9 3 5 9 6 6 8 8 6 9 2 7 9 4 6 5 4
## [201] 8 6 10 8 4 4 10 6 9 8 9 10 5 10 3 9 5 5 10 6 9 12 5 9 10
## [226] 4 6 11 7 3 12 10 12 4 8 4 5 6 10 10 4 8 6 7 8 9 5 4 11 8
## [251] 8 6 11 5 9 7 5 6 7 4 6 7 10 9 9 6 6 11 10 6 5 2 5 9 7
## [276] 8 7 7 6 6 8 11 2 8 10 3 2 5 5 2 10 10 8 9 4 9 8 5 4 4
## [301] 5 9 6 6 7 11 11 10 2 10 8 3 8 10 5 5 11 7 4 10 2 6 9 8 6
## [326] 8 4 8 3 8 6 5 7 10 10 6 7 6 5 7 7 6 4 6 7 5 7 7 5 8
## [351] 9 4 7 6 6 3 6 6 7 7 7 8 7 3 6 4 9 5 7 11 9 3 8 4 12
## [376] 5 2 4 7 4 6 5 6 8 10 8 7 4 10 8 9 4 4 11 8 3 7 11 10 7
## [401] 5 4 4 9 2 7 4 3 7 7 6 12 7 9 9 6 9 2 7 4 5 8 4 9 5
## [426] 9 7 10 8 3 4 10 5 5 9 7 5 7 10 10 6 9 10 8 9 9 10 9 9 5
## [451] 10 8 6 2 8 9 4 6 6 4 10 11 7 6 6 3 3 10 7 4 12 9 5 7 10
## [476] 9 6 8 8 5 5 2 7 7 11 8 6 6 5 4 6 9 11 8 6 6 9 9 4 9
# (c) Find the number of times you rolled a four, and divide by the
# number of experiments to find the empirical probability
count <- 0
n2<-500
for (i in 1:n2)
{
if(roll_500[i]== 4)
count = count+1
}
sum4=count
sum4
## [1] 49
prob4<-sum_4/n2
prob4
## [1] 0.006
#Ans: The empirical probability is 0.076
# Definition: A probability distribution of a discrete random variable
# gives the probability for each value that the variable can take. For
# example, if we flip a coin three times, and let X = the number of heads,
# then the probability distribution of X is given by the following code:
pdist <- cbind(X = 0:3, 'P(X)' = c(0.125,0.375, 0.375, 0.125))
pdist
## X P(X)
## [1,] 0 0.125
## [2,] 1 0.375
## [3,] 2 0.375
## [4,] 3 0.125
# In other words, P(X = 0) is 0.125, which says that the probability of
# getting no heads (or all tails) is 0.125. You will derive the above
# probability distribution in the next problem.
Question 4 -- [8.5 / 10 points] ❗Nice job -- you are getting the correct probabilities but you should use rowSums(ProbSpace == 1) to get the heads for each row. For ggplot, the x aesthetic should be set to X and y should be set to P.X..
# 4) We will look at flipping a coin 3 times and letting X = the
# number of heads. Find the probability distribution of X by
# completing the steps below:
# (a) Use the 'permutations' function to enumerate the sample
# space obtained from flipping a coin 3 times.
ProbSpace<-permutations(2,3, repeats=TRUE)
ProbSpace
## [,1] [,2] [,3]
## [1,] 1 1 1
## [2,] 1 1 2
## [3,] 1 2 1
## [4,] 1 2 2
## [5,] 2 1 1
## [6,] 2 1 2
## [7,] 2 2 1
## [8,] 2 2 2
# (b) Using your sample space, find X = the number of heads for each
# set of 3 coins.
X1<-sum(ProbSpace[1,]==1)
X1
## [1] 3
X2<-sum(ProbSpace[2,]==1)
X2
## [1] 2
X3<-sum(ProbSpace[3,]==1)
X3
## [1] 2
X4<-sum(ProbSpace[4,]==1)
X4
## [1] 1
X5<-sum(ProbSpace[5,]==1)
X5
## [1] 2
X6<-sum(ProbSpace[6,]==1)
X6
## [1] 1
X7<-sum(ProbSpace[7,]==1)
X7
## [1] 1
X8<-sum(ProbSpace[8,]==1)
X8
## [1] 0
# (c) Create a relative frequency table for X, which is the probability
# distribution of X = the number of heads in 3 coin tosses.
prob_dist <- cbind(X = 0:3, 'P(X)' = c(0.125,0.375, 0.375, 0.125))
prob_dist
## X P(X)
## [1,] 0 0.125
## [2,] 1 0.375
## [3,] 2 0.375
## [4,] 3 0.125
# (d) Create a bar graph of the relative frequencies, using ggplot and
# labeling the x-axis, y-axis, and title. The y-axis in this case
# corresponds to the 'probability'.
df<-data.frame(prob_dist)
df
## X P.X.
## 1 0 0.125
## 2 1 0.375
## 3 2 0.375
## 4 3 0.125
ggplot(data = df) +
geom_bar(mapping = aes(x = P.X.))+
labs(title = "A Bar Graph of Probability Distribuion of Head",
y="Probability",
x="X")

Question 5 -- [7 / 8 points] ❗You are calculating the correct probabilities, but to get a relative frequency table you should use
t <- table(Heads)
prop.table(t)
# 5) Find the empirical distribution of X = the number of heads in 3
# coin tosses by completing the steps below.
# (a) Write a function that flips a coin 3 times and returns the number
# of heads
flip3 <- function() {
s <- sample(1:2, 3, replace = TRUE)
s<-sum(s==1)
s
}
flip3()
## [1] 2
# (b) Use the 'replicate' function to repeat 3 coin tosses 5000 times, to
# get a vector containing the number of heads for each experiment
Heads<-replicate(500,flip3())
Heads
## [1] 1 2 1 3 1 2 1 1 2 2 3 2 1 2 2 2 1 1 0 1 1 3 2 1 2 2 3 2 2 1 2 3 1 2 0 2 2
## [38] 2 0 1 1 2 1 2 3 1 0 1 0 2 3 0 3 1 1 3 0 1 0 2 0 3 2 1 1 1 2 1 2 2 0 1 2 3
## [75] 1 3 2 1 0 1 2 1 2 1 1 2 1 1 2 1 1 2 2 2 1 1 1 1 2 2 2 3 1 3 3 1 2 3 0 2 3
## [112] 2 0 1 2 1 3 1 0 1 1 2 1 2 1 2 1 1 2 2 2 2 2 0 2 2 1 1 3 2 1 2 1 3 1 0 1 2
## [149] 2 1 2 1 1 3 1 3 2 0 3 1 2 1 0 1 1 0 3 2 1 2 1 0 0 2 3 2 3 3 2 3 2 2 2 1 2
## [186] 1 2 1 2 2 2 0 3 1 0 1 1 1 2 3 1 0 3 2 2 1 2 1 2 1 1 2 3 1 2 3 1 1 0 3 1 1
## [223] 2 1 3 1 1 0 3 1 0 1 3 1 2 3 1 2 1 3 1 2 2 1 2 1 1 1 0 1 2 2 3 2 2 1 2 2 0
## [260] 2 0 2 2 3 1 2 3 0 2 3 2 3 1 2 0 2 1 1 2 2 1 3 1 1 2 0 1 0 3 2 1 2 2 1 2 2
## [297] 2 2 2 0 0 2 2 0 2 1 1 1 0 1 2 0 2 3 0 0 0 1 3 1 3 1 3 1 2 3 1 2 2 1 2 2 2
## [334] 0 1 1 2 2 1 1 0 1 2 2 1 1 2 2 2 2 3 1 0 2 2 3 1 2 2 2 3 1 0 1 2 1 1 2 3 2
## [371] 3 1 1 2 0 1 0 0 1 1 1 3 0 2 1 1 2 2 2 1 2 2 2 2 3 2 1 0 0 2 0 2 2 1 1 1 1
## [408] 1 2 2 3 2 1 2 2 2 3 0 0 1 3 1 2 2 1 3 2 1 1 3 1 2 2 2 0 1 1 2 3 2 1 1 2 1
## [445] 2 2 2 2 1 2 0 2 3 1 1 2 3 1 2 1 3 1 0 1 2 3 1 2 1 0 2 1 2 3 2 1 1 1 3 1 2
## [482] 2 2 2 3 1 1 1 2 1 2 2 0 1 3 2 2 2 1 2
# (c) Create a relative frequency table for the number of heads. This is
# the empirical probability distribution of X = the number of heads
# in 3 coin tosses.
#Counting the number of Heads
x0<-sum(Heads==0)
x0
## [1] 60
x1<-sum(Heads==1)
x1
## [1] 179
x2<-sum(Heads==2)
x2
## [1] 189
x3<-sum(Heads==3)
x3
## [1] 72
#Calculating the probabilities
p_x0<-x0/500
p_x0
## [1] 0.12
p_x1<-x1/500
p_x1
## [1] 0.358
p_x2<-x2/500
p_x2
## [1] 0.378
p_x3<-x3/500
p_x3
## [1] 0.144
#a relative frequency table for the number of heads
Prob_Dist <- cbind(X = 0:3, 'P(X)' = c(p_x0,p_x1,p_x2,p_x3))
Prob_Dist
## X P(X)
## [1,] 0 0.120
## [2,] 1 0.358
## [3,] 2 0.378
## [4,] 3 0.144
######################################################################
# Poker Time! The commands below enumerate the sample space of
# all possible poker hands. Here we ignore the suit because it is
# not needed for the questions below. We also use combinations
# instead of permutations. Combinations should be used when the
# order does not matter (which is true for the order of cards
# in a hand). The cards are sampled WITHOUT replacement
# (repeats.allowed = FALSE) because we cannot include
# the same card twice in one hand. Finally, we specify 'set = FALSE' to
# allow for duplicate values in the deck vector. Each combination
# (hand) is equally likely, so classical probability can be used.
######################################################################
deck <- rep(1:13,4)
hands <- combinations(52, 5, deck, repeats.allowed = FALSE, set = FALSE)
Question 6 -- [0 / 3 points] ❌Each row is a possible poker hand, so you want the number of rows, which you can get using nrow.
# 6) How many possible poker hands are there?
length(hands)
## [1] 12994800
#There are 12994800 poker hands
Question 7 -- [0 / 3 points] ❌You want to
apply the
four.of.a.kind function to each row of the
hands matrix:
res <- apply(hands, 1, four.of.a.kind)
# 7) The function below takes a vector (corresponding to a hand of cards)
# and returns TRUE if the hand contains a four-of-a-kind
# Use 'apply' to apply this function to each hand, in order to show
# that the probability of being dealt a four-of-a-kind is
# approximately 0.00024 (or 1/4165). Note: You MUST use the
# apply function and the four.of.a.kind function below to find this.
# Because the hands matrix contains more than 2.5 million rows,
# this calculation may take several minutes. You should therefore test
# your code on a subset of the hands matrix first.
# There are two 4-of-a-kinds if you look at the first 20,000 rows.
##############################################################
# this function returns true if a hand contains a 4-of-a-kind
##############################################################
four.of.a.kind <- function(x) {
t <- table(x) # frequency table for cards in the hand
m <- max(t) # how frequent is the most common card?
if (m == 4) return (TRUE)
return (FALSE)
}
x<-hands[1:2000,]
n<-length(x)
count<-0
# GD: this is not correct
# for (i in 1:n)
# {
# if(four.of.a.kind(x))
# count = apply(x,2,mean)
# s=length(count)
# }
# prob_four<-s/4165
# prob_four
Question 8 -- [2 / 4 points] ❌Your function is correctly determining whether the hand has a three of a kind, but you also need to check for a pair. This can be done by checking whether min(t) is 2.
# 8) Create a function that determines whether a vector 'x' contains
# a full house (i.e., 3 of a kind and 1 pair). You can assume
# that 'x' includes exactly 5 cards.
three.of.a.kind <- function(x) {
t <- table(x)
m <- max(t)
if (m == 3) return (TRUE)
return (FALSE)
}
Question 9 -- [0 / 3 points] ❌
# 9) Show that the probability of being dealt a full house is
# approximately 0.00144 (roughly 1/694). Note for testing purposes,
# that there are 18 full houses in the first 20,000 rows of the
# hands matrix
# GD: this is not correct
# x<-hands[1:2000,]
# n<-length(x)
# count<-0
# for (i in 1:n)
# {
# if(three.of.a.kind(x))
# count = apply(x,2,mean)
# s=length(count)
# }
#
# prob_three<-s/694
# prob_three