Maria Delgado
September 24, 2025
The Iris dataset is a classic in Machine Learning.
Goal: Predict the species of a flower (Setosa, Versicolor, Virginica) using its measurements (sepals and petals).
Central question: Can we assign the correct species based only on 4 numerical variables?
Input variables:
Logistic transformation: converts linear combinations into probabilities between 0 and 1.
Intuitive interpretation:
Probability close to 1 → high certainty.
Probability close to 0 → low certainty.
The Shinny app can be viewed visiting: https://madelgadolo.shinyapps.io/assignment3_madl/
When plotting each variable:
We see distinct logistic curves for each species.
Setosa is clearly separated.
Versicolor and Virginica overlap more, but the model still distinguishes them.
These curves show how the models capture the transitions between species.
# 1. Model for Setosa vs No-Setosa
iris$Setosa <- ifelse(iris$Species == "setosa", 1, 0)
modelo_setosa <- glm(Setosa ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
data = iris, family = binomial)
# 2. Model for Versicolor vs No-Versicolor
iris$Versicolor <- ifelse(iris$Species == "versicolor", 1, 0)
modelo_versicolor <- glm(Versicolor ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
data = iris, family = binomial)
# 3. Model for Virginica vs No-Virginica
iris$Virginica <- ifelse(iris$Species == "virginica", 1, 0)
modelo_virginica <- glm(Virginica ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
data = iris, family = binomial)
par(mfrow = c(2,2))
#===========================
#PLOT1
#===========================
plot(iris$Sepal.Length, iris$Setosa, pch = 16, col = "gray",
xlab = "Sepal.Length", ylab = "Probability", main = "Logistic Curve for Sepal.Length",
ylim = c(0,1))
x_vals <- seq(3, 8, length.out = 200)
newdata <- data.frame(
Sepal.Length = x_vals,
Sepal.Width = mean(iris$Sepal.Width),
Petal.Length = mean(iris$Petal.Length),
Petal.Width = mean(iris$Petal.Width)
)
p_setosa <- predict(modelo_setosa, newdata = newdata, type = "response")
p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
p_virginica <- predict(modelo_virginica, newdata = newdata, type = "response")
lines(x_vals, p_setosa, col = "red", lwd = 2)
lines(x_vals, p_versicolor, col = "blue", lwd = 2)
lines(x_vals, p_virginica, col = "green", lwd = 2)
legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
col = c("red", "blue", "green"), lwd = 2, bty = "n")
#===========================
#PLOT 2
#===========================
plot(iris$Sepal.Width, iris$Setosa, pch = 16, col = "gray",
xlab = "Sepal.Width", ylab = "Probability", main = "Logistic Curve for Sepal.Width",
ylim = c(0,1))
x_vals <- seq(2, 5, length.out = 200)
newdata <- data.frame(
Sepal.Length = mean(iris$Sepal.Length),
Sepal.Width = x_vals,
Petal.Length = mean(iris$Petal.Length),
Petal.Width = mean(iris$Petal.Width)
)
p_setosa <- predict(modelo_setosa, newdata = newdata, type = "response")
p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
p_virginica <- predict(modelo_virginica, newdata = newdata, type = "response")
lines(x_vals, p_setosa, col = "red", lwd = 2)
lines(x_vals, p_versicolor, col = "blue", lwd = 2)
lines(x_vals, p_virginica, col = "green", lwd = 2)
legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
col = c("red", "blue", "green"), lwd = 2, bty = "n")
#===========================
#PLOT 3
#===========================
plot(iris$Petal.Length, iris$Setosa, pch = 16, col = "gray",
xlab = "Petal.Length", ylab = "Probability", main = "Logistic Curve for Petal.Length",
ylim = c(0,1))
x_vals <- seq(1, 7, length.out = 200)
newdata <- data.frame(
Sepal.Length = mean(iris$Sepal.Length),
Sepal.Width = mean(iris$Sepal.Width),
Petal.Length = x_vals,
Petal.Width = mean(iris$Petal.Width)
)
p_setosa <- predict(modelo_setosa, newdata = newdata, type = "response")
p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
p_virginica <- predict(modelo_virginica, newdata = newdata, type = "response")
lines(x_vals, p_setosa, col = "red", lwd = 2)
lines(x_vals, p_versicolor, col = "blue", lwd = 2)
lines(x_vals, p_virginica, col = "green", lwd = 2)
legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
col = c("red", "blue", "green"), lwd = 2, bty = "n")
#===========================
#PLOT 4
#===========================
plot(iris$Petal.Width, iris$Setosa, pch = 16, col = "gray",
xlab = "Petal.Width", ylab = "Probability", main = "Logistic Curve for Petal.Width",
ylim = c(0,1))
x_vals <- seq(0.1, 3, length.out = 200)
newdata <- data.frame(
Sepal.Length = mean(iris$Sepal.Length),
Sepal.Width = mean(iris$Sepal.Width),
Petal.Length = mean(iris$Petal.Length),
Petal.Width = x_vals
)
p_setosa <- predict(modelo_setosa, newdata = newdata, type = "response")
p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
p_virginica <- predict(modelo_virginica, newdata = newdata, type = "response")
lines(x_vals, p_setosa, col = "red", lwd = 2)
lines(x_vals, p_versicolor, col = "blue", lwd = 2)
lines(x_vals, p_virginica, col = "green", lwd = 2)
legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
col = c("red", "blue", "green"), lwd = 2, bty = "n")