Peer-graded Assignment: Course Project: Shiny Application and Reproducible Pitch

Maria Delgado

September 24, 2025

The Challenge

The Iris dataset is a classic in Machine Learning.

Goal: Predict the species of a flower (Setosa, Versicolor, Virginica) using its measurements (sepals and petals).

Central question: Can we assign the correct species based only on 4 numerical variables?

The Solution

We built three logistic regression models (one-vs-rest):
- Model 1: Setosa vs. not-Setosa
- Model 2: Versicolor vs. not-Versicolor
- Model 3: Virginica vs. not-Virginica
Each model returns a probability of belonging to its species.
The final classification comes from the highest probability.

How It Works

Input variables:
- Sepal.Length, Sepal.Width, Petal.Length, Petal.Width
Logistic transformation: converts linear combinations into probabilities between 0 and 1.
Intuitive interpretation:
- Probability close to 1 → high certainty.
- Probability close to 0 → low certainty.

Final Result

The Shinny app can be viewed visiting: https://madelgadolo.shinyapps.io/assignment3_madl/

When plotting each variable:
- We see distinct logistic curves for each species.
- Setosa is clearly separated.
- Versicolor and Virginica overlap more, but the model still distinguishes them.
These curves show how the models capture the transitions between species.

# 1. Model for Setosa vs No-Setosa
  iris$Setosa <- ifelse(iris$Species == "setosa", 1, 0)
  modelo_setosa <- glm(Setosa ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
                       data = iris, family = binomial)
  
  # 2. Model for Versicolor vs No-Versicolor
  iris$Versicolor <- ifelse(iris$Species == "versicolor", 1, 0)
  modelo_versicolor <- glm(Versicolor ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
                           data = iris, family = binomial)

  # 3. Model for Virginica vs No-Virginica
  iris$Virginica <- ifelse(iris$Species == "virginica", 1, 0)
  modelo_virginica <- glm(Virginica ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
                          data = iris, family = binomial)
  
par(mfrow = c(2,2))
  
    #===========================
    #PLOT1
    #===========================
    plot(iris$Sepal.Length, iris$Setosa, pch = 16, col = "gray",
         xlab = "Sepal.Length", ylab = "Probability", main = "Logistic Curve for Sepal.Length",
         ylim = c(0,1))
    
    x_vals <- seq(3, 8, length.out = 200)
    newdata <- data.frame(
      Sepal.Length = x_vals,
      Sepal.Width  = mean(iris$Sepal.Width),
      Petal.Length = mean(iris$Petal.Length),
      Petal.Width  = mean(iris$Petal.Width)
    )
    
    p_setosa     <- predict(modelo_setosa, newdata = newdata, type = "response")
    p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
    p_virginica  <- predict(modelo_virginica, newdata = newdata, type = "response")
    
    lines(x_vals, p_setosa,     col = "red",   lwd = 2)
    lines(x_vals, p_versicolor, col = "blue",  lwd = 2)
    lines(x_vals, p_virginica,  col = "green", lwd = 2)
    
    legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
           col = c("red", "blue", "green"), lwd = 2, bty = "n")
    
    #===========================
    #PLOT 2
    #===========================
    plot(iris$Sepal.Width, iris$Setosa, pch = 16, col = "gray",
         xlab = "Sepal.Width", ylab = "Probability", main = "Logistic Curve for Sepal.Width",
         ylim = c(0,1))
    
    x_vals <- seq(2, 5, length.out = 200)
    newdata <- data.frame(
      Sepal.Length = mean(iris$Sepal.Length),
      Sepal.Width  = x_vals,
      Petal.Length = mean(iris$Petal.Length),
      Petal.Width  = mean(iris$Petal.Width)
    )
    
    p_setosa     <- predict(modelo_setosa, newdata = newdata, type = "response")
    p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
    p_virginica  <- predict(modelo_virginica, newdata = newdata, type = "response")
    
    lines(x_vals, p_setosa,     col = "red",   lwd = 2)
    lines(x_vals, p_versicolor, col = "blue",  lwd = 2)
    lines(x_vals, p_virginica,  col = "green", lwd = 2)
    
    legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
           col = c("red", "blue", "green"), lwd = 2, bty = "n")
    
    #===========================
    #PLOT 3
    #===========================
    plot(iris$Petal.Length, iris$Setosa, pch = 16, col = "gray",
         xlab = "Petal.Length", ylab = "Probability", main = "Logistic Curve for Petal.Length",
         ylim = c(0,1))
    
    x_vals <- seq(1, 7, length.out = 200)
    newdata <- data.frame(
      Sepal.Length = mean(iris$Sepal.Length),
      Sepal.Width  = mean(iris$Sepal.Width),
      Petal.Length = x_vals,
      Petal.Width  = mean(iris$Petal.Width)
    )
    
    p_setosa     <- predict(modelo_setosa, newdata = newdata, type = "response")
    p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
    p_virginica  <- predict(modelo_virginica, newdata = newdata, type = "response")
    
    lines(x_vals, p_setosa,     col = "red",   lwd = 2)
    lines(x_vals, p_versicolor, col = "blue",  lwd = 2)
    lines(x_vals, p_virginica,  col = "green", lwd = 2)
    
    legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
           col = c("red", "blue", "green"), lwd = 2, bty = "n")
    
    
    #===========================
    #PLOT 4
    #===========================
    plot(iris$Petal.Width, iris$Setosa, pch = 16, col = "gray",
         xlab = "Petal.Width", ylab = "Probability", main = "Logistic Curve for Petal.Width",
         ylim = c(0,1))
    
    x_vals <- seq(0.1, 3, length.out = 200)
    newdata <- data.frame(
      Sepal.Length = mean(iris$Sepal.Length),
      Sepal.Width  = mean(iris$Sepal.Width),
      Petal.Length = mean(iris$Petal.Length),
      Petal.Width  = x_vals
    )
    
    p_setosa     <- predict(modelo_setosa, newdata = newdata, type = "response")
    p_versicolor <- predict(modelo_versicolor, newdata = newdata, type = "response")
    p_virginica  <- predict(modelo_virginica, newdata = newdata, type = "response")
    
    lines(x_vals, p_setosa,     col = "red",   lwd = 2)
    lines(x_vals, p_versicolor, col = "blue",  lwd = 2)
    lines(x_vals, p_virginica,  col = "green", lwd = 2)
    
    legend("topright", legend = c("Setosa", "Versicolor", "Virginica"),
           col = c("red", "blue", "green"), lwd = 2, bty = "n")