# main.py
# A demonstration of key classification algorithms using scikit-learn.
#
# Before running, you may need to install scikit-learn and pandas:
# pip install scikit-learn pandas
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
print("--- Starting Classification Algorithms Demonstration ---")
# --- Section 1: Load and Prepare the Dataset ---
# We will use the famous Iris dataset, which is included with scikit-learn.
# The goal is to predict the species of an iris flower based on its measurements.
print("\n--- 1. Loading and Preparing the Iris Dataset ---")
iris = load_iris()
X = iris.data # The features (sepal length, sepal width, petal length, petal width)
y = iris.target # The target (species of iris)
# For clarity, let's see the feature and target names
print(f"Features: {iris.feature_names}")
print(f"Target Classes: {iris.target_names}")
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"\nData split into {len(X_train)} training samples and {len(X_test)} testing samples.")
# Scale the features. This is important for distance-based algorithms like KNN and SVM.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Feature data has been scaled.")
# --- Section 2: Train and Evaluate Classification Models ---
# We will create a dictionary of models to train and evaluate them in a loop.
models = {
"K-Nearest Neighbors (KNN)": KNeighborsClassifier(n_neighbors=5),
"Decision Tree": DecisionTreeClassifier(random_state=42),
"Support Vector Machine (SVM)": SVC(kernel='linear', random_state=42)
}
print("\n--- 2. Training and Evaluating Models ---")
for name, model in models.items():
print(f"\n----- Evaluating: {name} -----")
# Use scaled data for KNN and SVM, unscaled for Decision Tree
if name in ["K-Nearest Neighbors (KNN)", "Support Vector Machine (SVM)"]:
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
else: # Decision Tree does not require feature scaling
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
# Print a detailed classification report
print("\nClassification Report:")
# The report shows precision, recall, and F1-score for each class.
print(classification_report(y_test, y_pred, target_names=iris.target_names))
print("\n--- End of Demonstration ---")