# main.py
# A demonstration of hierarchical clustering using SciPy and Matplotlib.
#
# Before running, you may need to install scipy, scikit-learn, and matplotlib:
# pip install scipy scikit-learn matplotlib
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.datasets import make_blobs
from scipy.cluster.hierarchy import dendrogram, linkage
print("--- Starting Hierarchical Clustering Demonstration with SciPy ---")
# --- Section 1: Generate Sample Dataset ---
# We will create a small dataset to make the dendrogram easy to interpret.
print("\n--- 1. Generating Sample Dataset ---")
# Create 15 data points in 3 loose clusters
X, y = make_blobs(n_samples=15, centers=3, n_features=2, random_state=42)
print("Sample data has been generated.")
# --- Section 2: Perform Hierarchical Clustering ---
# We use the 'linkage' function from SciPy to perform agglomerative clustering.
# 'ward' is a method that minimizes the variance of the clusters being merged.
print("\n--- 2. Performing Agglomerative Hierarchical Clustering ---")
linked = linkage(X, method='ward')
print("Linkage matrix has been created.")
# --- Section 3: Visualize the Dendrogram ---
# A dendrogram is a tree diagram that shows the hierarchical relationship
# between clusters. It illustrates the order in which data points and
# clusters were merged.
print("\n--- 3. Visualizing the Results as a Dendrogram ---")
try:
plt.figure(figsize=(12, 8))
dendrogram(
linked,
orientation='top',
labels=range(1, len(X) + 1), # Label each point from 1 to 15
distance_sort='descending',
show_leaf_counts=True
)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Data Point Index')
plt.ylabel('Euclidean Distance (Cluster Dissimilarity)')
plt.grid(axis='y', linestyle='--')
# Save the plot to a file
plot_filename = 'hierarchical_dendrogram.png'
plt.savefig(plot_filename)
print(f"\nDendrogram saved as '{plot_filename}'")
plt.show() # Display the plot
plt.close()
except Exception as e:
print(f"An error occurred during visualization: {e}")
# --- Clean up the created image file ---
finally:
print("\n--- Cleaning up created image file ---")
if 'plot_filename' in locals() and os.path.exists(plot_filename):
os.remove(plot_filename)
print(f"Removed '{plot_filename}'")
print("\n--- End of Demonstration ---")