From 2439b80e693f0e7c7354272fa7361088f1a22dc8 Mon Sep 17 00:00:00 2001 From: bhakti-thakur Date: Wed, 5 Nov 2025 16:12:56 +0530 Subject: [PATCH] ML6 added --- ML/ML6.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 ML/ML6.md diff --git a/ML/ML6.md b/ML/ML6.md new file mode 100644 index 0000000..c81bb64 --- /dev/null +++ b/ML/ML6.md @@ -0,0 +1,75 @@ +``` +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler +from sklearn.cluster import KMeans +from scipy.cluster.hierarchy import linkage, dendrogram, fcluster +import seaborn as sns + + +df = pd.read_csv("sales_data_sample.csv", encoding='latin1', on_bad_lines='skip') +print("Dataset shape:", df.shape) +print(df.head()) + + +X = df.select_dtypes(include=['int64', 'float64']) +print("Features used for clustering:\n", X.head()) + +# Select relevant numeric columns +# X = df[['SALES', 'QUANTITYORDERED', 'PRICEEACH']] + +# Handle missing values if any +# X = features.dropna() + + +scaler = StandardScaler() +X_scaled = scaler.fit_transform(X) + + +# Determine optimal number of clusters using Elbow Method +wcss = [] +for k in range(1, 11): + kmeans = KMeans(n_clusters=k, random_state=42) + kmeans.fit(X_scaled) + wcss.append(kmeans.inertia_) + +# Plot Elbow Method +plt.figure(figsize=(6,4)) +plt.plot(range(1, 11), wcss, marker='o') +plt.title('Elbow Method') +plt.xlabel('Number of clusters (k)') +plt.ylabel('Inertia (WCSS)') +plt.show() + +# Fit KMeans with chosen number of clusters (example: 3 clusters) +kmeans = KMeans(n_clusters=3, random_state=42) # Add n_init=10 param in the function to suppress warnings +clusters_kmeans = kmeans.fit_predict(X_scaled) +df['KMeans_Cluster'] = clusters_kmeans + +# Visualize clusters +sns.scatterplot(x='SALES', y='PRICEEACH', hue='KMeans_Cluster', data=df, palette='viridis') +plt.title("K-Means Clustering") +plt.show() + +print("\nK-Means Cluster Centers:\n", kmeans.cluster_centers_) +print("\nCluster counts:\n", df['KMeans_Cluster'].value_counts()) + + +# Create linkage matrix +Z = linkage(X_scaled, method='ward') + +# Plot dendrogram +plt.figure(figsize=(10,5)) +dendrogram(Z) +plt.title('Hierarchical Clustering Dendrogram') +plt.xlabel('Samples') +plt.ylabel('Distance') +plt.show() + +# Assign clusters (example: 3 clusters) +clusters_hier = fcluster(Z, t=3, criterion='maxclust') +df['Hierarchical_Cluster'] = clusters_hier + +print("\nHierarchical Cluster counts:\n", pd.Series(clusters_hier).value_counts()) + +``` \ No newline at end of file