import os import tkinter as tk from tkinter import filedialog, messagebox import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans # Function to read and load a CSV file def load_csv_file(): filename = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")]) if filename: try: data = pd.read_csv(filename) return data except Exception as e: messagebox.showerror("Error", f"Failed to load the file: {e}") return None else: return None # Function to show basic statistics of the dataset def show_statistics(data): if data is not None: stats = data.describe().transpose() messagebox.showinfo("Data Statistics", stats.to_string()) else: messagebox.showerror("Error", "No data loaded.") # Function to plot a histogram for numerical columns def plot_histogram(data): if data is not None: numeric_cols = data.select_dtypes(include=np.number).columns if len(numeric_cols) == 0: messagebox.showwarning("No Numeric Data", "No numeric columns found in the dataset.") return for col in numeric_cols: plt.figure(figsize=(10, 6)) data[col].hist(bins=20, color='skyblue', edgecolor='black') plt.title(f"Histogram of {col}") plt.xlabel(col) plt.ylabel('Frequency') plt.show() else: messagebox.showerror("Error", "No data loaded.") # Function to perform KMeans clustering def perform_kmeans(data): if data is not None: numeric_cols = data.select_dtypes(include=np.number).columns if len(numeric_cols) < 2: messagebox.showwarning("Insufficient Data", "Need at least two numeric columns for clustering.") return try: X = data[numeric_cols].values kmeans = KMeans(n_clusters=3) kmeans.fit(X) data['Cluster'] = kmeans.labels_ plt.figure(figsize=(10, 6)) plt.scatter(data[numeric_cols[0]], data[numeric_cols[1]], c=data['Cluster'], cmap='viridis') plt.title('KMeans Clustering') plt.xlabel(numeric_cols[0]) plt.ylabel(numeric_cols[1]) plt.show() except Exception as e: messagebox.showerror("Error", f"Failed to perform KMeans clustering: {e}") else: messagebox.showerror("Error", "No data loaded.") # Function to save the modified data to a new CSV file def save_data(data): if data is not None: save_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")]) if save_path: try: data.to_csv(save_path, index=False) messagebox.showinfo("Success", f"Data saved successfully to {save_path}") except Exception as e: messagebox.showerror("Error", f"Failed to save the file: {e}") else: messagebox.showwarning("Save Cancelled", "Save operation was cancelled.") else: messagebox.showerror("Error", "No data to save.") # Main GUI window class class DataAnalysisApp: def __init__(self, root): self.root = root self.root.title("Data Analysis Tool") self.root.geometry("400x300") self.data = None # Buttons self.load_button = tk.Button(root, text="Load CSV", command=self.load_data) self.load_button.pack(pady=10) self.stats_button = tk.Button(root, text="Show Stats", command=self.show_statistics) self.stats_button.pack(pady=10) self.hist_button = tk.Button(root, text="Plot Histogram", command=self.plot_histogram) self.hist_button.pack(pady=10) self.kmeans_button = tk.Button(root, text="Perform KMeans", command=self.perform_kmeans) self.kmeans_button.pack(pady=10) self.save_button = tk.Button(root, text="Save Data", command=self.save_data) self.save_button.pack(pady=10) def load_data(self): self.data = load_csv_file() if self.data is not None: messagebox.showinfo("File Loaded", "CSV file loaded successfully.") def show_statistics(self): show_statistics(self.data) def plot_histogram(self): plot_histogram(self.data) def perform_kmeans(self): perform_kmeans(self.data) def save_data(self): save_data(self.data) # Running the application if __name__ == "__main__": root = tk.Tk() app = DataAnalysisApp(root) root.mainloop()