# Complete Coding with Decision Tree
#Load and Explore Dataset import pandas as pd from google.colab import drive drive.mount('/content/drive') #Output: Mounted at /content/drive #Dataset Connecting # Load the dataset ds_path = '/content/drive/MyDrive/Machine Learning Lab/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv' df = pd.read_csv(ds_path) # Show basic info print("Dataset shape:", df.shape) print("\nFirst 5 rows:") print(df.head())#Train the Decision Tree Model from sklearn.tree import DecisionTreeClassifier # Initialize and train Decision Tree dt_model = DecisionTreeClassifier(random_state=42) dt_model.fit(X_train, y_train) # Predict on validation set y_pred_val = dt_model.predict(X_val) y_prob_val = dt_model.predict_proba(X_val)[:, 1] # for ROC/AUC print("Model training complete. Predictions on validation set ready.") #Model training complete. Predictions on validation set ready. #Evaluation with Full Metrics from sklearn.metrics import ( confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, balanced_accuracy_score, matthews_corrcoef ) # Compute confusion matrix cm = confusion_matrix(y_val, y_pred_val) TN, FP, FN, TP = cm.ravel() # Metrics accuracy = accuracy_score(y_val, y_pred_val) precision = precision_score(y_val, y_pred_val) recall = recall_score(y_val, y_pred_val) f1 = f1_score(y_val, y_pred_val) roc_auc = roc_auc_score(y_val, y_prob_val) specificity = TN / (TN + FP) fpr = FP / (FP + TN) fnr = FN / (FN + TP) balanced_acc = balanced_accuracy_score(y_val, y_pred_val) mcc = matthews_corrcoef(y_val, y_pred_val) # Print results print(f"\nAccuracy: {accuracy:.5f}") print(f"Precision (PPV): {precision:.5f}") print(f"Recall (Sensitivity): {recall:.5f}") print(f"F1 Score: {f1:.5f}") print(f"ROC AUC: {roc_auc:.5f}") print(f"Specificity (TNR): {specificity:.5f}") print(f"False Positive Rate: {fpr:.5f}") print(f"False Negative Rate: {fnr:.5f}") print(f"Balanced Accuracy: {balanced_acc:.5f}") print(f"Matthews Corr Coef: {mcc:.5f}")
#Confusion Matrix
import seaborn as sns
import numpy as np
# Title
title = "Confusion Matrix - Decision Tree (Validation Set)"
# Set style
plt.rcParams.update({
'font.size': 18,
'font.family': 'serif',
'axes.titlesize': 18,
'axes.labelsize': 18,
'xtick.labelsize': 18,
'ytick.labelsize': 18
})
# Plot
fig, ax = plt.subplots(figsize=(8, 5))
cmap = sns.color_palette("crest", as_cmap=True)
sns.heatmap(cm, annot=True, fmt='d', cmap=cmap, cbar=True,
ax=ax, annot_kws={"fontsize": 18}, linewidths=0.5, linecolor='white')
ax.set_title(title)
ax.set_xlabel("Predicted Labels")
ax.set_ylabel("True Labels")
ax.set_xticklabels(le.classes_, rotation=45, fontsize=14)
ax.set_yticklabels(le.classes_, rotation=0, fontsize=14)
# Inner gridlines
ax.hlines([1], *ax.get_xlim(), colors='white', linewidth=4)
ax.vlines([1], *ax.get_ylim(), colors='white', linewidth=4)
plt.tight_layout()
plt.show()
#ROC Curve & Classification Report. from sklearn.metrics import classification_report, roc_curve import matplotlib.pyplot as plt # ROC Curve fpr, tpr, thresholds = roc_curve(y_val, y_prob_val) plt.figure(figsize=(7, 3)) plt.plot(fpr, tpr, label=f'DT ROC (AUC = {roc_auc:.5f})', color='blue') plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Random Guess') plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate (Recall)") plt.title("ROC Curve - Decision Tree") plt.legend() plt.grid(True) plt.tight_layout() plt.show() # Formatted classification report (5 digits) report_dict = classification_report(y_val, y_pred_val, target_names=le.classes_, output_dict=True) print("\nClassification Report (rounded to 5 digits):\n") print(f"{'Label':<15} {'Precision':>10} {'Recall':>10} {'F1-Score':>10} {'Support':>10}") print("-" * 60) for label, scores in report_dict.items(): if isinstance(scores, dict): precision = f"{scores['precision']:.5f}" recall = f"{scores['recall']:.5f}" f1 = f"{scores['f1-score']:.5f}" support = f"{int(scores['support'])}" print(f"{label:<15} {precision:>10} {recall:>10} {f1:>10} {support:>10}")
#5-Fold Cross-Validation
from sklearn.model_selection import StratifiedKFold, cross_val_predict
# Stratified 5-fold cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Predict probabilities using cross-validation
y_cv_prob = cross_val_predict(dt_model, X_train, y_train, cv=cv, method='predict_proba')[:, 1]
# Calculate ROC AUC
cv_auc = roc_auc_score(y_train, y_cv_prob)
print(f"Cross-Validation ROC AUC (5-fold): {cv_auc:.5f}")
#Cross-Validation ROC AUC (5-fold): 1.00000
#Thank you and Good Luck!!!
No comments:
Post a Comment