def visualize_results(df, priority_scores, feature_importance):
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('Vulnerability Scanner - ML Evaluation Dashboard', fontsize=16, fontweight="daring")
axes[0, 0].hist(priority_scores, bins=30, coloration="crimson", alpha=0.7, edgecolor="black")
axes[0, 0].set_xlabel('Precedence Rating')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Precedence Rating Distribution')
axes[0, 0].axvline(np.percentile(priority_scores, 75), coloration="orange", linestyle="--", label="seventy fifth percentile")
axes[0, 0].legend()
axes[0, 1].scatter(df['cvss_score'], priority_scores, alpha=0.6, c=priority_scores, cmap='RdYlGn_r', s=50)
axes[0, 1].set_xlabel('CVSS Rating')
axes[0, 1].set_ylabel('ML Precedence Rating')
axes[0, 1].set_title('CVSS vs ML Precedence')
axes[0, 1].plot([0, 10], [0, 1], 'k--', alpha=0.3)
severity_counts = df['severity'].value_counts()
colours = {'CRITICAL': 'darkred', 'HIGH': 'pink', 'MEDIUM': 'orange', 'LOW': 'yellow'}
axes[0, 2].bar(severity_counts.index, severity_counts.values, coloration=[colors.get(s, 'gray') for s in severity_counts.index])
axes[0, 2].set_xlabel('Severity')
axes[0, 2].set_ylabel('Depend')
axes[0, 2].set_title('Severity Distribution')
axes[0, 2].tick_params(axis="x", rotation=45)
top_features = feature_importance.head(10)
axes[1, 0].barh(top_features['feature'], top_features['importance'], coloration="steelblue")
axes[1, 0].set_xlabel('Significance')
axes[1, 0].set_title('High 10 Characteristic Significance')
axes[1, 0].invert_yaxis()
if 'cluster' in df.columns:
cluster_counts = df['cluster'].value_counts().sort_index()
axes[1, 1].bar(cluster_counts.index, cluster_counts.values, coloration="teal", alpha=0.7)
axes[1, 1].set_xlabel('Cluster')
axes[1, 1].set_ylabel('Depend')
axes[1, 1].set_title('Vulnerability Clusters')
attack_vector_counts = df['attack_vector'].value_counts()
axes[1, 2].pie(attack_vector_counts.values, labels=attack_vector_counts.index, autopct="%1.1f%%", startangle=90)
axes[1, 2].set_title('Assault Vector Distribution')
plt.tight_layout()
plt.present()
def primary():
print("="*70)
print("AI-ASSISTED VULNERABILITY SCANNER WITH ML PRIORITIZATION")
print("="*70)
print()
fetcher = CVEDataFetcher()
df = fetcher.fetch_recent_cves(days=30, max_results=50)
print(f"Dataset Overview:")
print(f" Complete CVEs: {len(df)}")
print(f" Date Vary: {df['published'].min()[:10]} to {df['published'].max()[:10]}")
print(f" Severity Breakdown: {df['severity'].value_counts().to_dict()}")
print()
feature_extractor = VulnerabilityFeatureExtractor()
embeddings = feature_extractor.extract_semantic_features(df['description'].tolist())
df = feature_extractor.extract_keyword_features(df)
df = feature_extractor.encode_categorical_features(df)
prioritizer = VulnerabilityPrioritizer()
X = prioritizer.prepare_features(df, embeddings)
severity_map = {'LOW': 0, 'MEDIUM': 1, 'HIGH': 2, 'CRITICAL': 3, 'UNKNOWN': 1}
y_severity = df['severity'].map(severity_map).values
y_score = df['cvss_score'].values
X_scaled = prioritizer.train_models(X, y_severity, y_score)
priority_scores, severity_probs, score_preds = prioritizer.predict_priority(X)
df['ml_priority_score'] = priority_scores
df['predicted_score'] = score_preds
analyzer = VulnerabilityAnalyzer(n_clusters=5)
clusters = analyzer.cluster_vulnerabilities(embeddings)
df = analyzer.analyze_clusters(df, clusters)
feature_imp, emb_imp = prioritizer.get_feature_importance()
print(f"n--- Characteristic Significance ---")
print(feature_imp.head(10))
print(f"nAverage embedding significance: {emb_imp:.4f}")
print("n" + "="*70)
print("TOP 10 PRIORITY VULNERABILITIES")
print("="*70)
top_vulns = df.nlargest(10, 'ml_priority_score')[['cve_id', 'cvss_score', 'ml_priority_score', 'severity', 'description']]
for idx, row in top_vulns.iterrows():
print(f"n{row['cve_id']} [Priority: {row['ml_priority_score']:.3f}]")
print(f" CVSS: {row['cvss_score']:.1f} | Severity: {row['severity']}")
print(f" {row['description'][:100]}...")
print("nnGenerating visualizations...")
visualize_results(df, priority_scores, feature_imp)
print("n" + "="*70)
print("ANALYSIS COMPLETE")
print("="*70)
print(f"nResults abstract:")
print(f" Excessive Precedence (>0.7): {(priority_scores > 0.7).sum()} vulnerabilities")
print(f" Medium Precedence (0.4-0.7): {((priority_scores >= 0.4) & (priority_scores <= 0.7)).sum()}")
print(f" Low Precedence (<0.4): {(priority_scores < 0.4).sum()}")
return df, prioritizer, analyzer
if __name__ == "__main__":
results_df, prioritizer, analyzer = primary()
print("n✓ All analyses accomplished efficiently!")
print("nYou can now:")
print(" - Entry outcomes by way of 'results_df' DataFrame")
print(" - Use 'prioritizer' to foretell new vulnerabilities")
print(" - Discover 'analyzer' for clustering insights")
How Machine Studying and Semantic Embeddings Reorder CVE Vulnerabilities Past Uncooked CVSS Scores
RELATED ARTICLES
