Saturday, January 24, 2026
HomeArtificial IntelligenceHow Machine Studying and Semantic Embeddings Reorder CVE Vulnerabilities Past Uncooked CVSS...

How Machine Studying and Semantic Embeddings Reorder CVE Vulnerabilities Past Uncooked CVSS Scores

def visualize_results(df, priority_scores, feature_importance):
   fig, axes = plt.subplots(2, 3, figsize=(18, 10))
   fig.suptitle('Vulnerability Scanner - ML Evaluation Dashboard', fontsize=16, fontweight="daring")
   axes[0, 0].hist(priority_scores, bins=30, coloration="crimson", alpha=0.7, edgecolor="black")
   axes[0, 0].set_xlabel('Precedence Rating')
   axes[0, 0].set_ylabel('Frequency')
   axes[0, 0].set_title('Precedence Rating Distribution')
   axes[0, 0].axvline(np.percentile(priority_scores, 75), coloration="orange", linestyle="--", label="seventy fifth percentile")
   axes[0, 0].legend()
   axes[0, 1].scatter(df['cvss_score'], priority_scores, alpha=0.6, c=priority_scores, cmap='RdYlGn_r', s=50)
   axes[0, 1].set_xlabel('CVSS Rating')
   axes[0, 1].set_ylabel('ML Precedence Rating')
   axes[0, 1].set_title('CVSS vs ML Precedence')
   axes[0, 1].plot([0, 10], [0, 1], 'k--', alpha=0.3)
   severity_counts = df['severity'].value_counts()
   colours = {'CRITICAL': 'darkred', 'HIGH': 'pink', 'MEDIUM': 'orange', 'LOW': 'yellow'}
   axes[0, 2].bar(severity_counts.index, severity_counts.values, coloration=[colors.get(s, 'gray') for s in severity_counts.index])
   axes[0, 2].set_xlabel('Severity')
   axes[0, 2].set_ylabel('Depend')
   axes[0, 2].set_title('Severity Distribution')
   axes[0, 2].tick_params(axis="x", rotation=45)
   top_features = feature_importance.head(10)
   axes[1, 0].barh(top_features['feature'], top_features['importance'], coloration="steelblue")
   axes[1, 0].set_xlabel('Significance')
   axes[1, 0].set_title('High 10 Characteristic Significance')
   axes[1, 0].invert_yaxis()
   if 'cluster' in df.columns:
       cluster_counts = df['cluster'].value_counts().sort_index()
       axes[1, 1].bar(cluster_counts.index, cluster_counts.values, coloration="teal", alpha=0.7)
       axes[1, 1].set_xlabel('Cluster')
       axes[1, 1].set_ylabel('Depend')
       axes[1, 1].set_title('Vulnerability Clusters')
   attack_vector_counts = df['attack_vector'].value_counts()
   axes[1, 2].pie(attack_vector_counts.values, labels=attack_vector_counts.index, autopct="%1.1f%%", startangle=90)
   axes[1, 2].set_title('Assault Vector Distribution')
   plt.tight_layout()
   plt.present()


def primary():
   print("="*70)
   print("AI-ASSISTED VULNERABILITY SCANNER WITH ML PRIORITIZATION")
   print("="*70)
   print()
   fetcher = CVEDataFetcher()
   df = fetcher.fetch_recent_cves(days=30, max_results=50)
   print(f"Dataset Overview:")
   print(f"  Complete CVEs: {len(df)}")
   print(f"  Date Vary: {df['published'].min()[:10]} to {df['published'].max()[:10]}")
   print(f"  Severity Breakdown: {df['severity'].value_counts().to_dict()}")
   print()
   feature_extractor = VulnerabilityFeatureExtractor()
   embeddings = feature_extractor.extract_semantic_features(df['description'].tolist())
   df = feature_extractor.extract_keyword_features(df)
   df = feature_extractor.encode_categorical_features(df)
   prioritizer = VulnerabilityPrioritizer()
   X = prioritizer.prepare_features(df, embeddings)
   severity_map = {'LOW': 0, 'MEDIUM': 1, 'HIGH': 2, 'CRITICAL': 3, 'UNKNOWN': 1}
   y_severity = df['severity'].map(severity_map).values
   y_score = df['cvss_score'].values
   X_scaled = prioritizer.train_models(X, y_severity, y_score)
   priority_scores, severity_probs, score_preds = prioritizer.predict_priority(X)
   df['ml_priority_score'] = priority_scores
   df['predicted_score'] = score_preds
   analyzer = VulnerabilityAnalyzer(n_clusters=5)
   clusters = analyzer.cluster_vulnerabilities(embeddings)
   df = analyzer.analyze_clusters(df, clusters)
   feature_imp, emb_imp = prioritizer.get_feature_importance()
   print(f"n--- Characteristic Significance ---")
   print(feature_imp.head(10))
   print(f"nAverage embedding significance: {emb_imp:.4f}")
   print("n" + "="*70)
   print("TOP 10 PRIORITY VULNERABILITIES")
   print("="*70)
   top_vulns = df.nlargest(10, 'ml_priority_score')[['cve_id', 'cvss_score', 'ml_priority_score', 'severity', 'description']]
   for idx, row in top_vulns.iterrows():
       print(f"n{row['cve_id']} [Priority: {row['ml_priority_score']:.3f}]")
       print(f"  CVSS: {row['cvss_score']:.1f} | Severity: {row['severity']}")
       print(f"  {row['description'][:100]}...")
   print("nnGenerating visualizations...")
   visualize_results(df, priority_scores, feature_imp)
   print("n" + "="*70)
   print("ANALYSIS COMPLETE")
   print("="*70)
   print(f"nResults abstract:")
   print(f"  Excessive Precedence (>0.7): {(priority_scores > 0.7).sum()} vulnerabilities")
   print(f"  Medium Precedence (0.4-0.7): {((priority_scores >= 0.4) & (priority_scores <= 0.7)).sum()}")
   print(f"  Low Precedence (<0.4): {(priority_scores < 0.4).sum()}")
   return df, prioritizer, analyzer


if __name__ == "__main__":
   results_df, prioritizer, analyzer = primary()
   print("n✓ All analyses accomplished efficiently!")
   print("nYou can now:")
   print("  - Entry outcomes by way of 'results_df' DataFrame")
   print("  - Use 'prioritizer' to foretell new vulnerabilities")
   print("  - Discover 'analyzer' for clustering insights")

RELATED ARTICLES

LEAVE A REPLY

Please enter your comment!
Please enter your name here

Most Popular

Recent Comments