¿Qué es Technical Concepts?

Q: ¿Qué es Technical Concepts?

Probability and Statistics are fundamental mathematical tools in cybersecurity for risk analysis, threat assessment, and data-driven decision making.

Technical Concepts - Comfidentia

Probability and Statistics are fundamental mathematical tools in cybersecurity for risk analysis, threat assessment, and data-driven decision making.

What are Probability and Statistics?

Probability studies uncertainty and random events, while statistics analyzes data to extract conclusions and patterns, both being essential for risk management in cybersecurity.

Fundamental Concepts

Probability

Definition: Measure of the possibility that an event occurs
Range: 0 ≤ P(A) ≤ 1
Events: Mutually exclusive, independent
Application: Risk analysis

Descriptive Statistics

Central Tendency Measures: Mean, median, mode
Dispersion Measures: Variance, standard deviation
Distributions: Normal, binomial, Poisson
Application: Security data analysis

Inferential Statistics

Hypotheses: Hypothesis testing
Confidence Intervals: Parameter estimation
Regression: Relationship analysis
Application: Threat prediction

Probability Distributions

Normal Distribution

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def normal_distribution_example():
    """Normal distribution example"""
    # Parameters
    mu = 0  # Mean
    sigma = 1  # Standard deviation
    
    # Generate data
    x = np.linspace(-4, 4, 100)
    y = stats.norm.pdf(x, mu, sigma)
    
    # Calculate probabilities
    prob_less_than_1 = stats.norm.cdf(1, mu, sigma)
    prob_between = stats.norm.cdf(1, mu, sigma) - stats.norm.cdf(-1, mu, sigma)
    
    print(f"P(X < 1) = {prob_less_than_1:.4f}")
    print(f"P(-1 < X < 1) = {prob_between:.4f}")
    
    return x, y

# Usage example
x, y = normal_distribution_example()

Binomial Distribution

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
def binomial_distribution_example():
    """Binomial distribution example"""
    # Parameters
    n = 100  # Number of trials
    p = 0.1  # Success probability
    
    # Calculate probabilities
    prob_exactly_10 = stats.binom.pmf(10, n, p)
    prob_at_most_10 = stats.binom.cdf(10, n, p)
    prob_at_least_10 = 1 - stats.binom.cdf(9, n, p)
    
    print(f"P(X = 10) = {prob_exactly_10:.4f}")
    print(f"P(X ≤ 10) = {prob_at_most_10:.4f}")
    print(f"P(X ≥ 10) = {prob_at_least_10:.4f}")
    
    return n, p

# Usage example
n, p = binomial_distribution_example()

Poisson Distribution

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
def poisson_distribution_example():
    """Poisson distribution example"""
    # Parameters
    lambda_param = 5  # Occurrence rate
    
    # Calculate probabilities
    prob_exactly_5 = stats.poisson.pmf(5, lambda_param)
    prob_at_most_5 = stats.poisson.cdf(5, lambda_param)
    prob_more_than_5 = 1 - stats.poisson.cdf(5, lambda_param)
    
    print(f"P(X = 5) = {prob_exactly_5:.4f}")
    print(f"P(X ≤ 5) = {prob_at_most_5:.4f}")
    print(f"P(X > 5) = {prob_more_than_5:.4f}")
    
    return lambda_param

# Usage example
lambda_param = poisson_distribution_example()

Risk Analysis

Risk Assessment

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
def risk_assessment(probability, impact):
    """Risk assessment"""
    risk_score = probability * impact
    
    if risk_score >= 0.8:
        risk_level = "High"
    elif risk_score >= 0.5:
        risk_level = "Medium"
    else:
        risk_level = "Low"
    
    return risk_score, risk_level

# Usage example
prob = 0.7  # Occurrence probability
impact = 0.9  # Impact (0-1)
risk_score, risk_level = risk_assessment(prob, impact)
print(f"Risk: {risk_level} (Score: {risk_score:.2f})")

Vulnerability Analysis

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def vulnerability_analysis(vulnerabilities):
    """Statistical vulnerability analysis"""
    # Descriptive statistics
    mean_cvss = np.mean([v['cvss'] for v in vulnerabilities])
    std_cvss = np.std([v['cvss'] for v in vulnerabilities])
    median_cvss = np.median([v['cvss'] for v in vulnerabilities])
    
    # Classification by severity
    critical = len([v for v in vulnerabilities if v['cvss'] >= 9.0])
    high = len([v for v in vulnerabilities if 7.0 <= v['cvss'] < 9.0])
    medium = len([v for v in vulnerabilities if 4.0 <= v['cvss'] < 7.0])
    low = len([v for v in vulnerabilities if v['cvss'] < 4.0])
    
    return {
        'mean_cvss': mean_cvss,
        'std_cvss': std_cvss,
        'median_cvss': median_cvss,
        'critical': critical,
        'high': high,
        'medium': medium,
        'low': low
    }

# Usage example
vulnerabilities = [
    {'name': 'CVE-2023-001', 'cvss': 9.8},
    {'name': 'CVE-2023-002', 'cvss': 7.5},
    {'name': 'CVE-2023-003', 'cvss': 5.2},
    {'name': 'CVE-2023-004', 'cvss': 3.1}
]
analysis = vulnerability_analysis(vulnerabilities)
print(f"Analysis: {analysis}")

Security Data Analysis

Anomaly Detection

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
def anomaly_detection(data, threshold=2):
    """Anomaly detection using Z-score"""
    mean = np.mean(data)
    std = np.std(data)
    
    z_scores = np.abs((data - mean) / std)
    anomalies = z_scores > threshold
    
    return anomalies, z_scores

# Usage example
data = np.random.normal(100, 15, 1000)  # Normal data
data[50] = 200  # Anomaly
anomalies, z_scores = anomaly_detection(data)
print(f"Anomalies detected: {np.sum(anomalies)}")

Trend Analysis

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def trend_analysis(time_series):
    """Trend analysis in time series"""
    from scipy import stats
    
    # Linear regression
    x = np.arange(len(time_series))
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, time_series)
    
    # Prediction
    future_x = np.arange(len(time_series), len(time_series) + 10)
    future_y = slope * future_x + intercept
    
    return {
        'slope': slope,
        'r_squared': r_value**2,
        'p_value': p_value,
        'future_predictions': future_y
    }

# Usage example
time_series = np.random.normal(100, 10, 100) + np.linspace(0, 20, 100)
trend = trend_analysis(time_series)
print(f"Trend: {trend['slope']:.4f}")
print(f"R²: {trend['r_squared']:.4f}")

Hypothesis Testing

Student’s t-test

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def t_test(sample1, sample2, alpha=0.05):
    """Student's t-test for two samples"""
    from scipy import stats
    
    # t-test
    t_stat, p_value = stats.ttest_ind(sample1, sample2)
    
    # Decision
    if p_value < alpha:
        decision = "Reject H0"
    else:
        decision = "Do not reject H0"
    
    return {
        't_statistic': t_stat,
        'p_value': p_value,
        'decision': decision
    }

# Usage example
sample1 = np.random.normal(100, 15, 50)
sample2 = np.random.normal(105, 15, 50)
result = t_test(sample1, sample2)
print(f"Result: {result}")

Chi-square Test

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
def chi_square_test(observed, expected):
    """Chi-square test"""
    from scipy import stats
    
    chi2_stat, p_value = stats.chisquare(observed, expected)
    
    return {
        'chi2_statistic': chi2_stat,
        'p_value': p_value
    }

# Usage example
observed = [10, 15, 20, 25]
expected = [12, 18, 18, 22]
result = chi_square_test(observed, expected)
print(f"Chi-square: {result}")

Machine Learning for Security

Threat Classification

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

def threat_classification(features, labels):
    """Threat classification using Random Forest"""
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=0.2, random_state=42
    )
    
    # Train model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    
    # Evaluate
    report = classification_report(y_test, y_pred)
    
    return model, report

# Usage example
# features = np.random.randn(1000, 10)  # Features
# labels = np.random.randint(0, 3, 1000)  # Labels (0: benign, 1: malware, 2: phishing)
# model, report = threat_classification(features, labels)
# print(report)

Intrusion Detection

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def intrusion_detection(data, threshold=0.5):
    """Intrusion detection using statistical analysis"""
    # Calculate metrics
    mean = np.mean(data)
    std = np.std(data)
    
    # Detect anomalies
    z_scores = np.abs((data - mean) / std)
    intrusions = z_scores > threshold
    
    # Calculate performance metrics
    true_positives = np.sum(intrusions)
    false_positives = np.sum(intrusions) - np.sum(intrusions)  # Simplified
    
    return {
        'intrusions_detected': intrusions,
        'true_positives': true_positives,
        'false_positives': false_positives
    }

# Usage example
data = np.random.normal(100, 15, 1000)
data[50:60] = np.random.normal(200, 20, 10)  # Intrusions
result = intrusion_detection(data)
print(f"Intrusions detected: {np.sum(result['intrusions_detected'])}")

Performance Analysis

Performance Metrics

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
def performance_metrics(y_true, y_pred):
    """Calculate performance metrics"""
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }

# Usage example
y_true = [0, 1, 1, 0, 1, 0, 1, 1, 0, 1]
y_pred = [0, 1, 0, 0, 1, 0, 1, 0, 0, 1]
metrics = performance_metrics(y_true, y_pred)
print(f"Metrics: {metrics}")

Number Theory - Complementary mathematical foundations
Abstract Algebra - Complementary mathematical structures
Cryptanalysis - Analysis that uses probability and statistics
Machine Learning - Techniques that use probability and statistics
CISO - Role that oversees probability and statistics
General Cybersecurity - Discipline that includes probability and statistics
Security Breaches - Incidents analyzed with probability and statistics
Attack Vectors - Attacks analyzed with probability and statistics
Incident Response - Process that includes probability and statistics
SIEM - System that uses probability and statistics
SOAR - Automation that uses probability and statistics
EDR - Tool that uses probability and statistics
Firewall - Device that uses probability and statistics
VPN - Connection that uses probability and statistics
Dashboards - Visualization of probability and statistics metrics
Logs - Logs analyzed with probability and statistics