Skip to content

Correlation Analysis

MinexPy provides a dedicated correlation module for geoscience datasets with missing values, skewed distributions, and occasional outliers.

Available Correlation Methods

  • pearson_correlation: Linear association.
  • spearman_correlation: Rank-based monotonic association.
  • kendall_correlation: Rank concordance (robust in smaller samples).
  • distance_correlation: Nonlinear dependence measure.
  • biweight_midcorrelation: Outlier-resistant robust correlation.
  • partial_correlation: Correlation between two variables while controlling for confounders.

Pairwise Correlation Example

import numpy as np
from minexpy.correlation import (
    pearson_correlation,
    spearman_correlation,
    kendall_correlation,
)

zn = np.array([45.2, 52.3, 38.7, 61.2, 49.8, 55.1, 42.3, 58.9])
cu = np.array([12.5, 15.3, 11.2, 18.4, 14.1, 16.0, 12.8, 17.2])

print("Pearson:", pearson_correlation(zn, cu))
print("Spearman:", spearman_correlation(zn, cu))
print("Kendall:", kendall_correlation(zn, cu))

Robust and Nonlinear Example

import numpy as np
from minexpy.correlation import distance_correlation, biweight_midcorrelation

x = np.linspace(-3, 3, 100)
y = x ** 2 + 0.1 * np.random.randn(100)  # nonlinear relation

print("Distance correlation:", distance_correlation(x, y))
print("Biweight midcorrelation:", biweight_midcorrelation(x, y))

Partial Correlation Example

import numpy as np
from minexpy.correlation import partial_correlation

rng = np.random.default_rng(42)
depth = np.linspace(10, 200, 80)
zn = 0.12 * depth + rng.normal(0, 3, size=80)
cu = 0.10 * depth + 0.5 * zn + rng.normal(0, 3, size=80)

# Control for depth while checking Zn-Cu relation
result = partial_correlation(zn, cu, controls=depth)
print(result)

Correlation Matrix Example

import pandas as pd
from minexpy.correlation import correlation_matrix

df = pd.DataFrame(
    {
        "Zn": [45.2, 52.3, 38.7, 61.2, 49.8, 55.1],
        "Cu": [12.5, 15.3, 11.2, 18.4, 14.1, 16.0],
        "Pb": [8.9, 9.7, 8.1, 10.5, 9.1, 9.9],
    }
)

print(correlation_matrix(df, method="pearson"))
print(correlation_matrix(df, method="spearman"))
print(correlation_matrix(df, method="distance"))

Complete Geoscience Correlation Workflow

Compare linear, rank-based, robust, and nonlinear correlations in one pass:

import numpy as np
import pandas as pd
from minexpy.correlation import (
    pearson_correlation,
    spearman_correlation,
    kendall_correlation,
    distance_correlation,
    biweight_midcorrelation,
    partial_correlation,
    correlation_matrix,
)

rng = np.random.default_rng(10)
depth = np.linspace(20, 300, 80)
zn = 0.15 * depth + rng.normal(0, 4, size=80)
cu = 0.10 * depth + 0.45 * zn + rng.normal(0, 3, size=80)
pb = 0.05 * depth + rng.normal(0, 2, size=80)

print("Pearson:", pearson_correlation(zn, cu))
print("Spearman:", spearman_correlation(zn, cu))
print("Kendall:", kendall_correlation(zn, cu))
print("Distance:", distance_correlation(zn, cu))
print("Biweight:", biweight_midcorrelation(zn, cu))
print("Partial (control depth):", partial_correlation(zn, cu, controls=depth))

df = pd.DataFrame({"Zn": zn, "Cu": cu, "Pb": pb})
print("\nPearson matrix:")
print(correlation_matrix(df, method="pearson"))
print("\nDistance-correlation matrix:")
print(correlation_matrix(df, method="distance"))