Source code for rheojax.io.readers._column_mapping

"""Consolidated canonical column field registry for all I/O readers.

Merges patterns from Anton Paar COLUMN_MAPPINGS, TRIOS TRIOS_COLUMN_MAPPINGS,
and _utils.py regex patterns into a single canonical source.
"""

from __future__ import annotations

import re as _re
from dataclasses import dataclass

from rheojax.io.readers._utils import extract_unit_from_header
from rheojax.logging import get_logger

logger = get_logger(__name__)

__all__ = ["CanonicalField", "CANONICAL_FIELDS", "match_column", "match_columns"]


[docs] @dataclass class CanonicalField: """Canonical field descriptor for a rheological measurement column.""" canonical_name: str patterns: list[str] si_unit: str applicable_modes: list[str] is_x_candidate: bool = False is_y_candidate: bool = False priority: int = 100
CANONICAL_FIELDS: dict[str, CanonicalField] = { "time": CanonicalField( canonical_name="time", patterns=[ r"^time$", r"^t$", r"^zeit$", r"^step\s*time$", ], si_unit="s", applicable_modes=["creep", "relaxation", "oscillation", "rotation"], is_x_candidate=True, priority=10, ), "angular_frequency": CanonicalField( canonical_name="angular_frequency", patterns=[ r"^angular[\s_]?frequency$", r"^frequency$", r"^omega$", r"^ω$", ], si_unit="rad/s", applicable_modes=["oscillation"], is_x_candidate=True, priority=5, ), "shear_rate": CanonicalField( canonical_name="shear_rate", patterns=[ r"^shear[\s_]?rate$", r"^γ̇$", r"^gamma[\s_]?dot$", ], si_unit="1/s", applicable_modes=["rotation"], is_x_candidate=True, priority=5, ), "storage_modulus": CanonicalField( canonical_name="storage_modulus", patterns=[ r"^storage[\s_]?modulus$", r"^g'$", r"^g_prime$", ], si_unit="Pa", applicable_modes=["oscillation"], is_y_candidate=True, priority=5, ), "loss_modulus": CanonicalField( canonical_name="loss_modulus", patterns=[ r"^loss[\s_]?modulus$", r"^g''$", r'^g"$', r"^g_double_prime$", ], si_unit="Pa", applicable_modes=["oscillation"], is_y_candidate=True, priority=5, ), "complex_modulus": CanonicalField( canonical_name="complex_modulus", patterns=[ r"^complex[\s_]?modulus$", r"^g\*$", r"^\|g\*\|$", ], si_unit="Pa", applicable_modes=["oscillation"], is_y_candidate=True, priority=10, ), "tensile_storage_modulus": CanonicalField( canonical_name="tensile_storage_modulus", patterns=[ r"^e'$", r"^e_prime$", r"^e_stor$", r"^tensile[\s_]?storage[\s_]?modulus$", r"^young'?s?[\s_]?storage[\s_]?modulus$", ], si_unit="Pa", applicable_modes=["oscillation"], is_y_candidate=True, priority=5, ), "tensile_loss_modulus": CanonicalField( canonical_name="tensile_loss_modulus", patterns=[ r"^e''$", r'^e"$', r"^e_double_prime$", r"^e_loss$", r"^tensile[\s_]?loss[\s_]?modulus$", r"^young'?s?[\s_]?loss[\s_]?modulus$", ], si_unit="Pa", applicable_modes=["oscillation"], is_y_candidate=True, priority=5, ), "compliance": CanonicalField( canonical_name="compliance", patterns=[ r"^compliance$", r"^j\(?t\)?$", ], si_unit="1/Pa", applicable_modes=["creep"], is_y_candidate=True, priority=5, ), "relaxation_modulus": CanonicalField( canonical_name="relaxation_modulus", patterns=[ r"^relaxation[\s_]?modulus$", r"^g\(?t\)?$", ], si_unit="Pa", applicable_modes=["relaxation"], is_y_candidate=True, priority=5, ), "viscosity": CanonicalField( canonical_name="viscosity", patterns=[ r"^viscosity$", r"^η$", r"^eta$", ], si_unit="Pa.s", applicable_modes=["rotation"], is_y_candidate=True, priority=5, ), "complex_viscosity": CanonicalField( canonical_name="complex_viscosity", patterns=[ r"^complex[\s_]?viscosity$", r"^η\*$", r"^eta\*$", ], si_unit="Pa.s", applicable_modes=["oscillation"], is_y_candidate=True, priority=10, ), "shear_stress": CanonicalField( canonical_name="shear_stress", patterns=[ r"^shear[\s_]?stress$", r"^stress$", r"^τ$", r"^tau$", ], si_unit="Pa", applicable_modes=["creep", "relaxation", "rotation"], is_y_candidate=True, priority=20, ), "shear_strain": CanonicalField( canonical_name="shear_strain", patterns=[ r"^shear[\s_]?strain$", r"^strain$", r"^γ$", r"^gamma$", ], si_unit="dimensionless", applicable_modes=["creep", "relaxation"], is_y_candidate=True, priority=20, ), "phase_angle": CanonicalField( canonical_name="phase_angle", patterns=[ r"^phase[\s_]?angle$", r"^δ$", r"^delta$", ], si_unit="deg", applicable_modes=["oscillation"], priority=100, ), "temperature": CanonicalField( canonical_name="temperature", patterns=[ r"^temperature$", r"^temp$", ], si_unit="°C", applicable_modes=["creep", "relaxation", "oscillation", "rotation"], priority=100, ), "normal_force": CanonicalField( canonical_name="normal_force", patterns=[ r"^normal[\s_]?force$", ], si_unit="N", applicable_modes=["creep", "relaxation", "oscillation", "rotation"], priority=100, ), "torque": CanonicalField( canonical_name="torque", patterns=[ r"^torque$", ], si_unit="N.m", applicable_modes=["rotation"], priority=100, ), "strain_amplitude": CanonicalField( canonical_name="strain_amplitude", patterns=[ r"^strain[\s_]?amplitude$", ], si_unit="dimensionless", applicable_modes=["oscillation"], is_y_candidate=True, priority=100, ), "stress_amplitude": CanonicalField( canonical_name="stress_amplitude", patterns=[ r"^stress[\s_]?amplitude$", ], si_unit="Pa", applicable_modes=["oscillation"], is_y_candidate=True, priority=100, ), } # Pre-compiled patterns sorted by priority (lower number = higher priority) _compiled_patterns: dict[str, list[_re.Pattern]] = { name: [_re.compile(p, _re.IGNORECASE) for p in cf.patterns] for name, cf in CANONICAL_FIELDS.items() } # Sorted canonical field names by priority for match_column iteration _priority_order: list[str] = sorted( CANONICAL_FIELDS.keys(), key=lambda n: CANONICAL_FIELDS[n].priority )
[docs] def match_column(header: str, instrument: str | None = None) -> CanonicalField | None: """Match a column header string to a CanonicalField. Uses :func:`~rheojax.io.readers._utils.extract_unit_from_header` to strip parenthesized unit suffixes (e.g. ``"omega (rad/s)"`` → ``"omega"``), ensuring consistent unit extraction across the I/O subsystem. Parameters ---------- header: Raw column header string (may include a parenthesized unit suffix). instrument: Optional instrument name for future instrument-specific filtering. Currently unused; reserved for Phase 2 extension. Returns ------- CanonicalField or None The first matching canonical field (ordered by priority), or None if no field matches. """ # Reuse the canonical unit extraction from _utils to strip "(unit)" suffixes. # This avoids a duplicate regex and ensures slash-containing names like # "1/s" are not incorrectly truncated. name_part, _ = extract_unit_from_header(header) for field_name in _priority_order: for pattern in _compiled_patterns[field_name]: if pattern.match(name_part): logger.debug("Column %r matched canonical field %r", header, field_name) return CANONICAL_FIELDS[field_name] logger.debug("Column %r had no canonical match", header) return None
[docs] def match_columns( headers: list[str], instrument: str | None = None ) -> dict[str, CanonicalField]: """Match a list of column headers to canonical fields. Parameters ---------- headers: List of raw column header strings. instrument: Optional instrument name passed through to :func:`match_column`. Returns ------- dict mapping header -> CanonicalField for every header that matched. """ result: dict[str, CanonicalField] = {} for header in headers: cf = match_column(header, instrument=instrument) if cf is not None: result[header] = cf return result