Source code for avatars.constants
import io
import re
from enum import StrEnum
from typing import (
IO,
TYPE_CHECKING,
Any,
BinaryIO,
Union,
)
import pandas as pd
from avatar_yaml.models.schema import ColumnType
if TYPE_CHECKING:
from IPython.display import HTML # noqa: F401
from avatars.models import JobKind
DEFAULT_TIMEOUT = 5
FileLike = Union[BinaryIO, IO[Any], io.IOBase]
FileLikes = list[FileLike]
VOLUME_NAME = "input"
JOB_EXECUTION_ORDER = [
JobKind.standard,
JobKind.signal_metrics,
JobKind.privacy_metrics,
JobKind.report,
]
ERROR_STATUSES = ["parent_error", "error"]
READY_STATUSES = ["finished", *ERROR_STATUSES]
# For network retries
DEFAULT_NETWORK_RETRY_COUNT = 20
DEFAULT_NETWORK_RETRY_INTERVAL = 5
# For rate limit retries
DEFAULT_RATE_LIMIT_MAX_RETRIES = 3
DEFAULT_RATE_LIMIT_MIN_WAIT_SECONDS = 1.0
# For creating and polling jobs
DEFAULT_DELAY_BETWEEN_CONSECUTIVE_JOBS = 0.5
DEFAULT_POLL_INTERVAL = 5
[docs]
class Results(StrEnum):
ADVICE = "advice"
SHUFFLED = "shuffled"
UNSHUFFLED = "unshuffled"
PRIVACY_METRICS = "privacy_metrics"
SIGNAL_METRICS = "signal_metrics"
REPORT_IMAGES = "report_images"
PROJECTIONS_ORIGINAL = "original_projections"
PROJECTIONS_AVATARS = "avatar_projections"
METADATA = "run_metadata"
REPORT = "report"
META_PRIVACY_METRIC = "meta_privacy_metric"
META_SIGNAL_METRIC = "meta_signal_metric"
FIGURES = "figures"
FIGURES_METADATA = "figures_metadata"
PRIVACY_METRICS_SUMMARY = "privacy_metrics_summary"
SIGNAL_METRICS_SUMMARY = "signal_metrics_summary"
[docs]
class PlotKind(StrEnum):
"""Available plot types for visualization."""
CORRELATION = "correlation"
"""A correlation heatmap of the original and avatar data."""
CORRELATION_DIFFERENCE = "correlation_difference"
"""A heatmap of the difference between the original and avatar data."""
CONTRIBUTION = "contribution"
"""A bar chart showing the contribution of each feature in the model."""
PROJECTION_2D = "2d_projection"
"""A 2D projection of the original and avatar data."""
PROJECTION_3D = "3d_projection"
"""A 3D projection of the original and avatar data."""
DISTRIBUTION = "distribution"
"""Distributions plot of the original and avatar data, there is a plot for each column."""
AGGREGATE_STATS = "aggregate_stats"
"""A table containing the mean and std of the original and avatar data
(of the 10 first columns)."""
RAW_SERIES = "raw_series"
"""A line plot of the original and avatar time series over time."""
NORMALIZED_SERIES = "normalized_series"
"""A line plot of the normalized original and avatar time series over time."""
CLASS_PROJECTION_2D = "class_projection_2d"
"""A 2D projection colored by the target class
(only available with class balancing augmentation).
"""
METRICS_SUMMARY = "metrics_summary"
"""A summary table of privacy metrics."""
RESULTS_TO_STORE = [
Results.SHUFFLED,
Results.UNSHUFFLED,
Results.PRIVACY_METRICS,
Results.SIGNAL_METRICS,
Results.PROJECTIONS_ORIGINAL,
Results.PROJECTIONS_AVATARS,
Results.METADATA,
Results.FIGURES,
Results.PRIVACY_METRICS_SUMMARY,
Results.SIGNAL_METRICS_SUMMARY,
]
type TypeResults = dict | pd.DataFrame | str | list[dict[str, Any]] | None | HTML
MATCHERS: dict[re.Pattern[str], ColumnType] = {
re.compile(r"float"): ColumnType.NUMERIC,
re.compile(r"int"): ColumnType.INT,
re.compile(r"bool"): ColumnType.BOOL,
re.compile(r"datetime"): ColumnType.DATETIME,
re.compile(r"datetime64\[ns, UTC\]"): ColumnType.DATETIME_TZ,
# FIXME: implement bool ColumnType
}
DEFAULT_TYPE = ColumnType.CATEGORY
mapping_result_to_file_name = {
Results.ADVICE: "advice.json",
Results.SHUFFLED: "shuffled",
Results.UNSHUFFLED: "unshuffled",
Results.PRIVACY_METRICS: "privacy.json",
Results.SIGNAL_METRICS: "signal.json",
Results.PROJECTIONS_ORIGINAL: "projections.original",
Results.PROJECTIONS_AVATARS: "projections.avatars",
Results.METADATA: "run_metadata.json",
Results.REPORT: "report.md",
Results.PRIVACY_METRICS_SUMMARY: "privacy_metrics_summary.json",
Results.SIGNAL_METRICS_SUMMARY: "signal_metrics_summary.json",
}