Source code for avatar_yaml.models.avatar_metadata

from dataclasses import dataclass, field
from enum import Enum

from avatar_yaml.models.common import Metadata, ModelKind


# --- Enums for PIA metadata (align with avatar) ---
[docs] class SensitivityLevel(str, Enum): """Evaluation of the sensitivity level of the personal data being processed. This assessment is based on factors such as the nature of the data and potential risks to data subjects. It applies to three categories of data: - **Sensitive personal data** (GDPR Art. 9): Special categories including health, racial/ethnic origin, political opinions, religious beliefs, trade union membership, genetic data, biometric data, sex life, or sexual orientation. These typically require VERY_HIGH or HIGH sensitivity levels. - **Personal data** (GDPR Art. 4): Any information relating to an identified or identifiable natural person (e.g., name, identification number, location data, online identifiers). Sensitivity level varies based on context and combination with other data. - **Demographic data**: Non-sensitive characteristics such as age, gender, geographic location, education level. These are typically LOW to MEDIUM sensitivity, but can increase when combined with other identifying information. The sensitivity level should reflect potential harm to data subjects if the data were compromised or re-identified. """ VERY_HIGH = "Very High" HIGH = "High" MEDIUM = "Medium" LOW = "Low" VERY_LOW = "Very Low" NEGLIGIBLE = "Negligible" UNDEFINED = "Undefined"
[docs] class DataRecipient(str, Enum): """Categories of recipients for the anonymised data, based on their relationship to the Data Controller and the context of data sharing.""" UNKNOWN = "unknown" """The recipients of the anonymized data have not been specifically identified at this stage. The data recipient category will need to be determined to properly assess the privacy risks associated with data sharing and ensure appropriate safeguards are in place.""" OPENDATA = "opendata" """The recipients of the anonymised data are the general public, through publication in an open data repository or public research platform. Such dissemination aims to promote scientific collaboration, innovation, or public transparency. To guarantee full compliance with data protection requirements, the datasets released as open data have undergone an anonymisation process. """ CONTRACTUAL_THIRDPARTY = "contractual_thirdparty" """The recipients of the anonymised data are third parties with whom the Data Controller maintains a contractual relationship, such as research partners, insurers, data analytics firms, or other commercial entities. These transfers occur within a controlled legal framework ensuring compliance with the principles of confidentiality, data minimisation, and purpose limitation""" INTERNAL = "internal" """The recipients of the anonymised data are exclusively internal stakeholders of the Data Controller, such as authorised employees, researchers, or analysts operating within the same organisation. The synthetic datasets are used for internal analytical, research, or operational purposes, in strict compliance with the principles of data protection by design and by default""" OUTSIDE_EU = "outside_eu" """The recipients of the anonymised data are entities established outside the European Union, including international research institutions or commercial partners.""" TRUSTED_THIRDPARTY = "trusted_thirdparty" """The recipients of the anonymised data are trusted third parties operating under a contractual or institutional framework that ensures compliance with data protection and ethical standards. These may include subcontractors providing technical services, scientific publishers, or data repositories managing peer-reviewed research outputs. The sharing of anonymised datasets with such entities is governed by confidentiality agreements and data processing clauses that explicitly prohibit any attempt at re-identification. """
[docs] class DataSubject(str, Enum): """Categories of individuals whose personal data are being processed, based on the context and purpose of the data processing activity.""" UNKNOWN = "unknown" PATIENTS = "patients" """The data subjects are patients whose personal data are processed in the context of medical research, healthcare provision, or clinical trials. Such data may include information directly or indirectly identifying individuals, together with health-related or demographic variables.""" EMPLOYEES = "employees" """The data subjects are employees, job applicants, or contractors whose personal data are processed for human resources management, organisational analysis, or workforce studies. Such data may encompass professional identifiers, career trajectories, remuneration details, performance indicators, and training records.""" CLIENTS = "clients" """The data subjects are clients, customers, or insured persons whose personal data are processed for the purposes of service provision, product analysis, or contractual performance. These data may include identifying information, transaction or claim histories, contact details, and, in some contexts, financial or health-related information.""" USERS = "users" """The data subjects are users of digital, public, or mobility services whose personal data are processed for analytical, operational, or optimisation purposes. The data may include identifiers, behavioural indicators, service usage patterns, or geolocation data.""" STUDENTS = "students" """The data subjects are students enrolled in educational institutions whose personal data are processed for pedagogical, administrative, or research purposes. The datasets may include demographic information, academic performance, attendance records, or socio-economic indicators. """
[docs] class DataType(str, Enum): """Categories of personal data being processed, based on the context and sector of the data processing activity.""" UNKNOWN = "unknown" """The processing involves personal data of an unspecified type. The exact nature of the data has not been determined or categorized at this stage. """ HEALTH = "health" """The data processed originate from health-related datasets containing information on patients or study participants. These datasets typically include demographic, clinical, and behavioural variables, such as age, gender, diagnosis codes, treatment details, medical outcomes, and follow-up data.""" HR = "hr" """The personal data processed concern employees, job applicants, contractors, or trainees. The datasets generally include professional information such as identification data, employment history, remuneration details, performance evaluations, and training records. Certain datasets may also include information relating to health or diversity monitoring.""" MOBILITY = "mobility" """The personal data processed typically relate to users of transport systems, vehicle operators, or mobility service subscribers. These datasets may include identifiers, geolocation traces, timestamps, usage frequency, travel routes, and behavioural metrics. Depending on the context, they may also contain information derived from connected vehicles or smart ticketing systems.""" INSURANCE = "insurance" """The personal data processed typically relate to policyholders, beneficiaries, or claimants. The datasets may include demographic characteristics, contract details, claim histories, financial indicators, and, in some cases, health-related information.""" FINANCE = "finance" """The personal data processed concern clients, investors, account holders, or financial service users. Typical datasets may include identification data, transaction histories, account balances, income levels, credit ratings, and investment portfolios.In certain contexts, they may also contain data classified as sensitive, such as information revealing financial hardship or vulnerability.""" EDUCATION = "education" """The personal data processed relate to students, teachers, or administrative staff within educational institutions. The datasets may include demographic information, academic performance records, attendance logs, course enrolments, and, where relevant, special educational needs or socio-economic indicators."""
@dataclass(frozen=True) class PiaMetadata: datarecipient: str = DataRecipient.UNKNOWN.value data_type: str = DataType.UNKNOWN.value datasubject: str = DataSubject.UNKNOWN.value sensitivity_level: str = SensitivityLevel.UNDEFINED.value @dataclass(frozen=True) class AvatarMetadataSpec: display_name: str | None pia_metadata: PiaMetadata | None = None @dataclass(frozen=True) class AvatarMetadata: kind: ModelKind metadata: Metadata spec: AvatarMetadataSpec | None = None annotations: dict[str, str] = field(default_factory=dict) def get_metadata( display_name: str | None = None, annotations: dict[str, str] = {}, pia_metadata: PiaMetadata | None = None, ) -> AvatarMetadata: return AvatarMetadata( kind=ModelKind.METADATA, metadata=Metadata(name=f"avatar-metadata-{display_name}"), spec=AvatarMetadataSpec(display_name=display_name, pia_metadata=pia_metadata), annotations=annotations, )