Source code for msprime.demography

#
# Copyright (C) 2015-2021 University of Oxford
#
# This file is part of msprime.
#
# msprime is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# msprime is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with msprime.  If not, see <http://www.gnu.org/licenses/>.
#
"""
Module responsible for defining and debugging demographic models.
"""
from __future__ import annotations

import collections
import copy
import dataclasses
import enum
import inspect
import itertools
import json
import logging
import math
import numbers
import operator
import re
import sys
import textwrap
import warnings
from typing import Any
from typing import ClassVar
from typing import MutableMapping

import demes
import numpy as np
import tskit

from . import ancestry
from . import core
from . import species_trees


logger = logging.getLogger(__name__)


class IncompletePopulationMetadataWarning(UserWarning):
    """
    Warning raised when we don't have sufficient information to fill
    out population metadata.
    """


class LruCache(collections.OrderedDict):
    # LRU example from the OrderedDict documentation
    def __init__(self, maxsize=128, *args, **kwds):
        self.maxsize = maxsize
        super().__init__(*args, **kwds)

    def __getitem__(self, key):
        value = super().__getitem__(key)
        self.move_to_end(key)
        return value

    def __setitem__(self, key, value):
        super().__setitem__(key, value)
        if len(self) > self.maxsize:
            oldest = next(iter(self))
            del self[oldest]


_population_table_cache = LruCache(16)


def _build_population_table(populations):
    """
    Return a tskit PopulationTable instance encoding the metadata for the
    specified populations. Because encoding metadata is quite expensive
    we maintain an LRU cache.
    """
    population_metadata = []
    for population in populations:
        metadata = {
            "name": population.name,
            "description": population.description,
        }
        if population.extra_metadata is not None:
            intersection = set(population.extra_metadata.keys()) & set(metadata.keys())
            if len(intersection) > 0:
                printed_list = list(sorted(intersection))
                raise ValueError(
                    f"Cannot set standard metadata key(s) {printed_list} "
                    "using extra_metadata. Please set using the corresponding "
                    "property of the Population class."
                )
            metadata.update(population.extra_metadata)
        population_metadata.append(metadata)

    # The only thing we store in the Population table is the metadata, so
    # we cache based on this.
    key = json.dumps(population_metadata, sort_keys=True)
    if key not in _population_table_cache:
        table = tskit.PopulationTable()
        table.metadata_schema = tskit.MetadataSchema(
            {
                "codec": "json",
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "description": {"type": ["string", "null"]},
                },
                # The name and description fields are always filled out by
                # msprime, so we tell downstream tools this by making them
                # "required" by the schema.
                "required": ["name", "description"],
                "additionalProperties": True,
            }
        )
        for metadata in population_metadata:
            table.add_row(metadata=metadata)
        _population_table_cache[key] = table

    return _population_table_cache[key]


def check_num_populations(num_populations):
    """
    Check if an input number of populations is valid.
    """
    if num_populations < 1:
        raise ValueError("Must have at least one population")


def check_migration_rate(migration_rate):
    """
    Check if an input migration rate makes sense.
    """
    if migration_rate < 0:
        raise ValueError("Migration rates must be non-negative")



[docs]
@dataclasses.dataclass
class Population:
    """
    A single population in a :class:`.Demography`. See the
    :ref:`sec_demography_populations` section for more information on
    what populations represent, and how they can be used.

    .. warning:: This class should not be instantiated directly. Please use
        :meth:`.Demography.add_population` method instead.
    """

    initial_size: float = 0.0
    """
    The absolute size of the population at time zero.
    See the :ref:`sec_demography_populations_initial_size` section
    for more details and examples.
    """

    growth_rate: float = 0.0
    """
    The exponential growth rate of the population per generation (forwards in time).
    Growth rates can be negative. This is zero for a constant population size,
    and positive for a population that has been growing.
    See the :ref:`sec_demography_populations_growth_rate` section for more
    details and examples.
    """

    name: str | None = None
    """
    The name of the population. If specified this must be a uniquely
    identifying string and must be a valid Python identifier (i.e., could be
    used as a variable name in Python code).
    See :ref:`sec_demography_populations_identifiers` for more details
    and recommendations on best practise.
    """

    description: str = ""
    """
    A concise description of the population. Defaults to the empty string if not
    specified.
    """

    extra_metadata: dict = dataclasses.field(default_factory=dict)
    """
    A JSON-encodable dictionary of metadata items to be stored in the
    associated tskit population object. This dictionary must not contain keys
    for any of the pre-defined metadata items.
    See the :ref:`sec_demography_populations_metadata` section for more
    details and examples.
    """

    default_sampling_time: float | None = None
    """
    The default time at which samples are drawn from this population. See the
    :ref:`sec_demography_populations_default_sampling_time` section for more
    details.
    """

    initially_active: bool | None = None
    """
    If True, this population will always be initially active, regardless
    of whether it participates in a :ref:`sec_demography_events_population_split`.
    If not set, or None, the initial state of the population will be
    set automatically depending on the events declared in the demography.
    See the :ref:`sec_demography_populations_life_cycle` section for
    more details.
    """

    id: int | None = dataclasses.field(default=None)  # noqa: A003
    """
    The integer ID of this population within the parent :class:`.Demography`.
    This attribute is assigned by the Demography class and should not be set
    or changed by user code.
    """

    def asdict(self):
        return dataclasses.asdict(self)

    def validate(self):
        if self.initial_size < 0:
            raise ValueError("Negative population size")
        if self.name is None:
            raise ValueError("A population name must be set.")
        if not self.name.isidentifier():
            raise ValueError("A population name must be a valid Python identifier")




[docs]
@dataclasses.dataclass
class Demography(collections.abc.Mapping):
    """
    The definition of a demographic model for an msprime simulation,
    consisting of a set of populations, a migration matrix, and a list
    of demographic events. See the :ref:`sec_demography` section for
    detailed documentation on how to define, debug and simulate with
    demography in msprime.

    Please see the :ref:`sec_demography_demography_objects` section
    for details of how to access and update population information
    within a model.

    Demography objects implement the Python
    :class:`python:collections.abc.Mapping` protocol, in which the
    keys are **either** the population ``name`` or
    integer ``id`` values (see the :ref:`sec_demography_populations_identifiers`
    section for more information) and the values are :class:`.Population`
    objects.

    In general, population references in methods such as
    :meth:`.Demography.add_population_split` can either be string names
    or integer IDs, and the two forms can be used interchangeably.
    """

    populations: list[Population] = dataclasses.field(default_factory=list)
    events: list = dataclasses.field(default_factory=list)
    # Until we can use numpy type hints properly, it's not worth adding them
    # here. We still have to add in ignores below for indexed assignment errors.
    migration_matrix: Any | None = None

    def __post_init__(self):
        if self.migration_matrix is None:
            N = self.num_populations
            self.migration_matrix = np.zeros((N, N))

        # People might get cryptic errors from passing in copies of the same
        # population, so check for it.
        if len({id(pop) for pop in self.populations}) != len(self.populations):
            raise ValueError("Population objects must be distinct")

        # Assign the IDs and default names, if needed.
        for j, population in enumerate(self.populations):
            if population.id is not None:
                raise ValueError(
                    "Population ID should not be set before using to create "
                    "a Demography"
                )
            population.id = j
            if population.name is None:
                population.name = f"pop_{j}"
        self._validate_populations()


[docs]
    def add_population(
        self,
        *,
        initial_size: float,
        growth_rate: float | None = None,
        name: str | None = None,
        description: str | None = None,
        extra_metadata: dict | None = None,
        default_sampling_time: float | None = None,
        initially_active: bool | None = None,
    ) -> Population:
        """
        Adds a new :class:`.Population` to this :class:`.Demography` with the
        specified parameters. The new population will have ID equal to the
        the number of populations immediately before ``add_population``
        is called, such that the first population added has ID 0, the next
        ID 1 and so on. If the ``name`` is not specified, this defaults
        to ``"pop_{id}"``. An :ref:`sec_demography_populations_initial_size`
        value must be specified (but may be zero).

        :param float initial_size: The number of individuals of the population
            at time zero.  See the :ref:`sec_demography_populations_initial_size`
            section for more details and examples.
        :param float growth_rate: The exponential growth rate of the
            population. See the :ref:`sec_demography_populations_growth_rate`
            section for more details and examples.
        :param str name: The human-readable identifier for this population.
            If not specified, defaults to the string ``"pop_{id}"`` where
            ``id`` is the population's integer ID. See
            :ref:`sec_demography_populations_identifiers` for more details
            and recommendations on best practise.
        :param str description: A concise but informative description of
            what this population represents within the wider model. Defaults
            to the empty strings.
        :param dict extra_metadata: Extra metadata to associate with
            this population that will be stored tree sequences output
            by :func:`.sim_ancestry`. See the
            :ref:`sec_demography_populations_metadata` section for more
            details and examples.
        :param float default_sampling_time: The time at which samples
            will be taken from this population, if a time in not otherwise
            specified. By default this is determined by the details
            of the model, and whether populations are ancestral in
            :ref:`sec_demography_events_population_split` events. See the
            :ref:`sec_demography_populations_default_sampling_time` section
            for more details.
        :param bool initially_active: Whether this population is initially
            :ref:`active<sec_demography_populations_life_cycle>`.
            By default this is determined by the details
            of the model, and whether populations are ancestral in
            :ref:`sec_demography_events_population_split` events. See the
            :ref:`sec_demography_populations_life_cycle` section
            for more details.
        :returns: The new :class:`.Population` instance.
        :rtype: Population
        """
        N = self.num_populations
        population = Population(
            id=N,
            initial_size=initial_size,
            growth_rate=0 if growth_rate is None else growth_rate,
            name=f"pop_{N}" if name is None else name,
            description="" if description is None else description,
            extra_metadata={} if extra_metadata is None else extra_metadata,
            default_sampling_time=default_sampling_time,
            initially_active=initially_active,
        )
        self.populations.append(population)
        # TODO this is inefficient - we should probably store the migration
        # matrix in a sparse dictionary form internally.
        M = self.migration_matrix
        self.migration_matrix = np.zeros((N + 1, N + 1))
        self.migration_matrix[:N, :N] = M
        self._validate_populations()
        return population


    def _add_population_from_old_style(
        self, pop_config: PopulationConfiguration, name: str | None = None
    ) -> Population:
        population = self.add_population(
            name=name,
            initial_size=pop_config.initial_size,
            growth_rate=pop_config.growth_rate,
        )
        metadata = pop_config.metadata
        if metadata is not None and isinstance(metadata, collections.abc.Mapping):
            metadata = metadata.copy()
            if "name" in metadata:
                population.name = metadata.pop("name")
                if name is not None and name != population.name:
                    # Maybe this should be a warning, or just ignored entirely?
                    raise ValueError(
                        "Population name already set in old-style metadata "
                        f"({name}) and doesn't match supplied name "
                        f"({population.name})"
                    )
            if "description" in metadata:
                population.description = metadata.pop("description")
        population.extra_metadata = metadata
        return population

    def add_event(self, event: DemographicEvent) -> DemographicEvent:
        if not isinstance(event, DemographicEvent):
            raise TypeError("Events must be instances of DemographicEvent")
        event.demography = self
        self.events.append(event)
        return event


[docs]
    def set_migration_rate(
        self, source: str | int, dest: str | int, rate: float
    ) -> None:
        """
        Sets the backwards-time rate of migration from the specified ``source``
        population to ``dest`` to the specified value. This has the effect of
        setting ``demography.migration_matrix[source, dest] = rate``. It is
        the rate at which a lineage currently in ``source`` moves to ``dest``
        as one follows the lineage back through time.

        .. important:: Note this is the
            :ref:`backwards in time<sec_demography_direction_of_time>`;
            migration rate and that
            ``source`` and ``dest`` are from the perspective of lineages in the
            coalescent process. See :ref:`sec_demography_migration` for more
            details and clarification on this vital point.

        The ``source`` and ``dest`` populations can be referred to either by
        their integer ``id`` or string ``name`` values.

        :param str,int source: The source population from which lineages originate
            in the backwards-time process.
        :param str,int dest: The destination population where lineages are move
            to in the backwards-time process.
        :param float rate: The per-generation migration rate.
        """
        source = self[source].id
        dest = self[dest].id
        if source == dest:
            raise ValueError("The source and dest populations must be different")
        self.migration_matrix[source, dest] = rate  # type: ignore



[docs]
    def set_symmetric_migration_rate(
        self,
        populations: list[str | int],
        rate: float,
    ) -> None:
        """
        Sets the symmetric migration rate between all pairs of populations in
        the specified list to the specified value. For a given pair of population
        IDs ``j`` and ``k``, this sets ``demography.migration_matrix[j, k] = rate``
        and ``demography.migration_matrix[k, j] = rate``.

        Populations may be specified either by their integer IDs or by
        their string names.

        :param list populations: An iterable of population identifiers (integer
            IDs or string names).
        :param float rate: The value to set the migration matrix entries to.
        """
        # There's an argument for not checking this so that corner cases on
        # single population models can be handled. However, it's nearly always
        # going to be a user error where someone forgets the second population
        # so it seems better to raise an error to prevent hard-to-detect mistakes.
        if len(populations) < 2:
            raise ValueError("Must specify at least two populations")
        pop_ids = [self[identifier].id for identifier in populations]
        for pop_j, pop_k in itertools.combinations(pop_ids, 2):
            self.migration_matrix[pop_j, pop_k] = rate  # type: ignore
            self.migration_matrix[pop_k, pop_j] = rate  # type: ignore


    # Demographic events.

    def _check_population_references(self, populations: list[str | int]):
        for pop_ref in populations:
            # Slightly unsure whether this call can get optimised out, but
            # there doesn't seem to be a better way to do it without duplicating
            # the KeyError message
            self[pop_ref]

    def _add_activate_population_event(
        self, time: float, *, population: str | int
    ) -> ActivatePopulationEvent:
        """
        Activates a population at the specified time. The population is expected to be
        initially inactive.

        :param float time: The time at which this event occurs in generations.
        :param str, int population: The population to activate.
        """
        self._check_population_references([population])
        return self.add_event(ActivatePopulationEvent(time=time, population=population))


[docs]
    def add_population_split(
        self, time: float, *, derived: list[str | int], ancestral: str | int
    ) -> PopulationSplit:
        """
        Adds a population split event at the specified time. In a population
        split event all lineages from the (more recent) derived populations
        move to the (more ancient) ancestral population. Forwards in time,
        this corresponds to the ancestral population splitting into the
        derived populations.

        See the :ref:`sec_demography_events_population_split` section
        for more details and examples.

        In addition to moving lineages from the derived population(s) into the
        ancestral population, a population split has the following additional
        effects:

        - All derived populations are set to
          :ref:`inactive<sec_demography_populations_life_cycle>`.
        - All migration rates to and from the derived populations are set to 0.
        - Population sizes and growth rates for the derived populations are set
          to 0.
        - The ``default_sampling_time`` of the ``ancestral`` :class:`.Population`
          is set to the time of this event, **if** the ``default_sampling_time``
          for the ancestral population has not already been set.

        :param float time: The time at which this event occurs in generations.
        :param list(str, int) derived: The derived populations.
        :param str, int ancestral: The ancestral population.
        """
        self._check_population_references(list(derived) + [ancestral])
        pop = self[ancestral]
        if pop.initially_active is None:
            pop.initially_active = False
            if pop.default_sampling_time is None:
                pop.default_sampling_time = time
        return self.add_event(
            PopulationSplit(time=time, derived=derived, ancestral=ancestral)
        )



[docs]
    def add_admixture(
        self,
        time: float,
        *,
        derived: str | int,
        ancestral: list[str | int],
        proportions: list[float],
    ) -> Admixture:
        """
        Adds an admixture event at the specified time. In an admixture
        event all lineages from a (more recent) ``derived`` population
        move to a list of (more ancient) ``ancestral`` populations according
        to a list of ``proportions``, such that a given lineage has a
        probability ``proportions[j]`` of being moved to the population
        ``ancestral[j]``. This movement of lineages backwards in time
        corresponds to the initial state of the admixed derived population
        the specified ``time`` being composed of individuals from the
        specified ``ancestral`` populations in the specified ``proportions``.

        See the :ref:`sec_demography_events_admixture` section
        for more details and examples.

        In addition to moving lineages from the derived population into the
        ancestral population(s), an admixture has the following additional
        effects:

        - The derived population is set to
          :ref:`inactive<sec_demography_populations_life_cycle>`.
        - The ancestral populations are set to
          :ref:`active<sec_demography_populations_life_cycle>`, if they are
          not already active.
        - All migration rates to and from the derived population are set to 0.
        - Population sizes and growth rates for the derived population are set
          to 0, and the population is marked as inactive.

        :param float time: The time at which this event occurs in generations.
        :param str, int derived: The derived population.
        :param list(str, int) ancestral: The ancestral populations.
        :param list(float) proportions: The proportion of the derived population
            from each of the ancestral populations at the time of the event.
        """
        self._check_population_references(list(ancestral) + [derived])
        # Useful feature here might be to support taking n - 1 proportion values
        # and computing 1 - sum for the last value. Could be tedious for users to
        # do this manually.
        if not math.isclose(sum(proportions), 1.0):
            raise ValueError("Sum of the admixture proportions must be approximately 1")
        return self.add_event(
            Admixture(
                time=time, derived=derived, ancestral=ancestral, proportions=proportions
            )
        )



[docs]
    def add_mass_migration(
        self,
        time: float,
        *,
        source: str | int,
        dest: str | int,
        proportion: float,
    ) -> MassMigration:
        """
        Adds a mass migration (or "pulse migration") event at the specified
        time. In a mass migration event, lineages in the ``source`` population
        are moved to the ``dest`` population with probability ``proportion``.
        Forwards-in-time, this corresponds to individuals migrating
        **from** population ``dest`` **to** population ``source``.

        Please see the :ref:`sec_demography_events_mass_migration` section
        for more details and examples.

        .. warning:: Mass migrations are an advanced feature and should
            only be used if the required population dynamics cannot be
            modelled by :ref:`sec_demography_events_population_split`
            or :ref:`sec_demography_events_admixture` events.

        .. important::
            Note that ``source`` and ``dest`` are from the perspective of the
            coalescent process, i.e.
            :ref:`backwards in time<sec_demography_direction_of_time>`;
            please see the
            :ref:`sec_demography_migration` section for more details.

        :param float time: The time at which this event occurs in generations.
        :param str, int source: The population **from** which lineages are moved.
        :param str, int dest: The population **to** which lineages are moved.
        :param float proportion: For each lineage in the ``source`` population,
            this is the probability that it moves to the ``dest`` population.
        """
        self._check_population_references([source, dest])
        return self.add_event(MassMigration(time, source, dest, proportion))



[docs]
    def add_migration_rate_change(
        self,
        time: float,
        *,
        rate: float,
        source: int | str | None = None,
        dest: int | str | None = None,
    ) -> MigrationRateChange:
        """
        Changes the rate of migration from one deme to another to a new value at a
        specific time. Migration rates are specified in terms of the rate at which
        lineages move from population ``source`` to ``dest`` during the progress of
        the simulation.

        .. important::
            Note that ``source`` and ``dest`` are from the perspective of the
            coalescent process, i.e.
            :ref:`backwards in time<sec_demography_direction_of_time>`;
            please see the
            :ref:`sec_demography_migration` section for more details.

        By default, ``source=None`` and ``dest=None``, which results in all
        non-diagonal elements of the migration matrix being changed to the new
        rate. If ``source`` and ``dest`` are specified, they must refer to valid
        populations (either integer IDs or string names).

        :param float time: The time at which this event occurs in generations.
        :param float rate: The new per-generation migration rate.
        :param str, int source: The ID of the source population.
        :param str, int dest: The ID of the destination population.
        :param int source: The source population ID.
        """
        if source is None:
            source = -1
        else:
            self._check_population_references([source])
        if dest is None:
            dest = -1
        else:
            self._check_population_references([dest])
        return self.add_event(
            MigrationRateChange(time=time, source=source, dest=dest, rate=rate)
        )



[docs]
    def add_symmetric_migration_rate_change(
        self, time: float, populations: list[str | int], rate: float
    ) -> SymmetricMigrationRateChange:
        """
        Sets the symmetric migration rate between all pairs of populations in
        the specified list to the specified value. For a given pair of population
        IDs ``j`` and ``k``, this sets ``migration_matrix[j, k] = rate``
        and ``migration_matrix[k, j] = rate``.

        Please see the :ref:`sec_demography_migration` section for more details.

        Populations may be specified either by their integer IDs or by
        their string names.

        :param float time: The time at which this event occurs in generations.
        :param list populations: An sequence of population identifiers (integer
            IDs or string names).
        :param float rate: The new migration rate.
        """
        self._check_population_references(populations)
        return self.add_event(
            SymmetricMigrationRateChange(time=time, populations=populations, rate=rate)
        )



[docs]
    def add_population_parameters_change(
        self,
        time: float,
        *,
        initial_size: float | None = None,
        growth_rate: float | None = None,
        population: int | None = None,
    ) -> PopulationParametersChange:
        """
        Changes the size parameters of a population (or all populations)
        at a given time.

        Please see the :ref:`sec_demography_populations` section for more details.

        :param float time: The length of time ago at which this event
            occurred.
        :param float initial_size: The number of individuals in the population
            at the beginning of the time slice starting at ``time``. If None,
            the initial_size of the population is computed according to
            the initial population size and growth rate over the preceding
            time slice.
        :param float growth_rate: The new per-generation growth rate. If None,
            the growth rate is not changed. Defaults to None.
        :param str, int population: The ID of the population affected. If
            ``population`` is None, the changes affect all populations
            simultaneously.
        """
        if population is not None:
            self._check_population_references([population])
        event = PopulationParametersChange(
            time,
            initial_size=initial_size,
            growth_rate=growth_rate,
            population=population,
        )
        return self.add_event(event)



[docs]
    def add_simple_bottleneck(
        self,
        time: float,
        population: int | str,
        proportion: float | None = None,
    ) -> SimpleBottleneck:
        """
        Adds a population bottleneck at the specified time in which each lineage
        has probability equal to ``proportion`` of coalescing into a single
        ancestor.

        Please see the :ref:`sec_demography_events_simple_bottleneck` section
        for more details.

        :param float time: The length of time ago at which this event
            occurred.
        :param str, int population: The ID of the population affected.
        :param float proportion: The probability of each lineage coalescing
            into a single ancestor. Defaults to 1.0.
        """
        proportion = 1.0 if proportion is None else proportion
        self._check_population_references([population])
        return self.add_event(
            SimpleBottleneck(time=time, population=population, proportion=proportion)
        )



[docs]
    def add_instantaneous_bottleneck(
        self, time: float, *, population: str | int, strength: float
    ) -> InstantaneousBottleneck:
        """
        Adds a bottleneck at the specified time in the specified population
        that is equivalent to the coalescent process running for ``strength``
        generations.

        Please see the :ref:`sec_demography_events_instantaneous_bottleneck`
        section for more details.

        .. note:: The :ref:`ploidy<sec_ancestry_ploidy_coalescent_time_scales>`
            is also use to scale the time scale of the coalescent process
            during the bottleneck.

        :param float time: The length of time ago at which this event
            occurred.
        :param str, int population: The ID of the population affected.
        :param float strength: The equivalent amount of time in the standard
            coalescent.
        """
        self._check_population_references([population])
        return self.add_event(
            InstantaneousBottleneck(time=time, population=population, strength=strength)
        )



[docs]
    def add_census(self, time: float) -> CensusEvent:
        """
        Adds a "census" event at the specified time. In a census we add a node
        to each branch of every tree, thus recording the population that each
        lineage is in at the specified time.

        This may be used to record all ancestral haplotypes present at that
        time, and to extract other information related to these haplotypes: for
        instance to trace the local ancestry of a sample back to a set of
        contemporaneous ancestors, or to assess whether a subset of samples has
        coalesced more recently than the census time. (However, these added
        nodes will only represent the portions of ancestral genomes inherited
        by the samples, rather than complete ancestral genomes.)

        See :ref:`sec_ancestry_census_events` for more details.

        .. warning:: When used in the conjunction with the DTWF model
            non-integer census times should be used to guarantee that
            the census nodes don't coincide with coalescences (and
            therefore zero branch length errors).
            See :ref:`sec_ancestry_census_events_dtwf` for more details.

        :param float time: The time at which the census should occur.
        """
        return self.add_event(CensusEvent(time))


    def _populations_table(self):
        cols = [
            ("id", ""),
            ("name", ""),
            ("description", ""),
            ("initial_size", ".1f"),
            ("growth_rate", ".2g"),
            ("default_sampling_time", ".2g"),
            ("extra_metadata", ""),
        ]
        data = [
            [f"{getattr(pop, attr):{fmt}}" for attr, fmt in cols]
            for pop in self.populations
        ]
        return [title for title, _ in cols], data

    def _populations_text(self):
        col_titles, data = self._populations_table()
        alignments = ["^", "<", "<", "<", "^", ">", "<"]
        data = [
            [
                [item.as_text() if isinstance(item, core.TableEntry) else item]
                for item in row
            ]
            for row in data
        ]
        return core.text_table(
            "Populations", [[title] for title in col_titles], alignments, data
        )

    def _populations_html(self):
        col_titles, data = self._populations_table()
        return core.html_table(
            f"Populations ({len(self.populations)})", col_titles, data
        )

    def _migration_rate_info(self, source, dest, rate):
        extra = None
        if source != dest:
            source_name = self.populations[source].name
            dest_name = self.populations[dest].name
            extra = (
                "Backwards in time migration rate from population "
                f"{source_name} to {dest_name} = {rate} per generation. "
                "Forwards in time, this is the expected number of migrants "
                f"moving from {dest_name} to {source_name} "
                f"per generation, divided by the size of {source_name}."
            )
        return core.TableEntry(f"{rate:.4g}", extra)

    def _migration_matrix_table(self):
        col_titles = [""] + [pop.name for pop in self.populations]
        data = []
        for j in range(self.num_populations):
            row = [self.populations[j].name] + [
                self._migration_rate_info(j, k, self.migration_matrix[j, k])
                for k in range(self.num_populations)
            ]
            data.append(row)
        return col_titles, data

    def _migration_matrix_text(self):
        col_titles, data = self._migration_matrix_table()
        alignments = ">" + "^" * self.num_populations
        data = [
            [
                [item.as_text() if isinstance(item, core.TableEntry) else item]
                for item in row
            ]
            for row in data
        ]
        return core.text_table(
            "Migration Matrix", [[title] for title in col_titles], alignments, data
        )

    def _migration_matrix_html(self):
        if np.all(self.migration_matrix == 0):
            return core.html_table("Migration matrix (all zero)", [], [])
        else:
            col_titles, data = self._migration_matrix_table()
            return core.html_table("Migration matrix", col_titles, data)

    def _events_text(self, events, title="Events"):
        col_titles = [["time"], ["type"], ["parameters"], ["effect"]]
        alignments = "><<<"
        data = []
        for event in events:
            type_text = textwrap.wrap(event._type_str, 15)
            description = textwrap.wrap(event._parameters(), 22)
            effect = textwrap.wrap(event._effect(), 38)
            row = [[f"{event.time:.4g}"], type_text, description, effect]
            data.append(row)
        return core.text_table(
            title, col_titles, alignments, data, internal_hlines=True
        )

    def _events_html(self, events, title=None):
        if title is None:
            title = f"Events ({len(events)})"
        if len(self.events) == 0:
            return core.html_table(title, [], [])

        col_titles = ["time", "type", "parameters", "effect"]
        data = []
        for event in events:
            class_name = event.__class__.__name__
            camel_case = re.sub(r"(?<!^)(?=[A-Z])", "_", class_name).lower()
            add_method = f"msprime.Demography.add_{camel_case}"
            # TODO change this to stable when 1.0 is released.
            type_html = (
                "<a href='https://tskit.dev/msprime/docs/latest/api.html#"
                f"{add_method}'>{event._type_str}</a>"
            )
            row = [f"{event.time:.4g}", type_html, event._parameters(), event._effect()]
            data.append(row)
        return core.html_table(title, col_titles, data, no_escape=[1])

    def _repr_html_(self):
        resolved = self.validate()
        return (
            '<div style="margin-left:20px">'
            + resolved._populations_html()
            + resolved._migration_matrix_html()
            + resolved._events_html(self.events)
            + "</div>"
        )

    def __str__(self):
        resolved = self.validate()
        populations = resolved._populations_text()
        migration_matrix = resolved._migration_matrix_text()
        events = resolved._events_text(self.events)

        def indent(table):
            lines = table.splitlines()
            s = "╟  " + lines[0] + "\n"
            for line in lines[1:]:
                s += "║  " + line + "\n"
            return s

        s = (
            "Demography\n"
            + indent(populations)
            + indent(migration_matrix)
            + indent(events)
        )
        return s

    def __len__(self):
        return len(self.populations)

    def __iter__(self):
        for pop in self.populations:
            yield pop.name

    def __getitem__(self, identifier):
        """
        Returns the population with the specified ID or name.
        """
        if isinstance(identifier, str):
            for population in self.populations:
                if population.name == identifier:
                    return population
            else:
                raise KeyError(f"Population with name '{identifier}' not found")
        elif isinstance(identifier, numbers.Integral):
            # We don't support negative indexing here because -1 is used as
            # way to refer to *all* populations in demographic events, and
            # it would be too easy to introduce bugs in old code if we changed
            # the meaning of this.
            if identifier < 0 or identifier >= self.num_populations:
                raise KeyError(f"Population id {identifier} out of bounds")
            return self.populations[identifier]
        raise KeyError(
            "Keys must be either string population names or integer IDs:"
            f"identifier '{identifier}' is of type {type(identifier)}"
        )

    @property
    def num_populations(self):
        return len(self.populations)

    @property
    def num_events(self):
        return len(self.events)

    def _validate_populations(self):
        names = set()
        for j, population in enumerate(self.populations):
            if population.id != j:
                raise ValueError(
                    "Incorrect population ID. ID values should not be updated "
                    "by users. Please use Demography.add_population to add extra "
                    "populations after initialisation."
                )
            population.validate()
            if population.name in names:
                raise ValueError(f"Duplicate population name: '{population.name}'")
            names.add(population.name)


[docs]
    def validate(self):
        """
        Checks the demography looks sensible and raises errors/warnings
        appropriately, and return a copy in which all default values have
        been appropriately resolved.
        """
        self._validate_populations()
        migration_matrix = np.array(self.migration_matrix)
        N = self.num_populations
        if migration_matrix.shape != (N, N):
            raise ValueError(
                "migration matrix must be a N x N square matrix encoded "
                "as a list-of-lists or numpy array, where N is the number "
                "of populations. The diagonal "
                "elements of this matrix must be zero. For example, a "
                "valid matrix for a 3 population system is "
                "[[0, 1, 1], [1, 0, 1], [1, 1, 0]]"
            )
        last_event = None
        for event in self.events:
            if not isinstance(event, DemographicEvent):
                raise TypeError(
                    "Demographic events must be a list of DemographicEvent "
                    "instances sorted in non-decreasing order of time."
                )
            if last_event is not None:
                if last_event.time > event.time:
                    raise ValueError(
                        "Events must be time-sorted. Please use demography.sort_events()"
                        "if you add events out of order."
                    )
            last_event = event
        resolved = copy.deepcopy(self)
        for population in resolved.populations:
            if population.default_sampling_time is None:
                population.default_sampling_time = 0
            if population.initially_active is None:
                population.initially_active = True
        return resolved



[docs]
    def copy(self, populations: list[str] | None = None) -> Demography:
        """
        Returns a copy of this model. If the ``populations`` argument is
        specified, the populations in the copied model will be in this order.

        :param list populations: A list of population identifiers defining the
            order of the populations in the new model. If not specified, the
            current order is used.
        :return: A copy of this Demography.
        """
        if populations is None:
            populations = range(self.num_populations)
        if len(populations) != self.num_populations:
            raise ValueError("populations must have Demography.num_populations entries")
        copy_demog = Demography()
        for identifier in populations:
            pop = self[identifier]
            copy_demog.add_population(
                initial_size=pop.initial_size,
                growth_rate=pop.growth_rate,
                name=pop.name,
                description=pop.description,
                extra_metadata=pop.extra_metadata,
                default_sampling_time=pop.default_sampling_time,
                initially_active=pop.initially_active,
            )
        for copy_p1, copy_p2 in itertools.combinations(copy_demog.populations, 2):
            self_p1 = self[copy_p1.name].id
            self_p2 = self[copy_p2.name].id
            copy_demog.migration_matrix[copy_p1.id, copy_p2.id] = self.migration_matrix[
                self_p1, self_p2
            ]
            copy_demog.migration_matrix[copy_p2.id, copy_p1.id] = self.migration_matrix[
                self_p2, self_p1
            ]

        def remap(identifier):
            if identifier == -1:
                return -1
            return self[identifier].name

        for event in self.events:
            copy_event = copy.deepcopy(event)
            if isinstance(event, (MassMigration, MigrationRateChange)):
                copy_event.source = remap(event.source)
                copy_event.dest = remap(event.dest)
            elif isinstance(event, PopulationSplit):
                copy_event.derived = [remap(pop) for pop in event.derived]
                copy_event.ancestral = remap(event.ancestral)
            elif isinstance(event, Admixture):
                copy_event.ancestral = [remap(pop) for pop in event.ancestral]
                copy_event.derived = remap(event.derived)
            elif isinstance(
                event,
                (PopulationParametersChange, SimpleBottleneck, InstantaneousBottleneck),
            ):
                copy_event.population = remap(event.population)
            elif isinstance(event, SymmetricMigrationRateChange):
                copy_event.populations = [remap(pop) for pop in event.populations]
            elif isinstance(event, CensusEvent):
                # Census has no population
                pass
            else:
                raise AssertionError("Event class not implemented in copy")
            copy_demog.add_event(copy_event)

        return copy_demog


    def sort_events(self):
        # Sort demographic events by time. Sorting is stable so the relative
        # order of events at the same time will be preserved.
        self.events.sort(key=lambda de: de.time)

    def insert_populations(self, tables):
        """
        Insert population definitions for this demography into the specified
        set of tables.

        :meta private:
        """
        assert len(tables.populations) == 0
        population_table = _build_population_table(self.populations)
        tables.populations.metadata_schema = population_table.metadata_schema
        tables.populations.set_columns(
            metadata=population_table.metadata,
            metadata_offset=population_table.metadata_offset,
        )

    def insert_extra_populations(self, tables):
        """
        Insert additional population definitions for this demography
        into the specified set of tables. We assume that the populations
        up to len(tables.populations) are identical, and append additional
        populations to the tables for any remaining.

        :meta private:
        """
        # TODO we should be accessing a higher-level API for querying
        # the schema here, but there's none available right now.
        schema = tables.populations.metadata_schema.schema
        if schema is not None:
            properties = schema.get("properties", {})
            additional_properties = schema.get("additionalProperties", True)
            name_in_metadata = "name" in properties or additional_properties
            description_in_metadata = (
                "description" in properties or additional_properties
            )
            if not name_in_metadata:
                warnings.warn(
                    "The metadata schema does not have a 'name' property; "
                    "population names will not be recorded in the output "
                    "tree sequence",
                    IncompletePopulationMetadataWarning,
                    stacklevel=2,
                )
            if not description_in_metadata:
                warnings.warn(
                    "The metadata schema does not have a 'description' property; "
                    "population descriptions will not be recorded in the output "
                    "tree sequence",
                    IncompletePopulationMetadataWarning,
                    stacklevel=2,
                )
            left_out = []
            for population in self.populations[len(tables.populations) :]:
                md = {}
                if name_in_metadata:
                    md["name"] = population.name
                if description_in_metadata:
                    md["description"] = population.description
                for k in population.extra_metadata:
                    if k in properties or additional_properties:
                        md[k] = population.extra_metadata[k]
                    else:
                        left_out.append(k)
                tables.populations.add_row(metadata=md)
            if len(left_out) > 0:
                warnings.warn(
                    "The metadata schema does not allow for all properties specified "
                    "in extra_metadata: these keys have not been recorded in the "
                    f"output tree sequence: {', '.join(left_out)}",
                    IncompletePopulationMetadataWarning,
                    stacklevel=2,
                )
        else:
            warnings.warn(
                "No metadata schema present in population table, not recording "
                "metadata",
                IncompletePopulationMetadataWarning,
                stacklevel=2,
            )
            # No metadata schema, just add bare populations.
            for _ in self.populations[len(tables.populations) :]:
                tables.populations.add_row()

    def asdict(self):
        # NOTE: this slightly contradicts the interpretation of the
        # Demography object as a mapping storing populations. But
        # this is an internal undocumented method, so seems OK.
        return {
            "populations": [pop.asdict() for pop in self.populations],
            "events": [event.asdict() for event in self.events],
            "migration_matrix": self.migration_matrix.tolist(),
        }


[docs]
    def debug(self):
        """
        Returns a :class:`.DemographyDebugger` instance for this demography.

        :return: A DemographyDebugger object for this demography.
        :rtype: msprime.DemographyDebugger
        """
        return DemographyDebugger(demography=self)


    def __eq__(self, other):
        try:
            self.assert_equal(other)
            return True
        except AssertionError:
            return False


[docs]
    def assert_equal(self, other: Demography):
        """
        Compares this Demography with specified ``other`` and raises an
        AssertionError if they are not exactly equal.

        :param Demography other: The other demography to compare against.
        """
        # TODO we could potentially do better here with error messages
        # by showing a diff of the str() values for objects that differ.
        assert isinstance(other, Demography)
        assert self.num_populations == other.num_populations
        for p1, p2 in zip(self.populations, other.populations):
            assert p1 == p2, f"{p1} ≠ {p2}"
        assert np.array_equal(
            self.migration_matrix, other.migration_matrix
        )  # type: ignore
        assert self.num_events == other.num_events
        for e1, e2 in zip(self.events, other.events):
            assert e1 == e2, f"{e1} ≠ {e2}"



[docs]
    def is_equivalent(self, other: Demography, rel_tol=None, abs_tol=None):
        """
        Compares this demography with the other and return True if they are
        equivalent up to the specified numerical tolerances. Two demographies
        are equivalent if, they have the same set of epochs defined by demographic
        events, and for each epoch:

        - The population's ``initial_size``, ``growth_rate`` and ``active``
          values are equal in all populations.
        - The migration matrices are equal
        - The same sequence of lineage movements through population splits, etc.

        All numerical comparisons are performed using :func:`python:math.isclose`.

        :param Demography other: The other demography to compare against.
        :param float rel_tol: The relative tolerance used by math.isclose.
        :param float abs_tol: The relative tolerance used by math.isclose.
        :return: True if this demography and other are equivalent up to numerical
            tolerances.
        :rtype bool: bool
        """
        try:
            self.assert_equivalent(other, rel_tol=rel_tol, abs_tol=abs_tol)
            return True
        except AssertionError:
            return False


    def assert_equivalent(
        self,
        other: Demography,
        rel_tol: None | float = None,
        abs_tol: None | float = None,
    ):
        # Same defaults as math.isclose
        rel_tol = 1e-9 if rel_tol is None else rel_tol
        abs_tol = 0 if abs_tol is None else abs_tol
        assert isinstance(other, Demography)
        self_dbg = self.debug()
        other_dbg = other.debug()
        if self.num_populations != other.num_populations:
            raise AssertionError(
                "Number of populations not equal: "
                f"{self.num_populations} ≠ {other.num_populations}"
            )
        # Compare the population attributes.
        # NB use the *resolved* versions from the debug objects
        for self_pop, other_pop in zip(
            self_dbg.demography.populations, other_dbg.demography.populations
        ):
            if self_pop.name != other_pop.name:
                raise AssertionError(
                    f"Population names differ: {self_pop.name} ≠ {other_pop.name}"
                )
            self_st = (
                0
                if self_pop.default_sampling_time is None
                else self_pop.default_sampling_time
            )
            other_st = (
                0
                if other_pop.default_sampling_time is None
                else other_pop.default_sampling_time
            )
            if not math.isclose(self_st, other_st):
                raise AssertionError(
                    f"Sampling times not equal for {self_pop.name}: "
                    f"{self_pop.default_sampling_time} ≠ "
                    f"{other_pop.default_sampling_time}"
                )

        if self_dbg.num_epochs != other_dbg.num_epochs:
            raise AssertionError(
                "Number of epochs not equal: "
                f"{self_dbg.num_epochs} ≠ {other_dbg.num_epochs}"
            )
        for j, (self_epoch, other_epoch) in enumerate(
            zip(self_dbg.epochs, other_dbg.epochs)
        ):
            if not math.isclose(
                self_epoch.start_time,
                other_epoch.start_time,
                rel_tol=rel_tol,
                abs_tol=abs_tol,
            ):
                raise AssertionError(
                    f"Epoch[{j}] at different times: "
                    f"{self_epoch.start_time} ≠ {other_epoch.start_time}"
                )
            for self_pop, other_pop in zip(
                self_epoch.populations, other_epoch.populations
            ):
                if self_pop.state != other_pop.state:
                    raise AssertionError(
                        f"State mismatch in populations in epoch[{j}], {self_pop.name}: "
                        f"{self_pop.state} ≠ {other_pop.state}"
                    )
                if self_pop.state == PopulationStateMachine.ACTIVE:
                    if not math.isclose(
                        self_pop.start_size,
                        other_pop.start_size,
                        rel_tol=rel_tol,
                        abs_tol=abs_tol,
                    ):
                        raise AssertionError(
                            "Population start_size not equal to required precision "
                            f"in epoch[{j}], {self_pop.name}: "
                            f"{self_pop.start_size} ≠ {other_pop.start_size}"
                        )

                    if not math.isclose(
                        self_pop.growth_rate,
                        other_pop.growth_rate,
                        rel_tol=rel_tol,
                        abs_tol=abs_tol,
                    ):
                        raise AssertionError(
                            "Population growth_rate not equal to required precision "
                            f"in epoch[{j}], {self_pop.name}: "
                            f"{self_pop.growth_rate} ≠ {other_pop.growth_rate}"
                        )
            m_equal = np.isclose(
                self_epoch.migration_matrix,
                other_epoch.migration_matrix,
                rtol=rel_tol,
                atol=abs_tol,
            )
            if not np.all(m_equal):
                differs = "Differences between: "
                for source_id, dest_id in zip(*np.where(~m_equal)):
                    source = self[source_id].name
                    dest = self[dest_id].name
                    self_rate = self_epoch.migration_matrix[source_id, dest_id]
                    other_rate = other_epoch.migration_matrix[source_id, dest_id]
                    differs += f"({source}, {dest}: {self_rate} ≠ {other_rate}), "
                raise AssertionError(
                    f"Migration matrices in epoch[{j}] not equal: \n"
                    "self = \n"
                    f"{self_epoch.migration_matrix}\n"
                    "other = \n"
                    f"{other_epoch.migration_matrix}\n"
                    f"::{differs[:-2]}"
                )
            self._assert_lineage_movements_equivalent(
                self_epoch.events, other_epoch.events, rel_tol=rel_tol, abs_tol=abs_tol
            )

            self_state_change = [
                event
                for event in self_epoch.events
                if isinstance(event, StateChangeEvent)
            ]
            other_state_change = [
                event
                for event in other_epoch.events
                if isinstance(event, StateChangeEvent)
            ]
            if len(self_state_change) > 0 or len(other_state_change) > 0:
                raise ValueError(
                    "State change events not currently supported in equivalent. "
                    "Please open an issue on GitHub"
                )

    def _assert_lineage_movements_equivalent(
        self, self_events, other_events, *, rel_tol, abs_tol
    ):
        self_lineage_movements = self._normalise_lineage_movements(self_events)
        other_lineage_movements = self._normalise_lineage_movements(other_events)
        source_pops = set(self_lineage_movements.keys())
        if source_pops != set(other_lineage_movements.keys()):
            raise AssertionError(
                f"Mismatch in the set of populations affected by lineage movements: "
                f"{source_pops} ≠ {set(other_lineage_movements.keys())}"
            )
        for source in source_pops:
            self_out_movements = self_lineage_movements[source]
            other_out_movements = other_lineage_movements[source]
            if len(self_out_movements) != len(other_out_movements):
                raise AssertionError(
                    "Mismatch in number of normalised lineage movements out of "
                    f"{source}: {len(self_out_movements)} ≠ {len(other_out_movements)}"
                )
            for self_lm, other_lm in zip(self_out_movements, other_out_movements):
                if self_lm.dest != other_lm.dest:
                    raise AssertionError(
                        "Mismatch in lineage movement destination:"
                        f"{self_lm.dest} ≠ {other_lm.dest}"
                    )
                if not math.isclose(
                    self_lm.proportion,
                    other_lm.proportion,
                    rel_tol=rel_tol,
                    abs_tol=abs_tol,
                ):
                    raise AssertionError(
                        "Mismatch in normalised lineage movement proportions "
                        f"from {self_lm.source} to {self_lm.dest}: "
                        f"{self_lm.proportion} ≠ {other_lm.proportion}"
                    )

    def _normalise_lineage_movements(self, events: list[DemographicEvent]):
        """
        Extract the LineageMovementEvent instances from the specified list
        and normalise their effects into LineageMovement instances, and
        return a dictionary mapping source populations to the list of
        sequential lineage movements. For each source population we have a
        list of sequential lineage movements, sorted by destination population ID.
        """
        assert len({event.time for event in events}) <= 1
        ret = collections.defaultdict(list)
        for event in events:
            if isinstance(event, LineageMovementEvent):
                for move in event._as_lineage_movements():
                    ret[move.source].append(move)
        for pop in list(ret.keys()):
            # We have a list of *conditional* lineage movements out of a
            # population. We canonicalise this by sorting by the
            # destination population, so we have to first convert back to
            # absolute proportions.
            assert all(lm.source == pop for lm in ret[pop])
            P = _sequential_to_proportions([pm.proportion for pm in ret[pop]])
            id_value_pairs = sorted((lm.dest, p) for lm, p in zip(ret[pop], P))
            S = _proportions_to_sequential([p for _, p in id_value_pairs])
            ret[pop] = [
                LineageMovement(source=pop, dest=id_value_pairs[j][0], proportion=S[j])
                for j in range(len(S))
            ]
        return ret


[docs]
    @staticmethod
    def from_species_tree(
        tree,
        initial_size,
        *,
        time_units="gen",
        generation_time=None,
        growth_rate=None,
    ) -> Demography:
        """
        Parse a species tree in `Newick
        <https://en.wikipedia.org/wiki/Newick_format>`_ format and return the
        corresponding :class:`Demography` object. The tree is assumed to be
        rooted and ultrametric and branch lengths must be included and
        correspond to time, either in units of millions of years, years, or
        generations.

        The returned :class:`.Demography` object contains a
        :class:`.Population` for each node in the species tree. The
        population's ``name`` attribute will be either the corresponding node
        label from the newick tree, if it exists, or otherwise the name takes
        the form "pop_{j}", where j is the position of the given population in
        the list. Leaf populations are first in the list, and added in
        left-to-right order. Populations corresponding to the internal nodes
        are then added in a postorder traversal of the species tree. For each
        internal node a :ref:`sec_demography_events_population_split`
        event is added so that
        lineages move from its child populations at the appropriate time
        and rates of continuous migration to and from the child populations is
        set to zero. See the :ref:`sec_demography_events_population_split`
        section for more details.

        The initial sizes and growth rates for the populations in the model are
        set via the ``initial_size`` and ``growth_rate`` arguments. These can be
        specified in two ways: if a single number is provided, this is used
        for all populations. The argument may also be a mapping from population
        names to their respective values. For example:

        .. code-block:: python

            tree = "(A:10.0,B:10.0)C"
            initial_size = {"A": 1000, "B": 2000, "C": 100}
            demography = msprime.Demography.from_species_tree(tree, initial_size)

        Note that it is possible to have default population sizes for unnamed
        ancestral populations using a :class:`python:collections.defaultdict`, e.g.,

        .. code-block:: python

            tree = "(A:10.0,B:10.0)"
            initial_size = collections.defaultdict(lambda: 100)
            initial_size.update({"A": 1000, "B": 2000})
            demography = msprime.Demography.from_species_tree(tree, initial_size)

        :param str tree: The tree string in Newick format, with named leaves and branch
            lengths.
        :param initial_size: Each population's initial_size, in numbers of individuals.
            May be a single number or a mapping from population names to their sizes.
        :param growth_rate: Each population's growth_rate. May be a single number
            or a mapping from population names to their exponential growth rates.
            Defaults to zero.
        :param str time_units: The units of time in which the species tree's
            branch lengths are measured. Allowed branch length units are millions of
            years, years, and generations; these should be specified with the strings
            ``"myr"``, ``"yr"``, or ``"gen"``, respectively. This defaults to
            ``"gen"``.
        :param float generation_time: The number of years per generation. If and only
            if the branch lengths are not in units of generations, the generation time
            must be specified. This defaults to `None`.
        :return: A Demography object representing the specified species tree.
        :rtype: msprime.Demography
        """
        return species_trees.parse_species_tree(
            tree,
            initial_size=initial_size,
            growth_rate=growth_rate,
            time_units=time_units,
            generation_time=generation_time,
        )



[docs]
    @staticmethod
    def from_starbeast(tree, generation_time, time_units="myr") -> Demography:
        """
        Parse a species tree produced by the program `TreeAnnotator
        <https://www.beast2.org/treeannotator>`_
        based on a posterior tree distribution generated with `StarBEAST
        <https://academic.oup.com/mbe/article/34/8/2101/3738283>`_  and return
        the corresponding Demography object.

        Species trees produced by TreeAnnotator are written in `Nexus
        <https://en.wikipedia.org/wiki/Nexus_file>`_ format and are rooted,
        bifurcating, and ultrametric. Branch lengths usually are in units of
        millions of years, but the use of other units is permitted by StarBEAST
        (and thus TreeAnnotator). This function allows branch length units of
        millions of years or years. Leaves must be named and the tree must
        include information on population sizes of leaf and ancestral species
        in the form of annotation with the "dmv" tag, which is the case for
        trees written by TreeAnnotator based on StarBEAST posterior tree
        distributions.

        The returned :class:`.Demography` object contains a
        :class:`.Population` for each node in the species tree. The
        population's ``name`` attribute will be either the corresponding node
        label from the newick tree, if it exists, or otherwise the name takes
        the form "pop_{j}", where j is the position of the given population in
        the list. Leaf populations are first in the list, and added in
        left-to-right order. Populations corresponding to the internal nodes
        are then added in a postorder traversal of the species tree. For each
        internal node a :ref:`sec_demography_events_population_split`
        event is added so that
        lineages move from its child populations at the appropriate time and
        rates of continuous migration to and from the child populations is set
        to zero. See the :ref:`sec_demography_events_population_split` section
        for more details.

        :param str tree: The tree string in Nexus format, with named leaves, branch
            lengths, and branch annotation. Typically, this string is the entire content
            of a file written by TreeAnnotator.
        :param float generation_time: The number of years per generation.
        :param str time_units: The units of time in which the species tree's
            branch lengths are measured. Allowed branch length units are millions of
            years, and years; these should be specified with the strings ``"myr"`` or
            ``"yr"``, respectively. This defaults to ``"myr"``.
        :return: A :class:`.Demography` instance that describing the information in the
            specified species tree.
        :rtype: msprime.Demography
        """
        return species_trees.parse_starbeast(
            tree=tree,
            generation_time=generation_time,
            time_units=time_units,
        )


    def _from_old_style_map_populations(
        population_configurations: list[PopulationConfiguration],
        migration_matrix: list[list[float]],
        demographic_events: list[DemographicEvent],
        population_map: [list[dict[int, str]]],
    ) -> Demography:
        direct_model = Demography._from_old_style_simple(
            population_configurations, migration_matrix, demographic_events
        )

        for id_map in population_map:
            if len(set(id_map.values())) != len(id_map):
                raise ValueError("Population IDs in old model must be unique")
            for old_id in id_map.values():
                if old_id < 0 or old_id >= direct_model.num_populations:
                    raise ValueError(
                        f"Bad population reference {old_id} in old style model"
                    )
        if len(population_map[0]) != len(population_configurations):
            raise ValueError(
                "The ID map for the first epoch must have entries for all "
                "populations in the old-style model"
            )

        # Suppress misspecification warnings; we'll give more specific messages
        # later.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            dbg = direct_model.debug()

        if dbg.num_epochs != len(population_map):
            raise ValueError(
                "Mismatch in the number of epochs in the old style model "
                f"({dbg.num_epochs}) and specified in the population ID map "
                f"({len(population_map)})"
            )

        demography = Demography()
        # Set up the initial state of the model
        id_map = population_map[0]
        for new_name, old_id in id_map.items():
            pc = population_configurations[old_id]
            demography._add_population_from_old_style(pc, new_name)
        for epoch_id_map in population_map[1:]:
            for name in epoch_id_map.keys():
                if name not in demography:
                    demography.add_population(name=name, initial_size=0)

        # Fill out migration matrix
        for pop_1, pop_2 in itertools.combinations(id_map.keys(), 2):
            for pop_j, pop_k in [(pop_1, pop_2), (pop_2, pop_1)]:
                j = id_map[pop_j]
                k = id_map[pop_k]
                demography.set_migration_rate(
                    source=pop_j, dest=pop_k, rate=direct_model.migration_matrix[j, k]
                )

        # Set the state of populations and migration rates epoch by epoch.
        for epoch in dbg.epochs[1:]:
            epoch_id_map = population_map[epoch.index]
            # Set the population sizes and growth_rates for the extant populations.
            for new_id, old_id in epoch_id_map.items():
                pop_state = epoch.populations[old_id]
                demography.add_population_parameters_change(
                    epoch.start_time,
                    population=new_id,
                    initial_size=pop_state.start_size,
                    growth_rate=pop_state.growth_rate,
                )
            # Set the migration rates.
            for pop_1, pop_2 in itertools.combinations(epoch_id_map.keys(), 2):
                for pop_j, pop_k in [(pop_1, pop_2), (pop_2, pop_1)]:
                    j = epoch_id_map[pop_j]
                    k = epoch_id_map[pop_k]
                    demography.add_migration_rate_change(
                        time=epoch.start_time,
                        source=pop_j,
                        dest=pop_k,
                        rate=epoch.migration_matrix[j][k],
                    )

        # Add splits/admixtures/pulses according to the mass migrations in the
        # original model and ID maps.
        last_epoch_id_map = population_map[0]
        for epoch in dbg.epochs:
            logger.debug(
                f"Converting epoch[{epoch.index}] "
                f"{epoch.start_time:.2f}-{epoch.end_time:.2f}"
            )
            # New name -> old ID
            epoch_id_map = population_map[epoch.index]
            # Old ID -> new name
            old_id_map = {value: key for key, value in epoch_id_map.items()}
            last_epoch_pops = set(last_epoch_id_map.keys())
            epoch_pops = set(epoch_id_map.keys())
            derived = set()
            events = copy.deepcopy(epoch.events)
            old_derived_ids = []
            if last_epoch_pops != epoch_pops:
                ancestral = epoch_pops - last_epoch_pops
                derived = last_epoch_pops - epoch_pops
                old_derived_ids = []
                if len(ancestral) == 1:
                    logger.debug(
                        f"Adding split ancestral={ancestral} derived={derived}"
                    )
                    ancestral = ancestral.pop()
                    demography.add_population_split(
                        time=epoch.start_time,
                        derived=list(derived),
                        ancestral=ancestral,
                    )
                    old_derived_ids = [last_epoch_id_map[pop] for pop in derived]
                    derived_mass_migrations = 0
                    for event in events:
                        if isinstance(event, MassMigration):
                            if event.source in old_derived_ids:
                                derived_mass_migrations += 1
                            if event.proportion != 1:
                                raise ValueError(
                                    "MassMigration associated with population split "
                                    "with proportion != 1"
                                )
                    # This check is weak, but it will catch some errors at least.
                    # We're assuming the old model is well-formed, so this should
                    # help catch errors where the id map is slightly off.
                    if derived_mass_migrations < len(old_derived_ids) - 1:
                        raise ValueError(
                            "Insufficient MassMigrations found for population split"
                        )
                elif len(derived) == 1:
                    derived = derived.pop()
                    ancestral = []
                    sequential_proportions = []
                    old_derived_id = last_epoch_id_map[derived]
                    for event in events:
                        if isinstance(event, MassMigration):
                            if event.source == old_derived_id:
                                ancestral.append(old_id_map[event.dest])
                                sequential_proportions.append(event.proportion)
                    proportions = _sequential_to_proportions(sequential_proportions)
                    if math.isclose(sum(proportions), 1):
                        if len(ancestral) == 1:
                            # This is a single split from the ancestral population,
                            # which continues to exist. We need to override the
                            # defaults for the behaviour we want here.
                            logger.debug(
                                f"Adding single split ancestral={ancestral[0]} "
                                f"derived={derived}"
                            )
                            pop = demography[ancestral[0]]
                            pop.initially_active = True
                            pop.default_sampling_time = None
                            demography.add_population_split(
                                time=epoch.start_time,
                                derived=[derived],
                                ancestral=ancestral[0],
                            )
                        else:
                            logger.debug(
                                f"Adding admixture ancestral={ancestral} "
                                f"derived={derived} proportions={proportions}"
                            )
                            demography.add_admixture(
                                time=epoch.start_time,
                                derived=derived,
                                ancestral=ancestral,
                                proportions=proportions,
                            )
                        old_derived_ids = [old_derived_id]
                    else:
                        raise ValueError(
                            "Admixture or single population split implied by ID map "
                            "but absolute population proportions don't sum to 1"
                        )
            for event in events:
                if isinstance(event, MassMigration):
                    if event.source not in old_derived_ids:
                        demography.add_mass_migration(
                            time=epoch.start_time,
                            source=old_id_map[event.source],
                            dest=old_id_map[event.dest],
                            proportion=event.proportion,
                        )
                elif not isinstance(
                    event, (MigrationRateChange, PopulationParametersChange)
                ):
                    raise ValueError(
                        "Only MassMigration, MigrationRateChange and "
                        "PopulationParametersChange events are supported"
                    )

            # Go through the inactive populations and check the migration
            # rates make sense. It's an error to migrate out of an inactive
            # population and a warning to migrate out of inactive pops.
            pairs = itertools.combinations(range(len(epoch.populations)), 2)
            active = set(epoch_id_map.values())
            for pop_1, pop_2 in pairs:
                for source, dest in [(pop_1, pop_2), (pop_2, pop_1)]:
                    if epoch.migration_matrix[source, dest] != 0:
                        if source in active and dest not in active:
                            raise ValueError(
                                "Non zero migration from an active population "
                                f"({source}) to inactive ({dest})"
                            )
                        if source not in active:
                            warnings.warn(
                                "Migration out of inactive population "
                                f"({source}) to ({dest}). This may be an "
                                "error in your model.",
                                stacklevel=2,
                            )

            last_epoch_id_map = epoch_id_map

        demography.sort_events()
        return demography

    @staticmethod
    def _from_old_style_simple(
        population_configurations=None,
        migration_matrix=None,
        demographic_events=None,
    ) -> Demography:
        """
        Creates a Demography object from the pre 1.0 style input parameters,
        reproducing the old semantics with respect to default values.
        """
        demography = Demography()
        for pop_config in population_configurations:
            name = None
            if pop_config.metadata is not None:
                # If there's a name defined in the old-style metadata use that
                name = pop_config.metadata.get("name", None)
            demography._add_population_from_old_style(pop_config, name)
        if migration_matrix is not None:
            demography.migration_matrix = np.array(migration_matrix)
        if demographic_events is not None:
            for event in demographic_events:
                demography.add_event(copy.deepcopy(event))
        return demography


[docs]
    @staticmethod
    def from_old_style(
        population_configurations=None,
        *,
        migration_matrix=None,
        demographic_events=None,
        Ne=1,
        ignore_sample_size=False,
        population_map: [list[dict[int, str | int]]] | None = None,
    ) -> Demography:
        """
        Creates a Demography object from the pre 1.0 style input parameters,
        reproducing the old semantics with respect to default values.

        No sample information is stored in the new-style :class:`.Demography`
        objects, and therefore if the ``sample_size`` attribute of any
        of the input :class:`.PopulationConfiguration` objects is set a
        ValueError will be raised by default. However, if the
        ``ignore_sample_size`` parameter is set to True, this check will
        not be performed and the sample sizes specified in the old-style
        :class:`.PopulationConfiguration` objects will be ignored.

        Each :class:`.PopulationConfiguration` instance in the list of
        ``population_configurations`` corresponds to the equivalent
        :class:`.Population` object in the returned :class:`.Demography`.
        If a PopulationConfiguration has ``metadata`` defined and this
        dictionary contains a ``name`` field, this will be used as the
        :class:`.Population` name. Otherwise, the default population
        names will be used.

        Please see the :ref:`sec_ancestry_samples` section for details on
        how to specify sample locations in :func:`.sim_ancestry`.

        .. todo:: Document the remaining parameters.
        """
        if population_configurations is None:
            pop_configs = [PopulationConfiguration(initial_size=Ne)]
        else:
            pop_configs = copy.deepcopy(population_configurations)
            for pop_config in pop_configs:
                if pop_config.initial_size is None:
                    pop_config.initial_size = Ne

                if pop_config.sample_size is not None and not ignore_sample_size:
                    raise ValueError(
                        "You have specified a `sample_size` in a "
                        "PopulationConfiguration object that is to be converted "
                        "into a new-style Demography object, "
                        "which does not contain any information about samples. "
                        "Please use the ``samples`` argument to sim_ancestry "
                        "instead, which provides flexible options for sampling "
                        "from different populations"
                    )

        if population_map is None:
            return Demography._from_old_style_simple(
                pop_configs, migration_matrix, demographic_events
            )
        else:
            return Demography._from_old_style_map_populations(
                pop_configs,
                migration_matrix,
                demographic_events,
                population_map,
            )



[docs]
    @staticmethod
    def from_demes(graph: demes.Graph) -> Demography:
        """
        Creates a :class:`.Demography` object from the specified
        :ref:`demes graph <demes:sec_introduction>`.
        Time values in the demes graph may be specified in any units,
        but the returned object has units converted into generations.
        See the :ref:`sec_demography_importing_demes` section for details.

        .. code::

            import demes

            graph = demes.load("model.yaml")
            demography = msprime.Demography.from_demes(graph)

        :param demes.Graph graph: A demes graph.
        :return: A :class:`.Demography` instance corresponding to the demes model.
        :rtype: msprime.Demography
        """

        # Check for Demes features that we don't support.
        for deme in graph.demes:
            for epoch in deme.epochs:
                if epoch.selfing_rate != 0:
                    raise ValueError(
                        f"deme {deme.name}: non-zero selfing_rate not supported"
                    )
                if epoch.cloning_rate != 0:
                    raise ValueError(
                        f"deme {deme.name}: non-zero cloning_rate not supported"
                    )

        def get_growth_rate(epoch):
            ret = 0
            if epoch.size_function not in ["constant", "exponential"]:
                raise ValueError(
                    "msprime only supports constant or exponentially changing "
                    "population sizes"
                )
            if epoch.end_size != epoch.start_size:
                ret = -math.log(epoch.start_size / epoch.end_size) / epoch.time_span
            return ret

        graph = graph.in_generations()
        events = graph.discrete_demographic_events()

        demography = Demography()
        for deme in graph.demes:
            initial_size = 0
            growth_rate = 0
            last_epoch = deme.epochs[-1]
            initially_active = False
            to_activate = False
            if last_epoch.end_time == 0:
                initial_size = last_epoch.end_size
                growth_rate = get_growth_rate(last_epoch)
                initially_active = True
            else:
                # add activation if needed
                # needed if isn't the ancestor of any split or merger
                to_activate = True
                for split in events["splits"]:
                    if deme.name == split.parent:
                        to_activate = False
                for merger in events["mergers"]:
                    if deme.name in merger.parents:
                        to_activate = False
            demography.add_population(
                name=deme.name,
                description=deme.description,
                growth_rate=growth_rate,
                initial_size=initial_size,
                default_sampling_time=deme.end_time,
                initially_active=initially_active,
            )
            if to_activate:
                demography._add_activate_population_event(
                    time=deme.end_time, population=deme.name
                )
            for epoch in reversed(deme.epochs):
                new_initial_size = None
                if initial_size != epoch.end_size:
                    new_initial_size = epoch.end_size
                new_growth_rate = None
                alpha = get_growth_rate(epoch)
                if growth_rate != alpha:
                    new_growth_rate = alpha
                if new_growth_rate is not None or new_initial_size is not None:
                    demography.add_population_parameters_change(
                        population=deme.name,
                        time=epoch.end_time,
                        initial_size=new_initial_size,
                        growth_rate=new_growth_rate,
                    )
                    initial_size = new_initial_size
                    growth_rate = new_growth_rate

        for split in events.pop("splits"):
            demography.add_population_split(
                time=split.time, ancestral=split.parent, derived=split.children
            )
        for branch in events.pop("branches"):
            demography.add_population_split(
                time=branch.time,
                ancestral=branch.parent,
                derived=[branch.child],
            )
        # Add pulses in reverse order, so that pulses with the same time
        # correspond to the correct backwards-time mass migration ordering.
        for pulse in reversed(events.pop("pulses")):
            sequential_props = _proportions_to_sequential(pulse.proportions)
            for prop, source in zip(sequential_props, pulse.sources):
                demography.add_mass_migration(
                    time=pulse.time,
                    source=pulse.dest,
                    dest=source,
                    proportion=prop,
                )
        for merger in events.pop("mergers"):
            demography.add_admixture(
                time=merger.time,
                derived=merger.child,
                ancestral=merger.parents,
                proportions=merger.proportions,
            )
        for admixture in events.pop("admixtures"):
            demography.add_admixture(
                time=admixture.time,
                derived=admixture.child,
                ancestral=admixture.parents,
                proportions=admixture.proportions,
            )
        assert len(events) == 0

        # Turn migrations off at the start_time. We schedule all start_time
        # events first, and then all end_time events. This ensures that events
        # for migrations with an end_time that coincides with the start_time of
        # another migration will be scheduled later (backwards in time),
        # and thus override the rate=0 setting.
        for migration in graph.migrations:
            if not math.isinf(migration.start_time):
                demography.add_migration_rate_change(
                    time=migration.start_time,
                    source=migration.dest,
                    dest=migration.source,
                    rate=0,
                )
        for migration in graph.migrations:
            if migration.end_time == 0:
                demography.set_migration_rate(
                    source=migration.dest, dest=migration.source, rate=migration.rate
                )
            else:
                demography.add_migration_rate_change(
                    time=migration.end_time,
                    source=migration.dest,
                    dest=migration.source,
                    rate=migration.rate,
                )
        demography.sort_events()
        return demography



[docs]
    @staticmethod
    def from_tree_sequence(
        ts: tskit.TreeSequence, initial_size: float = 0
    ) -> Demography:
        """
        Creates a :class:`.Demography` object based on the information in the
        specified :class:`tskit.TreeSequence`. The returned demography will
        contain a population for each of the populations in the tree sequence,
        in the same order.

        The metadata for each population in the tree sequence will be
        inspected. If a schema is present and the metadata can be decoded, the
        ``name`` and ``description`` properties of populations are set if the
        corresponding keys are present.

        If the metadata cannot be decoded, the default values for ``name``
        and ``description`` are used.

        The ``initial_size`` of each of the new populations is set to zero
        by default, and all other :class:`.Population` attributes
        are set to their default values. It is therefore essential to
        update the ``initial_size`` and ``growth_rate`` values to reflect
        the desired demography.

        .. seealso:: See the
            :ref:`initial state<sec_ancestry_initial_state_demography>`
            section for examples of how this method can be used.

        :param tskit.TreeSequence ts: The tree sequence
            to extract population information from.
        :param float initial_size: The default initial size for the newly
            added populations, as a number of individuals (Default=0).
        :return: A Demography object representing the populations in the
            specified tree sequence.
        :rtype: msprime.Demography
        """
        demography = Demography()
        for population in ts.populations():
            name = None
            description = None
            if isinstance(population.metadata, collections.abc.Mapping):
                # We have decoded metadata, extract some useful information
                # from it.
                name = population.metadata.get("name")
                description = population.metadata.get("description")
            demography.add_population(
                initial_size=initial_size, name=name, description=description
            )
        return demography



[docs]
    @staticmethod
    def isolated_model(initial_size, *, growth_rate=None) -> Demography:
        """
        Returns a :class:`.Demography` object representing a collection of
        isolated populations with specified initial population sizes and
        growth rates. Please see :ref:`sec_demography` for more details on
        population sizes and growth rates.

        :param list initial_size: the ``initial_size`` value for each
            of the :class:`.Population` in the returned model. The length
            of the array corresponds to the number of populations.
            model.
        :param list growth_rate: The exponential growth rate for each
            population. Must be either None (the default, resulting a zero
            growth rate) or an array with the same length as ``initial_size``.
        :return: A Demography object representing this model, suitable as
            input to :func:`.sim_ancestry`.
        :rtype: msprime.Demography
        """
        initial_size = np.array(initial_size, dtype=np.float64)
        if len(initial_size.shape) != 1:
            raise ValueError(
                "The initial_size argument must a 1D array of population size values"
            )
        if growth_rate is None:
            growth_rate = np.zeros_like(initial_size)
        else:
            growth_rate = np.array(growth_rate, dtype=np.float64)
        if initial_size.shape != growth_rate.shape:
            raise ValueError(
                "If growth_rate is specified it must be a 1D array of the same "
                "length as the population_size array"
            )
        if np.any(initial_size < 0):
            raise ValueError("population size values must be nonnegative.")
        if not np.all(np.isfinite(initial_size)):
            raise ValueError("population size values must be finite.")
        if not np.all(np.isfinite(growth_rate)):
            raise ValueError("growth_rate values must be finite.")
        populations = [
            Population(
                initial_size=initial_size[j],
                growth_rate=growth_rate[j],
            )
            for j in range(len(initial_size))
        ]
        return Demography(populations=populations)



[docs]
    @staticmethod
    def island_model(initial_size, migration_rate, *, growth_rate=None) -> Demography:
        """
        Returns a :class:`.Demography` object representing a collection of
        populations with specified initial population sizes and growth
        rates, with symmetric migration between each pair of populations at the
        specified rate. Please see :ref:`sec_demography` for more details on
        population sizes and growth rates.

        :param list initial_size: the ``initial_size`` value for each
            of the :class:`.Population` in the returned model. The length
            of the array corresponds to the number of populations.
            model.
        :param float migration_rate: The migration rate between each pair of
            populations.
        :param list growth_rate: The exponential growth rate for each
            population. Must be either None (the default, resulting a zero
            growth rate) or an array with the same length as ``initial_size``.
        :return: A Demography object representing this model, suitable as
            input to :func:`.sim_ancestry`.
        :rtype: msprime.Demography
        """
        model = Demography.isolated_model(initial_size, growth_rate=growth_rate)
        check_migration_rate(migration_rate)
        model.migration_matrix[:] = migration_rate
        np.fill_diagonal(model.migration_matrix, 0)
        return model



[docs]
    @staticmethod
    def stepping_stone_model(
        initial_size, migration_rate, *, growth_rate=None, boundaries=False
    ) -> Demography:
        """
        Returns a :class:`.Demography` object representing a collection of
        populations with specified initial population sizes and growth
        rates, in which adjacent demes exchange migrants at the
        specified rate. Please see :ref:`sec_demography` for more details on
        population sizes and growth rates.

        .. note:: The current implementation only supports a one-dimensional
            stepping stone model, but higher dimensions could also be supported.
            Please open an issue on GitHub if this feature would be useful to you.

        :param list initial_size: the ``initial_size`` value for each
            of the :class:`.Population` in the returned model. The length
            of the array corresponds to the number of populations.
        :param float migration_rate: The migration rate between adjacent pairs
            of populations.
        :param list growth_rate: The exponential growth rate for each
            population. Must be either None (the default, resulting a zero
            growth rate) or an array with the same length as ``initial_size``.
        :param bool boundaries: If True the stepping stone model has boundary
            conditions imposed so that demes at either end of the chain do
            not exchange migrants. If False (the default), the set of
            populations is "circular" and migration takes place between the
            terminal demes.
        :return: A Demography object representing this model, suitable as
            input to :func:`.sim_ancestry`.
        :rtype: msprime.Demography
        """
        initial_size = np.array(initial_size, dtype=np.float64)
        if len(initial_size.shape) > 1:
            raise ValueError(
                "Only 1D stepping stone models currently supported. Please open "
                "an issue on GitHub if you would like 2D (or more) models"
            )
        model = Demography.isolated_model(initial_size, growth_rate=growth_rate)
        check_migration_rate(migration_rate)
        if model.num_populations > 1:
            index1 = np.arange(model.num_populations, dtype=int)
            index2 = np.mod(index1 + 1, model.num_populations)
            model.migration_matrix[index1, index2] = migration_rate
            model.migration_matrix[index2, index1] = migration_rate
            if boundaries:
                model.migration_matrix[0, -1] = 0
                model.migration_matrix[-1, 0] = 0
        return model


    @staticmethod
    def _ooa_model():
        """
        Returns the Gutenkunst et al three population out-of-Africa model.

        This version is included here temporarily as a way to get some
        test coverage on the model compared with stdpopsim. Because we
        use this model in the documentation, we want make sure that it's
        doing what we think. We compare the model defined here then with
        the one presented in the docs, to ensure that no errors creep in.

        Once the upstream code in stdpopsim is updated to use msprime 1.0
        APIs we can remove this model and instead compare directly
        to the stdpopsim model with .is_equivalent() or whatever.
        """
        # Times are provided in years, so we convert into generations.
        generation_time = 25
        T_OOA = 21.2e3 / generation_time
        T_AMH = 140e3 / generation_time
        T_ANC = 220e3 / generation_time
        # We need to work out the starting (diploid) population sizes based on
        # the growth rates provided for these two populations
        r_CEU = 0.004
        r_CHB = 0.0055
        N_CEU = 1000 / math.exp(-r_CEU * T_OOA)
        N_CHB = 510 / math.exp(-r_CHB * T_OOA)

        demography = Demography()
        demography.add_population(
            name="YRI",
            description="Yoruba in Ibadan, Nigeria",
            initial_size=12300,
        )
        demography.add_population(
            name="CEU",
            description=(
                "Utah Residents (CEPH) with Northern and Western European Ancestry"
            ),
            initial_size=N_CEU,
            growth_rate=r_CEU,
        )
        demography.add_population(
            name="CHB",
            description="Han Chinese in Beijing, China",
            initial_size=N_CHB,
            growth_rate=r_CHB,
        )
        demography.add_population(
            name="OOA",
            description="Bottleneck out-of-Africa population",
            initial_size=2100,
        )
        demography.add_population(
            name="AMH",
            description="Anatomically modern humans",
            initial_size=12300,
        )
        demography.add_population(
            name="ANC",
            description="Ancestral equilibrium population",
            initial_size=7300,
        )

        # Set the migration rates between extant populations
        demography.set_symmetric_migration_rate(["CEU", "CHB"], 9.6e-5)
        demography.set_symmetric_migration_rate(["YRI", "CHB"], 1.9e-5)
        demography.set_symmetric_migration_rate(["YRI", "CEU"], 3e-5)

        demography.add_population_split(
            time=T_OOA, derived=["CEU", "CHB"], ancestral="OOA"
        )
        demography.add_symmetric_migration_rate_change(
            time=T_OOA, populations=["YRI", "OOA"], rate=25e-5
        )
        demography.add_population_split(
            time=T_AMH, derived=["YRI", "OOA"], ancestral="AMH"
        )
        demography.add_population_split(time=T_ANC, derived=["AMH"], ancestral="ANC")
        return demography

    def _ooa_trunk_model():
        """
        Returns the Gutenkunst et al three population out-of-Africa model,
        rendered in a more "old-style" way where we merge the various
        populations back into Africa.

        This version is included here temporarily as a way to get some
        test coverage on the model compared with stdpopsim. Because we
        use this model in the documentation, we want make sure that it's
        doing what we think. We compare the model defined here then with
        the one presented in the docs, to ensure that no errors creep in.

        Once the upstream code in stdpopsim is updated to use msprime 1.0
        APIs we can remove this model and instead compare directly
        to the stdpopsim model with .is_equivalent() or whatever.
        """
        # Times are provided in years, so we convert into generations.
        generation_time = 25
        T_OOA = 21.2e3 / generation_time
        T_AMH = 140e3 / generation_time
        T_ANC = 220e3 / generation_time
        # We need to work out the starting (diploid) population sizes based on
        # the growth rates provided for these two populations
        r_CEU = 0.004
        r_CHB = 0.0055
        N_CEU = 1000 / math.exp(-r_CEU * T_OOA)
        N_CHB = 510 / math.exp(-r_CHB * T_OOA)

        demography = Demography()
        # This is the "trunk" population that we merge other populations into
        demography.add_population(
            name="YRI",
            description="Africa",
            initial_size=12300,
            initially_active=True,
        )
        demography.add_population(
            name="CEU",
            description="European",
            initial_size=N_CEU,
            growth_rate=r_CEU,
        )
        demography.add_population(
            name="CHB",
            description="East Asian",
            initial_size=N_CHB,
            growth_rate=r_CHB,
        )
        demography.add_population(
            name="OOA",
            description="Bottleneck out-of-Africa population",
            initial_size=2100,
        )

        # Set the migration rates between extant populations
        demography.set_symmetric_migration_rate(["CEU", "CHB"], 9.6e-5)
        demography.set_symmetric_migration_rate(["YRI", "CHB"], 1.9e-5)
        demography.set_symmetric_migration_rate(["YRI", "CEU"], 3e-5)

        demography.add_population_split(
            time=T_OOA, derived=["CEU", "CHB"], ancestral="OOA"
        )
        demography.add_symmetric_migration_rate_change(
            time=T_OOA, populations=["YRI", "OOA"], rate=25e-5
        )
        demography.add_population_split(time=T_AMH, derived=["OOA"], ancestral="YRI")
        demography.add_population_parameters_change(
            time=T_ANC, population="YRI", initial_size=7300
        )
        return demography

    @staticmethod
    def _ooa_archaic_model():
        """
        See notes for the _ooa model above.
        """
        # Implement the OutOfAfricaArchaicAdmixture_5R19 model
        # NOTE: this example isn't very well factored and needs more work.

        # Times are provided in years, so we convert into generations.
        generation_time = 29
        T_OOA = 36_000 / generation_time
        T_AMH = 60_700 / generation_time
        T_ANC = 300_000 / generation_time
        T_ArchaicAFR = 499_000 / generation_time
        T_Neanderthal = 559_000 / generation_time
        T_archaic_migration_start = 18_700 / generation_time
        T_archaic_migration_end = 125_000 / generation_time

        # We need to work out the starting (diploid) population sizes based on
        # the growth rates provided for these two populations
        r_CEU = 0.00125
        r_CHB = 0.00372
        N_CEU = 2300 / math.exp(-r_CEU * T_OOA)
        N_CHB = 650 / math.exp(-r_CHB * T_OOA)

        demography = Demography()
        # This is the "trunk" population that we merge other populations into
        demography.add_population(
            name="AFR",
            description="African population",
            initial_size=13900,
            initially_active=True,
        )
        demography.add_population(
            name="CEU",
            description=(
                "Utah Residents (CEPH) with Northern and Western European Ancestry"
            ),
            initial_size=N_CEU,
            growth_rate=r_CEU,
        )
        demography.add_population(
            name="CHB",
            description="Han Chinese in Beijing, China",
            initial_size=N_CHB,
            growth_rate=r_CHB,
        )
        demography.add_population(
            name="Neanderthal",
            description="Putative Neanderthals",
            initial_size=3600,
        )
        demography.add_population(
            name="ArchaicAFR",
            description="Putative Archaic Africans",
            initial_size=3600,
        )
        demography.add_population(
            name="OOA",
            description="Bottleneck out-of-Africa population",
            initial_size=880,
        )

        # Set the migration rates between extant populations
        demography.set_symmetric_migration_rate(["CEU", "CHB"], 11.3e-5)
        demography.set_symmetric_migration_rate(["AFR", "CEU"], 2.48e-5)

        demography.add_symmetric_migration_rate_change(
            T_archaic_migration_start, ["CEU", "Neanderthal"], 0.825e-5
        )
        demography.add_symmetric_migration_rate_change(
            T_archaic_migration_start, ["CHB", "Neanderthal"], 0.825e-5
        )
        demography.add_symmetric_migration_rate_change(
            T_archaic_migration_start, ["ArchaicAFR", "AFR"], 1.98e-5
        )
        demography.add_migration_rate_change(T_archaic_migration_end, rate=0)

        demography.add_population_split(
            time=T_OOA, derived=["CEU", "CHB"], ancestral="OOA"
        )
        demography.add_symmetric_migration_rate_change(
            time=T_OOA, populations=["AFR", "OOA"], rate=52.2e-5
        )
        demography.add_symmetric_migration_rate_change(
            time=T_OOA, populations=["OOA", "Neanderthal"], rate=0.825e-5
        )
        demography.add_population_split(time=T_AMH, derived=["OOA"], ancestral="AFR")
        demography.add_symmetric_migration_rate_change(
            T_AMH, ["ArchaicAFR", "AFR"], 1.98e-5
        )
        demography.add_population_parameters_change(
            time=T_AMH, population="AFR", initial_size=13900
        )
        demography.add_population_parameters_change(
            time=T_ANC, population="AFR", initial_size=3600
        )
        demography.add_population_split(
            time=T_ArchaicAFR, derived=["ArchaicAFR"], ancestral="AFR"
        )
        demography.add_population_split(
            time=T_Neanderthal, derived=["Neanderthal"], ancestral="AFR"
        )
        demography.sort_events()
        return demography

    @staticmethod
    def _american_admixture_model():
        # Implementation of AmericanAdmixture_4B11 model. See notes from the _ooa
        # model above as to why this is here.
        T_OOA = 920
        N_EUR = 34039
        r_EUR = 0.0038
        N_EAS = 45852
        r_EAS = 0.0048
        T_ADMIX = 12
        N_ADMIX = 54664
        r_ADMIX = 0.05

        demography = Demography()
        demography.add_population(name="AFR", description="African", initial_size=14474)
        demography.add_population(
            name="EUR",
            description="European",
            initial_size=N_EUR,
            growth_rate=r_EUR,
        )
        demography.add_population(
            name="EAS",
            description="East Asian",
            initial_size=N_EAS,
            growth_rate=r_EAS,
        )
        demography.add_population(
            name="ADMIX",
            description="Admixed America",
            initial_size=N_ADMIX,
            growth_rate=r_ADMIX,
        )
        demography.add_admixture(
            T_ADMIX,
            derived="ADMIX",
            ancestral=["AFR", "EUR", "EAS"],
            proportions=[1 / 6, 2 / 6, 3 / 6],
        )
        demography.add_population(
            name="OOA",
            description="Bottleneck out-of-Africa",
            initial_size=1861,
        )
        demography.add_population(
            name="AMH",
            description="Anatomically modern humans",
            initial_size=14474,
        )
        demography.add_population(
            name="ANC",
            description="Ancestral equilibrium",
            initial_size=7310,
        )
        demography.set_symmetric_migration_rate(["AFR", "EUR"], 2.5e-5)
        demography.set_symmetric_migration_rate(["AFR", "EAS"], 0.78e-5)
        demography.set_symmetric_migration_rate(["EUR", "EAS"], 3.11e-5)

        demography.add_population_split(T_OOA, derived=["EUR", "EAS"], ancestral="OOA")
        demography.add_symmetric_migration_rate_change(
            time=T_OOA, populations=["AFR", "OOA"], rate=15e-5
        )
        demography.add_population_split(2040, derived=["OOA", "AFR"], ancestral="AMH")
        demography.add_population_split(5920, derived=["AMH"], ancestral="ANC")
        return demography


[docs]
    def to_demes(self) -> demes.Graph:
        """
        Creates a :class:`demes.Graph` object from the demography.
        See the :ref:`sec_demography_importing_demes` section for details.

        .. note::

            Demographic models using bottlenecks added via the
            :meth:`add_simple_bottleneck` or :meth:`add_instantaneous_bottleneck`
            methods are not able to be converted into a demes graph.

        .. note::

            Demes is stricter than msprime with regard to how a demographic
            model is structured, so models that can be simulated with msprime
            are not guaranteed to be convertible to a demes graph. In particular,
            msprime's legacy API permits setting migrations or other attributes
            for a population even after that population has been merged into an
            ancestor. Such models are rarely constructed deliberately, so an
            error during conversion of a legacy model could indicate model
            misspecification.

        The returned graph can be saved as a
        `Demes-format <https://popsim-consortium.github.io/demes-spec-docs/main/>`_
        YAML file using the :ref:`demes <demes:sec_introduction>` API.

        .. code::

            import demes

            demography = msprime.Demography.island_model([1000] * 3, 1e-5)
            graph = demography.to_demes()
            demes.dump(graph, "island_model.yaml")

        Or plotted using the `demesdraw <https://github.com/grahamgower/demesdraw>`_
        visualisation package.

        .. code::

            import demesdraw

            demography = msprime.Demography.island_model([1000] * 3, 1e-5)
            graph = demography.to_demes()
            ax = demesdraw.tubes(graph)
            ax.figure.savefig("island_model.pdf")

        :return: A :class:`demes.Graph` object corresponding to the demography.
        :rtype: demes.Graph
        """
        dbg = self.debug()
        resolved = dbg.demography
        b = demes.Builder()
        for pop in resolved.populations:
            start_time = max(
                epoch.end_time
                for epoch in dbg.epochs
                if epoch.populations[pop.id].state == PopulationStateMachine.ACTIVE
            )
            end_time = min(
                epoch.start_time
                for epoch in dbg.epochs
                if epoch.populations[pop.id].state == PopulationStateMachine.ACTIVE
            )
            assert start_time > end_time
            initial_epoch = dict(
                end_size=pop.initial_size,
                growth_rate=pop.growth_rate,
                end_time=end_time,
            )
            b.add_deme(
                pop.name,
                description=pop.description if pop.description else None,
                start_time=start_time,
                epochs=[initial_epoch],
            )

        deme_map = {pop.name: b.data["demes"][pop.id] for pop in resolved.populations}

        # Copied from demes/ms.py
        def epoch_resolve(deme, time):
            """
            Return the oldest epoch if it has end_time == time. If not, create a
            new oldest epoch with end_time=time. Also resolve sizes by dealing
            with the growth_rate attribute (if required).
            """
            epoch = deme["epochs"][0]
            start_time = deme["start_time"]
            end_time = epoch["end_time"]
            if not (start_time > time >= end_time):
                raise ValueError(
                    f"time {time} outside {deme['name']}'s existence interval "
                    f"(start_time={start_time}, end_time={end_time}]"
                )

            if time > end_time:
                new_epoch = copy.deepcopy(epoch)
                # find size at given time
                growth_rate = epoch.pop("growth_rate", 0)
                dt = time - epoch["end_time"]
                size_at_t = epoch["end_size"] * math.exp(-growth_rate * dt)
                epoch["start_size"] = size_at_t
                new_epoch["end_size"] = size_at_t
                new_epoch["end_time"] = time
                deme["epochs"].insert(0, new_epoch)
                epoch = new_epoch

            return epoch

        for time, events_group in itertools.groupby(
            resolved.events, operator.attrgetter("time")
        ):
            events_group_list = list(events_group)
            lineage_movements = []
            for event in events_group_list:
                if isinstance(event, LineageMovementEvent):
                    # Collect these so that we can later group them by source.
                    lineage_movements.extend(event._as_lineage_movements())
                elif isinstance(event, PopulationParametersChange):
                    if event.population == -1:
                        pids = range(len(resolved.populations))
                    else:
                        pids = [event.population]
                    for pid in pids:
                        deme = deme_map[resolved[pid].name]
                        if (
                            len(deme["epochs"]) == 1
                            and deme["epochs"][0]["end_size"] == 0
                            and event.initial_size is not None
                            and event.initial_size > 0
                        ):
                            # The population size was 0 at time 0, but is now
                            # positive, indicating population extinction at the
                            # current time.
                            deme["epochs"][0]["end_time"] = time
                        epoch = epoch_resolve(deme, time)
                        if event.growth_rate is not None:
                            epoch["growth_rate"] = event.growth_rate
                        if event.initial_size is not None:
                            epoch["end_size"] = event.initial_size
                elif isinstance(event, StateChangeEvent):
                    raise ValueError(f"Cannot convert {event} to demes model.")

            # Lineage movement events correspond to either the source deme's
            # creation, or otherwise just pulses into the source.
            # We distinguish these cases based on the total ancestry
            # proportion for the source deme at this time.
            lm_by_source = collections.defaultdict(list)
            for lm in lineage_movements:
                source = resolved[lm.source].name
                lm_by_source[source].append(lm)
            pulses = set()
            for source, lm_group in lm_by_source.items():
                ancestors = [resolved[lm.dest].name for lm in lm_group]
                proportions = _sequential_to_proportions(
                    [lm.proportion for lm in lm_group]
                )
                if math.isclose(sum(proportions), 1):
                    # Source deme is created from the ancestors.
                    deme = deme_map[source]
                    deme.update(
                        ancestors=ancestors, proportions=proportions, start_time=time
                    )
                    for ancestor in ancestors:
                        if not resolved[ancestor].initially_active:
                            anc_deme = deme_map[ancestor]
                            anc_deme["epochs"][-1]["end_time"] = time
                else:
                    # Source deme receives pulses from the ancestors.
                    pulses.update(
                        (lm.source, lm.dest, lm.proportion) for lm in lm_group
                    )

            # The order of pulses matters when multiple pulses occur at the
            # same time, so we must be careful to add the pulses in the same
            # order as the lineage movements were specified. The pulses list
            # is later reversed, to correspond with the forwards-time ordering.
            for lm in lineage_movements:
                if (lm.source, lm.dest, lm.proportion) in pulses:
                    b.add_pulse(
                        sources=[resolved[lm.dest].name],
                        dest=resolved[lm.source].name,
                        proportions=[lm.proportion],
                        time=time,
                    )

        # Resolve/remove growth_rate in oldest epochs.
        for deme in b.data["demes"]:
            start_time = deme.get("start_time", math.inf)
            epoch = deme["epochs"][0]
            growth_rate = epoch.pop("growth_rate", 0)
            if growth_rate != 0:
                if math.isinf(start_time):
                    raise ValueError(
                        f"{deme['name']}: growth rate for infinite-length "
                        "epoch is invalid"
                    )
                dt = start_time - epoch["end_time"]
                epoch["start_size"] = epoch["end_size"] * math.exp(-dt * growth_rate)
            else:
                epoch["start_size"] = epoch["end_size"]

        # Copied from demes/ms.py
        def migrations_from_mm_list(
            mm_list: list[Any], end_times: list[float], deme_names: list[str]
        ) -> list[MutableMapping]:
            """
            Convert a list of migration matrices into a list of migration dicts.
            """
            assert len(mm_list) == len(end_times)
            migrations: list[MutableMapping] = []
            current: dict[tuple[int, int], MutableMapping] = dict()
            start_time = math.inf
            for migration_matrix, end_time in zip(mm_list, end_times):
                n = len(migration_matrix)
                assert n == len(deme_names)
                for j in range(n):
                    assert n == len(migration_matrix[j])
                    for k in range(n):
                        if j == k:
                            continue
                        rate = migration_matrix[j][k]
                        migration_dict = current.get((j, k))
                        if migration_dict is None:
                            if rate != 0:
                                migration_dict = dict(
                                    source=deme_names[j],
                                    dest=deme_names[k],
                                    start_time=start_time,
                                    end_time=end_time,
                                    rate=rate,
                                )
                                current[(j, k)] = migration_dict
                                migrations.append(migration_dict)
                        else:
                            if rate == 0:
                                del current[(j, k)]
                            elif migration_dict["rate"] == rate:
                                # extend migration_dict
                                migration_dict["end_time"] = end_time
                            else:
                                migration_dict = dict(
                                    source=deme_names[j],
                                    dest=deme_names[k],
                                    start_time=start_time,
                                    end_time=end_time,
                                    rate=rate,
                                )
                                current[(j, k)] = migration_dict
                                migrations.append(migration_dict)
                start_time = end_time
            return migrations

        mm_list = [epoch.migration_matrix.T for epoch in reversed(dbg.epochs)]
        mm_end_times = [epoch.start_time for epoch in reversed(dbg.epochs)]
        migrations = migrations_from_mm_list(mm_list, mm_end_times, list(resolved))
        b.data["migrations"] = migrations

        # Reverse the order of pulses, so that older pulses come first.
        # This also fixes the order of pulses that occur simultaneously,
        # so that realised ancestry proportions match the msprime model.
        if "pulses" in b.data:
            b.data["pulses"].reverse()

        # Sort demes by their start time (so that ancestors come before descendants).
        b.data["demes"] = sorted(
            b.data["demes"], key=operator.itemgetter("start_time"), reverse=True
        )

        return b.resolve()




# This was lifted out of older code as-is. No point in updating it
# to use dataclasses, since all we want to do is maintain compatibility
# with older code.

[docs]
class PopulationConfiguration:
    """
    The initial configuration of a population (or deme) in a simulation.

    .. important::
        This class is deprecated (but supported indefinitely);
        please use the msprime 1.0 :ref:`demography API<sec_demography>`
        in new code.

    :param int sample_size: The number of initial samples that are drawn
        from this population.
    :param float initial_size: The absolute size of the population at time
        zero. Defaults to the reference population size :math:`N_e`.
    :param float growth_rate: The forwards-time exponential growth rate of the
        population per generation. Growth rates can be negative. This is zero for a
        constant population size, and positive for a population that has been
        growing. Defaults to 0.
    :param dict metadata: A JSON-encodable dictionary of metadata to associate
        with the corresponding Population in the output tree sequence.
        If not specified or None, no metadata is stored (i.e., an empty bytes array).
        Note that this metadata is ignored when using the ``from_ts`` argument to
        :func:`simulate`, as the population definitions in the tree sequence that
        is used as the starting point take precedence.
    """

    def __init__(
        self, sample_size=None, initial_size=None, growth_rate=0.0, metadata=None
    ):
        if initial_size is not None and initial_size < 0:
            raise ValueError("Population size must be >= 0")
        if sample_size is not None and sample_size < 0:
            raise ValueError("Sample size must be >= 0")
        self.sample_size = sample_size
        self.initial_size = initial_size
        self.growth_rate = growth_rate
        self.metadata = metadata

    def asdict(self):
        return dict(
            sample_size=self.sample_size,
            initial_size=self.initial_size,
            growth_rate=self.growth_rate,
            metadata=self.metadata,
        )



def _list_str(a: list, fmt=None):
    """
    Returns the specified items rendered as a string without quotes.
    """
    if fmt is None:
        joined = ", ".join(str(item) for item in a)
    else:
        joined = ", ".join(f"{item:{fmt}}" for item in a)
    return f"[{joined}]"


@dataclasses.dataclass
class DemographicEvent:
    """
    Superclass of demographic events that occur during simulations.
    """

    time: float
    demography: Demography = dataclasses.field(
        init=False, compare=False, default=None, repr=False
    )

    def _parameters(self):
        raise NotImplementedError()

    def _effect(self):
        raise NotImplementedError()

    def asdict(self):
        return {
            key: getattr(self, key)
            for key in inspect.signature(self.__init__).parameters.keys()
            if hasattr(self, key)
        }

    def _convert_id(self, population_ref):
        """
        Converts the specified population reference into an integer,
        suitable for input into the low-level code. We treat -1 as a special
        case because it's used as meaning "all populations" by the events.
        """
        if population_ref in [-1, None]:
            # Both of these mean "all populations"
            return -1
        if self.demography is None:
            # We need to be able to handle Events that are not associated with
            # a Demography to support old code. However, these should only ever
            # happen with integer IDs.
            if not core.isinteger(population_ref):
                raise ValueError(
                    "Working with demographic events not associated with a "
                    "Demography object is a legacy-only operation. Population "
                    "references must be integer IDs"
                )
            return population_ref
        return self.demography[population_ref].id


class ParameterChangeEvent(DemographicEvent):
    """
    Superclass of events that change some parameters in the underlying
    simulation model but don't actually affect the state in any other
    way.
    """



[docs]
@dataclasses.dataclass
class PopulationParametersChange(ParameterChangeEvent):
    """
    Changes the demographic parameters of a population at a given time.

    This event generalises the ``-eg``, ``-eG``, ``-en`` and ``-eN``
    options from ``ms``. Note that unlike ``ms`` we do not automatically
    set growth rates to zero when the population size is changed.

    .. important::
        This class is deprecated (but supported indefinitely);
        please use the :meth:`.Demography.add_population_parameters_change`
        method in new code.

    :param float time: The length of time ago at which this event
        occurred.
    :param float initial_size: The absolute diploid size of the population
        at the beginning of the time slice starting at ``time``. If None,
        this is calculated according to the initial population size and
        growth rate over the preceding time slice.
    :param float growth_rate: The new per-generation growth rate. If None,
        the growth rate is not changed. Defaults to None.
    :param int population: The ID of the population affected. If
        ``population`` is None, the changes affect all populations
        simultaneously.
    """

    initial_size: float | None = None
    growth_rate: float | None = None
    # TODO change the default to -1 to match MigrationRateChange.
    population: int | None = None
    # Deprecated.
    # TODO add a formal deprecation notice
    population_id: int | None = dataclasses.field(default=None, repr=False)

    _type_str: ClassVar[str] = "Population parameter change"

    def __post_init__(self):
        if self.population_id is not None and self.population is not None:
            raise ValueError(
                "population_id and population are aliases; cannot supply both."
            )

        if self.population_id is not None:
            self.population = self.population_id
        if self.growth_rate is None and self.initial_size is None:
            raise ValueError("Must specify one or more of growth_rate and initial_size")
        if self.initial_size is not None and self.initial_size < 0:
            raise ValueError("Cannot have a population size < 0")
        self.population = -1 if self.population is None else self.population

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.2 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        ret = {
            "type": "population_parameters_change",
            "time": self.time,
            "population": self._convert_id(self.population),
        }
        if self.growth_rate is not None:
            ret["growth_rate"] = self.growth_rate
        if self.initial_size is not None:
            ret["initial_size"] = self.initial_size
        return ret

    def _parameters(self):
        s = f"population={self.population}, "
        if self.initial_size is not None:
            s += f"initial_size={self.initial_size}, "
        if self.growth_rate is not None:
            s += f"growth_rate={self.growth_rate}, "
        return s[:-2]

    def _effect(self):
        s = ""
        if self.initial_size is not None:
            s += f"initial_size → {self.initial_size:.2g} "
            if self.growth_rate is not None:
                s += "and "
        if self.growth_rate is not None:
            s += f"growth_rate → {self.growth_rate:.3g} "
        s += "for"
        if self.population == -1:
            s += " all populations"
        else:
            s += f" population {self.population}"
        return s




[docs]
@dataclasses.dataclass
class MigrationRateChange(ParameterChangeEvent):
    """
    Changes the rate of migration from one deme to another to a new value at a
    specific time. Migration rates are specified in terms of the rate at which
    lineages move from population ``source`` to ``dest`` during the progress of
    the simulation. Note that ``source`` and ``dest`` are from the perspective
    of the coalescent process; please see the :ref:`sec_ancestry_models`
    section for more details on the interpretation of this migration model.

    By default, ``source=-1`` and ``dest=-1``, which results in all
    non-diagonal elements of the migration matrix being changed to the new
    rate. If ``source`` and ``dest`` are specified, they must refer to valid
    population IDs.

    .. important::
        This class is deprecated (but supported indefinitely);
        please use the :meth:`.Demography.add_migration_rate_change`
        method in new code.

    :param float time: The time at which this event occurs in generations.
    :param float rate: The new per-generation migration rate.
    :param int source: The ID of the source population.
    :param int dest: The ID of the destination population.
    """

    rate: float
    source: int = -1
    dest: int = -1
    # Deprecated.
    # TODO add a formal deprecation notice
    matrix_index: tuple | None = dataclasses.field(default=None, repr=False)

    _type_str: ClassVar[str] = "Migration rate change"

    def __post_init__(self):
        # If the deprecated form is used, it overwrites the values of source
        # and dest
        if self.matrix_index is not None:
            self.source = self.matrix_index[0]
            self.dest = self.matrix_index[1]

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "migration_rate_change",
            "time": self.time,
            # Note: We'd like to change the name here to "rate" but it's best
            # to leave this alone until stdpopsim has been moved away from
            # using this internal API.
            "migration_rate": self.rate,
            "source": self._convert_id(self.source),
            "dest": self._convert_id(self.dest),
        }

    def _parameters(self):
        return f"source={self.source}, dest={self.dest}, rate={self.rate}"

    def _effect(self):
        ret = "Backwards-time migration rate "
        if self.source == -1 and self.dest == -1:
            ret += "for all populations "
        else:
            ret += f"from {self.source} to {self.dest} "
        ret += f"→ {self.rate}"
        return ret



@dataclasses.dataclass
class SymmetricMigrationRateChange(ParameterChangeEvent):
    """
    Class representing a SymmetricMigrationRateChange. Not part of the
    external API as it was added after 1.0.
    """

    populations: list[int | str]
    rate: float

    _type_str: ClassVar[str] = dataclasses.field(
        default="Symmetric migration rate change", repr=False
    )

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "symmetric_migration_rate_change",
            "time": self.time,
            "populations": [self._convert_id(pop) for pop in self.populations],
            "rate": self.rate,
        }

    def _parameters(self):
        return f"populations={_list_str(self.populations)}, rate={self.rate}"

    def _effect(self):
        s = "Sets the symmetric migration rate between "
        if len(self.populations) == 2:
            s += f"{self.populations[0]} and {self.populations[1]} "
        else:
            s += f"all pairs of populations in {_list_str(self.populations)} "
        s += f"to {self.rate} per generation"
        return s


@dataclasses.dataclass
class LineageMovement:
    """
    A single instantaneous movement of lineages from one population to
    another. Note that 'source' and 'dest' are in the backwards-in-time
    sense.
    """

    source: int
    dest: int
    proportion: float


class LineageMovementEvent(DemographicEvent):
    """
    Superclass of events that move lineages around between populations.
    """

    def _as_lineage_movements(self) -> list[LineageMovement]:
        """
        Returns the equivalent of this lineage movement event as a
        list of lineage movements.
        """
        raise NotImplementedError()



[docs]
@dataclasses.dataclass
class MassMigration(LineageMovementEvent):
    """
    A mass migration event in which some fraction of the population in one deme
    (the ``source``) simultaneously move to another deme (``dest``) during the
    progress of the simulation. Each lineage currently present in the source
    population moves to the destination population with probability equal to
    ``proportion``. Note that ``source`` and ``dest`` are from the perspective
    of the coalescent process; please see the :ref:`sec_ancestry_models`
    section for more details on the interpretation of this migration model.

    This event class generalises the population split (``-ej``) and
    admixture (``-es``) events from ``ms``. Note that MassMigrations
    do *not* have any side effects on the migration matrix.

    .. important::
        This class is deprecated (but supported indefinitely);
        please use the :meth:`.Demography.add_mass_migration`
        method in new code. In addition, please see the new
        higher-level :ref:`sec_demography_events_population_split`
        and :ref:`sec_demography_events_admixture` events.

    :param float time: The time at which this event occurs in generations.
    :param int source: The ID of the source population.
    :param int dest: The ID of the destination population.
    :param float proportion: The probability that any given lineage within
        the source population migrates to the destination population.
    """

    source: int
    # dest only has a default because of the deprecated destination attr.
    dest: None | int = None
    proportion: float = 1.0
    # Deprecated.
    # TODO add a formal deprecation notice
    destination: int | None = dataclasses.field(default=None, repr=False)

    _type_str: ClassVar[str] = dataclasses.field(default="Mass Migration", repr=False)

    def __post_init__(self):
        if self.dest is not None and self.destination is not None:
            raise ValueError("dest and destination are aliases; cannot supply both")
        if self.destination is not None:
            self.dest = self.destination

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "mass_migration",
            "time": self.time,
            "source": self._convert_id(self.source),
            "dest": self._convert_id(self.dest),
            "proportion": self.proportion,
        }

    def _parameters(self):
        return (
            f"source={self.source}, dest={self.dest}, proportion={self.proportion:.3g}"
        )

    def _effect(self):
        if self.proportion == 1.0:
            ret = (
                f"All lineages currently in population {self.source} move "
                f"to {self.dest} "
            )
        else:
            ret = (
                f"Lineages currently in population {self.source} move to {self.dest} "
                f"with probability {self.proportion:.3g} "
            )
        ret += (
            "(equivalent to individuals "
            f"migrating from {self.dest} to {self.source} forwards in time)"
        )
        return ret

    def _as_lineage_movements(self):
        return [
            LineageMovement(
                source=self._convert_id(self.source),
                dest=self._convert_id(self.dest),
                proportion=self.proportion,
            )
        ]



@dataclasses.dataclass
class ActivatePopulationEvent(DemographicEvent):
    """
    A population activation event, which changes the state of the population
    from inactive to active at the given time.
    """

    population: int | str | None = None
    """
    The name or ID of the population to activate.
    """

    _type_str: ClassVar[str] = dataclasses.field(
        default="Activate population event", repr=False
    )

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "activate_population_event",
            "time": self.time,
            "population": self._convert_id(self.population),
        }

    def _parameters(self):
        return f"population={self.population}"

    def _effect(self):
        s = f"Activates population {self.population}"
        return s


@dataclasses.dataclass
class PopulationSplit(LineageMovementEvent):
    """

    :param float time: The time at which this event occurs in generations.
    :param list(int) derived: The ID(s) of the derived population(s).
    :param int ancestral: The ID of the ancestral population.
    """

    derived: list[int | str]
    ancestral: int | str

    _type_str: ClassVar[str] = dataclasses.field(default="Population Split", repr=False)

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "population_split",
            "time": self.time,
            "derived": [self._convert_id(pop) for pop in self.derived],
            "ancestral": self._convert_id(self.ancestral),
        }

    def _parameters(self):
        return f"derived={_list_str(self.derived)}, ancestral={self.ancestral}"

    def _effect(self):
        s = "Moves all lineages from "
        if len(self.derived) == 1:
            s += f"the '{self.derived[0]}' derived population "
        else:
            s += "derived populations "
            if len(self.derived) == 2:
                s += f"'{self.derived[0]}' and '{self.derived[1]}' "
            else:
                s += f"{_list_str(self.derived)} "
        s += f"to the ancestral '{self.ancestral}' population. "
        s += "Also set "
        if len(self.derived) == 1:
            s += f"'{self.derived[0]}' "
        else:
            s += "the derived populations "
        s += (
            "to inactive, and all migration rates to and from "
            f"the derived population{'s' if len(self.derived) > 1 else ''} "
            "to zero."
        )

        return s

    def _as_lineage_movements(self):
        ancestral = self._convert_id(self.ancestral)
        return [
            LineageMovement(source=self._convert_id(pop), dest=ancestral, proportion=1)
            for pop in self.derived
        ]


@dataclasses.dataclass
class Admixture(LineageMovementEvent):
    """

    :param float time: The time at which this event occurs in generations.
    """

    derived: int | str
    ancestral: list[int | str]
    proportions: list[float]

    _type_str: ClassVar[str] = dataclasses.field(default="Admixture", repr=False)

    @property
    def num_ancestral(self):
        return len(self.ancestral)

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "admixture",
            "time": self.time,
            "derived": self._convert_id(self.derived),
            "ancestral": [self._convert_id(pop) for pop in self.ancestral],
            "proportions": self.proportions,
        }

    def _parameters(self):
        return (
            f"derived={self.derived} ancestral={_list_str(self.ancestral)} "
            f"proportions={_list_str(self.proportions, '.2f')}"
        )

    def _effect(self):
        move_to = "; ".join(
            f"'{pop}' with proba {proba:.3g}"
            for pop, proba in zip(self.ancestral, self.proportions)
        )
        return (
            f"Moves all lineages from admixed population '{self.derived}' "
            f"to ancestral population{'s' if len(self.ancestral) > 1 else ''}. "
            f"Lineages move to {move_to}. Set '{self.derived}' to inactive, "
            f"and all migration rates to and from '{self.derived}' to zero."
        )

    def _as_lineage_movements(self):
        derived = self._convert_id(self.derived)
        ancestral = [self._convert_id(pop) for pop in self.ancestral]
        # Conditioned on having already distributed a fraction q of the
        # lineages, the we need a fraction p / (1 - q) of the remaining
        # lineages to get an overall proportion of p.
        S = _proportions_to_sequential(self.proportions)
        return [
            LineageMovement(source=derived, dest=ancestral[j], proportion=S[j])
            for j in range(self.num_ancestral)
        ]


class StateChangeEvent(DemographicEvent):
    """
    Superclass of events that change the state of the simulation in complex
    ways.
    """


# This is an unsupported/undocumented demographic event.
@dataclasses.dataclass
class SimpleBottleneck(StateChangeEvent):
    population: int
    proportion: float = 1.0

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "simple_bottleneck",
            "time": self.time,
            "population": self._convert_id(self.population),
            "proportion": self.proportion,
        }

    _type_str: ClassVar[str] = dataclasses.field(
        default="Simple Bottleneck", repr=False
    )

    def _parameters(self):
        return f"population={self.population}, proportion={self.proportion}"

    def _effect(self):
        return (
            f"Lineages in population {self.population} coalesce with "
            f"probability {self.proportion}"
        )


# TODO document
@dataclasses.dataclass
class InstantaneousBottleneck(StateChangeEvent):
    population: int
    strength: float = 1.0

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "instantaneous_bottleneck",
            "time": self.time,
            "population": self._convert_id(self.population),
            "strength": self.strength,
        }

    _type_str: ClassVar[str] = dataclasses.field(
        default="Instantaneous Bottleneck", repr=False
    )

    def _parameters(self):
        return f"population={self.population}, strength={self.strength}"

    def _effect(self):
        return f"Equivalent to {self.strength} generations of the coalescent"



[docs]
@dataclasses.dataclass
class CensusEvent(DemographicEvent):
    """
    An event that adds a node to each branch of every tree at a given time
    during the simulation. This may be used to record all ancestral haplotypes
    present at that time, and to extract other information related to these
    haplotypes: for instance to trace the local ancestry of a sample back to a
    set of contemporaneous ancestors, or to assess whether a subset of samples
    has coalesced more recently than the census time.

    See :ref:`sec_ancestry_census_events` for more details.

    .. important::
        This class is deprecated (but supported indefinitely);
        please use the :meth:`.Demography.add_census`
        method in new code.

    :param float time: The time at which this event occurs in generations.
    """

    _type_str: ClassVar[str] = dataclasses.field(default="Census", repr=False)

    def get_ll_representation(self, num_populations=None):
        # We need to keep the num_populations argument until stdpopsim 0.1 is out
        # https://github.com/tskit-dev/msprime/issues/1037
        return {
            "type": "census_event",
            "time": self.time,
        }

    def _parameters(self):
        return ""

    def _effect(self):
        return "Insert census nodes to record the location of all lineages"



def _sequential_to_proportions(S):
    """
    Given a list of sequential lineage proportions out of a population,
    return the absolute proportions of the original population this
    corresponds to.
    """
    P = []
    for j in range(len(S)):
        P.append(S[j] * (1 - sum(P[:j])))
    return P


def _proportions_to_sequential(P):
    """
    Given a list of absolute proportions of lineages moving out of
    a population, return the sequential conditional movements required
    to give them same proportions.
    """
    # Conditioned on having already distributed a fraction q of the
    # lineages, the we need a fraction p / (1 - q) of the remaining
    # lineages to get an overall proportion of p
    C = [0 for _ in P]
    for j in range(len(P)):
        s = sum(P[:j])
        if s < 1:
            C[j] = P[j] / (1 - s)
    return C


def _matrix_exponential(A, n=5):
    """
    Returns the matrix exponential of A.
    Only works if the offdiagonals of A are nonnegative
    and the row sums of A are less than or equal to zero.
    Melloy & Bennett (1993), https://doi.org/10.1016/0377-0427(93)90036-B
    """
    assert np.max(np.diag(A)) <= 0
    assert np.min(np.tril(A, k=-1)) == 0
    assert np.min(np.triu(A, k=1)) == 0
    assert np.max(np.sum(A, 1)) <= 1e-10  # for floating-point error
    dA = (-1) * np.diag(A)
    dmax = np.max(dA)
    expA = np.eye(A.shape[0])
    if dmax > 0:
        P = A / dmax
        np.fill_diagonal(P, 1 - dA / dmax)
        # nscales is the number of scaling-and-squaring steps:
        # we compute exp(tA) and then square the result nscales times
        nscales = int(max(0, np.ceil(np.log2(dmax) - np.log2(0.2))))
        t = dmax / (2**nscales)
        # Now, exp(tA) = exp(-t) * (I + tP + (tP)^2 / 2 + ...)
        # and the k-th term in this sum is Pk=(tP/k times the previous)
        Pk = np.eye(A.shape[0])
        for k in range(1, n + 1):
            Pk = (t / k) * np.matmul(P, Pk)
            expA = expA + Pk
        expA *= np.exp(-t)
        expA = np.linalg.matrix_power(expA, 2**nscales)
    return expA


class PopulationStateMachine(enum.IntEnum):
    """
    During a simulation each population has three possible states described
    by this state machine. In general, a population follows:

    INACTIVE -> ACTIVE -> PREVIOUSLY_ACTIVE

    All populations are by default ACTIVE at the start of the simulation,
    except if they are they are "ancestral" in a population split event.
    In this case populations are initially INACTIVE. A population
    then transitions from INACTIVE -> ACTIVE when the corresponding
    population split event occurs.

    Populations transition from ACTIVE -> PREVIOUSLY_ACTIVE when they
    are "derived" in either population split or admixture events.

    No other transitions are possible.
    """

    INACTIVE = 0
    ACTIVE = 1
    PREVIOUSLY_ACTIVE = 2


@dataclasses.dataclass
class PopulationState:
    """
    Simple class to represent the state of a population in terms of its
    demographic parameters. Note: start and end here refer to time flowing
    *backwards*!
    """

    id: int  # noqa: A003
    name: str
    start_size: float
    end_size: float
    growth_rate: float
    state: int

    @property
    def active(self):
        return self.state == PopulationStateMachine.ACTIVE


@dataclasses.dataclass
class Epoch:
    """
    Represents a single epoch in the simulation within which the state
    of the demographic parameters are constant.
    """

    index: int
    start_time: float
    end_time: float
    populations: list[PopulationState]
    migration_matrix: list  # TODO numpy array
    events: list[DemographicEvent]

    def _title_text(self):
        return (
            f"Epoch[{self.index}]: "
            f"[{self.start_time:.3g}, {self.end_time:.3g}) generations"
        )

    def _population_state_text(self):
        return (
            f"Populations "
            f"(total={len(self.populations)} active={self.num_active_populations})"
        )

    @property
    def demographic_events(self):
        # For compatibility with msprime 0.x
        return self.events

    @property
    def active_populations(self):
        return [pop for pop in self.populations if pop.active]

    @property
    def num_active_populations(self):
        return len(self.active_populations)



[docs]
class DemographyDebugger:
    """
    Utilities to compute and display information about the state of populations
    during the different simulation epochs defined by demographic events.

    .. warning:: This class is not intended to be instantiated directly using
        the constructor - please use :meth:`.Demography.debug()` to obtain
        a DemographyDebugger for a given :class:`.Demography` instead.
    """

    def __init__(
        self,
        # Deprecated pre-1.0 parameters.
        Ne=1,
        population_configurations=None,
        migration_matrix=None,
        demographic_events=None,
        model=None,
        *,
        demography=None,
    ):
        if demography is None:
            # Support the pre-1.0 syntax
            demography = Demography.from_old_style(
                population_configurations,
                migration_matrix=migration_matrix,
                demographic_events=demographic_events,
                Ne=Ne,
                ignore_sample_size=True,
            )
        self.demography = demography.validate()
        self.num_populations = demography.num_populations
        self._make_epochs()
        self._check_misspecification()

    def _make_epochs(self):
        self.epochs = []
        simulator = ancestry._parse_sim_ancestry(
            demography=self.demography, init_for_debugger=True
        )
        start_time = 0
        end_time = 0
        abs_tol = 1e-9
        event_index = 0
        all_events = self.demography.events
        while not math.isinf(end_time):
            events = []
            while event_index < len(all_events) and math.isclose(
                all_events[event_index].time, start_time, abs_tol=abs_tol
            ):
                events.append(all_events[event_index])
                event_index += 1
            end_time = simulator.debug_demography()
            migration_matrix = simulator.migration_matrix
            pop_conf = simulator.population_configuration
            populations = [
                PopulationState(
                    id=j,
                    name=self.demography.populations[j].name,
                    start_size=simulator.compute_population_size(j, start_time),
                    end_size=simulator.compute_population_size(j, end_time),
                    growth_rate=pop_conf[j]["growth_rate"],
                    state=PopulationStateMachine(pop_conf[j]["state"]),
                )
                for j in range(self.num_populations)
            ]
            epoch_index = len(self.epochs)
            self.epochs.append(
                Epoch(
                    epoch_index,
                    start_time,
                    end_time,
                    populations,
                    migration_matrix,
                    events,
                )
            )
            start_time = end_time

    def _check_misspecification(self):
        """
        Check for things that might indicate model misspecification.
        """
        merged_pops = set()
        for epoch in self.epochs:
            for de in epoch.events:
                if isinstance(de, MassMigration) and de.proportion == 1:
                    merged_pops.add(de.source)
            mm = epoch.migration_matrix
            for pop_k in merged_pops:
                k = self.demography[pop_k].id
                if any(mm[k, :] != 0) or any(mm[:, k] != 0):
                    warnings.warn(
                        "Non-zero migration rates exist after merging "
                        f"population {k}. This almost certainly indicates "
                        "demographic misspecification.",
                        stacklevel=2,
                    )

    def _populations_table(self, epoch, as_text=True):
        active_populations = epoch.active_populations
        column_titles = ["", "start", "end", "growth_rate"]
        if len(active_populations) > 1:
            column_titles += [pop.name for pop in active_populations]
        data = []
        for pop in active_populations:
            row = [
                pop.name,
                f"{pop.start_size: .1f}",
                f"{pop.end_size: .1f}",
                f"{pop.growth_rate: .3g}",
            ]
            if len(active_populations) > 1:
                for other_pop in active_populations:
                    item = self.demography._migration_rate_info(
                        pop.id,
                        other_pop.id,
                        epoch.migration_matrix[pop.id, other_pop.id],
                    )
                    if as_text:
                        row.append(item.as_text())
                    else:
                        row.append(item)

            data.append(row)
        return column_titles, data

    def _populations_html(self, epoch):
        column_titles, data = self._populations_table(epoch, as_text=False)
        return core.html_table(epoch._population_state_text(), column_titles, data)

    def _populations_text(self, epoch):
        column_titles, data = self._populations_table(epoch)
        alignments = ">>><"
        if epoch.num_active_populations > 1:
            alignments += "^" * epoch.num_active_populations
        # Repack the table items as lists
        column_titles = [[x] for x in column_titles]
        data = [[[x] for x in row] for row in data]
        return core.text_table(
            epoch._population_state_text(), column_titles, alignments, data
        )

    def _repr_html_(self):
        out = ""
        for epoch in self.epochs:
            if epoch.index > 0:
                assert len(epoch.events) > 0
                title = f"Events @ generation {epoch.start_time:.3g}"
                out += self.demography._events_html(epoch.events, title)
                out += "</div></details>"
            else:
                assert len(epoch.events) == 0
            title = epoch._title_text()
            out += f'<details open="true"><summary>{title}</summary>'
            # Indent the content div slightly
            out += '<div style="margin-left:20px">'
            out += self._populations_html(epoch)
        out += "</div>"
        out += "</details>"
        return f"<div>{out}</div>"


[docs]
    def print_history(self, output=sys.stdout):
        """
        Prints a summary of the history of the populations.

        Deprecated since 1.0: use ``print(debugger)`` instead.
        """
        print(self, file=output, end="")


    def __str__(self):
        def indent(table, header_char="╟", depth=4):
            lines = table.splitlines()
            s = header_char + (" " * depth) + lines[0] + "\n"
            for line in lines[1:]:
                s += "║" + (" " * depth) + line + "\n"
            return s

        def box(title):
            N = len(title) + 2
            top = "╠" + ("═" * N) + "╗"
            bottom = "╠" + ("═" * N) + "╝"
            return f"{top}\n║ {title} ║\n{bottom}\n"

        out = "DemographyDebugger\n"
        for epoch in self.epochs:
            if epoch.index > 0:
                assert len(epoch.events) > 0
                title = f"Events @ generation {epoch.start_time:.3g}"
                out += indent(self.demography._events_text(epoch.events, title))
            out += box(epoch._title_text())
            out += indent(self._populations_text(epoch))
        return out

    @property
    def population_size_history(self):
        """
        Returns a (num_pops, num_epochs) numpy array giving the starting population size
        for each population in each epoch.
        """
        pop_size = np.zeros((self.num_populations, self.num_epochs))
        for j, epoch in enumerate(self.epochs):
            for k, pop in enumerate(epoch.populations):
                pop_size[k, j] = pop.start_size
        return pop_size

    @property
    def epoch_start_time(self):
        """
        The array of epoch start_times defined by the demographic model.
        """
        return np.array([x.start_time for x in self.epochs])

    @property
    def num_epochs(self):
        """
        Returns the number of epochs defined by the demographic model.
        """
        return len(self.epochs)


[docs]
    def population_size_trajectory(self, steps):
        """
        Return an array of per-population population sizes,
        as defined by the demographic model. These are the `initial_size`
        parameters of the model, modified by any population growth rates.
        The sizes are computed at the time points given by `steps`.

        :param list steps: List of times ago at which the population
            size will be computed.
        :return: Returns a numpy array of population sizes, with one column per
            population, whose [i,j]th entry is the size of population
            j at time steps[i] ago.
        """
        num_pops = self.num_populations
        N_t = np.zeros([len(steps), num_pops])
        for j, t in enumerate(steps):
            N, _ = self._pop_size_and_migration_at_t(t)
            N_t[j] = N
        return N_t



[docs]
    def lineage_probabilities(self, steps, sample_time=0):
        """
        Returns an array such that P[j, a, b] is the probability that a lineage that
        started in population a at time sample_time is in population b at time steps[j]
        ago.

        This function reports sampling probabilities _before_ mass migration events
        (or other events that move lineages) at a step time, if a mass migration
        event occurs at one of those times. Migrations will then effect the next
        time step.

        :param list steps: A list of times to compute probabilities.
        :param sample_time: The time of sampling of the lineage. For any times in steps
            that are more recent than sample_time, the probability of finding the
            lineage in any population is zero.
        :return: An array of dimension len(steps) by num pops by num_pops.
        """
        num_pops = self.num_populations
        # P[i, j] will be the probability that a lineage that started in i is now in j
        P = np.eye(num_pops)

        # epochs are defined by mass migration events or changes to population sizes
        # or migration rates, so we add the epoch interval times to the steps that we
        # need to account for
        epoch_breaks = [t for t in self.epoch_start_time if t not in steps]
        all_steps = np.concatenate([steps, epoch_breaks])

        sampling = []
        if sample_time not in all_steps:
            sampling.append(sample_time)
        all_steps = np.concatenate((all_steps, sampling))

        ix = np.argsort(all_steps)
        all_steps = all_steps[ix]
        # keep track of the steps to report in P_out
        keep_steps = np.concatenate(
            [
                np.repeat(True, len(steps)),
                np.repeat(False, len(epoch_breaks)),
                np.repeat(False, len(sampling)),
            ]
        )[ix]

        assert len(np.unique(all_steps)) == len(all_steps)
        assert np.all(steps == all_steps[keep_steps])
        P_out = np.zeros((len(all_steps), num_pops, num_pops))

        first_step = 0
        while all_steps[first_step] < sample_time:
            first_step += 1

        P_out[first_step] = P

        # get ordered mass migration events
        mass_migration_objects = []
        mass_migration_times = []
        for demo in self.demography.events:
            if isinstance(demo, LineageMovementEvent):
                # Convert higher-level lineage movement events like Admixtures
                # and PopulationSplits into LineageMovement instances. These are
                # equivalent to MassMigrations
                for lm in demo._as_lineage_movements():
                    mass_migration_objects.append(lm)
                    mass_migration_times.append(demo.time)

        for jj in range(first_step, len(all_steps) - 1):
            t_j = all_steps[jj]

            # apply any mass migration events to P
            # so if we sample at this time, we do not account for the instantaneous
            # mass migration events that occur at the same time. that will show up
            # at the next step
            if t_j > sample_time:
                for mass_mig_t, mass_mig_e in zip(
                    mass_migration_times, mass_migration_objects
                ):
                    if mass_mig_t == t_j:
                        S = np.eye(num_pops, num_pops)
                        S[mass_mig_e.source, mass_mig_e.dest] = mass_mig_e.proportion
                        S[mass_mig_e.source, mass_mig_e.source] = (
                            1 - mass_mig_e.proportion
                        )
                        P = np.matmul(P, S)

            # get continuous migration matrix over next interval
            _, M = self._pop_size_and_migration_at_t(t_j)
            dt = all_steps[jj + 1] - all_steps[jj]
            dM = np.diag([sum(s) for s in M])
            # advance to next interval time (dt) taking into account continuous mig
            P = P.dot(_matrix_exponential(dt * (M - dM)))
            P_out[jj + 1] = P

        return P_out[keep_steps]



[docs]
    def possible_lineage_locations(self, samples=None):
        """
        Given the sampling configuration, this function determines when lineages are
        possibly found within each population over epochs defined by demographic events
        and sampling times. If no sampling configuration is given, we assume we sample
        lineages from every population at time zero.

        The epoch intervals returned are those in which there are *distinct*
        configurations of possible lineage locations, and so the number of
        returned epochs may be less than the total number of epochs defined
        by the demography and will depend on the input sample configuration.

        The samples are specified by either a list of population identifiers (
        integer IDs or string names) or by a list of :class:`.SampleSet` objects,
        allowing sampling times to be specified explicitly. If the ``time`` field
        of the :class:`.SampleSet` is not specified (or population IDs are used)
        samples are taken at the population's `default_sampling_time`. Only
        :class:`.SampleSet` objects with ``num_samples > 0`` are counted as
        contributing samples to a particular population.

        To support legacy code, :class:`.Sample` objects from the 0.x API
        can also provided, although its use is discouraged in new code.

        :param list samples: The populations that we sample from. Can be either
            a list of population identifiers, :class:`.SampleSet` or
            :class:`.Sample` objects.
        :return: Returns a dictionary with epoch intervals as keys whose values are a
            list with length equal to the number of populations with True and False
            indicating which populations could possibly contain lineages over that
            epoch. The epoch intervals are given by tuples: (epoch start, epoch end).
            The first epoch necessarily starts at time 0, and the final epoch has end
            time of infinity.
        """
        if samples is None:
            samples = [
                pop.id
                for pop in self.demography.populations
                if pop.default_sampling_time == 0
            ]

        # get configuration of sampling times from samples ({time:[pops_sampled_from]})
        sampling_times = collections.defaultdict(list)
        for sample in samples:
            if isinstance(sample, (ancestry.Sample, ancestry.SampleSet)):
                pop_id = self.demography[sample.population].id
                sample_time = (
                    self.demography[pop_id].default_sampling_time
                    if sample.time is None
                    else sample.time
                )
                if isinstance(sample, ancestry.SampleSet) and sample.num_samples <= 0:
                    # If someone specifies 0 samples it should not be counted
                    continue
            else:
                # Assume this is a population identifier.
                pop = self.demography[sample]
                pop_id = pop.id
                sample_time = pop.default_sampling_time
            sampling_times[sample_time].append(pop_id)
        for t in sampling_times.keys():
            sampling_times[t] = list(set(sampling_times[t]))

        all_steps = sorted(
            list(set([t for t in self.epoch_start_time] + list(sampling_times.keys())))
        )

        epochs = [(x, y) for x, y in zip(all_steps[:-1], all_steps[1:])]
        epochs.append((all_steps[-1], np.inf))

        # need to go a bit beyond last step and into the final epoch that extends to inf
        all_steps.append(all_steps[-1] + 1)

        indicators = {e: np.zeros(self.num_populations, dtype=bool) for e in epochs}
        for sample_time, pop_ids in sampling_times.items():
            P_out = self.lineage_probabilities(all_steps, sample_time=sample_time)
            for epoch, P in zip(epochs, P_out[1:]):
                if epoch[1] <= sample_time:
                    # samples shouldn't affect the epoch previous to the sampling time
                    continue
                for pop_id in pop_ids:
                    indicators[epoch][P[pop_id] > 0] = True

        # join epochs if adjacent epochs have same set of possible live populations
        combined_indicators = {}
        skip = 0
        for ii, (epoch, inds) in enumerate(indicators.items()):
            if skip > 0:
                skip -= 1
                continue
            this_epoch = epoch
            while ii + skip + 1 < len(epochs) and np.all(
                indicators[epochs[ii + 1 + skip]] == inds
            ):
                this_epoch = (this_epoch[0], epochs[ii + 1 + skip][1])
                skip += 1
            combined_indicators[this_epoch] = inds

        return combined_indicators



[docs]
    def mean_coalescence_time(
        self, lineages, min_pop_size=1, steps=None, rtol=0.005, max_iter=12
    ):
        """
        Compute the mean time until coalescence between pairs of the specified
        sample ``lineages``. Sample lineages are specified as a mapping from
        populations to the number of **monoploid** sample genomes present in
        that population at time zero. See the
        :ref:`sec_demography_numerical_coalescence` section for usage examples
        and more details.

        .. important:: This function assumes a diploid model when computing
            coalescence rates (see the
            :ref:`sec_ancestry_ploidy_coalescent_time_scales` section for more
            information).

        The calculation is performed by using
        :meth:`~.DemographyDebugger.coalescence_rate_trajectory`
        to compute the probability that the lineages have not yet coalesced by time `t`,
        and using these to approximate :math:`E[T] = \\int_t^\\infty P(T > t) dt`,
        where :math:`T` is the coalescence time. See
        :meth:`~.DemographyDebugger.coalescence_rate_trajectory`
        for more details.

        To compute this, an adequate time discretisation must be arrived at
        by iteratively extending or refining the current discretisation.
        Debugging information about numerical convergence of this procedure is
        logged using the Python :mod:`logging` infrastructure.
        The `daiquiri <https://pypi.org/project/daiquiri/>`_ module is a
        convenient way to set up logging, and we can use it to make these
        messages appear on stderr like this::

            import daiquiri

            daiquiri.setup(level="DEBUG")
            debugger.mean_coalescence_time(1)

        Briefly, this outputs iteration number, mean coalescence time, maximum
        difference in probability of not having coalesced yet, difference to
        last coalescence time, probability of not having coalesced by the final
        time point, and whether the last iteration was an extension or
        refinement.

        :param dict lineages: A mapping of populations (either integer IDs
            or string names: see the :ref:`sec_demography_populations_identifiers`
            section for more details) to the number of monoploid sample lineages
            in that population.
        :param int min_pop_size: See
            :meth:`~.DemographyDebugger.coalescence_rate_trajectory`.
        :param list steps: The time discretisation to start out with (by default,
            picks something based on epoch times).
        :param float rtol: The relative tolerance to determine mean coalescence time
            to (used to decide when to stop subdividing the steps).
        :param int max_iter: The maximum number of times to subdivide the steps.
        :return: The mean coalescence time (a number).
        :rtype: float
        """

        def mean_time(steps, P):
            # Mean is int_0^infty P(T > t) dt, which we estimate by discrete integration
            # assuming that f(t) = P(T > t) is piecewise exponential:
            # if f(u) = a exp(bu) then b = log(f(t)/f(s)) / (t-s) for each s < t, so
            # \int_s^t f(u) du = (a/b) \int_s^t exp(bu) b du = (a/b)(exp(bt) - exp(bs))
            #    = (t - s) * (f(t) - f(s)) / log(f(t) / f(s))
            # unless b = 0, of course.
            assert steps[0] == 0
            dt = np.diff(steps)
            dP = np.diff(P)

            with np.errstate(divide="ignore", invalid="ignore"):
                dlogP = np.diff(np.log(P))
            nz = np.logical_and(dP < 0, P[1:] * P[:-1] > 0)
            const = dP == 0
            return np.sum(dt[const] * (P[:-1])[const]) + np.sum(
                dt[nz] * dP[nz] / dlogP[nz]
            )

        if steps is None:
            last_N = max(self.population_size_history[:, self.num_epochs - 1])
            last_epoch = self.epoch_start_time[-1]
            steps = sorted(
                list(
                    set(np.linspace(0, last_epoch + 12 * last_N, 101)).union(
                        set(self.epoch_start_time)
                    )
                )
            )
        p_diff = m_diff = np.inf
        last_P = np.inf
        step_type = "none"
        n = 0
        logger.debug(
            "iter    mean    P_diff    mean_diff last_P    adjust_type  "
            "num_steps  last_step"
        )
        # The factors of 20 here are probably not optimal: clearly, we need to
        # compute P accurately, but there's no good reason for this stopping rule.
        # If populations have piecewise constant size then we shouldn't need this:
        # setting steps equal to the epoch boundaries should suffice; while if
        # there is very fast exponential change in some epochs caution is needed.
        while n < max_iter and (
            last_P > rtol or p_diff > rtol / 20 or m_diff > rtol / 20
        ):
            last_steps = steps
            if n == 0:
                _, P1 = self.coalescence_rate_trajectory(
                    steps=last_steps,
                    lineages=lineages,
                    min_pop_size=min_pop_size,
                    double_step_validation=False,
                )
                m1 = mean_time(last_steps, P1)
            if last_P > rtol:
                step_type = "extend"
                steps = np.concatenate(
                    [steps, np.linspace(steps[-1], steps[-1] * 1.2, 20)[1:]]
                )
            else:
                step_type = "refine"
                inter = steps[:-1] + np.diff(steps) / 2
                steps = np.concatenate([steps, inter])
                steps.sort()
            _, P2 = self.coalescence_rate_trajectory(
                steps=steps,
                lineages=lineages,
                min_pop_size=min_pop_size,
                double_step_validation=False,
            )
            m2 = mean_time(steps, P2)
            keep_steps = np.in1d(steps, last_steps)
            p_diff = max(np.abs(P1 - P2[keep_steps]))
            m_diff = np.abs(m1 - m2) / m2
            last_P = P2[-1]
            n += 1
            # Use the old-style string formatting as this is the logging default
            logger.debug(
                "%d %g %g %g %g %s %d %d",
                n,
                m2,
                p_diff,
                m_diff,
                last_P,
                step_type,
                len(steps),
                max(steps),
            )
            P1 = P2
            m1 = m2

        if n == max_iter:
            raise ValueError(
                "Did not converge on an adequate discretisation: "
                "Increase max_iter or rtol. Consult the log for "
                "debugging information"
            )
        return m2



[docs]
    def coalescence_rate_trajectory(
        self, steps, lineages, min_pop_size=1, double_step_validation=True
    ):
        """
        Calculate the mean coalescence rates and proportions
        of uncoalesced lineages between pairs of the specified sample lineages,
        at each of the times ago listed by steps, in this demographic model.
        Sample lineages are specified as a mapping from
        populations to the number of **monoploid** sample genomes present in
        that population at time zero. See the
        :ref:`sec_demography_numerical_trajectories` section for usage examples
        and more details.

        The coalescence rate at time t in the past is the average rate of coalescence of
        as-yet-uncoalesced lineages, computed as follows: let :math:`p(t)` be
        the probability that the lineages of a randomly chosen pair of samples
        has not yet coalesced by time :math:`t`, let :math:`p(z,t)` be the
        probability that the lineages of a randomly chosen pair of samples has
        not yet coalesced by time :math:`t` *and* are both in population
        :math:`z`, and let :math:`N(z,t)` be the diploid effective population
        size of population :math:`z` at time :math:`t`. Then the mean
        coalescence rate at time :math:`t` is :math:`r(t) = (\\sum_z p(z,t) /
        (2 * N(z,t))) / p(t)`.

        The computation is done by approximating population size trajectories
        with piecewise constant trajectories between each of the steps. For
        this to be accurate, the distance between the steps must be small
        enough so that (a) short epochs (e.g., bottlenecks) are not missed, and
        (b) populations do not change in size too much over that time, if they
        are growing or shrinking. This function optionally provides a simple
        check of this approximation by recomputing the coalescence rates on a
        grid of steps twice as fine and throwing a warning if the resulting
        values do not match to a relative tolerance of 0.001.

        :param list steps: The times ago at which coalescence rates will be computed.
        :param dict lineages: A mapping of populations (either integer IDs
            or string names: see the :ref:`sec_demography_populations_identifiers`
            section for more details) to the number of monoploid sample lineages
            in that population.
        :param int min_pop_size: The smallest allowed population size during
            computation of coalescent rates (i.e., coalescence rates are actually
            1 / (2 * max(min_pop_size, N(z,t))). Spurious very small population sizes
            can occur in models where populations grow exponentially but are unused
            before some time in the past, and lead to floating point error.
            This should be set to a value smaller than the smallest
            desired population size in the model.
        :param bool double_step_validation: Whether to perform the check that
            step sizes are sufficiently small, as described above. This is highly
            recommended, and will take at most four times the computation.
        :return: A tuple of arrays whose jth elements, respectively, are the
            coalescence rate at the jth time point (denoted r(t[j]) above),
            and the probability that a randomly chosen pair of lineages has
            not yet coalesced (denoted p(t[j]) above).
        :rtype: (numpy.ndarray, numpy.ndarray)
        """
        num_samples = np.zeros(self.num_populations, dtype=int)
        for population, num_genomes in lineages.items():
            pop_id = self.demography[population].id
            num_samples[pop_id] += num_genomes
        steps = np.array(steps)
        if not np.all(np.diff(steps) > 0):
            raise ValueError("`steps` must be a sequence of increasing times.")
        if np.any(steps < 0):
            raise ValueError("`steps` must be non-negative")
        r, p_t = self._calculate_coalescence_rate_trajectory(
            steps=steps, num_samples=num_samples, min_pop_size=min_pop_size
        )
        if double_step_validation:
            inter = steps[:-1] + np.diff(steps) / 2
            double_steps = np.concatenate([steps, inter])
            double_steps.sort()
            rd, p_td = self._calculate_coalescence_rate_trajectory(
                steps=double_steps, num_samples=num_samples, min_pop_size=min_pop_size
            )
            assert np.all(steps == double_steps[::2])
            r_prediction_close = np.allclose(r, rd[::2], rtol=1e-3, equal_nan=True)
            p_prediction_close = np.allclose(p_t, p_td[::2], rtol=1e-3, equal_nan=True)
            if not (r_prediction_close and p_prediction_close):
                warnings.warn(
                    "Doubling the number of steps has resulted in different "
                    " predictions, please re-run with smaller step sizes to ensure "
                    " numerical accuracy.",
                    stacklevel=2,
                )
        return r, p_t


    def _calculate_coalescence_rate_trajectory(self, steps, num_samples, min_pop_size):
        num_pops = self.num_populations
        P = np.zeros([num_pops**2, num_pops**2])
        IA = np.array(range(num_pops**2)).reshape([num_pops, num_pops])
        Identity = np.eye(num_pops)
        for x in range(num_pops):
            for y in range(num_pops):
                P[IA[x, y], IA[x, y]] = num_samples[x] * (num_samples[y] - (x == y))
        P = P / np.sum(P)
        # add epoch breaks if not there already but remember which steps they are
        epoch_breaks = list(
            set([0.0] + [t for t in self.epoch_start_time if t not in steps])
        )
        steps_b = np.concatenate([steps, epoch_breaks])
        ix = np.argsort(steps_b)
        steps_b = steps_b[ix]
        keep_steps = np.concatenate(
            [np.repeat(True, len(steps)), np.repeat(False, len(epoch_breaks))]
        )[ix]
        assert np.all(steps == steps_b[keep_steps])
        mass_migration_objects = []
        mass_migration_times = []
        for demo in self.demography.events:
            if isinstance(demo, LineageMovementEvent):
                # Convert higher-level lineage movement events like Admixtures
                # and PopulationSplits into LineageMovement instances. These are
                # equivalent to MassMigrations
                for lm in demo._as_lineage_movements():
                    mass_migration_objects.append(lm)
                    mass_migration_times.append(demo.time)

        num_steps = len(steps_b)
        # recall that steps_b[0] = 0.0
        r = np.zeros(num_steps)
        p_t = np.zeros(num_steps)
        for j in range(num_steps - 1):
            time = steps_b[j]
            dt = steps_b[j + 1] - steps_b[j]
            N, M = self._pop_size_and_migration_at_t(time)
            C = np.zeros([num_pops**2, num_pops**2])
            for idx in range(num_pops):
                C[IA[idx, idx], IA[idx, idx]] = 1 / (2 * max(min_pop_size, N[idx]))
            dM = np.diag([sum(s) for s in M])
            for mmt, mmo in zip(mass_migration_times, mass_migration_objects):
                if mmt == time:
                    a = mmo.source
                    b = mmo.dest
                    p = mmo.proportion
                    S = np.eye(num_pops**2, num_pops**2)
                    for x in range(num_pops):
                        if x == a:
                            S[IA[a, a], IA[a, b]] = S[IA[a, a], IA[b, a]] = p * (1 - p)
                            S[IA[a, a], IA[b, b]] = p**2
                            S[IA[a, a], IA[a, a]] = (1 - p) ** 2
                        else:
                            S[IA[x, a], IA[x, b]] = S[IA[a, x], IA[b, x]] = p
                            S[IA[x, a], IA[x, a]] = S[IA[a, x], IA[a, x]] = 1 - p
                    P = np.matmul(P, S)
            p_notcoal = np.sum(P)
            p_t[j] = p_notcoal
            if p_notcoal > 0:
                r[j] = np.sum(np.matmul(P, C)) / np.sum(P)
            else:
                r[j] = np.nan
            G = (np.kron(M - dM, Identity) + np.kron(Identity, M - dM)) - C
            P = np.matmul(P, _matrix_exponential(dt * G))
        p_notcoal = np.sum(P)
        p_t[num_steps - 1] = p_notcoal
        if p_notcoal > 0:
            r[num_steps - 1] = np.sum(np.matmul(P, C)) / p_notcoal
        else:
            r[num_steps - 1] = np.nan
        return r[keep_steps], p_t[keep_steps]

    def _pop_size_and_migration_at_t(self, t):
        """
        Returns a tuple (N, M) of population sizes (N) and migration rates (M) at
        time t ago.

        Note: this isn't part of the external API as it is be better to provide
        separate methods to access the population size and migration rates, and
        needing both together is specialised for internal calculations.

        :param float t: The time ago.
        :return: A tuple of arrays, of the same form as the population sizes and
            migration rate arrays of the demographic model.
        """
        j = 0
        while self.epochs[j].end_time <= t:
            j += 1
        N = self.population_size_history[:, j]
        for i, pop in enumerate(self.epochs[j].populations):
            s = t - self.epochs[j].start_time
            g = pop.growth_rate
            N[i] *= np.exp(-1 * g * s)
        return N, self.epochs[j].migration_matrix