U
    /e8                     @  s   d dl mZ d dlZd dlmZmZmZmZ d dlm	Z	m
Z
mZmZmZ e	rXd dlmZ edddZed	dd
dZeZG dd deZG dd dee ZeG dd deZeG dd deeZdS )    )annotationsN)CallableHashableMappingSequence)TYPE_CHECKINGAnyProtocolTypeVarruntime_checkable)DisplayObjectCollTypeDaskCollection)boundCollType_coT)r   	covariantc                   @  s$   e Zd ZdZdddddddZdS )	SchedulerGetCallablezEProtocol defining the signature of a ``__dask_scheduler__`` callable.r   zSequence[Hashable] | Hashabler   )dskkeyskwargsreturnc                 K  s   t ddS )a  Method called as the default scheduler for a collection.

        Parameters
        ----------
        dsk : Mapping
            The task graph.
        keys : Sequence[Hashable] | Hashable
            Key(s) corresponding to the desired data.
        **kwargs : Any
            Additional arguments.

        Returns
        -------
        Any
            Result(s) associated with `keys`

        ,Inheriting class must implement this method.NNotImplementedError)selfr   r   r    r   //tmp/pip-unpacked-wheel-dbjnr7gq/dask/typing.py__call__   s    zSchedulerGetCallable.__call__N__name__
__module____qualname____doc__r   r   r   r   r   r      s   r   c                   @  s*   e Zd ZdZdddddddd	d
ZdS )PostPersistCallablezGProtocol defining the signature of a ``__dask_postpersist__`` callable.N)renamer   r   zMapping[str, str] | Noner   )r   argsr$   r   c                G  s   t ddS )aV  Method called to rebuild a persisted collection.

        Parameters
        ----------
        dsk: Mapping
            A mapping which contains at least the output keys returned
            by __dask_keys__().
        *args : Any
            Additional optional arguments If no extra arguments are
            necessary, it must be an empty tuple.
        rename : Mapping[str, str], optional
            If defined, it indicates that output keys may be changing
            too; e.g. if the previous output of :meth:`__dask_keys__`
            was ``[("a", 0), ("a", 1)]``, after calling
            ``rebuild(dsk, *extra_args, rename={"a": "b"})``
            it must become ``[("b", 0), ("b", 1)]``.
            The ``rename`` mapping may not contain the collection
            name(s); in such case the associated keys do not change.
            It may contain replacements for unexpected names, which
            must be ignored.

        Returns
        -------
        Collection
            An equivalent Dask collection with the same keys as
            computed through a different graph.

        r   Nr   )r   r   r$   r%   r   r   r   r   1   s    "zPostPersistCallable.__call__r   r   r   r   r   r#   .   s   r#   c                   @  s   e Zd ZU dZejddddZejddddZejd	dd
dZejddddZ	ejddddZ
ded< ded< ejdddddZejddddddZejd'd d!d"dd#d$d%d&ZdS )(r   z5Protocol defining the interface of a Dask collection.r   r   c                 C  s   t ddS )a  The Dask task graph.

        The core Dask collections (Array, DataFrame, Bag, and Delayed)
        use a :py:class:`~dask.highlevelgraph.HighLevelGraph` to
        represent the collection task graph. It is also possible to
        represent the task graph as a low level graph using a Python
        dictionary.

        Returns
        -------
        Mapping
            The Dask task graph. If the instance returns a
            :py:class:`dask.highlevelgraph.HighLevelGraph` then the
            :py:func:`__dask_layers__` method must be implemented, as
            defined by the :py:class:`~dask.typing.HLGDaskCollection`
            protocol.

        r   Nr   r   r   r   r   __dask_graph__Z   s    zDaskCollection.__dask_graph__zlist[Hashable]c                 C  s   t ddS )a:  The output keys of the task graph.

        Note that there are additional constraints on keys for a Dask
        collection than those described in the :doc:`task graph
        specification documentation <spec>`. These additional
        constraints are described below.

        All keys must either be non-empty strings or tuples where the
        first element is a non-empty string, followed by zero or more
        arbitrary hashables. The non-empty string is commonly known as
        the *collection name*. All collections embedded in the dask
        package have exactly one name, but this is not a requirement.

        These are all valid outputs:

        - ``[]``
        - ``["x", "y"]``
        - ``[[("y", "a", 0), ("y", "a", 1)], [("y", "b", 0), ("y", "b", 1)]``

        Returns
        -------
        list
            A possibly nested list of keys that represent the outputs
            of the graph. After computation, the results will be
            returned in the same layout, with the keys replaced with
            their corresponding outputs.

        r   Nr   r'   r   r   r   __dask_keys__p   s    zDaskCollection.__dask_keys__z!tuple[PostComputeCallable, tuple]c                 C  s   t ddS )a  Finalizer function and optional arguments to construct final result.

        Upon computation each key in the collection will have an in
        memory result, the postcompute function combines each key's
        result into a final in memory representation. For example,
        dask.array.Array concatenates the arrays at each chunk into a
        final in-memory array.

        Returns
        -------
        PostComputeCallable
            Callable that receives the sequence of the results of each
            final key along with optional arguments. An example signature
            would be ``finalize(results: Sequence[Any], *args)``.
        tuple[Any, ...]
            Optional arguments passed to the function following the
            key results (the `*args` part of the
            ``PostComputeCallable``. If no additional arguments are to
            be passed then this must be an empty tuple.

        r   Nr   r'   r   r   r   __dask_postcompute__   s    z#DaskCollection.__dask_postcompute__z!tuple[PostPersistCallable, tuple]c                 C  s   t ddS )a  Rebuilder function and optional arguments to construct a persisted collection.

        See also the documentation for :py:class:`dask.typing.PostPersistCallable`.

        Returns
        -------
        PostPersistCallable
            Callable that rebuilds the collection. The signature
            should be
            ``rebuild(dsk: Mapping, *args: Any, rename: Mapping[str, str] | None)``
            (as defined by the
            :py:class:`~dask.typing.PostPersistCallable` protocol).
            The callable should return an equivalent Dask collection
            with the same keys as `self`, but with results that are
            computed through a different graph. In the case of
            :py:func:`dask.persist`, the new graph will have just the
            output keys and the values already computed.
        tuple[Any, ...]
            Optional arguments passed to the rebuild callable. If no
            additional arguments are to be passed then this must be an
            empty tuple.

        r   Nr   r'   r   r   r   __dask_postpersist__   s    z#DaskCollection.__dask_postpersist__r   c                 C  s   t ddS )z+Value that must fully represent the object.r   Nr   r'   r   r   r   __dask_tokenize__   s    z DaskCollection.__dask_tokenize__r   Z__dask_optimize__z"staticmethod[SchedulerGetCallable]Z__dask_scheduler__)r   r   c                 K  s   t ddS )a  Compute this dask collection.

        This turns a lazy Dask collection into its in-memory
        equivalent. For example a Dask array turns into a NumPy array
        and a Dask dataframe turns into a Pandas dataframe. The entire
        dataset must fit into memory before calling this operation.

        Parameters
        ----------
        scheduler : string, optional
            Which scheduler to use like "threads", "synchronous" or
            "processes". If not provided, the default is to check the
            global settings first, and then fall back to the
            collection defaults.
        optimize_graph : bool, optional
            If True [default], the graph is optimized before
            computation. Otherwise the graph is run as is. This can be
            useful for debugging.
        kwargs
            Extra keywords to forward to the scheduler function.

        Returns
        -------
        The collection's computed result.

        See Also
        --------
        dask.compute

        r   Nr   r   r   r   r   r   compute   s     zDaskCollection.computer   )r   r   r   c                 K  s   t ddS )a  Persist this dask collection into memory

        This turns a lazy Dask collection into a Dask collection with
        the same metadata, but now with the results fully computed or
        actively computing in the background.

        The action of function differs significantly depending on the
        active task scheduler. If the task scheduler supports
        asynchronous computing, such as is the case of the
        dask.distributed scheduler, then persist will return
        *immediately* and the return value's task graph will contain
        Dask Future objects. However if the task scheduler only
        supports blocking computation then the call to persist will
        *block* and the return value's task graph will contain
        concrete Python results.

        This function is particularly useful when using distributed
        systems, because the results will be kept in distributed
        memory, rather than returned to the local process as with
        compute.

        Parameters
        ----------
        scheduler : string, optional
            Which scheduler to use like "threads", "synchronous" or
            "processes". If not provided, the default is to check the
            global settings first, and then fall back to the
            collection defaults.
        optimize_graph : bool, optional
            If True [default], the graph is optimized before
            computation. Otherwise the graph is run as is. This can be
            useful for debugging.
        **kwargs
            Extra keywords to forward to the scheduler function.

        Returns
        -------
        New dask collections backed by in-memory data

        See Also
        --------
        dask.persist

        r   Nr   r-   r   r   r   persist  s    .zDaskCollection.persistmydaskNFstrz
str | NoneboolzDisplayObject | None)filenameformatoptimize_graphr   r   c                 K  s   t ddS )a#  Render the computation of this object's task graph using graphviz.

        Requires ``graphviz`` to be installed.

        Parameters
        ----------
        filename : str or None, optional
            The name of the file to write to disk. If the provided
            `filename` doesn't include an extension, '.png' will be
            used by default. If `filename` is None, no file will be
            written, and we communicate with dot using only pipes.
        format : {'png', 'pdf', 'dot', 'svg', 'jpeg', 'jpg'}, optional
            Format in which to write output file. Default is 'png'.
        optimize_graph : bool, optional
            If True, the graph is optimized before rendering.
            Otherwise, the graph is displayed as is. Default is False.
        color: {None, 'order'}, optional
            Options to color nodes. Provide ``cmap=`` keyword for
            additional colormap
        **kwargs
           Additional keyword arguments to forward to ``to_graphviz``.

        Examples
        --------
        >>> x.visualize(filename='dask.pdf')  # doctest: +SKIP
        >>> x.visualize(filename='dask.pdf', color='order')  # doctest: +SKIP

        Returns
        -------
        result : IPython.diplay.Image, IPython.display.SVG, or None
            See dask.dot.dot_graph for more information.

        See Also
        --------
        dask.visualize
        dask.dot.dot_graph

        Notes
        -----
        For more information on optimization see here:

        https://docs.dask.org/en/latest/optimize.html

        r   Nr   )r   r3   r4   r5   r   r   r   r   	visualizeH  s    4zDaskCollection.visualize)r0   NF)r   r    r!   r"   abcabstractmethodr(   r)   r*   r+   r,   __annotations__r.   r/   r6   r   r   r   r   r   V   s,   
!!/   c                   @  s$   e Zd ZdZejddddZdS )HLGDaskCollectiona  Protocol defining a Dask collection that uses HighLevelGraphs.

    This protocol is nearly identical to
    :py:class:`~dask.typing.DaskCollection`, with the addition of the
    ``__dask_layers__`` method (required for collections backed by
    high level graphs).

    zSequence[str]r&   c                 C  s   t ddS )z#Names of the HighLevelGraph layers.r   Nr   r'   r   r   r   __dask_layers__  s    z!HLGDaskCollection.__dask_layers__N)r   r    r!   r"   r7   r8   r;   r   r   r   r   r:     s   	r:   )
__future__r   r7   collections.abcr   r   r   r   typingr   r   r	   r
   r   ZIPython.displayr   r   r   ZPostComputeCallabler   r#   r   r:   r   r   r   r   <module>   s    (  *