U
    /e4                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 dddZ
ddd	Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    N)partial)islice)Bagc                 C   s   t | ||d}|t|S )a  Chooses k unique random elements from a bag.

    Returns a new bag containing elements from the population while
    leaving the original population unchanged.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.sample(b, 3).compute())  # doctest: +SKIP
    [1, 3, 5]
    
populationksplit_every)_samplemap_partitions_finalize_sampler   r   r   res r   3/tmp/pip-unpacked-wheel-dbjnr7gq/dask/bag/random.pysample
   s    r      c                 C   s   t | ||d}|t|S )a7  
    Return a k sized list of elements chosen with replacement.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.choices(b, 3).compute())  # doctest: +SKIP
    [1, 1, 5]
    r   )_sample_with_replacementr
   r   r   r   r   r   choices&   s    r   c                 C   s   g }g }d}| D ]4}|\}}| | ||7 }t|}	|||	f q||krZ|sZ||fS g }
|D ]*\}}	|	dkrb||	|  }|
|g|	 7 }
qb|rtjnt}|||
|d|fS )aq  
    Reduce function used on the sample and choice functions.

    Parameters
    ----------
    reduce_iter : iterable
        Each element is a tuple coming generated by the _sample_map_partitions function.
    replace: bool
        If True, sample with replacement. If False, sample without replacement.

    Returns a sequence of uniformly distributed samples;
    r   )r   weightsr   )extendlenappendrndr   &_weighted_sampling_without_replacement)reduce_iterr   replaceZns_kssniZs_iZn_iZk_ipZp_iZsample_funcr   r   r   _sample_reduce@   s$    
r    c                    s4   fddt tD } fddt||D S )zk
    Source:
        Weighted random sampling with a reservoir, Pavlos S. Efraimidis, Paul G. Spirakis
    c                    s&   g | ]}t t  |  |fqS r   )mathlogr   random).0r   )r   r   r   
<listcomp>k   s     z:_weighted_sampling_without_replacement.<locals>.<listcomp>c                    s   g | ]} |d   qS )r   r   )r$   x)r   r   r   r%   l   s     )ranger   heapqnlargest)r   r   r   eltr   )r   r   r   r   f   s    r   c                 C   s4   |dk rt d| jtt|dtt|ddt|dS )Nr   z(Cannot take a negative number of samplesr   Fr   r   Zout_typer   )
ValueError	reductionr   _sample_map_partitionsr    r   r   r   r   r   r	   o   s    
r	   c                 C   s    | d }t ||k rtd|S )Nr   zSample larger than population)r   r.   )r   r   r   r   r   r   r   z   s    r   c           	      C   s   g d }}t | }t||D ]}|| |d7 }qttt | }|d t| }t	||D ]N\}}||kr||t
|< |ttt | 9 }|t|7 }|d7 }qf||fS )z
    Reservoir sampling strategy based on the L algorithm
    See https://en.wikipedia.org/wiki/Reservoir_sampling#An_optimal_algorithm
    r   r   )iterr   r   r!   expr"   r   r#   
_geometric	enumerate	randrange)	r   r   	reservoirstream_lengthstreamewnxtr   r   r   r   r0      s    



r0   c                 C   s$   | j tt|dtt|ddt|dS )Nr+   Tr,   r-   )r/   r   '_sample_with_replacement_map_partitionsr    r   r   r   r   r   r      s    
r   c                    s   t | }t|  fddt|D d }}dd t|D }dd |D }t|}t|dD ]n\} ||krt|D ]D\}	}
|
|krv ||	< ||	  t 9  < ||	  t||	 7  < qvt|}|d7 }q^||fS )z
    Reservoir sampling with replacement, the main idea is to use k reservoirs of size 1
    See Section Applications in http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
    c                    s   g | ]} qS r   r   r$   _r9   r   r   r%      s     z;_sample_with_replacement_map_partitions.<locals>.<listcomp>r   c                 S   s   g | ]}t  qS r   )r   r#   r=   r   r   r   r%      s     c                 S   s   g | ]}t |qS r   )r3   )r$   Zwir   r   r   r%      s     )r1   nextr'   minr4   r   r#   r3   )r   r   r8   r6   r7   r:   r;   Zmin_nxtr   jr   r   r?   r   r<      s     
r<   c                 C   s(   t ttddtd|   d S )Nr   r   )intr!   r"   r   uniform)r   r   r   r   r3      s    r3   )N)r   N)r(   r!   r#   r   	functoolsr   	itertoolsr   Zdask.bag.corer   r   r   r    r   r	   r   r0   r   r<   r3   r   r   r   r   <module>   s   

&		