U
    /e{	                     @   sD   d Z ddlZddlZddlmZ dd Zdd Zdd	 Z	d
d Z
dS )uc  Implementation of HyperLogLog

This implements the HyperLogLog algorithm for cardinality estimation, found
in

    Philippe Flajolet, Éric Fusy, Olivier Gandouet and Frédéric Meunier.
        "HyperLogLog: the analysis of a near-optimal cardinality estimation
        algorithm". 2007 Conference on Analysis of Algorithms. Nice, France
        (2007)

    N)hash_pandas_objectc                 C   s:   t j| dt d> }|jddt}d|jdd S )zGCompute the position of the first nonzero bit for each int in an array.       Zaxis!   )npZbitwise_andouterarangeZcumsumastypeboolsum)abits r   >/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/hyperloglog.pycompute_first_bit   s    r   c           	      C   s   d|  krdksn t dd| }d|> }t| dd}t|tjrL|j}|tj}||? }t	|}t
||d}|d	 d
 }|jt|ddjtjS )N      zb should be between 8 and 16r   r   F)index)j	first_bitr   r   r   )Z
fill_value)
ValueErrorr   
isinstancepdZSeriesZ_valuesr
   r   Zuint32r   Z	DataFramegroupbymaxZreindexr	   valuesZuint8)	objbZnum_bits_discardedmhashesr   r   ZdfZseriesr   r   r   compute_hll_array   s    r!   c                 C   s(   d|> }|  t| | |} | jddS )Nr   r   r   )Zreshapelenr   )Msr   r   r   r   r   reduce_state4   s    r$   c                 C   s   d|> }t | |}ddd|   }|| d|d    | }|d| k rp|dk }|rp|t||  S |dkrd	t| d
  S |S )Nr   gZӼ?g$C?g       @Zf8g      @r   gAl     l        )r$   r
   r   r   loglog1p)r#   r   r   MalphaEVr   r   r   estimate_count=   s    
 r+   )__doc__Znumpyr   Zpandasr   Zpandas.utilr   r   r!   r$   r+   r   r   r   r   <module>   s   	