U
    f/eZ3                     @  s~  d Z ddlmZ ddlmZmZ ddlmZmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZmZmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZ erddlm Z m!Z! dddddddddZ"G dd deZ#G dd de#Z$G dd de#Z%dddddZ&dd dd!d"d#Z'dd dd$d%d&Z(dd dd$d'd(Z)dd dd)d*d+Z*ddd,d-d.d/Z+dd d0d1d2Z,dS )3z
Module responsible for execution of NDFrame.describe() method.

Method NDFrame.describe() delegates actual execution to function describe_ndframe().
    )annotations)ABCabstractmethod)TYPE_CHECKINGCallableSequencecastN)	Timestamp)FrameOrSeriesFrameOrSeriesUnionHashable)validate_percentile)is_bool_dtypeis_datetime64_any_dtypeis_numeric_dtypeis_timedelta64_dtype)concat)format_percentiles)	DataFrameSeriesr
   str | Sequence[str] | NoneboolzSequence[float] | None)objincludeexcludedatetime_is_numericpercentilesreturnc                 C  sR   t |}| jdkr&ttd| |d}nttd| |||d}|j|d}tt|S )a  Describe series or dataframe.

    Called from pandas.core.generic.NDFrame.describe()

    Parameters
    ----------
    obj: DataFrame or Series
        Either dataframe or series to be described.
    include : 'all', list-like of dtypes or None (default), optional
        A white list of data types to include in the result. Ignored for ``Series``.
    exclude : list-like of dtypes or None (default), optional,
        A black list of data types to omit from the result. Ignored for ``Series``.
    datetime_is_numeric : bool, default False
        Whether to treat datetime dtypes as numeric.
    percentiles : list-like of numbers, optional
        The percentiles to include in the output. All should fall between 0 and 1.
        The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
        75th percentiles.

    Returns
    -------
    Dataframe or series description.
       r   r   r   r   r   r   r   r   )r   )refine_percentilesndimSeriesDescriberr   DataFrameDescriberdescriber
   )r   r   r   r   r   Z	describerresult r'   8/tmp/pip-unpacked-wheel-tiezk1ph/pandas/core/describe.pydescribe_ndframe0   s    
r)   c                   @  s4   e Zd ZdZdddddZedddd	d
ZdS )NDFrameDescriberAbstractzAbstract class for describing dataframe or series.

    Parameters
    ----------
    obj : Series or DataFrame
        Object to be described.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    r   r   r   c                 C  s   || _ || _d S Nr   )selfr   r   r'   r'   r(   __init__n   s    z!NDFrameDescriberAbstract.__init__Sequence[float]r   r   c                 C  s   dS )zDo describe either series or dataframe.

        Parameters
        ----------
        percentiles : list-like of numbers
            The percentiles to include in the output.
        Nr'   )r,   r   r'   r'   r(   r%   r   s    z!NDFrameDescriberAbstract.describeN)__name__
__module____qualname____doc__r-   r   r%   r'   r'   r'   r(   r*   c   s   
r*   c                   @  s*   e Zd ZU dZded< dddddZdS )	r#   z2Class responsible for creating series description.r   r   r.   r/   c                 C  s   t | j| j}|| j|S r+   )select_describe_funcr   r   )r,   r   describe_funcr'   r'   r(   r%      s
    zSeriesDescriber.describeN)r0   r1   r2   r3   __annotations__r%   r'   r'   r'   r(   r#   }   s   
r#   c                      sD   e Zd ZdZddddd fddZddd	d
dZdd Z  ZS )r$   a  Class responsible for creating dataobj description.

    Parameters
    ----------
    obj : DataFrame
        DataFrame to be described.
    include : 'all', list-like of dtypes or None
        A white list of data types to include in the result.
    exclude : list-like of dtypes or None
        A black list of data types to omit from the result.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    r   r   r   r    c                  s>   || _ || _|jdkr*|jjdkr*tdt j||d d S )N   r   z+Cannot describe a DataFrame without columns)r   )r   r   r"   columnssize
ValueErrorsuperr-   )r,   r   r   r   r   	__class__r'   r(   r-      s
    zDataFrameDescriber.__init__r.   r/   c                   sn   |   }g }| D ]$\}}t|| j}|||| qt| t fdd|D ddd}|j |_|S )Nc                   s   g | ]}|j  d dqS )F)copy)Zreindex.0xZ	col_namesr'   r(   
<listcomp>   s     z/DataFrameDescriber.describe.<locals>.<listcomp>r   F)Zaxissort)	_select_dataitemsr4   r   appendreorder_columnsr   r8   r>   )r,   r   dataldesc_seriesr5   dr'   rB   r(   r%      s    zDataFrameDescriber.describec                 C  s   | j dkrP| jdkrPtjg}| jr,|d | jj|d}t|j	dkr| j}n<| j dkrx| jdk	rpd}t
|| j}n| jj| j | jd}|S )zSelect columns to be described.Ndatetime)r   r   allz*exclude must be None when include is 'all')r   r   )r   r   npnumberr   rG   r   Zselect_dtypeslenr8   r:   )r,   Zdefault_includerI   msgr'   r'   r(   rE      s"    


zDataFrameDescriber._select_data)r0   r1   r2   r3   r-   r%   rE   __classcell__r'   r'   r<   r(   r$      s   r$   zSequence[Series]zlist[Hashable])rJ   r   c                 C  sD   g }t dd | D td}|D ] }|D ]}||kr&|| q&q|S )z,Set a convenient order for rows for display.c                 s  s   | ]}|j V  qd S r+   )indexr?   r'   r'   r(   	<genexpr>   s     z"reorder_columns.<locals>.<genexpr>)key)sortedrR   rG   )rJ   namesZldesc_indexesZidxnamesnamer'   r'   r(   rH      s    rH   r   r.   )rL   r   r   c                 C  sn   ddl m} t|}ddddg| dg }|  |  |  |  g| |  | 	 g }|||| j
dS )	zDescribe series containing numerical data.

    Parameters
    ----------
    series : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r   countmeanstdminmaxrU   rZ   )pandasr   r   r\   r]   r^   r_   quantiletolistr`   rZ   )rL   r   r   formatted_percentiles
stat_indexrM   r'   r'   r(   describe_numeric_1d   s    
rg   )rI   percentiles_ignoredr   c           
      C  s   ddddg}|   }t||dk }|dkrH|jd |jd  }}d}ntjtj }}d}|  |||g}ddlm}	 |	||| j	|d	S )
zDescribe series containing categorical data.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r\   uniquetopfreqr   Nobjectr[   rU   rZ   dtype)
value_countsrR   rU   ilocrP   nanr\   rb   r   rZ   )
rI   rh   rY   	objcountscount_uniquerj   rk   rn   r&   r   r'   r'   r(   describe_categorical_1d   s    rt   c                 C  s  ddg}|   }t||dk }|  |g}d}|dkr|jd |jd  }}| jj}	|  j	d}
t
|}|jdk	r|	dk	r||	}n
||	}|dddd	g7 }|||t
|
 |	d
t
|
 |	d
g7 }n |ddg7 }|tjtjg7 }d}ddlm} |||| j|dS )zDescribe series containing timestamp data treated as categorical.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r\   ri   r   Ni8rj   rk   firstlast)tzrl   r[   rm   )ro   rR   r\   rU   rp   dtrx   Zdropnavaluesviewr	   tzinfoZ
tz_convertZtz_localizer_   r`   rP   rq   rb   r   rZ   )rI   rh   rY   rr   rs   r&   rn   rj   rk   rx   Zasintr   r'   r'   r(   $describe_timestamp_as_categorical_1d  s2    

r}   )rI   r   r   c                 C  sf   ddl m} t|}dddg| dg }|  |  |  g| |  |  g }|||| j	dS )zDescribe series containing datetime64 dtype.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r[   r\   r]   r_   r`   ra   )
rb   r   r   r\   r]   r_   rc   rd   r`   rZ   )rI   r   r   re   rf   rM   r'   r'   r(   describe_timestamp_1dH  s    r~   r   )rI   r   r   c                 C  sX   t | jrtS t| rtS t| jrB|r,tS tjdt	dd t
S nt| jrPtS tS dS )zSelect proper function for describing series based on data type.

    Parameters
    ----------
    data : Series
        Series to be described.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    zTreating datetime data as categorical rather than numeric in `.describe` is deprecated and will be removed in a future version of pandas. Specify `datetime_is_numeric=True` to silence this warning and adopt the future behavior now.   )
stacklevelN)r   rn   rt   r   rg   r   r~   warningswarnFutureWarningr}   r   )rI   r   r'   r'   r(   r4   c  s     


r4   r/   c                 C  sv   | dkrt dddgS t| } t|  d| kr:| d t | } t | }| dk	sZtt|t| k rrt	d|S )zEnsure that percentiles are unique and sorted.

    Parameters
    ----------
    percentiles : list-like of numbers, optional
        The percentiles to include in the output.
    Ng      ?g      ?g      ?z%percentiles cannot contain duplicates)
rP   arraylistr   rG   Zasarrayri   AssertionErrorrR   r:   )r   Zunique_pctsr'   r'   r(   r!     s    


r!   )-r3   
__future__r   abcr   r   typingr   r   r   r   r   ZnumpyrP   Zpandas._libs.tslibsr	   Zpandas._typingr
   r   r   Zpandas.util._validatorsr   Zpandas.core.dtypes.commonr   r   r   r   Zpandas.core.reshape.concatr   Zpandas.io.formats.formatr   rb   r   r   r)   r*   r#   r$   rH   rg   rt   r}   r~   r4   r!   r'   r'   r'   r(   <module>   s0   3I 0$