U
    f/ef                     @  s  d dl mZ d dlmZmZmZmZmZ d dlZ	d dl
mZmZmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZmZ d dlm  mZ d d	l m!Z! d d
l"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- erd dl.m/Z/ edee!d dddBddddddZ0dddddddddd d!Z1dCd"dd#d$d%Z2dDdd&d'd(Z3dEdd&d)d*Z4dFddd#d+d,Z5d-d. Z6edee!d/ dddGdd0d0d0dd1d2d/Z7dHdddd3d4d5Z8dIdd6d7d8Z9dJdd:d;d<Z:d=d=d>d?d@dAZ;dS )K    )annotations)TYPE_CHECKINGCallableHashableSequencecastN)AggFuncTypeAggFuncTypeBaseAggFuncTypeDictFrameOrSeriesUnion
IndexLabel)AppenderSubstitution)maybe_downcast_to_dtype)is_integer_dtypeis_list_like	is_scalar)ABCDataFrame	ABCSeries)_shared_docs)Grouper)Index
MultiIndexget_objs_combined_axis)concat)cartesian_product)Series	DataFramez
data : DataFramepivot_table   )indentsmeanFTAllr   r   )dataaggfuncreturnc                 C  s   t |}t |}t|trg }g }|D ]>}t| |||||||||	|
d}|| |t|d| q&t||dd}|j| ddS t| |||||||||	|
}|j| ddS )N)
valuesindexcolumns
fill_valuer%   marginsdropnamargins_nameobservedsort__name__r    )keysaxisr   )method)_convert_by
isinstancelist__internal_pivot_tableappendgetattrr   Z__finalize__)r$   r'   r(   r)   r%   r*   r+   r,   r-   r.   r/   piecesr1   func_tabletable r>   =/tmp/pip-unpacked-wheel-tiezk1ph/pandas/core/reshape/pivot.pyr   6   sJ    

z!AggFuncTypeBase | AggFuncTypeDictboolstr)r$   r%   r+   r,   r-   r.   r/   r&   c                 C  s  || }|dk	}|rt |r*d}t|}n
d}|g}|D ]}|| kr8t|q8g }|| D ]@}t|trn|j}z|| kr|| W qZ tk
r   Y qZX qZt|t| j	k r| | } nB| j	}|D ].}z|
|}W q tttfk
r   Y qX qt|}| j||	|
d}||}|rt|trt|j	r|jdd}|D ]l}|| krBt| | rB||krBt|| sBt|| tr|| ||< nt|| | | j||< qB|}|jjdkr:|r:|jjdt| }g }tt|t|D ]<}|jj| }|dks||kr"|| n
|| q||}|st|jtrvtjt|jj|jjd}|j|d	d
}t|j	trtjt|j	j|j	jd}|j|dd
}t|tr|jdd
}|dk	r|j|dd}|dk	st|}|r(|r| |   j!dd
 } t"|| |||||||d	}|rP|sP|j	jdkrP|j#d	dd
}t|d	krrt|d	krr|j$}t|tr|r|jddd}|S )zL
    Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``.
    NTF)r.   r/   all)howr    namesr   r2   Zinfer)Zdowncast)rowscolsr%   r.   r-   r*   )rC   r2   )%r   r6   KeyErrorr5   r   keyr8   	TypeErrorlenr)   Zdrop
ValueErrorgroupbyaggr   r,   r   r   dtyper(   ZnlevelsrE   rangeunstackr   from_arraysr   levelsreindexZ
sort_indexfillnaAssertionErrorZnotnarB   _add_marginsZ	droplevelT)r$   r'   r(   r)   r%   r*   r+   r,   r-   r.   r/   r1   Zvalues_passedZvalues_multiiZ	to_filterxrJ   ZgroupedZaggedvr=   Zindex_namesZ
to_unstacknamemr<   r>   r>   r?   r7   o   s    








 
 
r7   r   )r=   r-   c	              	   C  s  t |tstdd| d}	| jjD ]}
|| j|
kr&t|	q&t||||}| jdkr| jjdd  D ]}
|| j|
krlt|	qlt	|dkr|fdt	|d   }n|}|st | t
r| t||| iS |rt| |||||||}t |ts|S |\}}}n>t | tstt| ||||||}t |tsB|S |\}}}|j|j|d}|D ]0}t |tr~|| ||< n||d  ||< q`dd	lm} |||gd
j}|jj}t|jD ],}||gj}|| jt|fd||< q||}||j_|S )Nz&margins_name argument must be a stringzConflicting name "z" in margins   r     )r*   r   r   )r)   )args)r5   rA   rM   r(   rE   Zget_level_values_compute_grand_marginndimr)   rL   r   r8   r   _generate_marginal_resultstupler   rW   )_generate_marginal_results_without_valuesrU   pandasr   rY   setZdtypesZselect_dtypesapplyr   )r=   r$   r'   rG   rH   r%   r.   r-   r*   msglevelgrand_marginrJ   Zmarginal_result_setresultmargin_keys
row_marginkr   Zmargin_dummyZ	row_namesrP   r>   r>   r?   rX      sx    



             
 
rX   )r-   c              	   C  s   |ri }| |   D ]\}}zlt|tr:t|| ||< nLt|trzt|| trht|||  ||< q|| |||< n||||< W q tk
r   Y qX q|S ||| jiS d S N)itemsr5   rA   r9   dictrK   r(   )r$   r'   r%   r-   rm   rq   r\   r>   r>   r?   rc   P  s    

rc   c                   s  t  dkrBg }g }	 fdd}
t |dkr|||  j||d|}d}| jd||dD ]8\}}|
|}| }|| ||< || |	| q`nddlm} d}| jd||dD ]d\}}t  dkr|
|}n}|| |||j}t	|g|j
jd|_
|| |	| qt||d	}t |dkrL|S n
| }| j}	t  dkr| |  j |d|}| }t  gttt   }|j
||_
nttj|jd
}||	|fS )Nr   c                   s   | fdt  d   S )Nr`   r    rL   )rJ   rH   r-   r>   r?   _all_keyn  s    z,_generate_marginal_results.<locals>._all_keyr.   r    rl   r2   r.   r   r]   rF   r(   )rL   rN   rO   copyr8   rh   r   rj   rY   r   r(   r]   r   r)   stackr6   rQ   Zreorder_levelsr   npnan)r=   r$   r'   rG   rH   r%   r.   r-   Ztable_piecesro   rw   marginZcat_axisrJ   Zpieceall_keyr   Ztransformed_piecern   rp   Z	new_orderr>   rv   r?   re   f  sH    



re   c                   s   t  dkrg } fdd}t |dkr`|| j||d|}	| }
|	| |
< | }||
 q|jdd|d|}	| }
|	| |
< | }||
 |S n
| }| j}t  r|  j |d|}nttj|jd}|||fS )Nr   c                     s&   t  dkrS fdt  d   S )Nr    r`   ru   r>   rv   r>   r?   rw     s    z;_generate_marginal_results_without_values.<locals>._all_keyrx   ry   r{   )rL   rN   rj   r8   r)   r   r~   r   )r=   r$   rG   rH   r%   r.   r-   ro   rw   r   r   rn   rp   r>   rv   r?   rg     s*    
rg   c                 C  sF   | d krg } n4t | s2t| tjtttfs2t| r:| g} nt| } | S rr   )	r   r5   r~   Zndarrayr   r   r   callabler6   )Zbyr>   r>   r?   r4     s    r4   pivotzIndexLabel | None)r$   r(   r)   r'   r&   c                   s  |d krt dt|}|d krV|d k	r6t|}ng }|d k} j|| |d}n|d krtt j jjdg}n fddt|D } fdd|D }	||	 t	|}
t
|rt|tsttt |} j | j|
|d}n j | j|
d}||S )	Nz.pivot() missing 1 required argument: 'columns')r8   rz   c                   s   g | ]} | qS r>   r>   ).0idxr$   r>   r?   
<listcomp>  s     zpivot.<locals>.<listcomp>c                   s   g | ]} | qS r>   r>   )r   colr   r>   r?   r     s     )r(   r)   r{   )rK   comZconvert_to_list_likeZ	set_indexr   r(   r]   extendr   rS   r   r5   rf   r   r   r   Z_constructorZ_valuesZ_constructor_slicedrR   )r$   r(   r)   r'   Zcolumns_listlikerH   r8   ZindexedZ
index_listZdata_columnsZ
multiindexr>   r   r?   r     s6    
 

  )r-   r,   r&   c
                 C  sP  |dkr|dk	rt d|dk	r0|dkr0t dt| } t|}d}
dd | | D }|rlt|ddd}
t| |d	d
}t||dd
}t||\}}}}ddlm} tt	|| tt	||}|||
d}|dkrd|d< t
dd}n||d< d|i}|jd|||||d|}|	dk	r0t||	||d}|j|dd}|j|dd}|S )ac  
    Compute a simple cross tabulation of two (or more) factors. By default
    computes a frequency table of the factors unless an array of values and an
    aggregation function are passed.

    Parameters
    ----------
    index : array-like, Series, or list of arrays/Series
        Values to group by in the rows.
    columns : array-like, Series, or list of arrays/Series
        Values to group by in the columns.
    values : array-like, optional
        Array of values to aggregate according to the factors.
        Requires `aggfunc` be specified.
    rownames : sequence, default None
        If passed, must match number of row arrays passed.
    colnames : sequence, default None
        If passed, must match number of column arrays passed.
    aggfunc : function, optional
        If specified, requires `values` be specified as well.
    margins : bool, default False
        Add row/column margins (subtotals).
    margins_name : str, default 'All'
        Name of the row/column that will contain the totals
        when margins is True.
    dropna : bool, default True
        Do not include columns whose entries are all NaN.
    normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False
        Normalize by dividing all values by the sum of values.

        - If passed 'all' or `True`, will normalize over all values.
        - If passed 'index' will normalize over each row.
        - If passed 'columns' will normalize over each column.
        - If margins is `True`, will also normalize margin values.

    Returns
    -------
    DataFrame
        Cross tabulation of the data.

    See Also
    --------
    DataFrame.pivot : Reshape data based on column values.
    pivot_table : Create a pivot table as a DataFrame.

    Notes
    -----
    Any Series passed will have their name attributes used unless row or column
    names for the cross-tabulation are specified.

    Any input passed containing Categorical data will have **all** of its
    categories included in the cross-tabulation, even if the actual data does
    not contain any instances of a particular category.

    In the event that there aren't overlapping indexes an empty DataFrame will
    be returned.

    Examples
    --------
    >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
    ...               "bar", "bar", "foo", "foo", "foo"], dtype=object)
    >>> b = np.array(["one", "one", "one", "two", "one", "one",
    ...               "one", "two", "two", "two", "one"], dtype=object)
    >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
    ...               "shiny", "dull", "shiny", "shiny", "shiny"],
    ...              dtype=object)
    >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
    b   one        two
    c   dull shiny dull shiny
    a
    bar    1     2    1     0
    foo    2     2    1     2

    Here 'c' and 'f' are not represented in the data and will not be
    shown in the output because dropna is True by default. Set
    dropna=False to preserve categories with no data.

    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
    >>> pd.crosstab(foo, bar)
    col_0  d  e
    row_0
    a      1  0
    b      0  1
    >>> pd.crosstab(foo, bar, dropna=False)
    col_0  d  e  f
    row_0
    a      1  0  0
    b      0  1  0
    c      0  0  0
    Nz&aggfunc cannot be used without values.z)values cannot be used without an aggfunc.c                 S  s   g | ]}t |ttfr|qS r>   )r5   r   r   )r   r[   r>   r>   r?   r   y  s      zcrosstab.<locals>.<listcomp>TF)Z	intersectr/   rowprefixr   r   r   r{   	__dummy__)r%   r*   r%   )r(   r)   r+   r-   r,   )	normalizer+   r-   )r(   r2   r    )r)   r2   )r   )rM   r   Zmaybe_make_listr   
_get_names_build_names_mapperrh   r   rt   ziprL   r   
_normalizeZrename_axis)r(   r)   r'   rownamescolnamesr%   r+   r-   r,   r   Z
common_idxZ	pass_objsrownames_mapperunique_rownamescolnames_mapperunique_colnamesr   r$   Zdfkwargsr=   r>   r>   r?   crosstab  sb    g

 
   r   )r+   c              
   C  sH  t |ttfsRddd}z|| }W n, tk
rP } ztd|W 5 d }~X Y nX |dkrdd dd d	d d
}|d |d< z|| }W n, tk
r } ztd|W 5 d }~X Y nX || } | d} nv|dkr<| j}| j}	| jdd d f j	}
||
k||
k@ rt| d| jd ddf }| jdd df }| jd dd df } t
| |dd} |dkr||  }t| |gdd} | d} |	| _n|dkr||  }| |} | d} || _np|dks|dkr2||  }||  }d|j|< t| |gdd} | |} | d} || _|	| _ntdntd| S )Nr(   r)   )r   r    zNot a valid normalize argumentFc                 S  s   | | j ddj dd S Nr    rF   r   sumr[   r>   r>   r?   <lambda>      z_normalize.<locals>.<lambda>c                 S  s   | |    S rr   r   r   r>   r>   r?   r     r   c                 S  s   | j | jddddS r   )divr   r   r>   r>   r?   r     r   )rB   r)   r(   rB   Tr   z not in pivoted DataFrame)r   r+   r    rF   zNot a valid margins argument)r5   r@   rA   rI   rM   rV   r(   r)   Zilocr]   r   r   r   r8   loc)r=   r   r+   r-   Z	axis_subserrZnormalizersfZtable_indexZtable_columnsZlast_ind_or_colZcolumn_marginZindex_marginr>   r>   r?   r     sd    










r   r   r   c                 C  s   |d krVg }t | D ]>\}}t|tr>|jd k	r>||j q|| d|  qn*t|t| krntdt|tst|}|S )N_z*arrays and names must have the same length)	enumerater5   r   r]   r8   rL   rW   r6   )ZarrsrE   r   rZ   Zarrr>   r>   r?   r     s    
r   z	list[str]z;tuple[dict[str, str], list[str], dict[str, str], list[str]])r   r   r&   c                   s   dd }t | t |}|| ||B |B   fddt| D } fddt| D } fddt|D } fddt|D }||||fS )	a  
    Given the names of a DataFrame's rows and columns, returns a set of unique row
    and column names and mappers that convert to original names.

    A row or column name is replaced if it is duplicate among the rows of the inputs,
    among the columns of the inputs or between the rows and the columns.

    Parameters
    ----------
    rownames: list[str]
    colnames: list[str]

    Returns
    -------
    Tuple(Dict[str, str], List[str], Dict[str, str], List[str])

    rownames_mapper: dict[str, str]
        a dictionary with new row names as keys and original rownames as values
    unique_rownames: list[str]
        a list of rownames with duplicate names replaced by dummy names
    colnames_mapper: dict[str, str]
        a dictionary with new column names as keys and original column names as values
    unique_colnames: list[str]
        a list of column names with duplicate names replaced by dummy names

    c                   s   t    fdd| D S )Nc                   s   h | ]}| kr|qS r>   r>   )r   r]   seenr>   r?   	<setcomp>.  s      z>_build_names_mapper.<locals>.get_duplicates.<locals>.<setcomp>)ri   rD   r>   r   r?   get_duplicates,  s    z+_build_names_mapper.<locals>.get_duplicatesc                   s$   i | ]\}}| krd | |qS Zrow_r>   r   rZ   r]   Z	dup_namesr>   r?   
<dictcomp>3  s      z'_build_names_mapper.<locals>.<dictcomp>c                   s&   g | ]\}}| krd | n|qS r   r>   r   r   r>   r?   r   6  s    z'_build_names_mapper.<locals>.<listcomp>c                   s$   i | ]\}}| krd | |qS Zcol_r>   r   r   r>   r?   r   :  s      c                   s&   g | ]\}}| krd | n|qS r   r>   r   r   r>   r?   r   =  s    )ri   intersectionr   )r   r   r   Zshared_namesr   r   r   r   r>   r   r?   r     s     



r   )
NNNr"   NFTr#   FT)Nr#   N)r#   )r#   )r#   )NNN)NNNNFr#   TF)r#   )r   )<
__future__r   typingr   r   r   r   r   Znumpyr~   Zpandas._typingr   r	   r
   r   r   Zpandas.util._decoratorsr   r   Zpandas.core.dtypes.castr   Zpandas.core.dtypes.commonr   r   r   Zpandas.core.dtypes.genericr   r   Zpandas.core.commoncorecommonr   Zpandas.core.framer   Zpandas.core.groupbyr   Zpandas.core.indexes.apir   r   r   Zpandas.core.reshape.concatr   Zpandas.core.reshape.utilr   Zpandas.core.seriesr   rh   r   r   r7   rX   rc   re   rg   r4   r   r   r   r   r   r>   r>   r>   r?   <module>   sx             7    P A &   /         &P