U
    f/e>                     @  sp  d dl mZ d dlmZmZmZmZ d dlZd dl	m
Z
mZ d dlmZmZmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)mZm*Z* d dl+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 er>d dl7m8Z8 d dl3m9Z9 edddZ:G dd deZ;G dd de2e4Z<dS )    )annotations)TYPE_CHECKINGAnySequenceTypeVarN)libmissing)	ArrayLikeDtypeNpDtypePositionalIndexerScalartype_tAbstractMethodError)cache_readonlydoc)validate_fillna_kwargs)ExtensionDtype)is_dtype_equal
is_integeris_object_dtype	is_scalaris_string_dtypepandas_dtype)is_array_like)isnanotna)r   nanops)factorize_arrayisintake)masked_reductions)OpsMixin)ExtensionArray)check_array_indexer)SeriesBooleanArrayBaseMaskedArrayTBaseMaskedArray)boundc                   @  st   e Zd ZU dZded< dZded< ejZe	dddd	Z
e	ddd
dZe	ddddZeddddZdS )BaseMaskedDtypez@
    Base class for dtypes for BasedMaskedArray subclasses.
    strnameNtypeznp.dtypereturnc                 C  s   t | jS )z%Return an instance of our numpy dtype)npdtyper/   self r6   =/tmp/pip-unpacked-wheel-tiezk1ph/pandas/core/arrays/masked.pynumpy_dtypeO   s    zBaseMaskedDtype.numpy_dtypec                 C  s   | j jS N)r8   kindr4   r6   r6   r7   r:   T   s    zBaseMaskedDtype.kindintc                 C  s   | j jS )z(Return the number of bytes in this dtype)r8   itemsizer4   r6   r6   r7   r<   X   s    zBaseMaskedDtype.itemsizeztype_t[BaseMaskedArray]c                 C  s   t dS )zq
        Return the array type associated with this dtype.

        Returns
        -------
        type
        N)NotImplementedError)clsr6   r6   r7   construct_array_type]   s    	z$BaseMaskedDtype.construct_array_type)__name__
__module____qualname____doc____annotations__base
libmissingNAna_valuer   r8   r:   r<   classmethodr?   r6   r6   r6   r7   r,   D   s   
r,   c                   @  s  e Zd ZU dZded< dZdddddd	Zed
dddZdddddZe	e
jd[dddddZddddZddddZdd Zd dd!d"Zdddd#d$Zddejfd%dddd&d'd(Zd\d*dd+d,d-d.Zd/Zd]d%dd0d1d2Zd^d3d4Zeddd5d6Zddd7d8Zed9d: Zed dd;d<Zed=d>dd?d@dAZdddBdddCddDdEdFZdGddHdIZddddJdKZ e	e
j!d_d dMdNdOdPZ!d`ddQdRdSdTZ"d)dUdVddWdXdYZ#dS )ar*   zf
    Base class for masked arrays (which use _data and _mask to store the data).

    numpy based
    r   _internal_fill_valueFz
np.ndarraybool)valuesmaskcopyc                 C  sh   t |tjr|jtjks td|jdkr2td|jdkrDtd|rX| }| }|| _	|| _
d S )NzGmask should be boolean numpy array. Use the 'pd.array' function instead   zvalues must be a 1D arrayzmask must be a 1D array)
isinstancer2   Zndarrayr3   bool_	TypeErrorndim
ValueErrorrN   _data_mask)r5   rL   rM   rN   r6   r6   r7   __init__s   s    

zBaseMaskedArray.__init__r,   r0   c                 C  s   t | d S r9   r   r4   r6   r6   r7   r3      s    zBaseMaskedArray.dtyper   zBaseMaskedArray | Any)itemr1   c                 C  sH   t |r$| j| r| jjS | j| S t| |}t| | j| | j| S r9   )r   rV   r3   rH   rU   r%   r/   )r5   rX   r6   r6   r7   __getitem__   s    


zBaseMaskedArray.__getitem__Nr)   )r5   r1   c                 C  s   t ||\}}| j}t|rPt|t| krHtdt| dt|  || }| r|d k	rt|}|| j	 ||	 d\}}t
| ||tjS | 	 }|||< n| 	 }|S )Nz'Length of 'value' does not match. Got (z)  expected )limitrM   )r   rV   r   lenrT   anyr   Zget_fill_funcrU   rN   r/   viewr2   rQ   )r5   valuemethodrZ   rM   funcZ
new_valuesZnew_maskr6   r6   r7   fillna   s*    


zBaseMaskedArray.fillnaztuple[np.ndarray, np.ndarray]c                 C  s   t | d S r9   r   )r5   rL   r6   r6   r7   _coerce_to_array   s    z BaseMaskedArray._coerce_to_arrayNonec                 C  sV   t |}|r|g}| |\}}|r4|d }|d }t| |}|| j|< || j|< d S )Nr   )r   rb   r%   rU   rV   )r5   keyr^   Z
_is_scalarrM   r6   r6   r7   __setitem__   s    

zBaseMaskedArray.__setitem__c                 c  s8   t t| D ]&}| j| r&| jjV  q| j| V  qd S r9   )ranger[   rV   r3   rH   rU   )r5   ir6   r6   r7   __iter__   s    
zBaseMaskedArray.__iter__r;   c                 C  s
   t | jS r9   )r[   rU   r4   r6   r6   r7   __len__   s    zBaseMaskedArray.__len__c                 C  s   t | | j | j S r9   )r/   rU   rV   rN   r4   r6   r6   r7   
__invert__   s    zBaseMaskedArray.__invert__zNpDtype | None)r3   rN   rH   r1   c                 C  sx   |t jkrtj}|dkrt}| jrdt|sLt|sL|tjkrLtd| d| j	
|}||| j< n| j	j
||d}|S )aF  
        Convert to a NumPy Array.

        By default converts to an object-dtype NumPy array. Specify the `dtype` and
        `na_value` keywords to customize the conversion.

        Parameters
        ----------
        dtype : dtype, default object
            The numpy dtype to convert to.
        copy : bool, default False
            Whether to ensure that the returned value is a not a view on
            the array. Note that ``copy=False`` does not *ensure* that
            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
            a copy is made, even if not strictly necessary. This is typically
            only possible when no missing values are present and `dtype`
            is the equivalent numpy dtype.
        na_value : scalar, optional
             Scalar missing value indicator to use in numpy array. Defaults
             to the native missing value indicator of this array (pd.NA).

        Returns
        -------
        numpy.ndarray

        Examples
        --------
        An object-dtype is the default result

        >>> a = pd.array([True, False, pd.NA], dtype="boolean")
        >>> a.to_numpy()
        array([True, False, <NA>], dtype=object)

        When no missing values are present, an equivalent dtype can be used.

        >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool")
        array([ True, False])
        >>> pd.array([1, 2], dtype="Int64").to_numpy("int64")
        array([1, 2])

        However, requesting such dtype will raise a ValueError if
        missing values are present and the default missing value :attr:`NA`
        is used.

        >>> a = pd.array([True, False, pd.NA], dtype="boolean")
        >>> a
        <BooleanArray>
        [True, False, <NA>]
        Length: 3, dtype: boolean

        >>> a.to_numpy(dtype="bool")
        Traceback (most recent call last):
        ...
        ValueError: cannot convert to bool numpy array in presence of missing values

        Specify a valid `na_value` instead

        >>> a.to_numpy(dtype="bool", na_value=False)
        array([ True, False, False])
        Nzcannot convert to 'zZ'-dtype NumPy array with missing values. Specify an appropriate 'na_value' for this dtype.rN   )r   
no_defaultrF   rG   object_hasnar   r   rT   rU   astyperV   )r5   r3   rN   rH   datar6   r6   r7   to_numpy   s$    B

zBaseMaskedArray.to_numpyTr
   r	   )r3   rN   r1   c                 C  s   t |}t|| jr$|r |  S | S t|trp| jj|j|d}|| jkrP| j	n| j	 }|
 }|||ddS t|tr|
 }|j| ||dS tdd S )Nrk   F)r3   rN   z*subclass must implement astype to np.dtype)r   r   r3   rN   rP   r,   rU   ro   r8   rV   r?   r   Z_from_sequencer=   )r5   r3   rN   rp   rM   r>   Zeaclsr6   r6   r7   ro   0  s    

zBaseMaskedArray.astypei  )r3   r1   c                 C  s   | j |dS )z|
        the array interface, return my values
        We return an object array here to preserve our scalar values
        r3   )rq   )r5   r3   r6   r6   r7   	__array__J  s    zBaseMaskedArray.__array__c                 C  s   ddl }|j| j| j|dS )z6
        Convert myself into a pyarrow Array.
        r   N)rM   r/   )ZpyarrowarrayrU   rV   )r5   r/   par6   r6   r7   __arrow_array__Q  s    zBaseMaskedArray.__arrow_array__c                 C  s
   | j  S r9   )rV   r\   r4   r6   r6   r7   rn   Y  s    zBaseMaskedArray._hasnac                 C  s
   | j  S r9   )rV   rN   r4   r6   r6   r7   r   b  s    zBaseMaskedArray.isnac                 C  s   | j jS r9   )r3   rH   r4   r6   r6   r7   	_na_valuee  s    zBaseMaskedArray._na_valuec                 C  s   | j j| jj S r9   )rU   nbytesrV   r4   r6   r6   r7   rx   i  s    zBaseMaskedArray.nbytesztype[BaseMaskedArrayT]zSequence[BaseMaskedArrayT])r>   	to_concatr1   c                 C  s2   t dd |D }t dd |D }| ||S )Nc                 S  s   g | ]
}|j qS r6   )rU   .0xr6   r6   r7   
<listcomp>q  s     z5BaseMaskedArray._concat_same_type.<locals>.<listcomp>c                 S  s   g | ]
}|j qS r6   )rV   rz   r6   r6   r7   r}   r  s     )r2   concatenate)r>   ry   rp   rM   r6   r6   r7   _concat_same_typem  s    z!BaseMaskedArray._concat_same_type)
allow_fill
fill_valuezScalar | None)r5   r   r   r1   c                C  sr   t |r| jn|}t| j|||d}t| j|d|d}|r`t|r`t|dk}|||< ||A }t| ||ddS )N)r   r   TFrk   )	r   rJ   r!   rU   rV   r   r2   Zasarrayr/   )r5   Zindexerr   r   Zdata_fill_valueresultrM   Z	fill_maskr6   r6   r7   r!   u  s    	   zBaseMaskedArray.taker(   c                 C  s`   ddl m} t| j|}| jrDtj|kr4|| j7 }n|t	| j9 }tj
| td}|||ddS )Nr   r'   rr   Frk   )pandas.core.arraysr(   r    rU   rn   rF   rG   rV   r2   invertZ
zeros_likerK   )r5   rL   r(   r   rM   r6   r6   r7   r      s    
zBaseMaskedArray.isinc                 C  s0   | j | j }}| }| }t| ||ddS )NFrk   )rU   rV   rN   r/   )r5   rp   rM   r6   r6   r7   rN     s    zBaseMaskedArray.copyr   z!tuple[np.ndarray, ExtensionArray])na_sentinelr1   c                 C  sT   | j }| j}t|||d\}}|j| jjdd}t| |tjt	|t
d}||fS )N)r   rM   Frk   rr   )rU   rV   r   ro   r3   r8   r/   r2   zerosr[   rK   )r5   r   ZarrrM   codesZuniquesr6   r6   r7   	factorize  s     zBaseMaskedArray.factorizer&   )dropnar1   c           
      C  s   ddl m}m} ddlm} | j| j  }|| }|jj	
t}|rP|j	}nVtjt|d dd}||dd< | j |d< |t|tj| jjgtdgtd}tjt|d	d}	|||	}|||d
S )aA  
        Returns a Series containing counts of each unique value.

        Parameters
        ----------
        dropna : bool, default True
            Don't include counts of missing values.

        Returns
        -------
        counts : Series

        See Also
        --------
        Series.value_counts
        r   )Indexr&   )IntegerArrayrO   Zint64rr   Nr   rK   )index)pandasr   r&   Zpandas.arraysr   rU   rV   value_countsr   Z_valuesro   rm   r2   emptyr[   sumr~   rt   r3   rH   r   )
r5   r   r   r&   r   rp   r   r   countsrM   r6   r6   r7   r     s"    
zBaseMaskedArray.value_counts)skipnar-   )r.   r   c                K  s   | j }| j}|dkr4tt|}|||fd|i|S | jrJ| jdtjd}ttd| }||fd||d|}t	|rt
jS |S )N>   maxr   minprodmeanr   Zfloat64)rH   nanr   )Zaxisr   rM   )rU   rV   getattrr"   rn   rq   r2   r   r   isnanrF   rG   )r5   r.   r   kwargsrp   rM   opr   r6   r6   r7   _reduce  s    

zBaseMaskedArray._reduce)F)NNN)T)N)N)r   )T)$r@   rA   rB   rC   rD   rW   propertyr3   rY   r   r$   ra   rb   re   rh   ri   rj   r   rl   rq   ro   Z__array_priority__rs   rv   rn   r   rw   rx   rI   r   r!   r    rN   r   r   r   r6   r6   r6   r7   r*   i   sT   

      Z

2)=
__future__r   typingr   r   r   r   Znumpyr2   Zpandas._libsr   r   rF   Zpandas._typingr	   r
   r   r   r   r   Zpandas.errorsr   Zpandas.util._decoratorsr   r   Zpandas.util._validatorsr   Zpandas.core.dtypes.baser   Zpandas.core.dtypes.commonr   r   r   r   r   r   Zpandas.core.dtypes.inferencer   Zpandas.core.dtypes.missingr   r   Zpandas.corer   Zpandas.core.algorithmsr   r    r!   Zpandas.core.array_algosr"   Zpandas.core.arrayliker#   r   r$   Zpandas.core.indexersr%   r   r&   r(   r)   r,   r*   r6   r6   r6   r7   <module>   s.     %