U
    f/efH                  
   @   s  d Z ddlZddlZddlm  mZ ddlm	Z	 ddl
ZddlmZ ddlmZ ejdd Zejdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejjdddd Zejjdddd  Z d!d" Z!d#d$ Z"d%d& Z#d'd( Z$d)d* Z%ej&d+d,d-gd.d/ Z'd0d1 Z(d2d3 Z)d4d5 Z*ej&d6d,d-gejjd7dd8d9 Z+ej&d:d;d<gej&d6d,d-gd=d> Z,ej&d:d;d<gej&d?ej-ej.gd@dA Z/ej&d6d,d-gejjd7ddBdC Z0dDdE Z1e2dFdGdH Z3e2dFdIdJ Z4e2dFdKdL Z5dMdN Z6dOdP Z7ej&dQdRdSdTge.d-d-d-gfdRdSdge.d-d-d,gfgdUdV Z8dWdX Z9ej&dYej:ej;ej<gdZd[ Z=d\d] Z>d^d_ Z?d`da Z@dS )bz
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)is_dtype_equal)ArrowStringArrayc                 C   s   t j| dS )N)storage)pdStringDtype)string_storage r   K/tmp/pip-unpacked-wheel-tiezk1ph/pandas/tests/arrays/string_/test_string.pydtype   s    r
   c                 C   s   |   S )N)Zconstruct_array_typer
   r   r   r	   cls   s    r   c                 C   s   t dt jdt jdg| di}d}t||ks4td}t|j|ksJt| jdkrXdnd	}d
| d}t|jj|ks|td S )NAabr   z      A
0     a
1  <NA>
2     bz40       a
1    <NA>
2       b
Name: A, dtype: stringpyarrowr   StringArray<z+>
['a', <NA>, 'b']
Length: 3, dtype: string)r   	DataFramearrayNAreprAssertionErrorr   r   )r
   dfexpectedZarr_namer   r   r	   	test_repr   s     r   c                 C   s6   |  dd dg}|d d k	s t|d tjks2td S )Nr   r      )_from_sequencer   r   r   )r   r   r   r   r	   test_none_to_nan(   s    r   c              	   C   s   |  ddg}| tjjkr d}nd}tjt|d d|d< W 5 Q R X | tjjkrXd}nd}tjt|d td	d
g|d d < W 5 Q R X d S )Nr   r   z4Cannot set non-string value '10' into a StringArray.zScalar must be NA or strmatch
   r   zMust provide strings.r      )	r   r   arraysr   pytestraises
ValueErrornpr   )r   arrmsgr   r   r	   test_setitem_validates.   s    r)   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr   cr   dr   )r   r   tmassert_extension_array_equal)r
   r'   r   r   r   r	   test_setitem_with_scalar_string@   s    r.   c                 C   s   | j dkr,d}tjj|td}|j| ntjjdtd}|j| ttj	ddd}d |d< |
| }t|j| s~t|
d	}t|| d S )
Nr   z6ValueError: Could not convert object to NumPy datetime)reasonr$   z/GH#36153 casting from StringArray to dt64 fails2000   )Zperiodsr   zdatetime64[ns])r   r#   markxfailr%   node
add_markerr   SeriesZ
date_rangeastyper   r
   r   r,   assert_series_equal)r
   requestr/   r2   serZcastedresultr   r   r	   test_astype_roundtripI   s    
 

r<   c                 C   s   | j dkr*d}tjjt|d}|j| tjdddd d g| d}tjdd	d d
d g| d}|| }tjddd d d g| d}t	
|| ||}t	
|| ||}tjddd d d g| d}t	
|| |j|dd}tjddddd g| d}t	
|| d S )Nr   zLunsupported operand type(s) for +: 'ArrowStringArray' and 'ArrowStringArray'r$   r/   r   r   r*   r   xyzaxZbyxaZyb-)Z
fill_valuezc-z-z)r   r#   r2   r3   	TypeErrorr4   r5   r   r6   r,   r8   addZradd)r
   r9   r/   r2   r   r   r;   r   r   r   r	   test_add^   s$    


rF   c              	   C   s   | j dkr*d}tjjd |d}|j| tjdddg| d}tjdddggt	d}tj
tdd	 ||  W 5 Q R X t|}tj
tdd	 ||  W 5 Q R X d S )
Nr   z*Failed: DID NOT RAISE <class 'ValueError'>r=   r   r   r*   r   z3 != 1r   )r   r#   r2   r3   r4   r5   r   r   r&   objectr$   r%   r6   )r
   r9   r/   r2   r   r   sr   r   r	   test_add_2dz   s    

rI   c                 C   s   | j dkr*d}tjjt|d}|j| tjddd d g| d}dd dd g}|| }tjd	d d d g| d}t	
|| || }tjd
d d d g| d}t	
|| d S )Nr   z@unsupported operand type(s) for +: 'ArrowStringArray' and 'list'r=   r   r   r   r>   r?   rA   rB   r   r#   r2   r3   rD   r4   r5   r   r   r,   r-   )r
   r9   r/   r2   r   otherr;   r   r   r   r	   test_add_sequence   s    
rL   c                 C   s~   | j dkr*d}tjjt|d}|j| tjddd g| d}|d }tjdd	d g| d}t	
|| d| }t	
|| d S )
Nr   z?unsupported operand type(s) for *: 'ArrowStringArray' and 'int'r=   r   r   r   r!   ZaaZbbrJ   )r
   r9   r/   r2   r   r;   r   r   r   r	   test_mul   s    
rM   zGH-28527)r/   c                 C   s   t jddddg| d}t dddd	gg}||tks<t|| }t d
dddgg| }t|| || }t ddddgg| }t|| d S )Nr   r   r*   r+   r   tuvwatZbuZcvZdwtaZubZvcwd)	r   r   r   __add__NotImplementedr   r7   r,   assert_frame_equalr
   r'   r   r;   r   r   r   r	   test_add_strings   s    rY   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tksDt|| }t dtjtjtjgg| }t	
|| || }t dtjtjtjgg| }t	
|| d S )Nr   r   r   r>   r?   rA   rB   )r   r   r&   nanr   rU   rV   r   r7   r,   rW   rX   r   r   r	   test_add_frame   s      r[   c                    sf   |  t jdd dg|d}dt| }tj fdd|D td}t j|dd}t|| d S )Nr   r*   r   c                    s   g | ]}t | qS r   )getattr).0itemop_namerK   r   r	   
<listcomp>   s     z2test_comparison_methods_scalar.<locals>.<listcomp>boolean)r   r   r\   r&   rG   r,   r-   )all_compare_operatorsr
   r   r;   r   r   r_   r	   test_comparison_methods_scalar   s    rd   c                 C   sL   | }t jdd dg|d}t||t j}t jd d d gdd}t|| d S )Nr   r*   r   rb   )r   r   r\   r   r,   r-   )rc   r
   r`   r   r;   r   r   r   r	   $test_comparison_methods_scalar_pd_na   s
    re   c                 C   s   | dkr(d}t jjt|d}|j| | }tjdd dg|d}d}t|||}dd dgd	d d	gd| }	tj|	d
d}
t	
||
 d S )N)__eq____ne__z@comparison op not supported between instances of 'str' and 'int'r=   r   r*   r   *   FTrb   )r#   r2   r3   rD   r4   r5   r   r   r\   r,   r-   )rc   r
   r9   r/   r2   r`   r   rK   r;   Zexpected_datar   r   r   r	   )test_comparison_methods_scalar_not_string   s    ri   c           	      C   s   |j dkr&tjjtdd}|j| | }tjdd dg|d}d d dg}t	|||}t
j|dd}t	|d ||d |d< tj|d	d}t|| t	||tj}tjd d d gd	d}t|| d S )
Nr   zleft is not an ExtensionArrayr=   r   r*   r   rG   rb   )r   r#   r2   r3   r   r4   r5   r   r   r\   r&   Z
empty_liker,   r-   r   )	rc   r
   r9   r2   r`   r   rK   r;   r   r   r   r	   test_comparison_methods_array   s"    
 
rk   c              	   C   s  | t jjkrd}nd}tjt|d | tjddgdd W 5 Q R X tjt|d | tg  W 5 Q R X tjt|d | tjdtjgt	d W 5 Q R X tjt|d | tjdd gt	d W 5 Q R X tjt|d | tjdt j
gt	d W 5 Q R X d S )Nz7StringArray requires a sequence of strings or pandas.NAz?Unsupported type '<class 'numpy.ndarray'>' for ArrowStringArrayr   r   r   ZS1r   )r   r"   r   r#   r$   r%   r&   r   rZ   rG   ZNaT)r   r(   r   r   r	   test_constructor_raises  s     " rl   copyTFc           	      C   s   |t kr,| dkr,tjjtdd}|j| tjdtj	gt
d}tjdtjgt
d}|j|| d}|t krdd l}||j|| dd	}n||}t|| | r|n|}t|| d S )
NFznumpy array are differentr=   r   r   )rm   r   TtypeZfrom_pandas)r   r#   r2   r3   r   r4   r5   r&   r   rZ   rG   r   r   r   r   stringr,   r-   assert_numpy_array_equal)	rm   r   r9   r2   Znan_arrZna_arrr;   par   r   r   r	   test_from_sequence_no_mutate  s      rs   c              	   C   s   t jdddg| d}|d}tjdddgdd}t|| t jdt jdg| d}d	}tjt	|d
 |d W 5 Q R X d S )N123r   Zint64r   r!      zJint\(\) argument must be a string, a bytes-like object or a( real)? numberr   )
r   r   r7   r&   r,   rq   r   r#   r$   rD   )r
   r'   r;   r   r(   r   r   r	   test_astype_int3  s    
rx   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nrt   rv   r   Int64r   rw   )r   r   r   r7   r,   r-   r
   r'   r;   r   r   r   r	   test_astype_nullable_int?  s    
r{   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r   r6   r   r7   r&   rZ   r,   r8   )r
   Z any_float_allowed_nullable_dtyper:   r;   r   r   r   r	   test_astype_floatG  s    
r|   skipnazNot implemented StringArray.sumc                 C   s0   t jdddg|d}|j| d}|dks,td S Nr   r   r*   r   r}   abc)r   r6   sumr   r}   r
   r'   r;   r   r   r	   test_reduceO  s    r   methodminmaxc           	      C   s   |j dkr*d}tjjt|d}|j| tjdddd g|d}t	|| |d}|rr| d	kr`dnd}||kst
n|tjkst
d S )
Nr   0'ArrowStringArray' object has no attribute 'max'r=   r   r   r*   r   r   r   )r   r#   r2   r3   AttributeErrorr4   r5   r   r6   r\   r   r   )	r   r}   r
   r9   r/   r2   r'   r;   r   r   r   r	   test_min_maxW  s    
r   boxc           
      C   s   |j dkrB|tjkrt}d}nt}d}tjj||d}|j	| |dddd g|d}t
t| |}| d	krpdnd}	||	kstd S )
Nr   z<'<=' not supported between instances of 'str' and 'NoneType'r   r=   r   r   r*   r   r   )r   r   r   rD   r   r#   r2   r3   r4   r5   r\   r&   r   )
r   r   r
   r9   r$   r/   r2   r'   r;   r   r   r   r	   test_min_max_numpyh  s    

r   c                 C   sJ   t jd dd ddd g|d}|j| d}| r8|dksFtnt |sFtd S r~   )r   r6   r   r   isnar   r   r   r	   test_reduce_missing{  s
    r   c              	   C   s   | j dkr*d}tjjt|d}|j| tjdtj	g| d}|j
dd}tjddg| d}t|| |j
tdd}tjddg| d}t|| d}tjt|d	 |j
d
d W 5 Q R X d S )Nr   zmRegex pattern "Cannot set non-string value '1' into a StringArray." does not match 'Scalar must be NA or str'r=   r   r   r   )valuez3Cannot set non-string value '1' into a StringArray.r   r   )r   r#   r2   r3   r   r4   r5   r   r   r   Zfillnar,   r-   r&   Zstr_r$   r%   )r
   r9   r/   r2   r'   resr   r(   r   r   r	   test_fillna_args  s    
r   r   c                 C   sd   dd l }tjdddg| d}||}|jt|| dd}| jdkrR||}||s`td S )	Nr   r   r   r*   r   Trn   r   )	r   r   r   listrp   r   chunked_arrayequalsr   )r
   rr   datar'   r   r   r   r	   test_arrow_array  s    


r   c              	   C   s   dd l }tjddd g| d}td|i}||}|djdksHttd| |	 }W 5 Q R X t
|d jtjs~t|d| d}t|| |jd	 tjkstd S )
Nr   r   r   r   rp   r   string[])r!   r   )r   r   r   r   tablefieldro   r   option_context	to_pandas
isinstancer
   r   r7   r,   rW   locr   r
   Zstring_storage2rr   r   r   r   r;   r   r   r   r	   test_arrow_roundtrip  s    
r   c              	   C   s   dd l }tjg | d}td|i}||}|djdksBt|j|jg |	 dg|j
d}td| | }W 5 Q R X t|d jtjst|d| d	}t|| d S )
Nr   r   r   rp   )ro   )schemar   r   r   )r   r   r   r   r   r   ro   r   r   rp   r   r   r   r   r
   r   r7   r,   rW   r   r   r   r	    test_arrow_load_from_zero_chunks  s    
 r   c                 C   s   t jdddt jg| d}|jdd}t jdddgddt jgdd	}t|| |jd
d}t jddgddgdd	}t|| d S )Nr   r   r   F)Zdropnar!   r   ry   indexr
   T)r   r   r   value_countsr6   r,   r8   rz   r   r   r	   test_value_counts_na  s    r   c                 C   sP   t jdddt jg| d}|jdd}t jddgddgdd	d
 }t|| d S )Nr   r   r   T)	normalizer!   r   ZFloat64r   rw   )r   r6   r   r   r,   r8   )r
   rH   r;   r   r   r   r	    test_value_counts_with_normalize  s    r   zvalues, expectedr   r   r*   c              	   C   s   t j| |d} t ddb |  }t|| t |  }t |}t|| t |  }t |}t	|| W 5 Q R X d S )Nr   zmode.use_inf_as_naT)
r   r   r   r   r,   rq   r6   r8   r   rW   )valuesr   r
   r;   r   r   r	   test_use_inf_as_na  s    	

r   c                 C   s^   | j dkrtd tjdddg| d}d|j  k rT|   krT|jdd	k sZn td S )
Nr   znot applicabler   r   r*   r   r   T)deep)r   r#   skipr   r6   nbytesZmemory_usager   )r
   Zseriesr   r   r	   test_memory_usage  s    

r   float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r   r6   r7   r,   r8   )r   r
   rH   r;   r   r   r   r	   test_astype_from_float_dtype  s    
r   c                 C   sF   t jdt jdg| d}t|}tjdt jdgtd}t|| d S )Nr   r   r   )r   r   r   r&   rG   r,   rq   rz   r   r   r	   "test_to_numpy_returns_pdna_default  s    
r   c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr   r   r   )na_value)r   r   r   Zto_numpyr&   rG   r,   rq   )r
   Znulls_fixturer   r'   r;   r   r   r   r	   test_to_numpy_na_value  s
    r   c                 C   s   t jddd g| d}|ddg}t dddg}t|| |dt jg}t dddg}t|| |g }t dddg}t|| |dt j g}t dddg}t|| d S )Nr   r   r   r*   TF)r   r6   isinr,   r8   r   Z	Timestampnow)r
   r9   rH   r;   r   r   r   r	   	test_isin  s    
r   )A__doc__Znumpyr&   r#   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandas.core.dtypes.commonr   Zpandasr   Zpandas._testingZ_testingr,   Zpandas.core.arrays.string_arrowr   Zfixturer
   r   r   r   r)   r.   r<   rF   rI   rL   rM   r2   r3   rY   r[   rd   re   ri   rk   rl   Zparametrizers   rx   r{   r|   r   r   r6   r   r   r   r   Z
skip_if_nor   r   r   r   r   r   r   Zfloat16Zfloat32Zfloat64r   r   r   r   r   r   r   r	   <module>   s   

	



	




