U
    kª/eyK  ã                   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ e jj	Z
zd dlmZ d dlZW n ek
rl   Y nX e jdddd„ ƒZd	d
„ Zd*dd„Zd+dd„Ze jje j ddddg¡dd„ ƒƒZdd„ Zdd„ Zdd„ Zdd„ Zdd„ Ze jjd d!„ ƒZd"d#„ Zd$d%„ Zd&d'„ Z d(d)„ Z!dS ),é    N)Úfs)Úutil)Úassert_frame_equalÚmodule)Zscopec                 C   s   | d S )NÚorc© )Zbase_datadirr   r   ú:/tmp/pip-unpacked-wheel-seu8352k/pyarrow/tests/test_orc.pyÚdatadir'   s    r	   c                 C   s*  |D ]}|| }| | }|dkrjdd„ |D ƒrj||   ¡ }t|ƒD ]\}}dd„ |D ƒ||< qD|||< q|d j}t|tjƒrŒt |¡}nt|tjƒr¢|jj}nz|t	j
krdgt|ƒ }	tt||ƒƒD ]F\}\}
}t |¡sÊ|
 ¡ j}d|  }t	 
t|| ƒ¡ |¡|	|< qÊt |	¡}|||< qdS )z_
    Fix type of expected values (as read from JSON) according to
    actual ORC datatype.
    Úmapc                 S   s&   g | ]}|D ]}|  ¡ d dhk‘qqS ©ÚkeyÚvalue)Úkeys)Ú.0ÚmÚdr   r   r   Ú
<listcomp>5   s       z&fix_example_values.<locals>.<listcomp>c                 S   s   g | ]}|d  |d f‘qS r   r   )r   r   r   r   r   r   9   s     r   Né
   )ÚcopyÚ	enumerateÚ	__class__Ú
issubclassÚdatetimeÚpdZto_datetimeÚdateÚdtÚdecimalÚDecimalÚlenÚzipZisnullÚas_tupleÚexponentÚroundÚscalebZSeries)Zactual_colsZexpected_colsÚnameÚexpectedÚactualÚcolÚir   ÚtypZconverted_decimalsr   ÚvÚexpZfactorr   r   r   Úfix_example_values,   s4    
ÿ




ÿ
r,   c                 C   s6   |d k	s|d k	r$|||… j dd}t| |dd d S )NT)ZdropF)Zcheck_dtype)Zreset_indexr   )Úorc_dfÚexpected_dfÚstartÚstopr   r   r   Úcheck_example_valuesR   s    r1   Fc           
      C   sî   ddl m} | | ¡}| ¡ }t|tjƒs.t‚| ¡  t	 
| ¡ ¡}t|jƒt|jƒks\t‚|j |j¡sx|j|jd}|r†t||ƒ t||ƒ d}t|jƒD ]<}| |¡}	tt	 
|	 ¡ ¡|||t|	ƒ d |t|	ƒ7 }qž||jksêt‚dS )zC
    Check a ORC file against the expected columns dictionary.
    r   ©r   ©Úcolumns)r/   r0   N)Úpyarrowr   ÚORCFileÚreadÚ
isinstanceÚpaZTableÚAssertionErrorÚvalidater   Z	DataFrameZ	to_pydictÚsetr4   ÚequalsZreindexr,   r1   ÚrangeZnstripesZread_striper   Znrows)
Zorc_pathr.   Úneed_fixr   Úorc_fileÚtabler-   Zjson_posr(   Úbatchr   r   r   Úcheck_example_fileX   s,    




ýrC   ÚfilenamezTestOrcFile.test1.orczTestOrcFile.testDate1900.orczdecimal.orcc                 C   s2   ||  }t jt| d¡ƒdd}t||dd dS )zÊ
    Check a ORC file example against the equivalent JSON file, as given
    in the Apache ORC repository (the JSON file has one JSON object per
    line, corresponding to one row in the ORC file).
    z.jsn.gzT)Úlines)r?   N)r   Ú	read_jsonÚstrÚwith_suffixrC   )rD   r	   ÚpathrA   r   r   r   Útest_example_using_json}   s    rJ   c                 C   s(  ddl m} | | d ¡ ¡ }|jdks,t‚t dt ¡ fdt 	¡ fdt 
¡ fdt ¡ fdt ¡ fd	t ¡ fd
t ¡ fdt ¡ fdt ¡ fdt dt t dt ¡ fdt ¡ fg¡¡fg¡fdt t dt ¡ fdt ¡ fg¡¡fdt t ¡ t dt ¡ fdt ¡ fg¡¡fg¡}|j|ks$t‚d S )Nr   r2   zTestOrcFile.emptyFile.orcZboolean1Zbyte1Zshort1Zint1Zlong1Zfloat1Zdouble1Zbytes1Zstring1ÚmiddleÚlistr
   )r5   r   r6   r7   Únum_rowsr:   r9   ÚschemaZbool_Zint8Zint16Úint32Úint64Zfloat32Zfloat64ÚbinaryÚstringÚstructÚlist_Zmap_)r	   r   rA   Zexpected_schemar   r   r   Útest_orcfile_empty   s@    









ÿÿÿ
ÿÿ
ÿÿírU   c                 C   s¬   ddl m} t ddddgi¡}| d }| ¡  |d }| |t|ƒ¡ |j|t 	¡ d	}| 
|¡sht‚|jd
t | ¡d	}| 
|¡sŠt‚| t |¡¡}| 
|¡s¨t‚d S )Nr   r2   Úaé   é   é   Údata_dirzdata.orc)Ú
filesystemzdata_dir/data.orc)r5   r   r9   rA   ÚmkdirÚwrite_tablerG   Ú
read_tabler   ZLocalFileSystemr=   r:   r   Z_filesystem_uri)Útmpdirr   rA   Ú	directoryrI   Úresultr   r   r   Útest_filesystem_uri°   s"     ÿÿrb   c                 C   sÆ   ddl m} t dd dd g¡}t d dd dg¡}t ||dœ¡}|  d¡}| ||¡ | |¡}| |¡snt	‚| |g ¡}d	|j
ksˆt	‚d|jks–t	‚|j|d
gd}d	|j
ks´t	‚d|jksÂt	‚d S )Nr   r2   rW   rY   ÚArrowÚORC©rP   Úutf8útest.orcé   rP   r3   )r5   r   r9   ÚarrayrA   Újoinr]   r^   r=   r:   rM   Znum_columns)r_   r   rV   ÚbrA   ÚfileÚoutput_tabler   r   r   Útest_orcfile_readwriteÈ   s    

rn   c                  C   sŒ   ddl m}  ddlm} |ƒ }t dd dd g¡}t d dd dg¡}t ||dœ¡}|  ||¡ | d¡ |  	|¡}| 
¡ }| |¡sˆt‚d S )	Nr   r2   )ÚBytesIOrW   rY   rc   rd   re   )r5   r   Úioro   r9   ri   rA   r]   Úseekr6   r7   r=   r:   )r   ro   ÚbufrV   rk   rA   r@   rm   r   r   r   Útest_bytesio_readwriteÛ   s    

rs   c               	   C   sX  ddl m}  t ¡ }t dd dd g¡}t d dd dg¡}t ||dœ¡}|  ||¡ t | ¡ ¡}|  	|¡}| 
¡ }| |¡s‚t‚|jdkst‚|jd	ksžt‚|jd
ks¬t‚|jdksºt‚t ¡ }t t¡ |  ||¡ W 5 Q R X t | ¡ ¡}|  	|¡}| 
¡ }| |¡st‚|jdks$t‚|jd	ks4t‚|jd
ksDt‚|jdksTt‚d S )Nr   r2   rW   rY   rc   rd   re   ÚUNCOMPRESSEDz0.12i'  i   ©r5   r   r9   ÚBufferOutputStreamri   rA   r]   ZBufferReaderÚgetvaluer6   r7   r=   r:   ÚcompressionÚfile_versionÚrow_index_strideZcompression_sizeÚpytestZwarnsÚFutureWarning©r   Úbuffer_output_streamrV   rk   rA   Zbuffer_readerr@   rm   r   r   r   Útest_buffer_readwriteê   s2    

r   c               	   C   sl  ddl m}  t ¡ }t dd dd g¡}t d dd dg¡}t ||dœ¡}| j||dd	d
dd t | ¡ ¡}|  	|¡}| 
¡ }| |¡sŒt‚|jdksšt‚|jd	ks¨t‚|jd
ks¶t‚|jdksÄt‚t ¡ }t t¡ | j||dd	ddd W 5 Q R X t | ¡ ¡}|  	|¡}| 
¡ }| |¡s(t‚|jdks8t‚|jd	ksHt‚|jdksXt‚|jdksht‚d S )Nr   r2   rW   rY   rc   rd   re   Úsnappyz0.11iˆ  i €  )rx   ry   rz   Úcompression_block_sizeZSNAPPYZuncompressedi N  i @  rt   ru   r}   r   r   r   Ú'test_buffer_readwrite_with_writeoptions  sN    ú
ú
r‚   c               	   C   s¶  ddl m}  t ¡ }t dd dd g¡}t d|i¡}t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||d	d
 W 5 Q R X t t¡ | j	||dd
 W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t
¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t
¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||dd W 5 Q R X t t¡ | j	||d d W 5 Q R X t t¡ | j	||d!d" W 5 Q R X t t¡ | j	||dd#gd" W 5 Q R X t t¡ | j	||dd$d%hd" W 5 Q R X t t¡ | j	||dd& W 5 Q R X t t¡ | j	||d'd& W 5 Q R X t t¡ | j	||d(d& W 5 Q R X d S ))Nr   r2   rW   rY   rP   )Z
batch_sizeiœÿÿÿgR¸…ë @g¤p=
×£À?)ry   z1.1)Zstripe_sizeipþÿÿg®Gáº °@)rx   ÚnoneZzlid)r   i8ÿÿÿgR¸…ë"‘@)Zcompression_strategyÚnoZlarge)rz   iàüÿÿg®Gáz”0¨@Úcat)Zpadding_toleranceZarrow)Zdictionary_key_size_thresholdg333333ó?gš™™™™™	ÀrR   )Zbloom_filter_columnsgffffffö?rX   éÿÿÿÿ)Zbloom_filter_fppgš™™™™™ñ?gš™™™™™¹¿)r5   r   r9   rv   ri   rA   r{   ÚraisesÚ
ValueErrorr]   Ú	TypeError)r   r~   rV   rA   r   r   r   Ú+test_buffer_readwrite_with_bad_writeoptions;  sp   ýýýýýýýýýýýýýýýýýýýýýýýýýýýýýýrŠ   c                 C   s  ddl m} t dt ¡ ¡}t dt |g¡¡}t dt ¡ ¡t dt t dt ¡ ¡¡¡t dt |t d	t ¡ ¡g¡¡t d
t t dt t dt ¡ ¡t d	t ¡ ¡g¡¡¡¡t dt ¡ ¡g}dgddggddiddœgdddœdddœggdgg}tj|t 	|¡d}t
| d ƒ}| ||¡ | |¡}| ¡ }	|	 |¡sJt‚|jddgd}
|
 | ddg¡¡stt‚|jdddgd}| | dddg¡¡s¢t‚|jdgd}t ddddiigi¡}| |¡sØt‚|jdgd}t dd	digi¡}| |¡s
t‚|jdddgd}| | ddg¡¡s6t‚|jdgd}t d
ddiddiggi¡}| |¡spt‚|jddgd}
|
 | ddg¡¡sšt‚|jdddgd}| | ddd
g¡¡sÈt‚t t¡ |jdgd W 5 Q R X t t¡ |jdgd W 5 Q R X d S )Nr   r2   ÚinnerrK   ÚbasicrL   ÚitemrS   Úinner2zlist-structÚinner1Zbasic2rW   rX   rY   rh   )rK   rŽ   é   é   )r   rŽ   é   é   é	   )rN   rg   r3   zstruct.middle.innerzstruct.inner2zlist-struct.inner1Zwrong)r5   r   r9   ÚfieldrP   rS   rO   rT   rA   rN   rG   r]   r6   r7   r=   r:   Úselectr{   r‡   ÚIOErrorrˆ   )Útempdirr   r‹   rK   ÚfieldsZarrsrA   rI   r@   Zresult1Zresult2Zresult3Zresult4Z	expected4Zresult5Z	expected5Zresult6Zresult7Z	expected7r   r   r   Útest_column_selection  sz     ÿ ÿ  þÿÿð   þ
ÿrš   c              
   C   sP   ddl m} t| d ƒ}| |¡$}t t¡ | ¡  W 5 Q R X W 5 Q R X d S )Nr   r2   rg   )r5   r   rG   Z	ORCWriterr{   r‡   ÚAttributeErrorÚtest)r˜   r   rI   Úwriterr   r   r   Útest_wrong_usage_orc_writerl  s
    rž   c              	   C   st   ddl m} t| d ƒ}t dd dd g¡}t d d d d g¡}t ||dœ¡}t tj¡ | 	||¡ W 5 Q R X d S )Nr   r2   rg   rW   rY   re   )
r5   r   rG   r9   ri   rA   r{   r‡   ZArrowNotImplementedErrorr]   )r˜   r   rI   rV   rk   rA   r   r   r   Ú test_orc_writer_with_null_arraysu  s    rŸ   )NN)F)"r{   r   r   r5   r9   r   Zpyarrow.testsr   Úmarkr   Z
pytestmarkZpandas.testingr   Zpandasr   ÚImportErrorZfixturer	   r,   r1   rC   ZparametrizerJ   rU   rb   rn   rs   r   r€   r‚   rŠ   rš   rž   rŸ   r   r   r   r   Ú<module>   sF   

&

%ý!!
/ cO	