U
    /eN                 !   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Zd dlZd dlZd dlmZ d dlZd dlmZ d dlZd dlmZ d dlmZmZ d dlmZmZmZm Z m!Z! d dl"m#Z# d d	l$m%Z% d d
l&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ zd dl0Z0W n" e1k
rB   dZ0edZ2Y nX ee0j3Z2zd dl4Z5W n" e1k
r|   dZ5edZ6Y nX ee5j3Z6zd dl7m8Z9 W n e1k
r   dZ9Y nX e0 Z:ej;j<e:ddZ=ej>dkre5re6edkrdZ?dZ@n
e9 Z?dZ@ej;j<e?e@dZAdZBdZCejDdd eEeBD dd eEeBD dejFdd eEeBD dd d!ZGejHeGeCd"ZIejJejKd#e=d$ejKd%eAd$gd&d'd( ZLd)d* ZMe0re2ed+k reresd,ZNd-ZOeMf eOeNeNd.ZPneM ZPeAd/d0 ZQe=d1d2 ZReM ej;Sd3ddgd4d5 ZTej;Sd6ddgeM d7d8 ZUeM d9d: ZVeM d;d< ZWeM d=d> ZXeM d?d@ ZYeM dAdB ZZeM dCdD Z[eM dEdF Z\dGdH Z]eM dIdJ Z^eM dKdL Z_dMdN Z`eM dOdP ZadQdR ZbdSdT ZceM dUdV ZdeM dWdX ZeeAej;SdYdZejKd%ej;j<e  d[dd$gd\d] ZfeAej;jge d^dd_d` ZheM dadb Ziej;Sdcddgddde Zjdfdg Zkdhdi Zldjdk ZmeAej;Sdlddgej;Sdmddgdndo Zndpdq Zodrds Zpej;Sdcddgdtdu Zqej;Sdcddgej;Sdvejrdwdxdydzejsd{d|fetd d}d{d~fgdd Zudd Zvej;Sdcddgdd Zwdd ZxeM dd Zydd Zzej;SdeDdddd{gii i feDddddgii i feDddddgii i feDddddgide5rde5{ inddi feDde|dddgii ddgifeDde|d{dd{gii ddgifeDde}e~ejdddgii i feDddddgidi i feDdddd{gidi i fejKeDdddd{gidi i ej;jge!ddej;jge!ddgd$ejKeDdddd{gidi i ej;jge!ddej;jge!ddgd$eDddddgidi i feDddddgidi i feDddddgidi i feDdddd{gidi i feDdddd{gidi i fejDddd{dgiddd{gd!i i fejDddd{dgiejFd{ddgdd d!i i feDd{ddgddd{gdi i fejDd{ddgddd{gdddgdi i feDdddd{gii i feDddddgii i feDddddgii i feDddddgii i feDddddgii i fgdd Zdd Zdd Zdd Zej;Sdddgdd ZeAe=dd ZeAdd ZeAej;Sd6ddgej;SdddgddÄ ZeAddń ZeAddǄ ZddɄ Zdd˄ ZeAej;Sdddggddτ ZeM ddф ZeM ddӄ ZeM ddՄ Zddׄ Zddل Zddۄ Zdd݄ Zdd߄ Ze=ej;jgdddd Zej;Sdddgdd ZeAej;Sdddgdd Ze=dd Ze=dd Zdd Zdd Zdd Zej;Sddddgdd Zej;Sddddgdd ZejJddddddddddgdgd dddddddddddgdgd ddddiddddgddddddddddddgdgd dgd&d	d
 Zdd Zdd ZeAdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZeAdd  Zd!d" Zd#d$ ZeAej;Sd%eDdd&dd'd{ddgieDddddgieDddddgieKeDde|dddgieKeDde|d{dd{gieDde}e~ejd(d)d*gieDde}e~ejdddgieDddddgidejKeDdddd{gidej;jge!ddd$ejKeDdddd{gidej;jge!ddd$eDdddd{gideDdddd{gidejDddd{dgiddd{gd!ejDdd&dd'd{ddgiejFd{ddd&dd'gdd d!eDd{ddgddd{gdejDd{ddgddd{gdddgdeDdddd{gieDddddgieDddddgieDddddgieDddddgigd+d, Zd-d. Zd/d0 Ze=d1d2 ZeM d3d4 ZeM d5d6 Zej;Sd7ddgej;Sd8ddgeM d9d: ZeM d;d< Zej;Sdddgd=d> Zd?d@ Zej;SdAddgej;Sd6dejdgdBdC ZdDdE ZdFdG ZdHdI ZdJdK ZdLdM ZdNdO ZdPdQ ZeAdRdS ZeAej;SdTd{dUgej;SdVddgdWdX ZeAej;SdTdYdZgd[d\ ZeAd]d^ ZŐd_d` ZƐdadb ZeM dcdd ZeAej;Sdeddgej;Sdddgej;SdfdgdhgeM didj ZeM ej;Sdkddgdldm ZeAej;Sdeddgej;Sdfddndgdhgdodp ZeM dqdr Ze=dsdt Z͐dudv Zej;SdVddgeM dwdx ZeAdydz ZeAd{d| ZeAd}d~ Zej;Sdddgdddggdd ZeAej;Sddgddggdd ZeAdd ZePdd Z֐dd Zאdd ZeM dd Zِdd Zڐdd ZeAdd ZeAej;Sdddgdd ZeAdd ZeAej;SdTddgdd Zߐdd ZeAdd ZeAdd ZeAej;SdddgeM dd Zdd Zdd Zdd Zdd Zdd Zdd ZeAdd Zdd Zdd Zej;SdVdddgdd Zej;Sdddgej;Sddd gdd ZeAej;Sd̐ddd Zdd ZdÐdĄ ZdŐdƄ ZdǐdȄ Zdɐdʄ Zdːd̄ ZeAej;j<e6ed̓k dddϐdЄ Zej;jSdd{hd{gdfdӐdԍdՐdք Zej;jSdѐdgdggdؐdgdؐdggfdِdԍdڐdۄ Zdܐd݄ Zej;jdސd߄ ZeAdd Zej;Sdddgdd ZeAej;Sdddgdd Zdd ZdS (      N)Decimal)	MagicMock)parse)
_numpy_124)	Blockwiseoptimize_blockwise)PANDAS_GT_110PANDAS_GT_121PANDAS_GT_130PANDAS_GT_150PANDAS_GT_200)
get_engine)_parse_pandas_metadata)optimize_dataframe_getitem)	assert_eq)DataFrameIOLayer)natural_sort_key)	hlg_layerF0zfastparquet not foundreasonwin32z2.0.0Tztskipping pyarrow 2.0.0 on windows: https://github.com/dask/dask/issues/6093|https://github.com/dask/dask/issues/6754zpyarrow not found(      c                 C   s   g | ]}|d  d qS )       .0ir   r   H/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/io/tests/test_parquet.py
<listcomp>M   s     r!   c                 C   s   g | ]}|d  qS )g      @r   r   r   r   r    r!   N   s     xyc                 C   s   g | ]}d | qS )
   r   r   r   r   r    r!   P   s     myindexnameindexnpartitionsfastparquetmarkspyarrow)paramsc                 C   s   | j S Nparamrequestr   r   r    engineV   s    r7   c                     s   ddh t td fdd D }|  D ]\}}|dd\}}t|d}|dkstt|d	kstt|  rtd
| tt	j
||d}t|d	kr|| | q.|D ]}||kr|| | qq.t	j
ddd t| D S )zProduct of both engines for write/read:

    To add custom marks, pass keyword of the form: `mark_writer_reader=reason`,
    or `mark_engine=reason` to apply to all parameters with that engine.r0   r-   )r-   r0   c                    s,   i | ]$} D ]}||f| | gqqS r   r   )r   wrbackendsZ
skip_marksr   r    
<dictcomp>l   s
        z&write_read_engines.<locals>.<dictcomp>_   )xfailskip   zunknown keyword %rr   )write_engineread_enginec                 S   s$   g | ]\}}t j|d t|iqS r.   )pytestr4   tuple)r   kvr   r   r    r!   ~   s     z&write_read_engines.<locals>.<listcomp>)FASTPARQUET_MARKPYARROW_MARKitemssplitrE   lenset
ValueErrorgetattrrD   markappendparametrizesorted)kwargsr/   kwvalkindrestkeyrF   r   r:   r    write_read_engines`   s(     rZ   z0.5z6pandas with fastparquet engine does not preserve indexz/pyarrow schema and pandas metadata may disagree)Zxfail_pyarrow_fastparquetZxfail_fastparquet_fastparquetZxfail_fastparquet_pyarrowc                  C   s0   ddl m}  td| ksttd| ks,td S )Nr   ArrowDatasetEnginer0   arrow)dask.dataframe.io.parquet.arrowr\   r   AssertionErrorr[   r   r   r    test_get_engine_pyarrow   s    r`   c                  C   s    ddl m}  td| kstd S )Nr   FastParquetEnginer-   )Z%dask.dataframe.io.parquet.fastparquetrb   r   r_   ra   r   r   r    test_get_engine_fastparquet   s    rc   has_metadatac              	   C   s  t | }ttjdtjdtjdtjdtjdtjdtjj	dddgdd
dd}tj|d	d
}|rrddini }|j|fd|d| t|}d|k|kstd|k|kstd|ksttj|d|d}	t|	jdkst|	jdd }
|jD ]}|| |
| k stqd S )N  dtypehelloyopeoplesizeOi32i64fZbhello  	chunksizewrite_metadata_fileTFwrite_indexr7   _common_metadata	_metadatapart.0.parquetr*   r7   r>   sync	scheduler)strpd	DataFramenparangeint32int64float64randomchoiceastypeddfrom_pandas
to_parquetoslistdirr_   read_parquetrL   	divisionscomputereset_indexcolumnsall)tmpdirrB   rC   rd   tmpdatadfrT   filesdf2outcolumnr   r   r    
test_local   s,    


r   r*   c                 C   sz   t | }tdddgdddgdd d }|r>|jddd	}tj|d
d}|j|||dd tj||d}t|| d S )Nab   r      r   r   r   TZdroprA   r+   rw   r7   ru   r7   	r   r   r   	set_indexr   r   r   r   r   )r   rB   rC   r*   fnr   ddfread_dfr   r   r    
test_empty   s    $r   c                 C   sp   t | }tdddgdddgd}|jddd}tj|d	d
}|j||d tj|dg|dd}t|| d S )Nr   r   r   r   r   r   Tr   rA   r+   r   r*   r7   calculate_divisionsr   )r   rB   rC   r   r   r   r   r   r   r    test_simple   s       r   c                 C   s   t | }tdddgdddgd}|jddd}tj|d	d
}|j||ddd  t	|}d|kslt
tjtj|ddg|dd}t|| d S )Nr   r   r   r   r   r   Tr   rA   r+   F)r7   r   ru   ry   	*.parquetr   )r   r   r   r   r   r   r   r   r   r   r_   r   pathjoinr   )r   rB   rC   r   r   r   r   r   r   r   r    test_delayed_no_metadata   s&       

r   c                 C   s   t | }tj||d tjtj|dr@ttj|d t|}d|ksVt	t
jtj|d|ddd}tt| d S )Nr   ry   r   r&   Tr7   r*   r   r   r   r   r   r   existsr   unlinkr   r_   r   r   r   )r   rB   rC   tmp_pathr   ddf2r   r   r    test_read_glob   s    
r   c                 C   s>   t | }tj|d|d tj||ddd}tt|ddd d S )NFrv   r   check_indexcheck_divisionsr   r   r   r   r   r   r   rB   rC   r   r   r   r   r    test_calculate_divisions_false  s    r   c                    s   ||  krdkr,n nt jdkr,td t  tj |d t fddt  D t	d}t
j||dd	d
}tt| d S )Nr-   ntzfilepath bug.r   c                 3   s&   | ]}| d stj |V  qdS )ry   N)endswithr   r   r   r   rq   r   r   r    	<genexpr>#  s   
z!test_read_list.<locals>.<genexpr>)rY   r&   Tr   )r   r(   rD   r@   r   r   r   rS   r   r   r   r   r   )r   rB   rC   r   r   r   r   r    test_read_list  s"    "

	   r   c                 C   s   t | }tj||d ttj|g |ddtg   ttj|g |ddtg   dd ttj|dg|ddtdg  ttj|dg|ddtdg  dd d S )Nr   Tr   r7   r   Fr   r#   r   r   r   r   r   r   Zclear_divisionsr   rB   rC   r   r   r   r    test_columns_auto_index1  s<    
      r   c                 C   s   t | }tj||d ttj|g |dddtg   ttj|g |dddtg   dd ttj|ddg|ddtdg  ttj|ddg|ddtdg  dd ttj|ddd	g|ddt ttj|ddd	g|ddt dd d S )
Nr   r&   T)r   r7   r*   r   Fr   r#   r*   r   r7   r   r$   r   r   r   r   r    test_columns_indexW  s    
r   c              	   C   s|   t | }tj||d tttf tj|dg|d W 5 Q R X tt	tf" tj|dgt
tj |d W 5 Q R X d S )Nr   Z	nonesenser   r7   )r   r   r   rD   raisesrN   KeyErrorr   r   	Exceptionlistr   r   r7   r   r   r   r    test_nonsense_column  s    r   c                 C   s   t | }tj||d t }ttj|d|dd|ddd ttj|dddg|dd|ddg ddd ttj|dd	dg|dd|d	dg ddd d S )
Nr   FTr   r   r#   r$   r   r&   )r   r   r   r   r   r   r   )r   rB   rC   r   r   r   r   r    test_columns_no_index  sB    

r   c                 C   sF   t | }tj||dd tj||dd}|jjd ks8t|jrBtd S )NFr7   rw   r7   r*   )	r   r   r   r   r   r*   r(   r_   known_divisions)r   rB   rC   r   r   r   r   r    !test_calculate_divisions_no_index  s
    r   c              
   C   s  t jt| d}tjjtdtdd gddgd}tj	tj
dddd	g|d
}|jdd}|dkrtj|| dd nttjj| dd| tj|||jd}t|| tj|d||jd}t||d  tj|dd	gddg|d}t||dd	gddg  tj|d|d}t|| tj|d	gdg|d}t||dd	g  tj|dd	gdg|d}t||ddd	g  tj|ddgdg|d}t||dddg  tj|ddd	g|d}t||dd	g  dD ]4}tj||ddg|d}t|||ddg  qdD ]2}tt tj||ddg|d}W 5 Q R X qdd|dfdd	|fdd|dg fdd|ddg fdd	|dffD ],\}}	}
tj|||	|d}t||
|	  qd S )Ntest.parquetr%   r>   x0x1namesrA   r   r   )r   r*   Fr   r-   )rw   preserve_indexr   )r   r7   r*   )r*   r   r7   r{   )r   r*   r7   )r   r   )r   r   )r   r   r   r   r   Z
MultiIndexfrom_arraysr   r   r   r   Zrandnr   r-   writepqwrite_tablepaTabler   r   r   r   r   r   rD   r   rN   )r   r7   r   r*   r   r   r   dindcolZsol_dfr   r   r    #test_columns_index_with_multi_index  sP     

$r   c                 C   s`   t | }tdddgdddgd}tj|dd}|j||d	 tj||d	}t||d
d d S )Nr>   rA      r   r   r   r   r+   r   Fr   )r   r   r   r   r   r   r   r   )r   rB   rC   r   r   r   r   r   r   r    test_no_index+  s    r   c                 C   s`   t | }tj||d tj|dgd|dd}ttdg | tj|dd|dd}ttj| d S )Nr   r#   r&   T)r   r*   r7   r   )r   r   r   r   r   r   r#   )r   r7   r   r   r   r   r    test_read_series5  s$            r   c                    s   t | }tj| d  fdd}t||jt||jksBtt||jt||dgdjkshtt||ddjt||dgdjkstd S )Nr   c                    s   t j| fd i|S )Nr7   )r   r   )r   rT   r   r   r    readG  s    ztest_names.<locals>.readr#   r   r#   )r   r   r   rM   daskr_   )r   r7   r   r   r   r   r    
test_namesC  s     &r   c                 C   sX   t | d}t }d|j_|j||dr2dndd tj	|d|d}t
|| d S )Nr   r*   r0   r-   r   r{   )r   r   r   copyr*   r(   r   
startswithr   r   r   )r   rB   rC   r   Zdfpr   r   r   r    test_roundtrip_from_pandasQ  s     r   c                 C   s   |dks|dkrt d ttjddtjddgddtjd	tjd
d	d
gddtjdddtjdgddtjddddtjgddd}tj|dd}|j| |d tj	| |d}t
|| dS )zx
    Test round-tripping nullable extension dtypes. Parquet engines will
    typically add dtype metadata for this.
    r-   z.https://github.com/dask/fastparquet/issues/465r>   rA   r   r   Int64rf   TFboolean皙?皙?333333?皙?Float64r   r   cr   stringr   r   r   r   r+   r   N)rD   r?   r   r   SeriesNAr   r   r   r   r   )r   rB   rC   r   r   r   r   r   r    test_roundtrip_nullable_dtypes]  s    
r  dtype_backendpandasz'Requires pyarrow-backed nullable dtypesc                    s|  |dkrd}nd}t t jddt jddgd| d	t jd
t jdd
dgd| d	t jdddt jdgd| d	t jddddt jgd| d	d}tj|dd}tj fdd| }t	fddt
|D  tjd|i |dkr"tjtdd  tj |d
d! W 5 Q R X nLtt tj |d"}t|| W 5 Q R X tj |d
d!}t||dd# W 5 Q R X d$S )%zw
    Test reading a parquet file without pandas metadata,
    but forcing use of nullable dtypes where appropriate
    r   z	[pyarrow]r>   rA   r   r   r   rf   TFr   r   r   r   r   r   r   r   r   r   r   r   r+   c                    s.   t j| i }t| d| d  dS )z0Write a parquet file without the pandas metadatapart..parquetN)r   r   r   replace_schema_metadatar   r   )r   r   table)r   r   r    write_partition  s    z1test_use_nullable_dtypes.<locals>.write_partitionc                    s   g | ]\}} ||qS r   r   )r   r   p)r
  r   r    r!     s     z,test_use_nullable_dtypes.<locals>.<listcomp>zdataframe.dtype_backendr-   z&`use_nullable_dtypes` is not supportedmatch)r7   use_nullable_dtypesr   r   N)r   r   r   r  r   r   r   delayed
to_delayedr   	enumerateconfigrM   rD   r   rN   r   r_   r   )r   r7   r  Zdtype_extrar   r   
partitionsr   r   )r   r
  r    test_use_nullable_dtypest  s<     

 r  zKnown bug in pandas. See https://issues.apache.org/jira/browse/ARROW-13413 and https://github.com/pandas-dev/pandas/pull/41052.c                 C   s   t t jddt jddgddt jdt jdddgd	dt jd
ddt jdgddt jddddt jgddd}tj|dd}|j| |d t t 	 i}tj
| ddd|jid}|dt 	 i}t|| d S )Nr>   rA   r   r   r   rf   TFr   r   r   r   r   r   r   r   r   r   r   r   r+   r   r0   types_mapper)r7   r  arrow_to_pandas)r   r   r   r  r   r   r   r   r   ZFloat32Dtyper   getr   r   )r   r7   r   r   r  resultexpectedr   r   r    *test_use_nullable_dtypes_with_types_mapper  s(     r  c                 C   s:  t | }tjddddgd idd}tj|dd	}tj|||d
 tj|d|d}| jj	j
 dddgksrttj|dg|d}| jj	j
 dddgkst|dkrtj||d
}| jj	j
 dddgkst|jd d   t||sttj|g |d}|jd d   |j|j k s6td S )Nr#   r   r   r   d   categoryrf   r   r+   r   
categoriesr7   r-   re   )r   r   r   r   r   r   r   r   r#   catr  tolistr_   locr   r   r   rB   rC   r   r   r   r   r   r   r    test_categorical  s        r#  metadata_filec              	   C   s@  t | }ttjdtjdtjdtjdtjdtjdtjj	dddgdd
dd}d	|j_t|d
 }tj|jd| dd}tj|j|d dd}|j|||d |rtt | dd}| }	W 5 Q R X |j|d|d |r$tt | dd}| }
W 5 Q R X |
|	ks$ttj||d}t|| dS )5Test that appended parquet equal to the original one.re   rf   rh   ri   rj   rk   rm   rn   r*   rA   Nr  rs   r7   ru   ry   rbTrQ   r7   r   )r   r   r   r   r   r   r   r   r   r   r   r*   r(   rL   r   r   ilocr   openr   r   r_   r   r   )r   r7   r$  r   r   halfddf1r   rq   Z	metadata1Z	metadata2ddf3r   r   r    test_append  s2    
r.  c              	   C   s   t | }ttjdtjdtjdtjdtjdtjdtjj	dddgdd
dd}d	|j_t|d
 }tj|jd| dd}tj|j|d dd}|j|d|d |j|d|d tj||d}t|| dS )r%  re   rf   rh   ri   rj   rk   rm   rn   r*   rA   Nr  rs   Tr(  r   )r   r   r   r   r   r   r   r   r   r   r   r*   r(   rL   r   r   r)  r   r   r   )r   r7   r   r   r+  r,  r   r-  r   r   r    test_append_create  s$    
r/  c              	   C   sN  t | }ttjddddtjddddtjddddd}d	|j_ttjddddtjddddtjd
dddd}d	|j_|d d|d< tj|d j	d< |d d|d< t
j|dd}t
j|dd}t
j||dg|d t
j||dgdd|d t
j||d	dd }|jd|d< t|dt||g|j dd d S )Nr   r%   r   rf      r  n   )latlonvaluer*   x      r2  r   r>   r+   r3  partition_onr7   T)r8  rQ   ignore_divisionsr7   r   r4  Fr   )r   r   r   r   r   r*   r(   r   nanr)  r   r   r   r   r   r3  r   sort_valuesconcatr   )r   r7   r   df0df1Zdd_df0Zdd_df1r   r   r   r    test_append_with_partition1  sR    	   
  r?  c                 C   s   t | }ttjdtjjdddgddtjjdddgddd}t|d}|j	|dg|d	 tj
||d
}t|jjjdddhkstd S )N2   r#   r$   zrk   r   r   r   rA   r   r7  r   r   r   r   r   r   randr   r   r   r   r   rM   r   r  r  r_   r   r7   r   r   r   r   r   r    test_partition_on_catsa  s    
rF  metastatsc                 C   s   t | }ttjdtjjdddgddtjjdddgddd}t|d}|j	|dgd	|d
 tj
|d	|d}t|jjjdddhkstd S )Nr@  r#   r$   rA  rk   rB  rA   r   r0   r8  r7   ru   r7   r   rC  )r   rH  rG  r   r   r   r   r   r    test_partition_on_cats_pyarrowp  s    
rK  c                 C   s   t | }ttjdtjjdddgddtjjdddgddd}t|d}|j	|dg|d	d
 tj
||ddd}t|jjjdddhkstd S )Nr@  r#   r$   rA  rk   rB  rA   r   FrI  Tr>   r7   r   metadata_task_sizerC  rE  r   r   r     test_partition_parallel_metadata  s     
   rN  c                 C   sd  t | }ttjdtjjdddgddtjjdddgddd}t|d}|j	|dd	g|d
 tj
||d}t|jjjdddhkstt|jjjdddhksttj
|dd	g|d}t|jjjdddhkstd|jkstt||  tj
|d	|d}t|jjdddhks$td	|jks4ttj
|d|d}t|jjdddhks`td S )Nr@  r#   r$   rA  rk   rB  rA   r   r   r7  r   r   r   r{   )r   r   r   r   r   rD  r   r   r   r   r   rM   r   r  r  r_   r   r   r   r   r*   rE  r   r   r    test_partition_on_cats_2  s*    
rO  c           
   	   C   s:  t | d}ttjdtjdtjdtjdtjdtjdtj	j
dddgdddd	}t|d
 }tj|jd| dd}tj|j|d dd}|j|||d tt}|j|dd|d W 5 Q R X dt |jkstt | d}|j|d||d |j|dd|d tj|d|d}	t|d|	 dS )z#Test append with write_index=False.ztmp1.parquetre   rf   rh   ri   rj   rk   rm   rn   rA   Nr  rs   r&  FT)rw   rQ   r7   Appended columnsztmp2.parquetr   rq   r{   )r   r   r   r   r   r   r   r   r   r   r   r   rL   r   r   r)  r   rD   r   rN   r4  r_   r   r   r   )
r   r7   r$  r   r   r+  r,  r   excinfor-  r   r   r    test_append_wo_index  s6    
   rR  )r*   offsetz
2022-01-01z
2022-01-31D)freqr>   )daysrr   i  c           	   	   C   s   t | }tjtjt|tjdtjt|tjdtjt|tjdtj	j
dddgt|ddd|d}tj|d	d
}tj||j| d	d
}|j|||d tjtdd |j||dd W 5 Q R X |j||ddd dS )z1Test raising of error when divisions overlapping.rf   rh   ri   rj   rk   rm   rn   r)   r  rs   r&  z)overlap with previously written divisionsr  Tr7   rQ   )r7   rQ   r9  N)r   r   r   r   r   rL   r   r   r   r   r   r   r   r   r   r*   r   rD   r   rN   )	r   r7   r$  r*   rS  r   r   r,  r   r   r   r    !test_append_overlapping_divisions  s(     rX  c           	      C   s   t | }tjtdtdddtdddd}tj|ddd	}ttddtdd
d}tj|dd}|j||dd |j||dd tj||d}t	||g}t
|| d S )Nr     r"   r   r)   r   F)r,   sort,  r+   Tr&  rW  r   )r   r   r   r   r   r   r   r   r   r<  r   )	r   r7   r   r>  r,  r   r   ressolr   r   r    6test_append_known_divisions_to_unknown_divisions_works  s      r_  c              	   C   s  t | }tdtjdtjdi}tdtjdtjdi}tdtjdtjdi}tj|dd}tj|dd}tj|dd}	|j	|||d t
t}
|j	||dd	 W 5 Q R X d
t |
jkstt
t}
|	j	||dd	 W 5 Q R X dt |
jkstdS )z-Test raising of error when non equal columns.ro   r  rf   rp   rA   rs   r&  TrW  rP  zAppended dtypesN)r   r   r   r   r   r   r   r   r   r   rD   r   rN   r4  r_   )r   r7   r$  r   r>  r   df3r,  r   r-  rQ  r   r   r    test_append_different_columns  s    ra  c           
      C   s   |dkrt d nttdk r*t d t| }tdd}tjddd	 t	t
|D i|d
}tj|dd}dtdt fgi}|j|d||d |j|d||dd tj||d}t||g}| }	t||	 d S )Nr-   z1Fastparquet engine is missing dict-column supportz1.0.1z0PyArrow 1.0.1+ required for dict-column support.z
2020-01-01
2021-01-01r4  c                 S   s   g | ]}d |iqS r   r   )r   r#   r   r   r    r!   1  s     z+test_append_dict_column.<locals>.<listcomp>r)   r>   r+   r#   T)rQ   r7   schema)rQ   r7   rc  r9  r   )rD   r?   
pa_versionparse_versionr@   r   r   
date_ranger   rangerL   r   r   r   structr   r   r   r<  r   r   )
r   r7   r   Zdtsr   r,  rc  r   expectr  r   r   r    test_append_dict_column&  s0    
    rj  c                 C   s   t | }tjdddgdddgddd	gd
tjdddgdddddgd}tj|dd}tj|||d tj|d|d}t||dd d S )Nr>   rA   r   r%   r0     r  rY  r\  rB  rZ  r&   r'   r   r   r   )r*   r   r+   r   r{   Fr   )	r   r   r   Indexr   r   r   r   r   r"  r   r   r    test_orderingG  s    ro  c                 C   s   t | }ttjdtjdtjdtjdd}tj|dd}|j	||d tj
|ddg|d	d
}t|ddg |dd ttj|d}tj
|dg|d }|jdd	d t|dg |ddd tj
|ddg|d	d
}t|ddg |dd d S )Nre   rf   ro   rq   r@  rs   r   ro   rq   Tr   Fr   r   r   )inplacer   )r   r   r   r   r   r   r   r   r   r   r   r   globr   r   r   r   r;  )r   r7   r   r   r   r   fnsr`  r   r   r     test_read_parquet_custom_columnsV  s0           rt  zdf,write_kwargs,read_kwargsr#   r   rA   r   r   r   ccbbb   a   b   cbytes)object_encodingrc  r  i  i  re   M8[ns]zM8[us]z,https://github.com/apache/arrow/issues/15079z.https://github.com/dask/fastparquet/issues/837zM8[ms]zdatetime64[ns]datetime64[ns, UTC]zdatetime64[ns, CET]Zuint16float32r   foor$   r   -      @       @. c           	      C   sf  d|kr(|j jdkr(d|kr(tjdd d|kr\|j jdkr\|dkr\ttdkr\tjdd tr|d	d r|dkrttdkrtd
 t| }|j	j
d krd|j	_
tj|dd}|dd }|r|dkrtj||f||d| ntj||fd|i| tj|f|j	j
|dd|}t|jddkrT|dkrTt|d|dd nt||dd d S )Nr#   r|  r]   z7Parquet pyarrow v1 doesn't support nanosecond precisionr   r-   z0.6.3z4fastparquet doesn't support nanosecond precision yetr  z.https://github.com/dask/fastparquet/issues/577r*   rA   r+   r{  )r7   r{  r7   Tr   ZUInt16Fr   )r#   rg   rD   r?   fastparquet_versionre  r
   r  r   r*   r(   r   r   popr   r   Zdtypesr   r   )	r   r   Zwrite_kwargsZread_kwargsr7   r   r   Zoer   r   r   r    test_roundtripn  sP    E




   r  c              	   C   sb  t | }tdddddgtdd}tj|dd}|jd	|d
< |j||d tj	|d
g|dd}tj	||dd}t
|| tt |jjj W 5 Q R X t|j jjdddhkst|dd  }| ddddgkst|dkr2t
|j|jdd tt tj	|dg|d }W 5 Q R X tttf tj	|dg|d}W 5 Q R X d S )Nr>   rA   r   r   r   Zcaaabr"   r+   r  r$   r   T)r  r7   r   rJ  r   r   r   c                 S   s   | j jj S r2   )r$   r  r  r;  r   r   r   r    <lambda>      z!test_categories.<locals>.<lambda>r-   F)check_namesr#   r  r  )r   r   r   r   r   r   r$   r   r   r   r   rD   r   NotImplementedErrorr  r  rM   r   r_   map_partitionsr   	TypeErrorrN   FutureWarning)r   r7   r   r   r   r   r-  Zcats_setr   r   r    test_categories  s0       
 
 r  c                 C   s|   t | } tjdddgdddgddddgd	}tj|dd
}|jdgd}|j| |d tj| |d}t|j	|j	dd d S )Nr>   rA   r   r   r   ABr#   r$   )r   r*   r+   r  r   r   Fr   )
r   r   r   r   r   Z
categorizer   r   r   r*   r   r7   r   r   r   r   r   r    test_categories_unnamed_index  s     r  c                 C   s   t | }ttdtdd}tj|dd}||jdk }|j||d tj||dd}|j	dk sht
| }t||ddd	 d S )
Nr%   r   r   r+   r   TrJ  F)r  r   )r   r   r   rg  r   r   r   r   r   r,   r_   r   r   )r   r7   r   r   r   r   r-  r^  r   r   r    test_empty_partition
  s    r  write_metadatac                 C   sV   t | }tj }d|j_tj|dd}|j|||d tj||dd}t	|| d S )Nr  r   r+   r&  TrJ  )
r   r   _compatZmakeTimeDataFramer*   r(   r   r   r   r   )r   r7   r  r   r   r   r   r   r   r    test_timestamp_index  s    
r  c                 C   sf   t | d}tddtjdtjdgi}tj|dd}|j|dd	 t	
|}|d jdksbtd S )
Nr   Zc1      ?rA   r   r>   r+   r-   r   )r   r   r   r   r   r:  r   r   r   r   
read_tableZ
null_countr_   )r   r   r   r   r	  r   r   r    0test_to_parquet_fastparquet_default_writes_nulls%  s    
r  c              
   C   s  dddgddgt jt jg}dddgddgd d g}ddt jt jg}ddd d g}tjdd	d
}||tjtjg}| | t dt dg}d}tjdd	|d}	|	|	tjtjg}
|	d  |	d  t dt dg}tddddg||||
d}t	j
|dd}tdtt fdt fdtdfdtd|fdt fg}|jt| dd|d t	jt| ddd jdd}|jj}tt|D ],}t || || stt|| q|jj}tt|D ]B}t || rt || stn|| || kstq|jj}tt|D ]B}t || r\t || srtn|| || ks2tq2t |j j|stt |j!|j!std S )Nr   r>   rA   r   r   r   r   iˌ4Zs)unitNaTz
US/Eastern)r  tz)partition_columnarraysstringststamps
tz_tstampsr+   r  r  r  nsr  r  r0   )r7   r8  rc  FrJ  Tr   )"r   r:  r   	Timestampr  Zto_datetime64Z
datetime64Z
tz_convertr   r   r   r   rc  list_r   r   	timestampr   r   r   r   r   r  valuesrg  rL   Zarray_equalr_   typer  Zisnatr  r  r  )r   Z	in_arraysZ
out_arraysZ
in_stringsZout_stringsZtstampZ
in_tstampsZout_tstampstimezoneZ	tz_tstampZin_tz_tstampsZout_tz_tstampsr   r   rc  Zddf_after_writeZarrays_after_writer   Ztstamps_after_writeZtz_tstamps_after_writer   r   r    Stest_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual_schema2  sz    



	   *r  rc  Zinfercomplexc                 C   s   |dkrt  t  d}t| } tjdddddddgtdddddd	d
gdddddddgdtddd}|rtj	|dd
d}ntj	|dd}|j| d|d tj| ddd}t|| d S )Nr  )r*   amount1234z
2017-01-01z
2017-01-02z
2017-01-06z
2017-01-09r  rY  r\    rr   X  i  )r*   dater  r      r)   rA   r+   r*   r0   r7   rc  TrJ  )r   r   r   r   r   r   to_datetimerg  r   r   r   r   r   r   )r   r*   rc  r   Zdf_outr   r   r    test_pyarrow_schema_inference  s2    r  c              	   C   s   t dddgdddgd}t dd	dgd
ddgd}tjt|t|g|dd}tt}|j	t
| dd W 5 Q R X t
|j}d|kstdt
|jkstdt
|jkstd S )Nr>   rA   r         @r   r   r"   r   r   r   r   r   FrG  Zverify_metar0   r   z6Failed to convert partition to expected pyarrow schemaz	y: doublez	y: string)r   r   r   from_delayedr   r  rD   r   rN   r   r   r4  r_   )r   r>  r   r   Zrecmsgr   r   r    "test_pyarrow_schema_mismatch_error  s      
r  c                 C   s   t dddgdddgd}t dd	dgd
ddgd}tjt|t|g|dd}|jt| dd d tj| dd}t 	||g}t
||dd d S )Nr>   rA   r   r  r   r   r"   r   r   r   r   r   Fr  r0   r  r   )check_dtype)r   r   r   r  r   r  r   r   r   r<  r   )r   r>  r   r   r]  r^  r   r   r    1test_pyarrow_schema_mismatch_explicit_schema_none  s      r  c              
   C   s.  t | } ttjjdddgddtjjdddgddtjjddtjjd	d
ddtddd}tj	|dd}|j
| ddg|d tj| |ddd }|j D ]0}t|j|j|k t|j|j|k kstqtj| |ddgd }|j D ]0}t|j|j|k t|j|j|k kstqd S )Nr  r  Cr  rk   XYZr>   r   r   )a1a2r   r   r   rA   r+   r  r  r7  Fr   r   )r7   r   )r   r   r   r   r   r   randintr   r   r   r   r   r   r  uniquerM   r   r_   r  )r   r7   r   r   r   rV   r   r   r    test_partition_on  s,    
	   
.r  c                 C   s   t | } ttjjdddgddtjjdddgddtjjddd	}tj|d
d}td
D ]}|j	| ddg|d q^tj
| |d }t|t|kstt| D ] \}}}|D ]}|dkstqqd S )Nr  r  r  r  rk   r  r  r  )r  r  r   rA   r+   r  r  r7  r   )rz   zpart.1.parquetrx   ry   )r   r   r   r   r   r   r   r   rg  r   r   r   rL   r_   r   walk)r   r7   r   r   r=   r   r   filer   r   r    test_partition_on_duplicates  s    r  r8  aac              
   C   s   t | } tjjdd| t | } ttjjdddgddtjjddtjj	dd	ddd
}t
j|dd}|j| |ddd t
j| dddd}W 5 Q R X | }|j D ]0}t|j|j|k t|j|j|k kstqd S )Nzsingle-threadedr}   r  r  r  r  rk   r>   r   )r  bbru  rA   r+   Fr0   )r8  rw   r7   )r*   r   r7   )r   r   r  rM   r   r   r   r   r   r  r   r   r   r   r   r  r  r  r_   )r   r8  r   r   r   rV   r   r   r    test_partition_on_string  s2          r  c                 C   s   t | } ddddg}tddddgtj||ddd}tj|d	d
d}|j| d|d tj| d|dgdd}t	|dkst
d S )Nz
2018-01-01
2018-01-02z
2018-01-03z
2018-01-04r>   T)r  Zordered)dummyDatePartr   r+   r  r  r7  )r  <=r  )r*   r7   filtersr   rA   )r   r   r   Categoricalr   r   r   r   r   rL   r_   )r   rB   rC   ZcatsZdftestZddftestZddftest_readr   r   r    test_filters_categorical  s"    
r  c                 C   s  t | }ttdtdd}tj|dd}|jdks<t|j	||dd tj
||dgd	}|jd
kslt|jd
k  sttj
||dgd	}|jdkst|jdk  sttj
||ddgd	}|jdkstt|rtt|| tj
||ddgddggd	}	|	jd
kst|	jdk|	jdk @   s>ttj
||dgd	}
|
jdks`t|
jdk |
jdkB   sttj
||dgd	}|jdkstt|st|jdk  sttj
||dgd	}|jdkstd S )Nr%   
aabbccddeer"   r   r+   Tr&  )r#   >r   r7   r  r   )r$   ==r   r>   r   )r#   r  r   )r#   r  r>   )r#   <r   r#   r  r   )r#   r     r  )r#   in)r   	   rA   r   )r$   =r   )r#   z!=r>   )r   r   r   rg  r   r   r   r,   r_   r   r   r#   r   r   r$   rL   r   )r   rB   rC   r   r   r   r   r   r   r   erq   gr   r   r    test_filters5  sJ      
	""r  c                 C   s  |dks|dkrt jddd |dk}t| }tddddd	d
gi}tj|dd}|jdddj|d|d tj	|d|dgd
 }tj	|d|dgd
 }|rt|||d dk dd t|||d dk dd nt|| t|| |jdddj||d tj	||d
 }t|| |dkr|jdddj||d t|jdgd}	t|jdgd}
t|	dks|tt|
dkst|jdddj||d tj	||dgd
 }tj	||dgd
 }t|dkstt|dkstt|| d S )Nr-   0.3.1Z
minversionr0   atabr  badar  r>   r+   T)r,   forceFrv   )r  r  r  r*   r7   r  )r  r  r  r   rA   r   r  r   r  )rD   importorskipr   r   r   r   r   repartitionr   r   r   r   r-   ParquetFile	to_pandasrL   r_   )r   rB   rC   Zpyarrow_row_filteringr   r   r   r   r-  r   r`  r   r   r    test_filters_v0f  sf         
   




  
  
r  c                 C   s   t jddd t| }ttdddgd d}tj|d	d
}|j|d|dd d}d}dd|fdd|fgg}tj	|dd|d}d}	||d |k  j
D ]}
|	tt|
 dk7 }	q|j|	kst||d |k  }||d |k }t|| dd d S )Nr0   z1.0.0r  r  r  dogr@  )r  r  r%   r+   FTr   r   r  r  r  r  r  r   r   )rD   r  r   r   r   rg  r   r   r   r   r  intrL   r   r,   r_   r   )r   r7   r   r   r   Zaa_limZbb_valr  r   Znonemptypartr   r   r    test_filtering_pyarrow_dataset  s     r  c                 C   s   t tdtdd}tj|dd}|jt| |d t| d}tj	|d|d	gd
}|j
dkshtt||d dk | dd tj	t| dd|d	gd
}t|dkstd S )Nr%   r  r"   r   r+   r   r   Tr  )r   r7   r  r   r#   Fr   rz   r   )r   r   rg  r   r   r   r   r   r   r   r,   r_   r   r   rL   )r   r7   r   r   ZfilsZddf_outr   r   r   r    test_fiters_file_list  s&       r  c              
   C   s   t d tdddddddd	gtdd
d}|jd d jt| 	dddd |jdd  jt| 	dddd t
jt| ddddgd}|jdkstt
jt| ddddgd}|jdkstd S )Nr0   r   r>   r%      rA   r   r  r  r   r   r   zfile.0.parquetr7   row_group_sizezfile.1.parquetFT)r   r  r   )r7   split_row_groupsr   r  r   rA   r   )rD   r  r   r   rg  r   r)  r   r   r   r   r   r   r_   )r   r   r   r   r   r    test_pyarrow_filter_divisions  s8    
*    r  c              	   C   s   t jddd t| } d}g }dD ]}||gt|d  7 }q"t|tjj|dtjjdd	|dd
}t	j
|dd}|j| ddgdd t	j| ddgdd}d}|j|kstd S )Nr-   r  r  r  r   r   rk   r>   r   rB  r+   Tr   rw   r8  r7   )r   r  r   r7   r  r   )   1   )rD   r  r   r  r   r   r   r   r  r   r   r   r   r   r_   )r   rl   Zcategoricalsr4  r   r   r   expected_divisionsr   r   r     test_divisions_read_with_filters  s,    r  c              
   C   s   t jddd t| } tjddddddddgdd	dd	dd	dd	gd
ddddddddgd}tj|dd}|j| dgdd tj| ddgdd}|j	st
d}|j|kst
d S )Nr-   r  r  r   r>   rA   r   id1Zid2)r  idr)   r+   r  r7  )r  r  r  Tr  r  )rD   r  r   r   r   r   r   r   r   r   r_   r   )r   r   r   r   r  r   r   r    *test_divisions_are_known_read_with_filters  s&    
r  z$No longer accept ParquetFile objectsc              	   C   s  t | }ttjjdddgddtjjddtjjddddd}tj|d	d
}|j	|dgdd t
|}t| }|j D ]0}t|j|j|k t|j|j|k kstqtj|dgd }t|j|jdk t|jksttt tj|dd}W 5 Q R X d S )Nr  r  r  r  rk   r>   r   rB  rA   r+   r   r-   r7  )r   r  r  r  r0   r   )r   r   r   r   r   r   r  r   r   r   r-   r  r   r   r   r  rM   r   r_   rD   r   )r   r   r   r   Zpq_fr   rV   r   r   r    &test_read_from_fastparquet_parquetfile-  s"    
."r  r~   threadsZ	processesc                 C   s   t | } tddddgddddgd	}d
|j_tj|dd}|j| d|d}t|ds\t	|j
|d tj| sxt	tj| |dd}t||dd d S )Nr>   rA   r   r   r  r  r  g      @r   r*   r+   Fr   r7   r   r}   TrJ  r   )r   r   r   r*   r(   r   r   r   hasattrr_   r   r   r   r   r   r   )r   r~   r7   r   r   r4  r   r   r   r    test_to_parquet_lazyL  s     r
  r   c                 C   s`   ddl m} t }||d| tj| |dd |t| }|jsHt	|j
jd |ks\t	d S )Nr   LocalFileSysteminvalidate_cacher0   r  )fsspec.implementations.localr  r   setattrr   r   Z_strip_protocolr   calledr_   Z	call_argsargs)r   Zmonkeypatchr   r  r  r   r   r   r    &test_to_parquet_calls_invalidate_cache]  s    
r  c                 C   s   t | }tdtjdddgi}t|d}|j|dddd	 t|}|j	d j
tjjjksdttj|ddd
 }t|| d S )Nr   nowTutcr>   r-   FZint96)r7   rw   timesr   )r   r   r   r  r   r   r   r-   r  Z_schemar  Zparquet_thriftTypeZINT96r_   r   r   r   )r   r   r   r   pfr   r   r   r    test_timestamp96j  s    
r  c           	         s  t | }d t fddtdddgD }t fddtdddgD }g }dD ]<}tj||}tj|st| |	tj|d	 qXt
|d
 | t
|d | tj|dd}d|jkst| }d|kstt|jddgk std S )Nr   c                    s   i | ]\}}|t j qS r   r   r   r   r   r   Nr   r    r<   z  s      z%test_drill_scheme.<locals>.<dictcomp>r   r   r   c                    s   i | ]\}}|t j qS r   r  r  r  r   r    r<   {  s      )
test_data1
test_data2z
data1.parqr   r>   r-   r   dir0r  r  )r   r   r   r  r   r   r   r   mkdirrQ   r-   r   r   r   r   r_   r   r   r  r   r   )	r   r   r>  r   r   r   dnr   r   r   r  r    test_drill_schemev  s"    ""
r#  c              
   C   s   t | }ttjtjjddddddgddd	d
tjttdddd
tjttdddd
d}t	
|d}|j||d t	j|dg|d}t|jdgkstt	j||d}t|jt|kstd S )Nr   r   r   r   r  rq   r  rk   r  rf   r   r  float)r  intsZfloatsr>   r   r%  r   )r   r   r   r   r   r   r   r   rg  r   r   r   r   r   r_   )r   r7   r   r   r   Zrddfr   r   r    test_parquet_select_cats  s     r&  c                 C   s   |dkrt tdkrtd t| }tjdddgitjddgd	d
d}d|j_	t
|d}|j||d t
j||d	gd}t|| d S )Nr-   r  z5Fastparquet does not write column_indexes up to 0.3.1r  r>   rA   r   r   idxr'   r)   colsr   r   )r  re  rD   r@   r   r   r   rn  r   r(   r   r   r   r   r   )r   r7   r   r   r   r  r   r   r    test_columns_name  s    
$r)  c                 C   s   | dkrRt |}|jjd jd j}|d kr@|j|jksPtq|j|jkstnt	j
tj|d}|jj}t|jD ]z}||}tt|D ]^}	||	}
|d kr|
j|
jkstq|}|dkrd}| |
j kst|
j|
jkstqqzd S )Nr-   r   ry   defaultsnappy)r-   r  fmdZ
row_groupsr   Z	meta_dataZtotal_compressed_sizeZtotal_uncompressed_sizer_   r   parquetZread_metadatar   r   r   rc  r   rg  num_row_groups	row_grouprL   r   lowercompression)r7   filenamer1  r  mdmetadatar   r   r/  jr   Zcompress_expectr   r   r    check_compression  s*    




r6  zcompression,gzipr+  c                 C   s~   t | }tdddgd dddgd d}d	|j_tj|dd
}|j|||dd tj||dd}t	|| t
||| d S )Nr   r   r   r%   r>   rA   r   r"   r*   r+   T)r1  r7   ru   rJ  )r   r   r   r*   r(   r   r   r   r   r   r6  )r   r1  r7   r   r   r   r   r   r   r    %test_writing_parquet_with_compression  s    $
r8  c                 C   sh   t | }tdddgd dddgd d}d	|j_tj|dd
}|j|||dgdd t||| d S )Nr   r   r   r%   r>   rA   r   r"   r*   r+   r#   T)r1  r7   r8  ru   )	r   r   r   r*   r(   r   r   r   r6  )r   r1  r7   r   r   r   r   r   r    6test_writing_parquet_with_partition_on_and_compression  s    $r9  r'  r   r4  r(   
numpy_typepandas_typer  0.21.0r   index_columnspandas_versionencodingUTF-8objectunicode
field_namer4  r(   r;  r<  __index_level_0__Zcolumn_indexesr   r?  r@  c                 C   s   | j S r2   r3   r5   r   r   r    pandas_metadata  s    JrI  c                 C   s   t | \}}}}|dgkst|dgks,t|d gks:t| d dgkr\|dddksntn|dddksntt|ts|td S )Nr'  r  r?  rG  rG  r  )r'  r  )r   r_   
isinstancedict)rI  index_namescolumn_namesmappingcolumn_index_namesr   r   r    test_parse_pandas_metadata9  s    rQ  c            	      C   s  d g} dg}d dd}d g}d ddddd ddddgdgdd}t |\}}}}|| ks^t||ksjt||ksvt||kstd dd	id d
ddgdd dddddd d dddgdgdd}t |\}}}}|| kst||kst||kst||kstd S )Nr#   )rG  r#   r   r:  rG  r=  r>  rA  rB  rC  rD  rE  rH  r   r_   )	Ze_index_namesZe_column_namesZ	e_mappingZe_column_index_namesr3  rM  rN  rO  rP  r   r   r    %test_parse_pandas_metadata_null_indexJ  sf    
rS  c                 C   s   t | d }tjjtdddgtdddggddgd	}t|| tj||d
}t	
dddgdddgd}t|| d S )Nz
table.parqr>   rA   r   r   r   r  r  r   r   r  )r   r   r   r   arrayr   r   r   r   r   r   r   )r   r7   r   r	  r  r  r   r   r    test_read_no_metadata  s     rU  c                  C   s   d ddid dddgdd dddddd ddddgdgd	d
} t | \}}}}|dgks\t|dgksjt|dddks|t|d gkstd S NrA  rB  rC  rD  rE  r  r   rG  r=  rH  rJ  rR  r3  rM  rN  Zstorage_name_mappingrP  r   r   r    2test_parse_pandas_metadata_duplicate_index_columns  sB    "rX  c                  C   s   d ddid dddgdd dddddd ddddgdgd	d
} t | \}}}}|dgks\t|dgksjt|dddks|t|d gkstd S rV  rR  rW  r   r   r    1test_parse_pandas_metadata_column_with_index_name  sB    "rY  c           
   	   C   sZ  t | }tj|d}tj|d}ttjjdddgddtjjddtjj	dd	ddd
}d|j
_tj|dd}dd ddddd dd}|j|fd|i||  tj||dd}t|||dkd tjjdd$ |j|f|dgd||  W 5 Q R X tj||d }|j D ]4}	t|j|j|	k t|j|j|	k ks tq d S )NnormalZpartitionedr  r  r  r  rk   r>   r   rB  r*   r   r+   r+  T)r1  Zcoerce_timestampsZuse_dictionaryr   )r1  r  Z
fixed_text)r0   r-   r7   rJ  r-   r   r|   r}   r   r7   r8  r   )r   r   r   r   r   r   r   r   r   r  r*   r(   r   r   r   r   r   r   r  rM   r   r   r  r   r_   )
r   r7   r   path1path2r   r   Zengine_kwargsr   rV   r   r   r     test_writing_parquet_with_kwargs  s@    
	 r^  c              	   C   s2   t | }tt tj||dd W 5 Q R X d S )NZunknown_value)r7   Zunknown_key)r   rD   r   r  r   r   r   r   r   r    (test_writing_parquet_with_unknown_kwargs	  s    r_  c                    s   ddl m t| } dg  fdd}tdddd	gd
dddgd}tj|dd}|j| |d|id  d svttj	t
j| d|d}t||dd d S )Nr   )r  Fc                     s   d d< | |S )NTr   r   )r  rT   flagZmp_getr   r    my_get%	  s    z(test_to_parquet_with_get.<locals>.my_getr   r   r   r   r>   rA   r   r   r"   r+   r~   )r7   Zcompute_kwargs*r   r   )dask.multiprocessingr  r   r   r   r   r   r   r_   r   r   r   r   r   )r   r7   rb  r   r   r  r   r`  r    test_to_parquet_with_get	  s     re  c                 C   s   t | }d}tjjdd|d d tjjdddg|dtjjd	d
dg|dd}tt	|d}|j
|dd|ddgd tj||d}||jdk   d S )Nr0  r   r   rk   r@  r  r  r  rT  EF)Zsignal1fake_categorical1fake_categorical2rA   r+  Frh  ri  )r1  rw   r7   r8  r   )r   r   r   rZ  Zcumsumr   r   r   r   r   r   r   rh  r   )r   r7   r   rl   r   r   Zdf_partitionedr   r   r    test_select_partitioned_column3	  s     rj  c              	   C   s   |dkrt tdk rtd t p |dkrtdt t| }t	j
dggdgdd}t|d	}|j||d
 tj||d
}t||ddd W 5 Q R X d S )Nr-   z0.3.0z&fastparquet<0.3.0 did not support thisignorer   r   r}  )r   rg   r>   r   Fr   r   )r  re  rD   r@   warningscatch_warningssimplefilterr  r   r   r   r   r   r   r   r   )r   r7   r   r   r   r   r   r    test_with_tzH	  s    

rp  c                 C   s   t | }ttddtjtddtjtddtjtddtjd}t	
|}tj|dd}|j|d	d
dd tj|d
d	d}|dtji  d S )Nr   rA   rZ  r   rl  rm  )r  r   r   r   r+   r0   Fr  )r7   rw   r8  r{   r   )r   r   repeatr   r   Zint8Zint16r~  r   r   r   r   r   r   r   r   )r   r   r   pdfr   r   r   r    test_arrow_partitioningX	  s    
rs  c               	   C   s^   t t} tjddd W 5 Q R X dt| jks6tdt| jksHtdt| jksZtd S )Nr  r   r]   r-   )rD   r   rN   r   r   r   r4  r_   )infor   r   r    test_informative_error_messagesk	  s
    ru  c                 C   s   t | }tddddddgi}|d d|d< tj|dd}tj|||d tj||dd|d	 tj||d }|d 	 dddddgd
 kst
d S )Nr#   r   r   r  r>   r+   r   T)rQ   r9  r7   rA   )r   r   r   r   r   r   r   r   r   r   r_   )r   r7   r   r   r   r   r   r   r    test_append_cat_fpt	  s    rv  r   r   r   i- i @B c                 C   sV   t | }|jjsd|j_tj|dd}tj||ddd tj|ddd}t|| d S )Nr*   rA   r+   r0   Tr   rJ  )r   r*   r(   r   r   r   r   r   )r   r   r   r   r   r   r   r    test_roundtrip_arrow	  s    *rx  c                 C   sJ   t | }tjjdddd }|j||d tj||dd}t|| d S )N
2000-01-01z
2000-01-10Z1dstartendrU  r   TrJ  )	r   r   datasets
timeseriespersistr   r   r   r   )r   r7   r   r   r   r   r   r    test_datasets_timeseries	  s      
r  c              	   C   sr   dd l }tdddddddgi}d	|j_tj|dd
}|t| }|j	||d tj
||dd}t|| d S )Nr   r#   r   r   r   r>   rA   r   r*   r+   r   TrJ  )pathlibr   r   r*   r(   r   r   Pathr   r   r   r   )r   r7   r  r   r   r   r   r   r   r    test_pathlib_path	  s    r  c                 C   s   t | d}tjjdddd}tj|jdgd}|j	d|_|j
|d	d
d tj||ddid}tt|jjjt| jjj d S )Nzparquet_int16.parqr   i 5 rw  rk   r(   r   r  r-   Zuncompressed)r7   r1  i8 )r7   r  )r   r   r   r   r  r   r   Tr(   r   r   r   r   r   rS   r  r  r   )r   r7   r   Znumbersr   r   r   r   r    test_categories_large	  s    r  c                 C   sB   t | }tj||d tjtj|d|dd}tt|dd d S )Nr   r   FrJ  r   )	r   r   r   r   r   r   r   r   r   r   r   r   r    test_read_glob_no_meta	  s    r  c                 C   sb   t | }tj||dd ttj|d}|tj|d tj	||dd}t
t|dd d S )NTr&  r   ry   FrJ  r   )r   r   r   rr  r   r   r   rQ   r   r   r   )r   rB   rC   r   pathsr   r   r   r    test_read_glob_yes_meta	  s    r  r   remove_commonc                 C   s   t | }tj||dd tjtj|drBttj|d t|}d|ksXt	|rtjtj|drttj|d t
j|||d}tt||d d S )NTr&  ry   rx   rJ  r   r   )r   rB   rC   r   r  r   r   r   r   r   r    test_read_dir_nometa	  s    
r  c                 C   s6   t | }tj||dd tj||dd}tt| d S )NFr&  TrJ  r   r   r   r   r    test_statistics_nometa
  s    r  c                 C   s   t | d}tj|dd}tjjdddd 	dd }|
d	  }|j|jdkd |_|j||d
|d tj||d}t||d
d
d d S )Nr   z../ry  z
2000-01-03Z1hrz  c                 S   s   | j d d S )Nr   )r!  r   r   r   r    r  
  r  z1test_timeseries_nulls_in_schema.<locals>.<lambda>r#   F)r7   ru   rc  r   rl  )r   r!  r   r   r   r   r}  r~  r   r  r   r  r(   wherer  r   r   r   r   )r   r7   rc  r   r   ddf_readr   r   r    test_timeseries_nulls_in_schema
  s    r  c                 C   s^   dd l }t| }tjjddddd}|j||d tj||d}t|	|
 dk sZtd S )	Nr   ry  z
2000-01-02Z60SZ1H)r{  r|  rU  Zpartition_freqr   ia  )pickler   r   r}  r~  r   r   r   rL   dumps__dask_graph__r_   )r   r7   r  r   r,  r   r   r   r    test_graph_size_pyarrow!
  s       r  r   c                 C   s(  t | d}t | d}tjddgd ddgd dd	gd d
|d}d|j_tj|ddd}|j|||d tj	||dd }|
 j||dd}t|j|jgd}	t|	d}
t|
tst|
jdgksttt|
j d jdgkstt|	d}t|tstt|jdd|  d S )Nr   r   r>   rA   re   r   r   r   r   )r  r  r  r)   Zmy_indexFr[  r   r   r  )r7   r   keysread-parquetr   
to-parquetZoptimize_graph)r   r!  r   r   r*   r(   r   r   r   r   Zto_framer   r   rY   r   rK  r   r_   r   nextiterdskr  r   r   r   )r   r7   r   r*   Ztmp_path_rdZtmp_path_wtr   r   r   r  subgraph_rdZsubgraph_wtr   r   r    test_getitem_optimization0
  s&    " 
"
r  c                 C   s   t dgd dgd dgd dgd d}tj|ddd}tjt| }|j||d	 tj	||d	g  }t
|j|jgd
}tdd |j D }|jg kstt||g   d S )Nr>   r  rA   r   r   r  r  r  rT  Fr  r   r  c                 s   s   | ]}t |tr|V  qd S r2   )rK  r   )r   lr   r   r    r   W
  s     
 z2test_getitem_optimization_empty.<locals>.<genexpr>)r   r   r   r   r   r   r   r   r   r   r   r   _namer  layersr  r   r_   r   )r   r7   r   r   r   r   r  subgraphr   r   r    test_getitem_optimization_emptyN
  s    ,r  c                 C   s   t dgd dgd dgd dgd d}t|d}tjt| }|j||d tj	||dd }tj	||dd	g }tj	||dd	d
g }t
|||\}}	}
t
j|||dd\}}}t|| t|	| t|
| d S )Nr>   r  rA   r   r   r  r   r  r  r  Fr  )r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r7   r   r   r   r   r   r   r  r  a3Zb1Zb2Zb3r   r   r    test_getitem_optimization_multi]
  s    ,

r  c                 C   s   t dddgd tdtdd}tj|ddj| |d tj| |d}||d	 d
k dg }||d	 d
k dg }t|j|j	gd}t
|d}t|tstt|jdd	hkstt|| d S )Nr>   rA   r   r   r   rB  r+   r   r   r%   r   r  r  )r   r   rg  r   r   r   r   r   r   r  r   rK  r   r_   rM   r   r   r   r7   r   r   r   r   r  r  r   r   r    &test_getitem_optimization_after_filtero
  s    $
r  c                 C   s   t dddgd tdtdd}tj|ddj| |d tj| |d}|d	g }|jdd
}||d dk d	g }|d	g }|jdd
}||d dk d	g }t|j	|j
gd}t|d}t|tstt|jd	hkstt|| d S )Nr>   rA   r   r   r   rB  r+   r   r   )r   r   r  r  )r   r   rg  r   r   r   r   assignr   r   r  r   rK  r   r_   rM   r   r   r  r   r   r    .test_getitem_optimization_after_filter_complex
  s    $


r  c           	   	   C   s   t tdddgd d}tj|ddj| |dgd	 d
g}tj| ||d}d|d  ksbttj| |d}t	
t t|| W 5 Q R X |jj|j j}|di }||d< |d |dg |}t|| d S )Nr%   r  r  r   r   r>   r+   r   r[  r   r  r  r  r   rT   r  funcr  )r   r   rg  r   r   r   r   r   r_   rD   r   r   r   r  r  Zcreation_infor  )	r   r7   r   r  r,  r   rt  rT   r-  r   r   r    test_layer_creation_info
  s"      r  c              	   C   s   t dtjdtjdi}tj|dd}|jt| |d t	j
dd tjt| |d}W 5 Q R X | j}t|d	ks~ttt| }t|tst|jd
dikstd S )Nr   r   rf   rA   r+   r   bar)r  r>   r  )r   r   r   r   r   r   r   r   r   r   annotater   r  r  rL   r_   r  r  r  rK  r   annotations)r   r7   r   ri  r   r  layerr   r   r    "test_blockwise_parquet_annotations
  s    
r  c                    sV  d}d}t | }tdtj|tjdi}tj||d}|j||d tj	||dd  
 j}t|d	kspttt| d
 tstt |  d	7  |d	7 } d7  |d7 } 
 j}t|dksttdd | D st fddt|D }t 
 |}	|	j}t| d
 }
t|d	ks4tt||
 tsHtt | d S )Nr   rA   r   rf   r+   r   TrJ  r>   r   r%   r   c                 s   s   | ]}t |tV  qd S r2   )rK  r   )r   r  r   r   r    r   
  s     z2test_optimize_blockwise_parquet.<locals>.<genexpr>c                    s   g | ]} j |fqS r   )r  r   r   r   r    r!   
  s     z3test_optimize_blockwise_parquet.<locals>.<listcomp>)r   r   r   r   r   r   r   r   r   r   r  r  rL   r_   rK  r   r  r   r   r   rg  r   r  )r   r7   rl   r,   r   r   ri  r  r  graphr(   r   r  r    test_optimize_blockwise_parquet
  s2    


r  c                 C   s  t | }ttjdtjdtjdtjdd}d|j_t	|d }t
j|jd| ddj|dd	d
 t
j||dd}|jdkstt
j||ddd}|jdkstt
j|j|d ddj|dddd t
j||ddd}|jdkstt
j||ddd}|jdkstdS )z(Test split_row_groups read_parquet kwarg   rf   rp  r*   rA   Nr+   r0   r  r  T)r7   r  r   F)r7   r   r  r@  rQ   r7   r  r  )r   r   r   r   r   r   r   r*   r(   rL   r   r   r)  r   r   r,   r_   )r   r7   r   r   r+  r-  r   r   r    test_split_row_groups
  sP                r  r  r  r   c                 C   s   t | }d}d}d}ttjd| tjdtjd| tjdd}t|d }	tj	|j
d |	 |dj|d|d	 tj	|j
|	d  |dj|d
d|d tj||||d}
t|| }|
jdt||  kstd S )Nr%   r   r  rA   rf   rp  r+   r0   r  Tr  )r7   r  r   )r   r   r   r   r   r   r   rL   r   r   r)  r   r   r  r,   mathceilr_   )r   r  r   r7   r   r  r,   Z	half_sizer   r+  r   Zexpected_rg_coutr   r   r    test_split_row_groups_int  s:         r  r  r  c              	   C   s   d}d}t tj|tjdtj|tjdd}tj|ddjt	| d|dd	 t
jtd
d tjt	| ||dd}W 5 Q R X t|| | }|j|kstt||kstt||dd d S )Nr%   r  rf   rp  r   r+   r0   F)r7   r  rw   argument will be deprecatedr  T)r7   r  aggregate_filesr   )r   r   r   r   r   r   r   r   r   r   rD   warnsr  r   r  r  r,   r_   rL   r   )r   r7   r  r  rl   r   r   Znpartitions_expectedr   r   r    )test_split_row_groups_int_aggregate_files2  s0       r  c                 C   s   t | }ttjdtjdtjdtjdd}d|j_d}dd|fg}t	j
|dd	j|d
ddd t	j||d}t	j||d
d
|d}|d |k  stt||d |k  ||d |k   d S )Nr  rf   rp  r*   r  rq   r  r   r+   Tr0   r@  r  r   )r7   r   r  r  ro   )r   r   r   r   r   r   r   r*   r(   r   r   r   r   anyr   r_   r   )r   r7   r   r   Z
search_valr  r   r-  r   r   r    test_split_row_groups_filterT  s4        r  c                 C   s~   t j| d}tjdddgdddgdddgdddgddd	d
gd}|j||d tj||d}|dd	g d	 
  d S )Npath.parquetr   r   rA   r>   r   r   r   r   r   r)   r   )r   r   r   r   r   r   r   r   rollingmaxr   )r   r7   r   r   r   r   r   r    &test_optimize_getitem_and_nonblockwiser  s    $r  c                 C   s@  t j| d}tjdddgdddgdddgdddgddd	d
gd}|j||d tj||d}|d |d
 	 
 }|d	 |d
 	 
 }|dd	g d 
 }|d 
 }t|| | | \}	t|d d t|d d t|d d t|d d g}
t|	|
D ]\}}t|| q&d S )Nr  r   r   rA   r>   r   r   r   r   r   r)   r   r   )r   r   r   r   r   r   r   r   groupbyfirstr  r  r  r   r   zipr   )r   r7   r   r   r   Zdf2aZdf2bZdf2cZdf2dr  r  r   r   r   r   r    test_optimize_and_not~  s&    $r  c              	   C   sz   t t jddt jddd}tj|dd}|j| |dd tjtd	d
 tj	| |dd}W 5 Q R X t
||dd d S )Nr  rf   r$  r   r>   r+   Tr&  r  r  1MiB)r7   rt   Fr   )r   r   r   r   r   r   rD   r  r  r   r   )r   rB   rC   r   r,  r   r   r   r    test_chunksize_empty  s     r  r4  rt      r  c              	   C   sV  |r|dkr|st d d}ttjjdddg|dtjj|dtjjdd	|dd
}tj	|dd}|j
t| |||dd t jtdd$ tjt| |||r|ndd}	W 5 Q R X |dkr|	j|jk stn*|dkr|r|	jdkstn|	jdkst|rB|	 ddg}
|ddg}t|ddg |
ddg dd nt||	ddd d S )Nr-   z4Fastparquet requires _metadata for partitioned data.r  applebananacarrotrk   r>   r   rB  r  r+   F)r7   r8  ru   rw   r  r  Tr7   rt   r  r  r  r   r   r   r   rl  )rD   r@   r   r   r   r   r   r  r   r   r   r   r  r  r   r,   r_   r   r;  r   )r   rt   r8  rB   rC   r4  df_sizer>  r,  r   r   r   r   r    test_chunksize_files  sF    	

 r  r  c              
   C   s"  d}ddg}d}t tjjdddg|dtjjd	d
g|dtjj|dtjjdd|dd}tj|dd}|jt	| ||dd t
jtdd tjt	| |||d}	W 5 Q R X |dkr|	jdkstn|dkr|	jdkst|	 ddg}
|ddg}t|ddg |
ddg dd d S )Nr  r   r   r  r  r  r  rk   smallZlarger>   r   r  r+   Fr7   r8  rw   r  r  r  r   r   r   r   r   )r   r   r   r   r   r  r   r   r   r   rD   r  r  r   r,   r_   r   r;  r   )r   rB   rC   r  rt   r8  r  r>  r,  r   r   r   r   r    test_chunksize_aggregate_files  s>    r     c              
   C   sD  d}d}d}t tjjdddg|dtjj|dtjjdd|dtd	|d
d}tj	||d}|j
t| d||d |rt| }	n,t| }
t|
}d|ksttj|
d}	tjtdd tj|	||ddddd}W 5 Q R X t||dd || }|s|j|ks@tn*|j|k s&t|dkr@|jdks@td S )NrA   r  r   r  r  r  rk   r>   r   )r   r   r   r*   r*   r+   r0   )r7   r  ru   ry   r   r  r  T)r7   rt   r  r   r*   r  Fr   r  )r   r   r   r   r   r  r   r   r   r   r   r   r   r   r_   r   r   rD   r  r  r   r   r,   )r   rt   r7   r4  Znpartsr  r  r   r,  r   dirnamer   r   r.  r   r   r    test_chunksize  sT    
	



r  c              	   C   sx   t | d}t }d|j_|j||dr2dndd tj	t
dd tj||dd	d	dd
}W 5 Q R X t|| d S )Nr   r*   r0   r-   r   r  r  z10 kiBT)r7   rt   r   r  r*   )r   r   r   r   r*   r(   r   r   rD   r  r  r   r   r   )r   rB   rC   r   rr  r  r   r   r    test_roundtrip_pandas_chunksize>  s"     	r  c                 C   sr   t dd tdD }t| }|j|dgdd tj||d}t|d  dksXt	t| j
dksnt	d S )Nc                 S   s$   g | ]}t ||d |d  dqS )ABCr   )r   r  group)r   r   r   r   r    r!   W  s     z<test_read_pandas_fastparquet_partitioned.<locals>.<listcomp>r   r  r-   )Zpartition_colsr7   r   )r   r   rg  r   r   r   r   rL   r   r_   r  )r   r7   rr  r   r  r   r   r    (test_read_pandas_fastparquet_partitionedT  s    r  c              	   C   s   t ddddddgddd	d
ddgd}tjt| d}|drHdnd}|j||d tj	||d}t
|d |\}}|d }t|j
|jdfgd}dd |jD d }	|j|	 }
t|
tst|
jdgkstd S )Nr>   rA   r   r   r   r   r   r   r   r   r  rq   r  data.parquetr0   r-   r   r  r   r  c                 S   s   g | ]}| d r|qS )r  )r   )r   rY   r   r   r    r!   n  s     
 zLtest_read_parquet_getitem_skip_when_getting_read_parquet.<locals>.<listcomp>)r   r   r   r   r   r   r   r   r   r   r   optimizer   r  r  rK  r   r_   r   )r   r7   rr  r   Z	pd_enginer   r   r   r  r   r  r   r   r    8test_read_parquet_getitem_skip_when_getting_read_parqueta  s    (
r  c                 C   s   t | } tddddgd tdtjjddgddd	}tj|dd
}|j	| ddg|d tj
| d||dgd}| }t|t||d dk  kst|d  dk std S )Nr>   rA   r   r      r  r  rk   )r  timer   r+   Fr  r  )r  r  r   )r*   r7   r   r  r  r   )r   r   r   r   r   r   r   r   r   r   r   r   rL   r_   r  )r   rB   rC   r   Zdf_writeZ	ddf_writer  Zdf_readr   r   r     test_filter_nonpartition_columnst  s0        r  c                 C   sr   t | } tjttjdd dgddtjddd gddddd	}|j| d
d tj| d
dd}t||dd d S )Nr>   rA   r   rf   r  r  r   r  r+   r0   r   TrJ  Fr   )	r   r   r   r   r   rT  r   r   r   )r   r,  r   r   r   r    %test_pandas_metadata_nullable_pyarrow  s    	r  c           	   	   C   s
  t t d}tr$tjtdd}nt }|  t j	|j
d |jddd}W 5 Q R X |d}tjjt|gdgd	}tjj||  d
dd ttjj}tjt| dd  W 5 Q R X dt|jkstddlm} G dd d|}tjt| |d  d S )Nr   z!invalid value encountered in castr  rA   r  )r{  stopnumrg   zdatetime64[ms]tsr   z/file.parquetF)Zuse_deprecated_int96_timestampsr0   r   zout of boundsr   r[   c                       sF   e Zd ZeejejdddZedejejd fddZ	  Z
S )zMtest_pandas_timestamp_overflow_pyarrow.<locals>.ArrowEngineWithTimestampClamp)arrow_tablereturnc           	      S   s   g }|j D ]}tj|jr|jjdkrdddd|jj }|j}|t  }t	
t	d}|j|j| d |j| dd t|t }||}|| q
|| q
tjj||jd	S )
zConstrain datetimes to be valid for pandas

            Since pandas works in ns precision and arrow / parquet defaults to ms
            precision we need to clamp our datetimes to something reasonable)r  msusl    d(	 rw  re   r   r>   T)r0  upperrq  r   )r   r   typesZis_timestampr  r  castr   r  r   iinforg   Zclipminr  rT  rQ   r   r   rN  )	clsr  Znew_columnsr   Z
multiplieroriginal_typeZseriesrt  Z	new_arrayr   r   r    clamp_arrow_datetimes  s*    



zctest_pandas_timestamp_overflow_pyarrow.<locals>.ArrowEngineWithTimestampClamp.clamp_arrow_datetimesFc                    s   |  |}t j|||f|S r2   )r  super_arrow_table_to_pandas)r  r  r  r  rT   Zfixed_arrow_table	__class__r   r    r    s    
  zdtest_pandas_timestamp_overflow_pyarrow.<locals>.ArrowEngineWithTimestampClamp._arrow_table_to_pandas)F)__name__
__module____qualname__classmethodr   r   r  r   r   r  __classcell__r   r   r  r    ArrowEngineWithTimestampClamp  s   ! r  )r   r  rg   r   rD   r  RuntimeWarning
contextlibnullcontextZlinspacer  r  r   r   r   r   rT  r-  r   r   libArrowInvalidr   r   r   r   r4  r_   r^   r\   )	r   rt  ctxZarr_numericZ	arr_datesr	  r  ZArrowEnginer  r   r   r    &test_pandas_timestamp_overflow_pyarrow  s0       
   -r  c                 C   s   t dt dgi}t| d}|j||d ddi}t| j	f |}t
j|d|d}t|| |jj| jjkstd S )	Nr  ry  r   r   Ztimestamp_as_objectTr0   )r7   r  )r   r   r  r   r   r   r   r  r   r  r   r   r   r  rg   r   r_   )r   r7   r   r   r  ri  gotr   r   r    test_arrow_to_pandas  s    
r  
write_colsr  r   rW   c              	   C   sp  |  d |  d t|  d}t|  d}tj|d}tj|d}tddtdd	}td
dtdd	}|| }|| }|j|dd |j|dd |dkr||g}	nt| }	tj	||gdd}
|dkrt
tdkrtj|	|d}| jdd |
 ksltnR|dddgkrHtj|	|d}t||
dd n$tt tj|	|d W 5 Q R X d S )Npart=apart=bzpart=a/kind=xzpart=b/kind=xr  r   r#   r   )r  rW   r   r   Fr)   r-   TZignore_indexz0.8.3r   r   r  rW   r   r   )r!  r   r   r   r   r   r   rg  r   r<  r  re  r   r   r   r   Zto_dictr_   r   rD   r   rN   )r   r7   r  Zpath0r\  _df1_df2r>  r   r   ri  r  r   r   r    test_partitioned_column_overlap   s0    


$r  c                 C   s   |  d}|  d}tj|d}tj|d}tdtdd}tdtdd}tjj	|| dd	j
i d
}t|| tjj	|| dd	j
i d
}t|| tj||gdd}	tjt| |d}
|
d d|
d< t|
t|	j |	dd d S )Nr  r  r  r   r   r  r   r   Fr   )r4  Tr  r   r  rC  r   )r!  r   r   r   r   r   rg  r   r   r   r  r   r   r<  r   r   r   r   r   r   r   )r   r7   r  r\  r]  r	  r
  t1t2ri  r  r   r   r    #test_partitioned_no_pandas_metadata(  s0    	

r  c                 C   s   |  d}|  d}tj|d}tj|d}tdtdd}tdtdd}|j|dd |j|dd tj||gdd	}t	j
t| dd
gt| dd}|d
 d|d
< t|t|j |dd d S )Nr   r   r  r   r  r0   r   Tr  r  )partitioningZpartition_base_dir)r7   datasetrC  Fr   )r!  r   r   r   r   r   rg  r   r<  r   r   r   r   r   r   r   )r   r\  r]  r	  r
  ri  r  r   r   r    #test_pyarrow_directory_partitioningM  s     

r  c                 C   s   t | }d}d}t||| }tt|tjj|dt|dd}d |j	_
tj||d}|j|d|d ||d d	k }	tj||d
gd}
t|	|
 d S )Nre   r   rk   )r&   r  r  r&   r+   r  r7  r>   )r  r  r>   r  )r   r   r   rq  r   r   r   r  r   r*   r(   r   r   r   r   r   )r   rB   rC   r   rl   r,   r   r   r>  ri  r  r   r   r    test_partitioned_preserve_indexe  s$    r  c                 C   s   | drtjddd t| d}tddgddgd	d
gdd}d |j_	|j
|| drfdnddd t|}tj||d}t|| d S )Nr0   0.15.0r  r   r>   rA   r   r   r   r   rB  r   r-   Tr   r   )r   rD   r  r   r   r   r   r   r*   r(   r   r   r   r   )r   r7   r   r   ri  r  r   r   r    $test_from_pandas_preserve_none_index{  s    
$
r  c                 C   s   | drtjddd d}n|dks*td}tjdd}t|jD ]0\}}| d| d}|	 j
t||d	 qDtjt| d
|d}t|| d S )Nr0   r  r  r-   Tr   ztest.r  r   Fr{   )r   rD   r  r_   r   r   r  r  r   r   r   r   r   r   r   )r   r7   rB   r,  r   r  r   r   r   r   r    %test_multi_partition_none_index_false  s    
r  c                 C   sl   t | d}tjddddgitjdddd}|j||d	rDd	nd
d tj||d}t	||
  d S )Nr   tr>   rA   r   r   )r{  r  r)   r0   r-   r   )r   r   r   r   
RangeIndexr   r   r   r   r   r   )r   rB   rC   r   r=  r>  r   r   r    )test_from_pandas_preserve_none_rangeindex  s    " r  c              	   C   s   d}t | d}tdddg|ddgid}d |j_tj|dd}t	j
t|d	 |j||d
d W 5 Q R X t	t}|j||d W 5 Q R X |t |jkstd S )NZ__null_dask_index__r   r#   r>   rA   r   r   r+   r  Fr   r   )r   r   r   r   r   r*   r(   r   r   rD   r  UserWarningr   r   rN   r4  r_   )r   r7   Z	null_namer   r   r   r  r   r   r    test_illegal_column_name  s     r  c                 C   sh   t ddd d gddddgd}tj|dd}|jt| |dd tjt| |d	d
}|jdksdtd S )Nr>   rA   r   r   r   r+   Fr   r   r   )NNN)	r   r   r   r   r   r   r   r   r_   )r   r7   r   r   r  r   r   r    "test_divisions_with_null_partition  s
     r  c                 C   st   t | }tdddgdddgd}|jddd}tj|d	d
}|j||d tj|ddd}|  t	|| d S )Nr   r   r   r   r   r   Tr   rA   r+   r   r0   rJ  )
r   r   r   r   r   r   r   r   r   r   )r   r7   r   r   r   r   r   r   r    test_pyarrow_dataset_simple  s    r  test_filterc                 C   s   t | }tdddgdddgd}|d d|d< tj|dd	}|j||dd
d tj|d|rhdgnd d
d}|rt||d dk 	 |	  n
t|| d S )Nr   r   r   r   r   r   r  rA   r+   T)r7   r8  ru   r0   r   r  r   r  
r   r   r   r   r   r   r   r   r   r   )r   r7   r  r   r   r   r   r   r   r     test_pyarrow_dataset_partitioned  s     r!  c              	   C   s   t | }tdddgdddgd}|d d|d< tj|dd	}|j|d
dd tt	 tj
|d
dgdd}W 5 Q R X tj
|d
dgd}t|| t||d dk  |  d S )Nr   r   r   r   r   r   r  rA   r+   r0   r[  r  F)r7   r  Zread_from_pathsr  )r   r   r   r   r   r   r   rD   r  r  r   r   r   )r   r   r   r   Z	read_df_1Z	read_df_2r   r   r    $test_pyarrow_dataset_read_from_paths  s&    
r"  c                 C   s   t | }tdddgdddgdddgd}|d d	|d< tj|d
d}|j|dddgd tj|d|dgd}t|	 dg ||d dk dg dd d S )Nr   r   r   r   r   r  r  rB  r  rA   r+   r0   r   r[  )r   r  r   )r7   r  r  Fr   r   )r   r  r   r   r   r   r   r   r    'test_pyarrow_dataset_filter_partitioned  s*    r#  c                    s   t tdtdd t fddtd}|j| |dgd tj| |dgd	}|d d
|d< t	 j
dd | |dkrtj| |dggd	}|d d
|d< t	 j
dd | d S )Nr   Zabcdefg)rV   r  c                    s    j | | d  S )Nr>   )r)  r   r   r   r    r  '  r  z<test_pyarrow_dataset_filter_on_partitioned.<locals>.<lambda>r  r[  )r  r  r   r  rC  rA   r   r0   )r   r   rg  r   r   Zfrom_mapr   r   r   r   r)  )r   r7   r   Zread_ddfr   r%  r    *test_pyarrow_dataset_filter_on_partitioned#  s*    
r&  c                 C   s   t | } ttjjg g ddd}ttjjddddgddddgddd}ttjjddddgddddgddd}t|||g}|j| dd	gd
dd t	
| }d|kstd|ksttt	j| dj }|j}|st|dd
std S )Nr"   )r  r  rf   r>   rA   r   r   r   r0   r#   FTr7   r8  rQ   ru   ry   rx   r?  )r   r   r  r   r   	from_dictr   r  r   r   r   r_   r   r  r   r   rc  Zto_arrow_schemarI  r  )r   df_adf_bdf_cr   r   Zschema_commonrI  r   r   r    )test_parquet_pyarrow_write_empty_metadataA  s<       	

r-  c                 C   s   t | } ttjjddddgddddgddd}ttjjddddgddddgddd}t||g}|j| ddgd	d
d ttjjg g ddd}ttjjddddgddddgddd}t||g}|j| ddgd
d
d
d d S )Nr>   rA   r   r"   r'  rf   r0   r#   FTr(  r   r   )r7   r8  rQ   r9  ru   )	r   r   r  r   r   r)  r   r  r   )r   r*  r+  r>  r,  Zdf_dr   r   r   r    0test_parquet_pyarrow_write_empty_metadata_appendg  sD        r.  c           
      C   sD  t | } ttdddddgd d}d|j_tj|d	d
}|j| d||d |rnt		t
j| |d }nt		t
j| d}tjjj|ddd tj| dd|dd}|r|d}| d}|jd|_t|| tjjj|dddd}tt
j| dj}	|j|	jkst|j|	jks.t|j|	jks@td S )Nr  r  r  r  rT  r  )r   r   r&   r%   r+   F)ru   r8  r7   z=*/*.parquetr   r0   r   )r7   split_everyT)r   r  r7   r*   r   rC  )r7   r/  Zout_dirry   )r   r   r   rg  r*   r(   r   r   r   rr  r   r   r   ior-  Zcreate_metadata_filer   r;  r   r   r   r   r   r  r4  Znum_rowsr_   Znum_columnsr.  )
r   rB   rC   r8  r>  r,  rs  r   r,  Zfmd_filer   r   r    test_create_metadata_file  sP     

	r1  c                 C   s   t jtjtjjddddddddd	d
ddddg
ddd}|jdd}t j|| |dd |j	dd}t j|| |dd t
| }dd |D }t||jkstd S )Nr   r  )r  r%   )lowhighrl   r  r  r  rT  rf  rg  GHIJr   r   r+   Tr   r7   	overwriter   c                 S   s   g | ]}|d kr|qS ))rx   ry   r   r   r   r   r    r!     s      z5test_read_write_overwrite_is_true.<locals>.<listcomp>)r   r   r   r   r   r   r  r   r   r  r   r   rL   r,   r_   )r   r7   r   r   r   r   r   r    !test_read_write_overwrite_is_true  s    
r:  c           
   
   C   s   ddl m} tttddtddtddf}ddd	g|_tj	|d
d}tj
|| |ddgdd || d}dd |D }|jdd}tj
|| |ddgdd || d}dd |D }	t|	t|k std S )Nr   )r  )r@  r   r>   )r0  r   rA   r  r  r  r   r+   T)r7   r8  r9  rc  c                 S   s   g | ]}|  qS r   as_posixr   r   r   r    r!     s     zBtest_read_write_partition_on_overwrite_is_true.<locals>.<listcomp>r   c                 S   s   g | ]}|  qS r   r;  r   r   r   r    r!     s     )r  r  r   r   r   Zvstackfullr   r   r   r   rglobr  rL   r_   )
r   r7   r  r   r   Zfiles_r   r   Zfiles2_Zfiles2r   r   r    .test_read_write_partition_on_overwrite_is_true  s&    


	r?  c              	   C   sv   t dtdi}tj|dd}tt tj|d|dd W 5 Q R X tt tj|| |ddd W 5 Q R X d S )	Nr   r  r   r+   z./Tr8  )r7   rQ   r9  )	r   r   rg  r   r   rD   r   rN   r   r   r7   r   r   r   r   r     test_to_parquet_overwrite_raises  s    rA  c              
   C   s   |  d}tjtdtdiddj||d t|}|| fD ]f}tj	t
dd |j|d	d
 W 5 Q R X |j|jd d}tj	t
dd |j|d	d
 W 5 Q R X qBd S )Nsubdirr#   r0  rA   r+   r   zsame parquet filer  T)r9  r>   )r$   )r!  r   r   r   r   rg  r   r   rD   r   rN   r  r#   )r   r7   rB  r   targetr   r   r   r    Etest_to_parquet_overwrite_files_from_read_parquet_in_same_call_raises  s    
 
rD  c              	   C   s\   t tdtdd}tj|dd}tjtdd |jt	| 
d|d W 5 Q R X d S )	Nr%   )r#   r>   rA   r+   znon-string column namesr  tempr   )r   r   rg  r   r   rD   r   rN   r   r   r   r@  r   r   r    .test_to_parquet_errors_non_string_column_names+  s    rF  c                 C   s   t jddddddtjdddd	d
ddtjdddddddddd}tjj|dd}|j| d|d t	j
| dg|d}|jd|d< t|||jdk  d S )Ng     u@g     0v@g     `v@g     p@g     p@)r   r>   rA   r   r   r   r   g     @g     @g     T@g     @g     @g     @i    )r  r  yearr>   r+   rH  r7  )rH  r  rG  )r  r7   r   )r   r   r)  r   r:  r   	dataframer   r   r   r   rH  r   r   r  r   r   r    test_dir_filter2  s2    
	rJ  c                 C   s   t | } dd tdD }tjt|dd}|j| ddtdd	id
 tj	| dd}|d j
|d j
ksptt||dd d S )Nc                 S   s$   g | ]}t jd ddtddqS )rb  zEurope/Berlinr  z123.00)r  col1)r   r  r   r   r   r   r    r!   W  s   z0test_roundtrip_decimal_dtype.<locals>.<listcomp>   r>   r+   r0   rK  r   rA   )r   r7   rc  r   Fr   )r   rg  r   r   r   r   r   r   Z
decimal128r   rg   r_   r   )r   r   r,  r   r   r   r    test_roundtrip_decimal_dtypeR  s    rM  c                 C   sr   t jt| d}tjdddgtjjddd}|	| t
j||d}d	d
dg|_d	d
dg|_t||  d S )Nr   r   r   r   )r%   r   rk   )r   r   r   r   r  rq   )r   r   r   r   r   r   r   r   uniformr   r   r   r   r   r   )r   r7   r   r>  r   r   r   r    test_roundtrip_rename_columnsg  s    
rO  c           
   	   C   s  ddi}t | }ttdtdd}tj|ddj|||dd t|tj||d	 t	rt

tj|d
}|tj|dg7 }|D ]4}t	|jj}| D ]}|| || kstqqddi}tt }	tj|ddj|||d W 5 Q R X dt |	jks
td S )Ns   my_keys   my_datar%   r   rA   r+   T)r7   custom_metadataru   r   r   ry   s   pandass   my_new_pandas_md)r7   rP  zUser-defined key/value)r   r   r   rg  r   r   r   r   r   r   rr  r   r   r   r  r4  r  r_   rD   r   rN   r4  )
r   r7   rP  r   r   r   r   Z_mdrF   r  r   r   r    test_custom_metadatav  s2    rQ  c           
   	   C   s   t | } tj| d}tj| d}ttdddgd d}tj|dd	}|j	||d
d |j	||d
d dt
|kstttj|dd}|d W 5 Q R X dt
|kstdt
|ksttj||d|d}tj||d|d}	t||	 d S )NZdata1Zdata2r  r  r  r@  r   rA   r+   Fr   r7   ru   ry   r8   zINVALID METADATAT)r7   Zignore_metadata_filer   )r   r   r   r   r   r   rg  r   r   r   r   r_   r*  r   r   r   )
r   r7   r   Zdataset_with_bad_metadataZdataset_without_metadatar>  r,  rq   ddf2addf2br   r   r    test_ignore_metadata_file  sB        rU  ru   rM  c           	   	   C   s   t | } ttdddgd d}tj|dd}|jt | ||d tjt | |d	d
}tjt | |d	|d}t|| t	j
d|i tjt | |d	d
}W 5 Q R X t|| d S )Nr  r  r  r@  r   r%   r+   rR  TrJ  rL  z*dataframe.parquet.metadata-task-size-local)r   r   r   rg  r   r   r   r   r   r   r  rM   )	r   r7   ru   rM  r>  r,  rS  rT  Zddf2cr   r   r    test_metadata_task_size  s:      
rV  )r   Nc              	   C   s  t | } ttdddgd d}|j|jdd}tj|dd	}|j	| |d
|d t
tj| dd  t
tj| dd  ttj| d tj| |d
d}t||dd t|j|jdd ddd}tj| fd|i|ddd
i}t||dd t|j|jdd tjtdd* tj| fd|i|dd
ddd
i}W 5 Q R X tttjjf$ tj| fd|i|d   W 5 Q R X tt$ tj| fd|i|d  W 5 Q R X d S )Nr  r  r  r@  r   r  )r   rA   r+   T)r7   ru   r8  Z_SUCCESSr8   zpart.0.parquet.crcry   rJ  F)check_categorical)Zcheck_category_orderc                 S   s   |rdd| iiS d| iS )Nr  Zrequire_extensionZparquet_file_extensionr   )rV   legacyr   r   r    _parquet_file_extension  s    z0test_extra_file.<locals>._parquet_file_extensionr7   r  r   zrequire_extension is deprecatedr  )rX  .foo)F)r   r   r   rg  r  r   r   r   r   r   r*  r   r   r   closeremover   r   rD   r  r  r   OSErrorr   r  r   r   rN   )r   r7   r8  r   r   r   rY  r   r   r    test_extra_file  sl    


r^  c                 C   sN   t jt| d}tdtdi}|j||d t|t	j
||ddd d S )Nz	multi.foor   r%   r   FTr   )r   r   r   r   r   r   rg  r   r   r   r   )r   r7   r   r=  r   r   r    test_unsupported_extension_file3  s     r_  c                 C   sR   t | }ttdtdid}|j||dd dd t|tj||dd d S )	Nr   r%   r>   c                 S   s   d|  dS )Nr  rZ  r   r$  r   r   r    r  H  r  z0test_unsupported_extension_dir.<locals>.<lambda>T)r7   name_functionru   rJ  )	r   r   r   r   r   rg  r   r   r   )r   r7   r   Zddf0r   r   r    test_unsupported_extension_dir?  s    ra  c                 C   s   t | }tddddgddddgd	}tj|dd
}|j|ddd |d t|}d|ksbtd|ksntd|ksztd|kstt	|tj
||dd d S )Nr>   rA   r   r   r   r  r  r%   Znum1Znum2r+   Tc                 S   s   d|  dS )Nhi-r  r   r   r   r   r    r  W  r  z&test_custom_filename.<locals>.<lambda>ru   r`  r7   rx   ry   hi-0.parquetzhi-1.parquetrJ  )r   r   r   r   r   r   r   r   r_   r   r   )r   r7   r   rr  r   r   r   r   r    test_custom_filenameN  s"    
rf  c                 C   s$  t | }tddddgddddgd	}tj|dd
}|j|ddd |d tdgdgd	}tj|dd
}|dkrtd |j|dd |ddd t	|}d|kst
d|kst
d|kst
d|kst
d|kst
tdddddgdddddgd	}tj||dd}t||dd d S )Nr>   rA   r   r   r   r  r  r%   rb  r+   Tc                 S   s   d| d  dS Nrc  rA   r  r   r   r   r   r    r  l  r  zMtest_custom_filename_works_with_pyarrow_when_append_is_true.<locals>.<lambda>rd  !   ,   r-   a  fastparquet errors our with IndexError when ``name_function`` is customized and append is set to True.  We didn't do a detailed investigation for expediency. See this comment for the conversation: https://github.com/dask/dask/pull/7682#issuecomment-845243623c                 S   s   d| d  dS rg  r   r   r   r   r    r  |  r  )r`  r7   rQ   r9  rx   ry   re  zhi-2.parquetzhi-4.parquetFr   r   )r   r   r   r   r   r   rD   r?   r   r   r_   r   r   )r   r7   r   rr  r   r   Zexpected_pdfactualr   r   r    ;test_custom_filename_works_with_pyarrow_when_append_is_truec  sJ    
rk  c              	   C   s   t | }tddddgddddgd	}tj|dd
}tjtdd |j|d|d W 5 Q R X tjtdd |j|dd |d W 5 Q R X d S )Nr>   rA   r   r   r   r  r  r%   rb  r+   z7``name_function`` must be a callable with one argument.r  whatever.parquet)r`  r7   z0``name_function`` must produce unique filenames.c                 S   s   dS )Nrl  r   r   r   r   r    r    r  zAtest_throws_error_if_custom_filename_is_invalid.<locals>.<lambda>)	r   r   r   r   r   rD   r   rN   r   )r   r7   r   rr  r   r   r   r    /test_throws_error_if_custom_filename_is_invalid  s      rm  c                 C   s   t | }tddddgddddgd}tj|d	d
}|j||dgdd dd t|D ]J\}}}|D ]}|dkshtqh|D ]$}	|	dd t	|j
D dks~tq~qZtj||dd}
t||
dddd d S )NZfrankliZmarcelaZluisZcanadaZchinaZ	venezuela)Z
first_namecountryr   r+   ro  c                 S   s
   |  dS )N-cool.parquetr   r   r   r   r    r    r  z5test_custom_filename_with_partition.<locals>.<lambda>F)r7   r8  r`  rw   )zcountry=canadazcountry=chinazcountry=venezuelac                 S   s   g | ]}| d qS )rp  r   r   r   r   r    r!     s     z7test_custom_filename_with_partition.<locals>.<listcomp>rx   ry   r   )r   r  rW  )rx   ry   )r   r   r   r   r   r   r   r  r_   rg  r,   r   r   )r   r7   r   rr  r   r=   dirsr   dirr  rj  r   r   r    #test_custom_filename_with_partition  s>    

      rs  z5.0z.pyarrow write_dataset was added in version 5.0c                 C   s  dd l m} ddlm}m} tddgddgd}| d}tj	|dd	}|j
||d
gdd | d}tj	|}	||	|dd|td
t fgd dd }
tj||d}|| }t|
||
|dd tj||d}|| }t|
||
|dd d S )Nr   )HivePartitioningwrite_datasetr>   rA   r   r   )rK  col2zfoo-daskr+   rK  Fr  zfoo-pyarrowzpart.{i}.parquetr-  )r   base_dirZbasename_templateformatr  c                 S   s   |  dddg S )Nrv  rK  )r;  r   r   r   r    _prep  s    z9test_roundtrip_partitioned_pyarrow_dataset.<locals>._prepr   r   )pyarrow.parquetr-  Zpyarrow.datasetrt  ru  r   r   r!  r   r   r   r   r   rc  r   r   r  r  r   )r   r7   r   rt  ru  r   Z	dask_pathr   Zpa_pathr	  ry  Zdf_read_daskZ
df_read_par   r   r    *test_roundtrip_partitioned_pyarrow_dataset  s,    


r{  filter_value)r>   )rM   r   rE   )idsc           	      C   s   | d }t ddddgddddgd}|j||d dd	|fg}tj|||d
}t j|||d
}t|| tj|dd}| d }|j||d tj|||d
}t j|||d
}t||dd dS )<Regression test for https://github.com/dask/dask/issues/8720z$in_predicate_iterable_pandas.parquetr>   rA   r   r   r  r   r  r  r  r+   z"in_predicate_iterable_dask.parquetFr   N)r   r   r   r   r   r   r   )	r   r7   r|  r   r   r  r  r  r   r   r   r    #test_in_predicate_can_use_iterables  s    
r  )r  r  r%   )r  r  r%   )zone-item-single-nestzone-item-double-nestztwo-item-double-nestztwo-item-two-nestc              	   C   s   | d }t ddddgddddgd}|j||d tjtdd	 tj|||d
 W 5 Q R X tj|dd}| d }|j||d tjtdd	 tj|||d
 W 5 Q R X dS )r~  zgh_8720_pandas.parquetr>   rA   r   r   r  r   zValue of 'in' filterr  r  r+   zgh_8720_dask.parquetN)	r   r   r   rD   r   r  r   r   r   )r   r7   r|  r   r   r   r   r   r    &test_in_predicate_requires_an_iterable  s    r  c              	   C   s`   t dtdi}| d }|j||d tjtdd tj||dd}W 5 Q R X t	|| d S )	Nr   r%   z(test_deprecate_gather_statistics.parquetr   
deprecatedr  T)r7   Zgather_statistics)
r   r   rg  r   rD   r  r  r   r   r   )r   r7   r   r   r   r   r   r     test_deprecate_gather_statistics-  s    r  c           	      C   s   t | }td}td}ddlm} ||jfdd }|ddgd	d
gd}||d}|| |	|}t
|| d S )Ncudf	dask_cudfr   )pyarrow_schema_dispatchc                 S   s
   |   jS r2   )Zto_arrowrc  )objr   r   r    get_pyarrow_schema_cudfE  s    z>test_gpu_write_parquet_simple.<locals>.get_pyarrow_schema_cudfabcdefr   rA  r   r   )r   rD   r  Zdask.dataframe.dispatchr  registerr   Z	from_cudfr   r   r   )	r   r   r  r  r  r  r   r   r  r   r   r    test_gpu_write_parquet_simple>  s    




r  c           	   	   C   s  t | }d|  }ddi}tdtdi}tj|dd}|| |j|d|d	}t|jd
}|j	sjt
|j	d dks|t
tj||d}t|jd}|j	st
|j	d dkst
|j|d|d	}t|jd
}|j	rt
tj||d}t|jd}|j	rt
tjdd| |j|d|d	}t|jd
}|j	s2t
|j	d dksFt
tj||d}t|jd}|j	slt
|j	d dkst
W 5 Q R X d S )Nzsimplecache://Ztarget_protocolr  r   r%   rA   r+   F)r   storage_optionsr  retriesr   )r  r  )r  )r   r   r   rg  r   r   r   r   r   r  r_   r   r  )	r   r   Z	remote_fnr  r   r   Zscalarr  r   r   r   r    !test_retries_on_remote_filesystemU  sB    





  r  fsZfsspecc                 C   s   ddl m} tdtdi}tj|ddj| |d |p>| }tj| ||d}|d kr|t	t
|jj jj}||ks|tt|| d S )	Nr   r  r   r%   rA   r+   r   r7   
filesystem)r  r  r   r   rg  r   r   r   r   r  r  r   r  r  io_funcr  r_   r   )r   r7   r  r  r   r  r   layer_fsr   r   r    test_filesystem_option  s    
r  r]   c                 C   s   ddl m} ddlm} tdtdi}tj|dd	|  |pF| }tj
| d|d	}tt|jj jj}t||s~tt|j|stt|| d S )
Nr   )ArrowFSWrapperr  r   r%   rA   r+   r0   r  )Zfsspec.implementations.arrowr  Z
pyarrow.fsr  r   r   rg  r   r   r   r   r  r  r   r  r  r  r  rK  r_   r   )r   r  r  r  r   r   r  r   r   r    test_pyarrow_filesystem_option  s    
r  c              	   C   sj   t tddgd d}| d }|j|dd tjtdd tj||d	gd
}W 5 Q R X t	|| d S )Nr%   r  r   z#test_select_filtered_column.parquetFr)   zSorted columns detectedr  r  r  )
r   r   rg  r   rD   r  r  r   r   r   )r   r7   r   r   r   r   r   r    test_select_filtered_column  s    r  (   r  rr  r  r   sysrm  decimalr   Zunittest.mockr   Znumpyr   r  r   rD   Zpackaging.versionr   re  r   Zdask.dataframerI  r   rd  Zdask.array.numpy_compatr   Zdask.blockwiser   r   Zdask.dataframe._compatr   r	   r
   r   r   Zdask.dataframe.io.parquet.corer   Zdask.dataframe.io.parquet.utilsr   Zdask.dataframe.optimizer   Zdask.dataframe.utilsr   Zdask.layersr   Z
dask.utilsr   Zdask.utils_testr   r-   ImportErrorr  __version__r0   r   rd  rz  r-  r   ZSKIP_FASTPARQUETrP   ZskipifrH   platformZSKIP_PYARROWZSKIP_PYARROW_REASONrI   Znrowsr,   r   rg  rn  r   r   r   Zfixturer4   r7   rZ   Zfp_pandas_msgZpyarrow_fastparquet_msgZfp_pandas_xfailr`   rc   rR   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r?   r  r#  r.  r/  r?  rF  rK  rN  rO  rR  rf  Z	Timedeltar  rX  r_  ra  rj  ro  rt  binaryr  r   mapr  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r#  r&  r)  r6  r8  r9  rI  rQ  rS  rU  rX  rY  r^  r_  re  rj  rp  rs  ru  rv  rx  r  r  r  r  r  r  r  r  r  r   Zpermutationr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r"  r#  r&  r-  r.  r1  r:  r?  rA  rD  rF  rJ  rM  rO  rQ  rU  rV  r^  r_  ra  rf  rk  rm  rs  r{  r  r  r  Zgpur  r  r  r  r  r   r   r   r    <module>   sb  


 
#
	

 





%
U	
,
	>
	

 6

 0
 

!
$$  ", (D
'


W!



0
7%





ID**&	$   "'	+' 	3)7J$ 	%&8# *'#D+$)"
-