U
    k/eG                    @   sZ  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	Z
d dlmZ d dl	mZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ z$d dlmZ d dlmZmZm Z  W n e!k
r   dZY nX zd dl"Z#d dl$m%Z& W n e!k
r   d Z#Z&Y nX ej'jZ(ej'j"d	d
 Z)ej'j"dd Z*ej'+ddd Z,dd Z-edd Z.ej'j"edd Z/ej'+dej'j"dd Z0ej'j"edd Z1ej'j"edd Z2ej'j"edd Z3ej'j"eej'j4e5e6fdd d!d" Z7ej'j"ej'j8d#d$ Z9ej'j"ed%d& Z:ej'j"ed'd( Z;ej'j"ed)d* Z<ej'j"ed+d, Z=ej'j"eej'>d-d.gd.gge?d/d0k e?d1d2d0k e?d1d3@e
A d0k fej'>d4d5d6d7 ZBej'j"ed8d9 ZCej'jDed:d; ZEej'jDed<d= ZFej'j"ed>d? ZGej'j"ej'jDed@dA ZHej'j"ej'jDedBdC ZIddEdFZJdGdH ZKdIdJ ZLej'j"ej'+dKdLdM ZMej'j"ej'+dKdNdO ZNdPdQ ZOeej'j"dRdS ZPej'j"ej'+dTedUdV ZQej'j"edWdX ZRej'+dej'j"edYdZ ZSej'j"ed[d\ ZTej'j"ed]d^ ZUddadbZVdcdd ZWej'j"eej'>dedfdggdhdi ZXej'j"edjdk ZYej'j"edldm ZZej'j"eej'>dedfdggdndo Z[ej'j"edpdq Z\edrds Z]ej'+dKddtduZ^ddvdwZ_ej'j"edxdy Z`ej'j"edzd{ Zaej'j"ed|d} Zbej'j"ed~d Zcej'j"edd Zdej'j"ej'jDedd Zeej'j"ej'jDedd Zfej'j"ej'jDedd Zgej'+ddej'j"edd Zhej'j8ej'j"dd ZidddZjdddZkej'j"edd Zlej'j"edd Zmej'j"edd Znej'j"edd Zoej'j8ej'j"ej'+ddd Zpej'j8dd Zqej'j8ej'+ddd Zrej'j8dd Zsej'+dedd Ztej'j8dd Zuej'j8dd Zvej'j8dd Zwej'j8dd Zxej'j8dd Zyej'j8ej'>dddd Zzej'j"edd Z{dS )    N)fs)LocalFileSystem)util)parametrize_legacy_dataset parametrize_legacy_dataset_fixed(parametrize_legacy_dataset_not_supported)guid)Version)_read_table_test_dataframe_write_tablec              	   C   sd   t d}tj|}| d }t||dd tt t	|}W 5 Q R X |
 }||s`td S )N  parquet_piece_read.parquet2.6version)r   paTablefrom_pandasr   pytestwarnsFutureWarningpqParquetDatasetPiecereadequalsAssertionError)tempdirdftablepathpiece1result r#   F/tmp/pip-unpacked-wheel-seu8352k/pyarrow/tests/parquet/test_dataset.pytest_parquet_piece_read;   s    r%   c              	   C   s   t d}tj|}| d }t||dd tt t	|}W 5 Q R X |
 }t|tjsbt| }t|tjszt||std S )Nd   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   
isinstancer   get_metadataZFileMetaDatar   )r   r   r   r    piecetable1Zmeta1r#   r#   r$   (test_parquet_piece_open_and_get_metadataJ   s    r+   z(ignore:ParquetDatasetPiece:FutureWarningc                  C   s   d} t | }t j| dd}t j| dddgd}t|| ks@tt|dksPtt|dks`t||kslt||ksxt||kst||kstd S )	Nz	/baz.parq   )	row_group)foor   )barr,   )r-   Zpartition_keysz/baz.parq | row_group=1z/partition[foo=0, bar=1] /baz.parq | row_group=1)r   r   strr   )r    r!   Zpiece2Zpiece3r#   r#   r$   test_parquet_piece_basics]   s    
  r1   c               	   C   s   t ddddg} t ddddg}t| jtjs6tt|jtjsHtt dtdd	d	g}t	
t |j W 5 Q R X d S )
Nkey1r.   r/   Zbazkey2i  i  i  r,   )r   ZPartitionSetr'   
dictionaryr   ZStringArrayr   ZIntegerArraydatetimer   raises	TypeError)Zset1Zset2Zset3r#   r#   r$   "test_partition_set_dictionary_typep   s    r8   c                 C   s   t ddddgi}| d }|  |d }t|t| tj|t |d}|	|s^t
tjdt| |d}|	|st
d S )	Nar,         data_dirdata.parquet
filesystemuse_legacy_datasetzdata_dir/data.parquet)r   r   mkdirr   write_tabler0   
read_tabler   r   r   r   r   Z_filesystem_uri)r   r@   r   	directoryr    r"   r#   r#   r$   test_filesystem_uri|   s"      rE   c                 C   s   t  }t|| | d S N)r   _get_instance_partition_test_for_filesystem)r   r@   r   r#   r#   r$   test_read_partitioned_directory   s    rI   z$ignore:'ParquetDataset:FutureWarningc              	   C   s   t  }| }t|| tj||dd}tjtdd tj||ddd}W 5 Q R X t	|j
dksdt|j}t	|jdks|t|j|jjkstt	|jt	|jjkstd S )	Nr,   )r?   metadata_nthreadsz"Specifying the 'metadata_nthreads'match   T)r?   rJ   r@   r   )r   rG   rH   r   ZParquetManifestr   r   r   ParquetDatasetlenpiecesr   
partitionsZpartition_nameslevels)r   r   	base_pathmanifestdatasetrQ   r#   r#   r$   *test_create_parquet_dataset_multi_threaded   s*    
   rV   c                 C   s`   t  }| }t|| tj||d}|jdgd}|rL|jdddgks\tn|jdgks\td S )Nr@   valuescolumnsr.   r/   )r   rG   rH   r   rN   r   column_namesr   )r   r@   r   rS   rU   r"   r#   r#   r$   'test_read_partitioned_columns_selection   s    
 r\   c                 C   s~  t  }| }ddg}dddg}ddg}d|gd	|gd
|gg}tjtj|dddtttj|tdddtttj|ddddddd	d
gd}t	|||| t
j||dddg|d}	|	 }
|
 jdd}d|d jkstd|d	 jkstd|d
 jkstdddgddgg}t
j||||d}	|	 }
|
 jdd}|d dk|d	 dk@ |d
 dk@ }t|d dk|d
 dk@ }| dkst| dkst|jd | |  kst|r<tt  dgg}t
j|||dd W 5 Q R X tt  dgg}t
j|||dd W 5 Q R X n>dggdggfD ],}t
j|||dd}	|	 jdksLtqLd S )Nr   r,   r9   bcTFintegerstringbooleani4Zdtype      r:   boolr;   r_   r`   ra   rY   )r_   =r,   )r`   !=r]   )ra   ==Truer?   filtersr@   drop)r_   rh   r   )ra   rj   Falserk   rp   )r`   rj   s   1 a)r`   rj   z1 a)r   rG   pd	DataFramenparrayrepeattileobject_generate_partition_directoriesr   rN   r   	to_pandasreset_indexrX   r   sumshaper   r6   NotImplementedErrornum_rows)r   r@   r   rS   integer_keysstring_keysboolean_keyspartition_specr   rU   r   	result_dfrm   Zdf_filter_1Zdf_filter_2r#   r#   r$   test_filters_equivalency   s    
   



 

  r   c                 C   s   t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| tj	||ddg|d}|
 }	|	 jddjdd}
dd tt|
d jD }|ddgkstd S )Nr   r,   r:   r;      integersre   rb   rc   indexr   r   rY   )r   <r   )r   >r,   rl   ZbyTrn   c                 S   s   g | ]}|qS r#   r#   .0xr#   r#   r$   
<listcomp>>  s     z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>r   rG   rq   rr   rs   arangert   rx   r   rN   r   ry   sort_valuesrz   mapintrX   r   r   r@   r   rS   r   r   Nr   rU   r   r   Zresult_listr#   r#   r$   %test_filters_cutoff_exclusive_integer  s:     r   z5Loss of type information in creation of categoricals.)r6   reasonc              	   C   s  t  }| }tdddtdddtdddtdddtdddg}d|gg}d	}tjt|tj|d
ddddgd}t	|||| t
j||ddg|d}| }	|	 jddjdd}
tjtjtdddgd
dtj|d
dd}|
d j|kstd S )Ni  r   	   
            datesre   Z
datetime64rc   )r   r   r   rY   )r   r   z
2018-04-12)r   r   z
2018-04-10rl   r   Trn   
categories)r   rG   r5   daterq   rr   rs   r   rt   rx   r   rN   r   ry   r   rz   CategoricalrX   r   )r   r@   r   rS   Z	date_keysr   r   r   rU   r   r   expectedr#   r#   r$   &test_filters_cutoff_exclusive_datetimeB  sJ    	 r   c              	   C   sv   | d }t t jddddtddj|dd tj|d	d
tdddfgd}|d	 dddddgksrt
d S )Nztimestamps.parquetz
2020-01-01r   D)Zperiodsfreq)r   idT)Zuse_deprecated_int96_timestampsr   <=i  r,   re   )rm   r   r   r:   r;   r   )rq   rr   Z
date_rangerangeZ
to_parquetr   rC   r5   column	to_pylistr   )r   r    r   r#   r#   r$   test_filters_inclusive_datetimeu  s     r   c                 C   s   t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| tj	||ddg|d}|
 }	|	 jddjdd}
dd tt|
d jD }|ddgkstd S )Nr   r,   r:   r;   r   r   re   rb   rc   r   r   rY   )r   r   r;   )r   z>=r:   rl   r   Trn   c                 S   s   g | ]}t |qS r#   )r   r   r#   r#   r$   r     s     z2test_filters_inclusive_integer.<locals>.<listcomp>r   r   r#   r#   r$   test_filters_inclusive_integer  s:     r   c                 C   s  t  }| }ddg}dddg}ddg}d|gd	|gd
|gg}tjtj|dddtttj|tdddtttj|ddddddd	d
gd}t	|||| t
j||dg|d}	|	 }
|
 jdd}d|d	 jkstd|d	 jkstd|d	 jkstt
j||dddgfdd
ddhfg|d}	|	 }
|
 jdd}d|d jksbtd|d	 jksvtd|d
 jkstd S )Nr   r,   r9   r]   r^   TFr_   r`   ra   rb   rc   rd   re   r:   rf   r;   rg   rY   )r`   inabrl   rn   r   )r`   r   r9   r]   znot inrp   )r   rG   rq   rr   rs   rt   ru   rv   rw   rx   r   rN   r   ry   rz   rX   r   )r   r@   r   rS   r   r   r   r   r   rU   r   r   r#   r#   r$   test_filters_inclusive_set  sV    
  
r   c           	   	   C   s  t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| t	t
 tj||dg|d W 5 Q R X t	t tj||dg|d W 5 Q R X |rt	t" tj||ddt fg|d W 5 Q R X n0tj||ddt fg|d}| jdkst|rTt	t" tj||dddhfg|d W 5 Q R X nFtj||dddhfg|d}t	t | jdkstW 5 Q R X d S )Nr   r,   r:   r;   r   r   re   rb   rc   r   r   rY   )r   r   r;   rl   )r   z=<r;   r   ri   )r   rG   rq   rr   rs   r   rt   rx   r   r6   r7   r   rN   
ValueErrorsetr   r~   r   r}   )	r   r@   r   rS   r   r   r   r   rU   r#   r#   r$   test_filters_invalid_pred_op  sh    r   c           	   	   C   s   t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| d}tj	t
|d tj||dg|d  W 5 Q R X d S )Nr   r,   r:   r;   r   r   re   rb   rc   r   r   rY   z1No match for FieldRef.Name\(non_existent_column\)rK   )Znon_existent_columnr   r;   rl   )r   rG   rq   rr   rs   r   rt   rx   r   r6   r   r   rN   r   )	r   r@   r   rS   r   r   r   r   msgr#   r#   r$   test_filters_invalid_column  s$    
r   rm   )r   r   r;   r   r;   nestedr9   r]   read_method)rC   read_pandasc              	   C   s   t t|}t }| }dddddg}d|gg}t|}	tt|	tj	|ddt	d	d
 t
|	D d}
t||||
 t|||d}|rt|tjrd}tjt|d ||f| W 5 Q R X n||f|}|jdkstd S )Nr   r,   r:   r;   r   r   rb   rc   c                 S   s   g | ]}|t |d qS )r   )r0   r   ir#   r#   r$   r   K  s     z+test_filters_read_table.<locals>.<listcomp>)r   r   r   rl   z6Expressions as filter not supported for legacy datasetrK   )getattrr   r   rG   rO   rq   rr   rs   r   rt   r   rx   dictr'   pcZ
Expressionr   r6   r7   r~   r   )r   r@   rm   r   r   r   rS   r   r   r   r   kwargsr   r   r#   r#   r$   test_filters_read_table3  s,    

r   c           
      C   s   t  }| }ddg}d|gg}d}tjt|tj|dddddgd	}t|||| tj	||d
}|
 }	|	d |kstd S )NZ2019_2Z2019_3	year_weekr:   rw   rc   )r   r   r   rY   rW   )r   rG   rq   rr   rs   r   rt   rx   r   rN   r   r   r   r   )
r   r@   r   rS   r   r   r   r   rU   r"   r#   r#   r$   $test_partition_keys_with_underscores]  s&     r   c                 C   sR   | \}}|d }t ddddgi}t|||d t|||d}||sNtd S Nz/test.parquetr9   r,   r:   r;   r?   r>   r   r   r   r
   r   r   )s3_example_s3fsr@   r   r    r   r"   r#   r#   r$   test_read_s3fsx  s      r   c                 C   sR   | \}}|d }t ddddgi}t|||d t|||d}||sNtd S r   r   )r   r@   r   rD   r    r   r"   r#   r#   r$   test_read_directory_s3fs  s      r   c                 C   sP   t | d }tddddgi}t|| tj|g|d }||sLtd S )Nr=   r9   r,   r:   r;   rW   )	r0   r   r   r   r   rN   r   r   r   )r   r@   	data_pathr   r"   r#   r#   r$   test_read_single_file_list  s    
 
r   c              	   C   s|   dd l }ddlm} t|jtdkr0td | \}}tt ||}W 5 Q R X t	|| t
j|||d}|  d S )Nr   )S3FSWrapperz0.5z+S3FSWrapper no longer working for s3fs 0.5+r>   )s3fspyarrow.filesystemr   r	   __version__r   skipr   r   rH   r   rN   r   )r   r@   r   r   r   r    wrapperrU   r#   r#   r$   ,test_read_partitioned_directory_s3fs_wrapper  s    

  r   c                 C   s   | \}}t |||d d S )NrW   )rH   r   r@   r   r    r#   r#   r$   $test_read_partitioned_directory_s3fs  s      r   Tc                 C   sp  ddg}dddg}d|gd|gg}d}t jt|tj|d	d
dtttj|td
ddtj	|dddddgd}t
| ||| tj|| |d}| }	|	 jddjdd}
|jddjddj|
jd}|stt jtdk r t j|d |d|d< t j|d |d|d< n$|d d|d< |d d|d< |
jddddgk s`tt|
| d S )Nr   r,   r9   r]   r^   r.   r/      rb   rc   rd   re   r:   )r   r.   r/   rX   r   rX   rY   r>   r   Trn   z2.0.0r   category)rq   rr   rs   r   rt   ru   rv   rw   randomrandnrx   r   rN   r   ry   r   rz   reindexrZ   r	   r   r   astypeallr   tmassert_frame_equal)r   rS   r@   Zfoo_keysZbar_keysr   r   r   rU   r   r   Zexpected_dfr#   r#   r$   rH     sL    


  rH   c                    sB   t  tdtdd fdd|dg  d S )Npathsepsep/c              
      s  | \}}|D ]}|||fg } t| d||g}| | d krЈ |t g}t|}	tj|	}
	|d}t
|
| W 5 Q R X |st |dg}	|d}W 5 Q R X q||d |  |dg}	|d}W 5 Q R X qd S )Nz{}={}r,   wbZ_SUCCESS)joinr0   formatrA   r   _filter_partitionr   r   r   openr   existsr   )base_dirlevel	part_keysnamerX   valueZthis_part_keysZ	level_dir	file_pathZfiltered_dfZ
part_tablefZfile_successZDEPTH_visit_levelr   r   r   r   r#   r$   r     s,    


z5_generate_partition_directories.<locals>._visit_levelr   )rO   r   )r   r   r   r   r#   r   r$   rx     s    rx   c              	   C   sL  dd l }dd lm} d}|jt|tj|dddgd}t|}t	j
|d}tj|}| |d}t|| W 5 Q R X t	j
|d	}	| |	d}||j| W 5 Q R X |j|| d
d}
tt |
jt|	kstW 5 Q R X | |}||j}W 5 Q R X |
j|s"t|j|g| d
d}|j|
jsHtd S )Nr   r&   r   rX   r   rX   rY   r=   r   _common_metadataTr>   )pandaspyarrow.parquetparquetrr   rs   r   r   r   r0   osr    r   r   r   r   r   r   write_metadataschemarN   r   r   r   common_metadata_pathr   read_metadatar   )r   rS   rq   r   r   r   r   r   r   metadata_pathrU   Zcommon_schemadataset2r#   r#   r$    _test_read_common_metadata_files  s:    

r   z+ignore:'ParquetDataset.schema:FutureWarningc                 C   s   t  }t||  d S rF   )r   rG   r   )r   r   r#   r#   r$   test_read_common_metadata_files?  s    r   c           
   	   C   s  t  }d}tjt|tj|dddgd}| d }tj	
|}||d}t|| W 5 Q R X | d }||d}t|j| W 5 Q R X tj| |d	d
}tt |jt|kstW 5 Q R X ||}t|j}	W 5 Q R X |j|	std S )Nr&   r   r   rX   rY   r=   r   	_metadataTr>   )r   rG   rq   rr   rs   r   r   r   r   r   r   r   r   r   r   r   rN   r   r   r   r   r0   r   r   r   )
r   r   r   r   r   r   r   r   rU   Zmetadata_schemar#   r#   r$   test_read_metadata_filesF  s.    
r   c                 C   sl   t jt| td}g }|D ]>\}}|| t|tjtjfrHt	|}|| | |kM }q| | j
|ddS )Nrc   r,   )Zaxis)rs   ZonesrO   rf   appendr'   r5   r   rq   Z	Timestampro   )r   r   	predicateZto_dropr   r   r#   r#   r$   r   f  s    

r   c                 C   s   | d }|   tjtddddgi}t||d  | d }|   tjtddd	d
gi}t||d  tj| dgg|d}|	d
tdddggstd S )NzA=0Br,   r:   r;   r=   zA=1r9   r]   r^   )Arj   r   )rm   r@   )rA   r   r   r   rq   rr   r   rB   rC   r   r   Zchunked_arrayr   )r   r@   Zdir1r*   Zdir2Ztable2r   r#   r#   r$   "test_filter_before_validate_schemav  s    r  z.ignore:Specifying the 'metadata':FutureWarningc              
      s  d}d}| t   }|  g }g }t|D ]\}t||d}|d tj|d< |d| }	tj	
|}
t|
|	 ||
 ||	 q*|d   dfdd		}|| t|} |stt|d
 }r2|||d}||sttjtdd tj||jdd }W 5 Q R X ||s\tn*tjtdd tj||dd W 5 Q R X d
dd jd g} fdd|D }tj||d}tj	j fdd|D | jjd}||sttj|dd t||djd d d df }| dt   }tj	
|}t|| s"d S t|}tt |||g  W 5 Q R X tt |||d W 5 Q R X ||d
 g}tt. tjtdd |||jd W 5 Q R X W 5 Q R X tt || W 5 Q R X d S )Nr   re   seedZuint32
{}.parquetz_SUCCESS.crcTc                    s$   t j| fd i|}|j||dS )Nr@   )rZ   use_threads)r   rN   r   )pathsrZ   r  r   rU   rW   r#   r$   read_multiple_files  s    z5test_read_multiple_files.<locals>.read_multiple_filesr   )metadatazSpecifying the 'schema'rK   r   r@   no longer supportedF)r
  r@   r:      r,   c                    s   g | ]}  |jqS r#   )fieldr   r   r"   r#   r$   r     s     z,test_read_multiple_files.<locals>.<listcomp>)rZ   r@   c                    s   g | ]}  |qS r#   )r   r   r  r#   r$   r     s     )namesr
  )r  r@   r   r   )NT) r   rA   r   r   r   rs   int64r   r   r   r   r   r   touchZconcat_tablesr   r   r   r   r   r   r   rN   r   r   r6   r   rC   num_columnsZfrom_arraysr
  Ziloc)r   r@   nfilessizedirpath	test_datar  r   r   r    r   r	  r   r
  Zresult2Zresult3Zto_readZ	col_namesoutZ	bad_appleZbad_apple_pathtZbad_metaZmixed_pathsr#   )r"   r@   r$   test_read_multiple_files  s    




    

"r  c                    s.  d}d}| t   }|  g }g }g }t|D ]t}t||d}	t|| |d | |	_d|	j_|d| }
t	j
|	}t||
 || ||	 ||
 q.tj||d}ddg |j d	 }t fd
d|D }t|| |jt d	 }|j|jkstt|j|jd	| d S )Nre   r  r,   r   r  rW   Zuint8stringsrY   c                    s   g | ]}|  qS r#   r#   r   rY   r#   r$   r     s     z,test_dataset_read_pandas.<locals>.<listcomp>)r   rA   r   r   rs   r   r   r   r   r   r   r   r   r   r   rN   r   ry   rq   concatr   r   r   r|   r   r   rZ   )r   r@   r  r  r  r  framesr  r   r   r    r   rU   r"   r   r#   rY   r$   test_dataset_read_pandas  s2    



r  c                 C   s   | t   }|  tddd}|dd }tj|}t||dd tj	|d|d}|
 |sht|r|jd 
 |std S )	Nr   r   r  r  r   r   T)
memory_mapr@   )r   rA   r   r   r   r   r   r   r   rN   r   r   r   rP   )r   r@   r  r   r    r   rU   r#   r#   r$   test_dataset_memory_map  s    
  r!  c              	   C   s   | t   }|  tddd}|dd }tj|}t||dd t	t
 tj|d|d W 5 Q R X d	D ]&}tj|||d}| |sptqpd S )
Nr   r   r  r  r   r   i)buffer_sizer@   )   i   )r   rA   r   r   r   r   r   r   r   r6   r   r   rN   r   r   r   )r   r@   r  r   r    r   r"  rU   r#   r#   r$   #test_dataset_enable_buffered_stream.  s&    
  r$  c           	      C   s   | t   }|  tddd}|dd }tj|}t||dd dD ]D}tj	|||d}|
 |spttj|||d}||sJtqJd S )	Nr   r   r  r  r   r   )TF)
pre_bufferr@   )r   rA   r   r   r   r   r   r   r   rN   r   r   r   rC   )	r   r@   r  r   r    r   r%  rU   actualr#   r#   r$   test_dataset_enable_pre_bufferE  s"    
 r'  r   re   c                 C   sN   g }g }t |D ]8}t||d}| d| }|t|| || q|S )Nr  r  )r   r   r   r   r   )rS   r  
file_nrowsr  r  r   r   r    r#   r#   r$   _make_example_multifile_datasetZ  s    r)  c                 C   sR   |r(t tt|dd | jD ksNtn&dd |D }t |t | jjksNtd S )Nc                 S   s   h | ]
}|j qS r#   )r    r   r#   r#   r$   	<setcomp>h  s     z(_assert_dataset_paths.<locals>.<setcomp>c                 S   s   g | ]}t | qS r#   )r0   as_posix)r   r    r#   r#   r$   r   j  s     z)_assert_dataset_paths.<locals>.<listcomp>)r   r   r0   _piecesr   Z_datasetfiles)rU   r  r@   r#   r#   r$   _assert_dataset_pathsf  s    $r.  
dir_prefix_.c                 C   sP   | t   }|  t|ddd}|d|   tj||d}t||| d S )Nr   re   r  r(  z	{}stagingrW   )r   rA   r)  r   r   rN   r.  r   r/  r@   r  r  rU   r#   r#   r$   test_ignore_private_directoriesn  s    
r4  c              	   C   s   | t   }|  t|ddd}|d d}|d W 5 Q R X |d d}|d W 5 Q R X tj||d}t||| d S )	Nr   re   r2  z	.DS_Storer   s	   gibberishz.privaterW   r   rA   r)  r   writer   rN   r.  r   r@   r  r  r   rU   r#   r#   r$   test_ignore_hidden_files_dot  s    
r8  c              	   C   s   | t   }|  t|ddd}|d d}|d W 5 Q R X |d d}|d W 5 Q R X tj||d}t||| d S )	Nr   re   r2  Z_committed_123r   s   abcdZ_started_321rW   r5  r7  r#   r#   r$   #test_ignore_hidden_files_underscore  s    
r9  c                 C   sf   | d | t  }|jdd t|ddd}tj||d}t||| tj||d}t||| d S )Nz{0}dataTparentsr   re   r2  rW   )r   r   rA   r)  r   rN   r.  r3  r#   r#   r$   /test_ignore_no_private_directories_in_base_path  s    r<  c                 C   s   dgd dgd  }t jt tt|t | gddgd}tj|t| dgd | d }|	  tj|t|dgd tj
| |d	gd
}||std S )NZxxxr;   Zyyyr   Z_partr  )partition_colsZ_private_duplicateZ_private)r@   Zignore_prefixes)r   r   rt   r   rO   dictionary_encoder   write_to_datasetr0   rA   rC   r   r   )r   r@   partr   Zprivate_duplicater   r#   r#   r$   test_ignore_custom_prefixes  s&     rB  c                 C   sF   | d }|   tj||d}| }|jdks4t|jdksBtd S )NrU   rW   r   )rA   r   rN   r   r~   r   r  )r   r@   Z	empty_dirrU   r"   r#   r#   r$   test_empty_directory  s     rC  c                 C   s  dd l }dd lm} dd lm} |tdtdttdtj	gd tj
ddddd	d
}|j }	ddg}
tjj||ddd}|j|| |
||d tjt| d}|d k	r||d}||j| W 5 Q R X n||j| |j| |d|d}|r.tjtdd t|j j}W 5 Q R X nt|jj}|t|jjksPt |! }|" }|j }|
|dt#|
 d  kst ||	 }|
D ]}|| d||< q|r|$dj%& }|d ||d< |'|| d S )Nr   
aaabbbbccc
eefeffgeeer   
2017-01-01
2017-01-11datetime64[D]rc   datetime64[ns]group1group2numnanr   rK  rL  F)r   safeZpreserve_indexr>   r   r   T)r?   validate_schemar@   'ParquetDataset.schema'rK   r   r   )(r   pandas.testingtestingr   r   rr   listr   rs   rN  r   r   rZ   tolistr   r   r   r@  r   r    r   r0   r   r   r   rN   r   r   r   r   Zto_arrow_schemar  r   r   ry   rO   Zfield_by_nametypeZto_pandas_dtyper   )rS   r@   r?   r   
index_namerq   r   r   	output_dfcolspartition_byoutput_tabler   r   rU   Zdataset_colsinput_tableinput_dfZinput_df_colscolZexpected_date_typer#   r#   r$   &_test_write_to_dataset_with_partitions  s^    




r`  c              
   C   s   dd l }dd lm} |tdtdttdtjddddd	d
}|j	
 }tj|}|d krpt }d}t|D ]}	|j|| ||d q|dd |t| D }
t|
|kst|j| ||d }| }| }|| }t|| d S )Nr   rD  rE  r   rF  rG  rH  rc   rI  )rK  rL  rM  r   re   )r@   r?   c                 S   s   g | ]}| d r|qS )z.parquet)endswith)r   filer#   r#   r$   r   H  s    
z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>r>   )r   r   r   rr   rU  r   rs   r   r   rZ   rV  r   r   r   r   rG   r@  Zlsr0   rO   r   rN   r   ry   Zdrop_duplicatesr   r   )rS   r@   r?   rq   r   rY  rZ  r\  nr   Zoutput_filesr]  r^  r#   r#   r$   $_test_write_to_dataset_no_partitions-  s>    

 
rd  c                 C   s   t t| | d S rF   r`  r0   r   r@   r#   r#   r$   %test_write_to_dataset_with_partitionsX  s    rg  c                 C   st   t t jdt  dt jdt  dt jdt  dt jdt  dt jdt jdddg}tt| ||d	 d S )
NrK  )rW  rL  rM  rN  r   us)unitr  )	r   r   r  r`   r  int32	timestampr`  r0   )r   r@   r   r#   r#   r$   0test_write_to_dataset_with_partitions_and_schema^  s      rl  c                 C   s   t t| |dd d S )NrX  )rX  re  rf  r#   r#   r$   4test_write_to_dataset_with_partitions_and_index_namel  s
      rm  c                 C   s   t t| | d S rF   )rd  r0   rf  r#   r#   r$   #test_write_to_dataset_no_partitionsu  s    rn  c                 C   s    t | d | t| d | d S )Ntest1test2)r`  rd  rf  r#   r#   r$   test_write_to_dataset_pathlib{  s      rq  c              	   C   sd   |\}}t jtdd t| d ||d W 5 Q R X t jtdd t| d ||d W 5 Q R X d S )Nz"path-like objects are only allowedrK   ro  r   rp  )r   r6   r7   r`  rd  )r   r   r@   r   r0  r#   r#   r$   &test_write_to_dataset_pathlib_nonlocal  s        rr  c                 C   s   | \}}t |||d d S Nr   )r`  r   r#   r#   r$   *test_write_to_dataset_with_partitions_s3fs  s      rt  c                 C   s   | \}}t |||d d S rs  )rd  r   r#   r#   r$   (test_write_to_dataset_no_partitions_s3fs  s      ru  z,ignore:'partition_filename_cb':FutureWarningc           
      C   s   t tdtdttdtjgd tjddddd}d	d
g}tj	|}t
| }dd }tj|||||d tj||d}ddddddg}dd |jD }	t|t|	kstd S )NrD  rE  r   rF  rG  rH  rc   rJ  rK  rL  c                 S   s
   dj |  S )Nz{}-{}.parquet)r   )keysr#   r#   r$   partition_filename_callback  s    z_test_write_to_dataset_with_partitions_and_custom_filenames.<locals>.partition_filename_callbackrW   za-e.parquetza-f.parquetzb-e.parquetzb-f.parquetzb-g.parquetzc-e.parquetc                 S   s   g | ]}t j|jqS r#   )r   r    basename)r   pr#   r#   r$   r     s     zNtest_write_to_dataset_with_partitions_and_custom_filenames.<locals>.<listcomp>)rq   rr   rU  r   rs   rN  r   r   r   r   r0   r   r@  rN   rP   sortedr   )
r   r@   rY  r[  r\  r    rw  rU   Zexpected_basenamesZoutput_basenamesr#   r#   r$   :test_write_to_dataset_with_partitions_and_custom_filenames  s6    


    r{  c                 C   sX   t ddddgi}tj|}t| }tj||t	 d t
|}||sTtd S )Nr  r,   r:   r;   r   )rq   rr   r   r   r   r0   r   r@  r   r   rC   r   r   )r   r   r   r    r"   r#   r#   r$    test_write_to_dataset_filesystem  s    
r|  Fr&   c              	   C   s   | d }t  }tjt|tj|dddgd}tj	
|}d}t||j}t|D ]}	|| q^W 5 Q R X t|}
|
jj|kst| d }||d}t|j| W 5 Q R X tj| ||d	}|rtt |jt|kstW 5 Q R X |S )
Nr=   r   r   rX   rY   r;   r   r   r>   )r   rG   rq   rr   rs   r   r   r   r   r   r   r   ZParquetWriterr   r   rB   ZParquetFiler
  num_row_groupsr   r   r   rN   r   r   r   r   r0   )r   r@   r   r    r   r   r   Z
num_groupswriterr   readerr   r   rU   r#   r#   r$   _make_dataset_for_pickling  s6    

  r  c              	      s    fdd}|| st |rtt | j}W 5 Q R X ||sDt ||jsRt t|js`t |jD ]}||sft qf| jD ]D}||st | }|j	st t
|j	D ]}|||st qq~d S )Nc                    s   |    | kS rF   )loadsdumps)objpicklerr#   r$   is_pickleable  s    z3_assert_dataset_is_picklable.<locals>.is_pickleable)r   r   r   r   r
  r   rO   r,  r(   r}  r   r-   )rU   r  r@   r  r
  r   r)   r   r#   r  r$   _assert_dataset_is_picklable   s     


r  c                 C   s$   dd l }t| |}t|||d d S )Nr   r  r@   )pickler  r  )r   datadirr@   r  rU   r#   r#   r$   test_builtin_pickle_dataset  s    
  r  c                 C   s&   t d}t| |}t|||d d S )NZcloudpickler  )r   importorskipr  r  )r   r  r@   cprU   r#   r#   r$   test_cloudpickle_dataset  s    

  r  c                 C   s   | d }t dddddddgddd	dddd
gdddddddgd}tj|}tj|t|ddg|d tj||d	 }t
||d  d S )Nz
ARROW-3208rR  r   g      @r&   r   r,   g333333=@r:   r   r   )onetwoZthreer  r  )	root_pathr>  r@   rW   zoutput.parquet)rq   rr   r   r   r   r   r@  r0   rN   r   rB   )r   r@   r    r   r   r#   r#   r$   test_partitioned_dataset(  s      
r  c           	      C   s4  | d }t jdd tdD d gdgd}t jdd tdD d gdgd}tj|t||d	 tj|t||d	 tj|dg|d
 }|d d	 |d d	 g}|d j
dkst|d d|d d }}||d r||d s0tn(||d st||d s0td S )NzARROW-3325-datasetc                 S   s   g | ]}t d qS r   r   Zrandsr   r#   r#   r$   r   @  s     z0test_dataset_read_dictionary.<locals>.<listcomp>re   r   Zf0r=  c                 S   s   g | ]}t d qS r  r  r   r#   r#   r$   r   A  s     )r  r@   )read_dictionaryr@   r   r:   r,   )r   r   r   r   r@  r0   rN   r   chunkr?  Z
num_chunksr   r   )	r   r@   r    t1t2r"   Z	ex_chunksZc0Zc1r#   r#   r$   test_dataset_read_dictionary<  s.    $$ 
r  z(ignore:Passing 'use_legacy:FutureWarningc              	   C   s2  t dt dddgt  i}t|| d  t|| d  t dg}tj| d |d}t jddddgi|d}||st	tj| |d}t jdddddddgi|d}||st	t
jtd	d
 tj| d |dd W 5 Q R X tj| |dd}t jdddddddgi|d}| |s.t	d S )Nr9   r,   r:   r;   zdata1.parquetzdata2.parquet)r9   r  r  z'The 'schema' argument is only supportedrK   r=   Tr  F)r   r   rt   rj  r   rB   r   rC   r   r   r   r6   r   rN   r   )r   r   r   r"   r   r#   r#   r$   test_read_table_schemaX  s0        r  c                	   C   s   t jtdd tjddtg d W 5 Q R X t jtdd tjdddd W 5 Q R X t jtdd tjdddd W 5 Q R X t jtdd tjddd	d
 W 5 Q R X t jtdd tjddtg d W 5 Q R X d S )Nznot yet supported with the newrK    F)r@   r
  )r@   rP  T)r@   Zsplit_row_groupsr   )r@   rJ   r  )r   r6   r   r   rN   r   r   rC   r#   r#   r#   r$   !test_dataset_unsupported_keywords|  s     r  c              	   C   s"  dd l m} | d }|d d d jdd tdd	d
dgi}t|t|d d d d  |jdddgd}tj	t||dd}|j
ddddgksttjt||dd }|j
ddddgksttt tj	t||dd W 5 Q R X tt tjt||dd W 5 Q R X d S )Nr   Ztest_partitioningZ201210Z01Tr:  r9   r,   r:   r;   r=   yearmonthday)field_namesF)partitioningr@   )pyarrow.datasetrU   rA   r   r   r   rB   r0   r  rC   r[   r   rN   r   r   r6   r   )r   dsr  r   rA  r"   r#   r#   r$   test_dataset_partitioning  sB         
    r  c                 C   s`   t ddddgi}t|| d  tt| t }tjd|d}|	 }|
|s\td S )Nr9   r,   r:   r;   r=   r1  r   )r   r   r   rB   r   ZSubTreeFileSystemr0   r   rN   r   r   r   )r   r   r?   rU   r"   r#   r#   r$   #test_parquet_dataset_new_filesystem  s    r  c                 C   sx   t d}|d}tddddgi}t|| d  t| dd	}tj	|||d
}|d }|j
d j|ksttd S )Nfsspecrb  r9   r,   r:   r;   r=   \r   r>   z/data.parquetr   )r   r  r?   r   r   r   rB   r0   replacerN   rP   r    r   )r   r@   r  r?   r   r    rU   r   r#   r#   r$   6test_parquet_dataset_partitions_piece_path_with_fsspec  s    

  r  c              	   C   s  t ddddgi}| d }t|| tj|dd}tjtdd	 |j W 5 Q R X tjtd
d	 |j	 W 5 Q R X tjtdd	 |j
 W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tj|dd}tjtdd	 |j W 5 Q R X d S )Nr9   r,   r:   r;   r=   TrW   z'ParquetDataset.piecesrK   z'ParquetDataset.partitionsz'ParquetDataset.memory_mapz'ParquetDataset.read_dictioz'ParquetDataset.buffer_sizez'ParquetDataset.fsrQ  z 'ParquetDataset.common_metadata'z'ParquetDataset.metadataz'ParquetDataset.metadata_pathz$'ParquetDataset.common_metadata_pathF)r   r   r   rB   rN   r   r   r   rP   rQ   r   r  r"  r   r   Zcommon_metadatar
  r   r   )r   r   r    rU   r   r#   r#   r$   *test_parquet_dataset_deprecated_properties  s>    r  c              	   C   sx   t ddddgi}| d }tjtdd tj||dd	 W 5 Q R X tjtdd tj||d
d d W 5 Q R X d S )Nr9   r,   r:   r;   r=   z!Passing 'use_legacy_dataset=True'rK   TrW   c                 S   s   dS Nzfilename.parquetr#   r   r#   r#   r$   <lambda>      zEtest_parquet_write_to_dataset_deprecated_properties.<locals>.<lambda>)partition_filename_cb)r   r   r   r   r   r   r@  r   r   r    r#   r#   r$   3test_parquet_write_to_dataset_deprecated_properties  s    r  c              
   C   s>  t ddddgi}| d }tjtdd( tj||dt dt  fgd	 W 5 Q R X tjtd
d tj||ddgd W 5 Q R X tjtdd tj||ddd W 5 Q R X tjtdd tj||ddd d W 5 Q R X tjtdd tj||ddd W 5 Q R X tjtdd tj||ddd W 5 Q R X d S )Nr9   r,   r:   r;   r=   r   rK   T)r@   r   r  )r@   r  r  F)r@   r  file_visitorc                 S   s   | S rF   r#   r  r#   r#   r$   r  *  r  zNtest_parquet_write_to_dataset_unsupported_keywards_in_legacy.<locals>.<lambda>)r@   r  existing_data_behaviorerror)r@   r  basename_templatepart-{i}.parquet)r@   r  )	r   r   r   r6   r   r   r@  r   rj  r  r#   r#   r$   <test_parquet_write_to_dataset_unsupported_keywards_in_legacy  s8    






r  c                    s   t ddddgi}| d }g   fdd}d}tj||dg||d	d
 |d d |d d |d d h}tttj }||kstd S )Nr9   r,   r:   r;   r  c                    s     | j d S rF   )r   r    )Zwritten_fileZpaths_writtenr#   r$   r  <  s    zDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitorr  F)r  r  r  r@   1zpart-0.parquet23)	r   r   r   r@  r   r   pathlibPathr   )r   r   r    r  r  Zexpected_pathsZpaths_written_setr#   r  r$   .test_parquet_write_to_dataset_exposed_keywords5  s     


r  c              	   C   sL  t ddddgi}| d }tjtdd tj||dd	d
 dd W 5 Q R X tjtdd tj||ddd
 dd W 5 Q R X tjtdd tj||ddgdgd W 5 Q R X tjtdd tj||ddgdgd W 5 Q R X tjtdd tj||dg dd
 d W 5 Q R X tjtdd tj||dg dd
 d W 5 Q R X d S )Nr9   r,   r:   r;   r=   zH'basename_template' argument is not supported by use_legacy_dataset=TruerK   Tc                 S   s   dS r  r#   r  r#   r#   r$   r  X  r  z<test_write_to_dataset_conflicting_keywords.<locals>.<lambda>zfile-{i}.parquet)r@   r  r  zM'partition_filename_cb' argument is not supported by use_legacy_dataset=FalseFc                 S   s   dS r  r#   r  r#   r#   r$   r  ^  r  zC'partitioning' argument is not supported by use_legacy_dataset=True)r@   r>  r  zF'partition_cols' argument is not supported by use_legacy_dataset=FalsezC'file_visitor' argument is not supported by use_legacy_dataset=Truec                 S   s   | S rF   r#   r  r#   r#   r$   r  t  r  )r@   Zmetadata_collectorr  zJ'metadata_collector' argument is not supported by use_legacy_dataset=Falsec                 S   s   | S rF   r#   r  r#   r#   r$   r  z  r  )r   r   r   r6   r   r   r@  r  r#   r#   r$   *test_write_to_dataset_conflicting_keywordsO  sL    r  write_dataset_kwarg))
create_dirT)r  Fc              	   C   s   ddl m} tddddgi}| d }t|j}|\}}|ttjj	ksRt
||j	ks`t
tjj|dd	d
:}tj||f||i |jd \}	}
}|| |kst
W 5 Q R X dS )zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr9   r,   r:   r;   zout.parquetwrite_datasetT)Zautospec)r  rU   r   r   inspect	signaturer  r   r@  
parametersr   mockpatchrw   Z
mock_calls)r   r  r  r   r    r  keyargZmock_write_dataset_name_argsr   r#   r#   r$   #test_write_to_dataset_kwargs_passed}  s    r  c                 C   s   t t jdddgdddgddddgd}t|}| d	 }tj|| d	 d
g|d dd | D }t|dksxt	d|kst	d S )Nr9   r]   r^   r   r,   r:   r;   )catr_  rU   r  )r>  r@   c                 S   s   g | ]}|  r|jqS r#   )is_dirr   )r   r   r#   r#   r$   r     s      z;test_write_to_dataset_category_observed.<locals>.<listcomp>zcat=c)
rq   rr   r   r   r   r   r@  iterdirrO   r   )r   r@   r   r   r    subdirsr#   r#   r$   'test_write_to_dataset_category_observed  s    
  r  )T)r   re   )TNNN)TN)Fr&   )F)|r5   r  r   r  Znumpyrs   r   Zunittest.mockr  Zpyarrowr   Zpyarrow.computeZcomputer   r   r   r   Zpyarrow.testsr   Zpyarrow.tests.parquet.commonr   r   r   Zpyarrow.utilr   Zpyarrow.vendored.versionr	   r   r   r   r
   r   r   ImportErrorr   rq   rS  rT  r   markZ
pytestmarkr%   r+   filterwarningsr1   r8   rE   rI   rV   r\   r   r   Zxfailr7   r   r   rU   r   r   r   r   r   Zparametrizer  castr  r   r   Zs3r   r   r   r   r   rH   rx   r   r   r   r   r  r  r  r!  r$  r'  r)  r.  r4  r8  r9  r<  rB  rC  r`  rd  rg  rl  rm  rn  rq  rr  rt  ru  r{  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#   r#   r#   r$   <module>   s  






V"+"0:!
,(&

a%



    H  
+		"



!

!


.



-