U
    k/e                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlmZ d dlmZmZmZmZmZ zd dl Z!W n e"k
r   dZ!Y nX zd dl#m$Z% W n e"k
r   dZ%Y nX zd dl&m'Z( W n e"k
rD   dZ(Y nX ej)j$Z*dd Z+dd Z,dd	 Z-ej.ej)j'd
d Z/ej.dd Z0ej.dddd Z1ej.ej)j'dd Z$ej.ddgddgddd Z2ej)j'dd Z3dd Z4ej)j'dd  Z5ej)j'd!d" Z6ej)j'd#d$ Z7ej)j'd%d& Z8ej)j'd'd( Z9ej)j'd)d* Z:ej)j'd+d, Z;d-d. Z<d/d0 Z=d1d2 Z>d3d4 Z?d5d6 Z@ej)j'd7d8 ZAej)j'd9d: ZBej)j'd;d< ZCd=d> ZDd?d@ ZEej)FdAejGdBddCdDdEggej)FdFddgej)j'dGdH ZHej)j'dIdJ ZIdKdL ZJdMdN ZKej)j'dOdP ZLej)j'ddQdRZMej)j ej)j'ej)NdSdTdU ZOej)j ej)j'dVdW ZPej)j ej)j'ej)NdSdXdY ZQej)j ej)j'ej)NdSdZd[ ZRej)j'd\d] ZSej)j ej)j'd^d_ ZTej)j ej)j'ej)NdSd`da ZUej)j ej)j'dbdc ZVddddeZWej)j ej)j'ej)NdSdfdg ZXej)j'dhdi ZYej)j ej)j'djdk ZZej)j ej)j'ej)NdSdldm Z[ej)j ej)j'ej)NdSdndo Z\ej)j ej)j'ej)NdSdpdq Z]ej)j ej)j'ej)NdSdrds Z^ej)j ej)j'ej)NdSdtdu Z_ej)j ej)j'dvdw Z`ej)j'ej)Fdxdydz d{dz gd|d} Zaej)j'ej)Fd~ddgej)Fdxddz ddz gdd Zbej)Fdxddz ddz gdd Zcej)Fdxddz ddz gdd Zddd Zedd Zfej)j'ej)j dd Zgdd Zhdd Zidd Zjdd Zkdd Zldd Zmdd Znej)j'dd Zodd ZpdddZqdd Zrdd Zsdd Ztej)j'dd Zuej)j'dd Zvej)j'dd Zwej)j'dd Zxej)j'dd Zyej)j'dd Zzej)j'dd Z{ej)j'dd Z|ej)j'dd Z}ej)j'dd Z~dd Zdd ZddĄ ZddƄ Zej)FdddgddɄ Zdd˄ Zej)j'dd̈́ Zej)j'ddτ Zej)j'ddф Zej)j'ddӄ ZddՄ Zddׄ Zej)j'ej)Fdddgej)Fdddgej)Fd~ddgej)Fddddgdddgfdddgdddgfdddgdddgfdddgdddgfdddgdddgfdddgdddgfdddgdddgfgdd Zej)j dd Zej.ej)j'dd Zej)j'ej)jdd Zej)j'ej)jdd Zej)j'ej)jdd Zej)j'dd Zej)j'dd Zej)j dd Zej)j'dd Zej)j'dd Zej)j'd d Zdd Zdd Zdd Zdd	 Zej)j'd
d Zej)j'dd Zdd Zej)jdd Zej)jdd Zdd Zej)jdd Zej)j dd Zej)j ej)Fdddddgdd  Zd!d" Zd#d$ Zd%d& Zej)j d'd( Zej)j d)d* Zej)j d+d, Zd-d. Zd/d0 Zd1d2 Zej)j ej)Fdddd3gd4d5 Zd6d7 Zej)j'ej)j d8d9 Zej)j'ej)j ej)jejd:kd;d<d=d> Zej)j'ej)j ej)Fd?ddgej)NdSd@dA Zej)j'dBdC Zej)j'ej)j dDdE ZdFdG ZdHdI Zej)j'ej)j dJdK Zej)j'ej)j dLdM Zej)j'ej)j dNdO Zej)j'ej)j dPdQ Zej)j'dRdS Zej)j'ej)j dTdU Zej)j'ej)j dVdW ZdXdY Zej)j ej)j'dZd[ Zej)j'ej)j d\d] Zej)j'ej)j ej)NdSd^d_ ZĐd`da ZŐdƐdbdcZej)j'ddde Zej)j'ej)j dfdg ZȐdhdi Zɐdjdk Zʐdldm Zej)j'dndo Z̐dpdq Zej)j drds ZΐdǐdudvZϐdwdx ZАdydz Zej)j'd{d| Zej)j'd}d~ Zej)j'dd Zej)j'dd Zej)j'ej)j dd Zej)j'ej)j dd Zאdd Zؐdd Zِdd Zڐdd Zېdd Zej)j'dd Zݐdd Zej)j'dd Zej)j'dd Zej)j'ej)j dd Zdd Zej)j'dd Zej)j'ej)jdd ZdZej)j'ej)jdd Zej)j'dd Zej)j$dd Zej)j$dd Zej)j$dd Zej)Fdddgdd Zej)Fdddgdd Zdd Zdd Zej)Fdddd Zdd Zej)j'dd Zdd Zej)Fdddgdd ZdS (      Nquote)
change_cwd_filesystem_uriFSProtocolClassProxyHandler_configure_s3_limited_userc                 C   s   dd l }dd l}| ddd}|jdd}|dddd	d
g}g }t| D ]&}|||t|t|f ||7 }qJtj	|ddddgdS )Nr   i        )daysZgreenZblueZyellowZredZorangedateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr   r   dayintervalcolorsdatai r"   >/tmp/pip-unpacked-wheel-seu8352k/pyarrow/tests/test_dataset.py_generate_dataA   s    
r$   c              
   C   s\   t t dt  t dt  t dt  t dt  g}t jj| |dd}|	 S )Nr   r   r   r   F)schemaZpreserve_index)
par%   fielddate32int64float64stringTableZfrom_pandasreplace_schema_metadata)dfr%   tabler"   r"   r#   _table_from_pandasQ   s    r0   c              
   C   sd   |   D ]V}| D}t|tjs&t|jr0t| s<t| sHt|	 rTtW 5 Q R X qd S N)
get_fragmentsopen
isinstancer&   Z
NativeFileAssertionErrorclosedseekablereadablewritable)datasetfragmentnfr"   r"   r#   +assert_dataset_fragment_convenience_methods\   s    

r=   c            
      C   s  t  } ddg}t|D ]\}}d||}| | | |}ttdttt	tdttt
td|gd dd tdD g}tdt fdt fd	t fd
t fdtt t dfg}tj||d}tj|g}	t|	| W 5 Q R X q| S )Nzsubdir/1/xxxzsubdir/2/yyyz{}/file{}.parquetr
   c                 S   s"   g | ]}|d  t |d  dqS    abstr).0jr"   r"   r#   
<listcomp>z   s     zmockfs.<locals>.<listcomp>i64f64rD   conststructr@   r%   )fs_MockFileSystem	enumerateformat
create_diropen_output_streamlistr   mapr   rD   r&   r%   r)   r*   r+   rK   record_batchr,   from_batchespqwrite_table)
mockfsdirectoriesr!   	directorypathoutr    r%   batchr/   r"   r"   r#   rY   g   s2    





rY   c                    sx   ddl m}m} ddlm} |   fddt  fdd}| |d	| || }tjfd
d}||fS )Nr   )PyFileSystemLocalFileSystemr	   )r   c                    s    fdd| D S )Nc                    s   h | ]}  t|qS r"   )normalize_pathrD   rE   plocalfsr"   r#   	<setcomp>   s     z6open_logging_fs.<locals>.normalized.<locals>.<setcomp>r"   )pathsrd   r"   r#   
normalized   s    z#open_logging_fs.<locals>.normalizedc                    s$     t|}| | j|S r1   )ra   rD   add_fsopen_input_file)selfr\   )re   openedr"   r#   rk      s    
z(open_logging_fs.<locals>.open_input_filerk   c              	   3   s.      z
d V  W 5   | ks(tX d S r1   )clearr5   )Zexpected_opened)rh   rm   r"   r#   assert_opens   s    
z%open_logging_fs.<locals>.assert_opens)	
pyarrow.fsr_   r`   Ztest_fsr   setsetattr
contextlibcontextmanager)Zmonkeypatchr_   r`   r   rk   rM   ro   r"   )re   rh   rm   r#   open_logging_fs   s    ru   module)Zscopec              
   C   s  | j jd | j jd td}t }t|d\}}}}|d t	t|dD ]8\}}d
|}	||	}
tt||
 W 5 Q R X qZ|d ||jjj|jgD ]L\}}d	j
| }d

|}	|| ||	}
tt||
 W 5 Q R X q|d ||jjj|jjjgD ]N\}}dj
| }d

|}	|| ||	}
tt||
 W 5 Q R X q&|d |dD ]N\}}d
|}d

|}	|| ||	}
tt||
 W 5 Q R X q|S )Npandasparquet     plain
   zplain/chunk-{}.parquetr%   zschema/{}/{}z{}/chunk.parquethivezhive/year={}/month={}Z
hive_colorr   zhive_color/color={})configpyarrowrequiresr$   rM   rN   npZarray_splitrQ   rO   rP   rR   rW   rX   r0   groupbyr   dtZ	dayofweekr   yearmonth)requestr.   rY   Zdf_aZdf_bZdf_cZdf_dr!   chunkr\   r]   partfolderr"   r"   r#   multisourcefs   s@    






"






r   c              
   C   sf   t  }tjddd}t d}t ttdt	 tdt
 g|_t | |||}| S )NsubdirT	recursivegroupkey)dsParquetFileFormatrM   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr&   r%   r'   int32r+   partitioningFileSystemDatasetFactoryfinish)rY   rP   selectoroptionsfactoryr"   r"   r#   r:      s    
r:   TFZthreadedserial)paramsidsc                    s   | j  G  fddd}| S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                       sT   e Zd Z fddZ fddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )zdataset_reader.<locals>.readerc                    s
    | _ d S r1   use_threads)rl   r   r"   r#   __init__   s    z'dataset_reader.<locals>.reader.__init__c                    s   d|krt d |d< d S )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)rl   kwargsr   r"   r#   _patch_kwargs   s
    z,dataset_reader.<locals>.reader._patch_kwargsc                 [   s   |  | |jf |S r1   )r   to_tablerl   r:   r   r"   r"   r#   r     s    
z'dataset_reader.<locals>.reader.to_tablec                 [   s   |  | |jf |S r1   )r   
to_batchesr   r"   r"   r#   r     s    
z)dataset_reader.<locals>.reader.to_batchesc                 [   s   |  | |jf |S r1   )r   scannerr   r"   r"   r#   r     s    
z&dataset_reader.<locals>.reader.scannerc                 [   s   |  | |j|f|S r1   )r   head)rl   r:   num_rowsr   r"   r"   r#   r     s    
z#dataset_reader.<locals>.reader.headc                 [   s   |  | |j|f|S r1   )r   take)rl   r:   indicesr   r"   r"   r#   r     s    
z#dataset_reader.<locals>.reader.takec                 [   s   |  | |jf |S r1   )r   
count_rowsr   r"   r"   r#   r     s    
z)dataset_reader.<locals>.reader.count_rowsN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r"   r   r"   r#   reader   s   r   )param)r   r   r"   r   r#   dataset_reader   s    	$r   c              	      s,  t t dt  g}t  ddg}dd tddD } fddt||D }td	td
k}tj	|| |d}tj	j
|| ||d}||fD ]}t|tj	stt|jtjst|j|stt|jt|kstt| }t|||D ]\}	}
}|	j|
s t|	j|ks0tt|	jtjsDtt|	tjsVt|	jdgksht|	jdksxtt|	 }|	jt|  krdksn tt|d tjst|d j|kst|d jdgkst|d jdkstqt|jtddkd}t|dkstqtj	|| d}|jtdsVttj	j
|| d}|jtdst| D ]}	|	jtdstqtjtdd t	| | W 5 Q R X tjtdd tj	|| dd W 5 Q R X tjtdd tj	j
| d W 5 Q R X d S )NrJ   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetc                 S   s   g | ]}t d |kqS )r   r   r'   rE   xr"   r"   r#   rG   &  s     z+test_filesystem_dataset.<locals>.<listcomp>r	   r?   c                    s   g | ]\}}  ||qS r"   )make_fragment)rE   r\   r   file_formatrY   r"   r#   rG   '  s   leveli9  )r%   rP   
filesystemroot_partition)r%   rP   r   
partitionsr   r   filter   r%   rP   r   Tzincorrect typematch)r%   rP   r   rP   )r&   r%   r'   r)   r   r   r   zipscalarFileSystemDataset
from_pathsr4   r5   rP   partition_expressionequalsrq   filesrS   r2   r\   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_grouplenpytestraises	TypeError)rY   r%   rg   r   	fragmentsr   Zdataset_from_fragmentsZdataset_from_pathsr:   r;   	partitionr\   row_group_fragmentsr"   r   r#   test_filesystem_dataset  s           $       r   c              	   C   sj   t t dt  g}t }dg}tjj|||t	 d}|
  tt | | W 5 Q R X d S )Nf1znonexistingfile.arrowr   )r&   r%   r'   r)   r   IpcFileFormatr   r   rM   r`   r2   r   r   FileNotFoundErrorr   )r   r%   r   rg   r:   r"   r"   r#   1test_filesystem_dataset_no_filesystem_interactiong  s      r   c           	      C   s  t | tjstt | jtjs"ttjdddddgt d}tjdddddgt	 d}|
| D ]<}t |tjsxt|d|st|d|sdtqd||  D ]&}t |tjstt |jtjstq|| }t |tjstt|dksttddk}| jd	|d
}|d }|d ddgksDt|d ddgksZtt|d ddgksttt|d ddgksttddk}| jd	|d
}|d }|d ddddgkst|d ddddgkst|d ddddgkst|d ddddgks ttdtdtddkd}| jd	|d}|d }t|dddgksvt|d ddddddddddg
kst|d ddddddddddg
kst|d ddd	d	ddddd	d	g
kstt|  d S )Nr   r	   r   r?   rz   typer|   rH   T)r   r   r   rI         ?r   xxxyyy)rK   rB   1g      @)rH   rI   new)r   r   r                  @      @F)r4   r   Datasetr5   r%   r&   Schemaarrayr)   r*   r   RecordBatchcolumnr   r   scan_batchesZTaggedRecordBatchr;   ZFragmentr   r,   r   r'   sort_by	to_pydictsortedrS   r=   )	r:   r   expected_i64expected_f64r^   r/   	conditionresultZ
projectionr"   r"   r#   test_dataset}  sf    
&        r   c                 C   s(   | j ddd}t|}|jdks$td S )N      )Zfragment_readaheadZbatch_readahead   )r   r   num_columnsr5   )r:   r   r^   r"   r"   r#   test_scanner_options  s    r   c           	   	   C   s@  |j | t d}t|tjs"tttj	 |j | dgd W 5 Q R X |j | dgt d}|j
| jkspt|jtdt fgkstt|tjst| }| D ]"}|j|jkst|jdkstq||  kst|j|jkstt|jD ],}t|g}||||ks tq ttj |t|jg W 5 Q R X |j| ksnt|j | ddd	d
gt d}| }ddd	d
g}|j|kst|d}|d  dgd dgd  kst|d  dgd dgd  kst|d	  dgd ks t|d
  dgd ks<td S )N)memory_poolunknownr   rH   )r   r   r	   Z
__filenameZ__fragment_indexZ__batch_indexZ__last_in_fragmentr   r
   r   r   r|   T)r   r&   default_memory_poolr4   r   Scannerr5   r   r   ArrowInvalidZdataset_schemar%   projected_schemar)   r   r   r   	to_readerread_allr   r   r   r   ZArrowIndexErrorr   column_namesr   	to_pylist)	r:   r   r   r/   r^   r!   r   Zexpected_namesZsorted_tabler"   r"   r#   test_scanner  s^     
 


&r  c              	   C   s\   t  }t  }t | z0| }tj| }| }| |ksHt	W 5 t | X d S r1   )
r&   r   Zsystem_memory_poolZset_memory_poolZbytes_allocatedr   r   Zfrom_datasetr   r5   )r:   old_poolpoolZallocated_beforer   _r"   r"   r#   test_scanner_memory_pool  s    
r	  c                 C   s  | | d}|tjjg | jdks&t|j | ddgd }|ddgiksNt|j | ddgtddkd }|dddgikst|j | d	dgd }|dt	t
d
d ikstt|  }|j ddgd }|ddgikst|j d	dgd }|dt	t
d
ikstd S )Nr   rL   r	   rH   r   r   r   r   r?   i   r
   )r   r&   r,   rV   r%   r5   r   r   r'   rS   r   r   r2   )r:   r   r   r;   r"   r"   r#   	test_head  s    
r  c              	   C   s   t |  }ddgtddgfD ](}|||}||||ks"tq"tt	 ||tdg W 5 Q R X ddgtddgfD ]$}|| ||| |kstqtt	 || tdg W 5 Q R X d S )Nr	   r?   r
   r   r|   )
r   r2   r&   r   r   r   r5   r   r   
IndexError)r:   r   r;   r   expectedr"   r"   r#   	test_take  s     
r  c                 C   s   t |  }||dkst|j|tddkddks>t|| dksPt|j| tddkddkspt|j| tddkddkst|j| tdd	k dd	kstd S )
Nr
   rH   rz   r   r	   r|   r   r?   r   )r   r2   r   r5   r   r'   )r:   r   r;   r"   r"   r#   test_count_rows+  s"       r  c               
   C   s:   t jt jt jg} | D ] }tt |  W 5 Q R X qd S r1   )r   Z
FileFormatr   Partitioningr   r   r   )classesklassr"   r"   r#   test_abstract_classes;  s    r  c                  C   sZ  t t dt  t dt  g} tjtjtjfD ]8}|| }t	|tj
sRt||| ksbt|dks6tq6t t dt  t dt  g} t| }t|jdksttdd |jD st|d	}t	|tjsttdd
ktddk@ }||sttt j |d W 5 Q R X |d}tdd
k}||sZt|tj| ddksrtt t dt  t dt  g} tj| dd}t|jdksttdd |jD st|d}tdtdktdtd
k@ }||st|d}td tdtd
k@ }||sRtdD ](}tt j || W 5 Q R X qV|tj| ddkstt t dt  t dt  g} t| }t|jdksttdd |jD st|d}t	|tjsttdd
ktddk@ }||s>ttt j |d W 5 Q R X |tj| ddksxtt t dt  t dt t  t  g} tj| dt dddgid }|jd d kst|jd!  dddgkst|tj| d d ksttjt t dt  t dt t  t  gdt dddgid }|jd d ksrt|jd!  dddgkstt jt td"t tj d"t t!d#d$gd%gd&d'd(gd)}t d(t  fg}tjtjtjfD ]\}t"# H}||}tj$||d*|d+ tj%|d*|d+}	|	& }
|
|sHtW 5 Q R X qd S ),NrH   rI   zother objectr   r   r   c                 s   s   | ]}|d kV  qd S r1   r"   r   r"   r"   r#   	<genexpr>X  s     z$test_partitioning.<locals>.<genexpr>z/3/3.14/r?   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc                 s   s   | ]}|d kV  qd S r1   r"   r   r"   r"   r#   r  n  s     z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc                 s   s   | ]}|d kV  qd S r1   r"   r   r"   r"   r#   r    s     z3_3.14_Zprefix_3_aaa_firstsecondthirddictionariesr	      rA   rB   r|   r   f2r   namesipcrP   r   )'r&   r%   r'   r)   r*   r   r   HivePartitioningFilenamePartitioningr4   r  r5   r   r!  allparseZ
Expressionr   r   r   r   r   is_null
dictionaryint8r+   r   r  r/   r   r   randomrandnrepeattempfileTemporaryDirectorywrite_datasetr:   r   )r%   r  r   exprr  Z
shouldfailr/   partitioning_schematempdir	load_backload_back_tabler"   r"   r#   test_partitioningF  s    




 

       
 r:  c               
   C   s   t t dt  t dt  g} t| t| t| tj| ddtj| ddtj| dddg}|D ]}t	
t	||ksltqld S )NrH   rI   r  r  r  )r  r  )r&   r%   r'   r)   r*   r   r   r(  r)  pickleloadsdumpsr5   )r%   partsr   r"   r"   r#   test_partitioning_pickling  s    	r?  c                  C   s   t tdddgdddgd} t d}t d}| j|d || |d |d| dd	}tddd
gdddgdd
dgdddgd}||std S )Nr	   r   r?   r@   rA   rB   r*   )za+1zb-aza*2za/br   rz   r            ?r   g      ?)	r   r:   r&   r/   r'   r   castr   r5   )r:   rA   rB   r   r  r"   r"   r#   $test_expression_arithmetic_operators  s     "


  rD  c                  C   s   dd dD \} }}t | ddiks*tt | t | ksBtt | |@ |@ dd dD ksftt ddk}t |i kstt | |@ ddikstt d }t |dd ikstd S )	Nc                 S   s   g | ]}t ||kqS r"   r   rE   fr"   r"   r#   rG     s     z'test_partition_keys.<locals>.<listcomp>abcrA   c                 S   s   i | ]
}||qS r"   r"   rE  r"   r"   r#   
<dictcomp>  s      z'test_partition_keys.<locals>.<dictcomp>dr?   )r   Zget_partition_keysr5   Z_get_partition_keysr'   r,  )rA   rB   cZnopenullr"   r"   r#   test_partition_keys  s    $rL  c                  C   s   t  } t jddgd}t jdd}| jt ks4t|jddhksFt| jdksTt|jdksbt| | ksnt| |kszt| |kstd S )NrA   rB   dictionary_columnsmscoerce_int96_timestamp_unitns)r   ParquetReadOptionsrN  rq   r5   rQ  )opts1opts2opts3r"   r"   r#   test_parquet_read_options  s    rW  c                  C   sf   t  } t jdhd}t jdd}| jt  ks4t|jt jdgdksLt|jt jddksbtd S )NrA   rM  srP  )r   r   read_optionsrS  r5   )Zpff1Zpff2Zpff3r"   r"   r#   %test_parquet_file_format_read_options  s    rZ  c                  C   sn  t  } t jdd}t jddd}t jddd}t jddd	}| jd
ksLt| jdksZt| jd
ksht| jdksvt| jdkst|jd
kst|jdkst|jd
kst|jdkst|jdkst|jd
kst|jd
kst|jdkst|jdkst|jdkst|jdks$t| | ks2t| |ks@t||ksNt||ks\t|| ksjtd S )N   buffer_sizei    T)r]  use_buffered_stream)r]  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitFi @B )r   ParquetFragmentScanOptionsr^  r5   r]  r_  r`  ra  )rT  rU  rV  Zopts4Zopts5r"   r"   r#   test_parquet_scan_options  s@     rd  c                  C   s  t  t  t tjjdddt jtjjddgddt jtjjddd	dt  t jtjjdd
ddt jtjjddddg} z| 	t 
  W n tk
r   Y nX td k	r| t  t jdhdt jddt jdddddg | D ]}tt||kstqd S )N	T)	delimiterZignore_empty_linesr?   foo)	skip_rowsr  rY  i   )rh  
block_sizeignoreZnewlines_in_valuesZunexpected_field_behavior)Zparse_optionsF   r   rj  rA   rM  )r^  r[  {   i  )r^  r]  r`  ra  )r   r   CsvFileFormatr&   csvParseOptionsReadOptionsJsonFileFormatjsonr   OrcFileFormatImportErrorrW   extendr   r;  r<  r=  r5   )formatsr   r"   r"   r#   test_file_format_pickling5  sT    
 
 
 
rz  c               
   C   s   t  t jtjjdddt jtjjdddt  t tjjddd	t jtjjdd
ddg} t	d k	r| 
t jddt jddg | D ]}tt||kstqd S )NT)strings_can_be_nullconvert_options   rj  ri  Ferrorrl  i   rn  r[  r\  )r_  )r   CsvFragmentScanOptionsr&   rq  ConvertOptionsrs  JsonFragmentScanOptionsru  rr  rW   rx  rc  r;  r<  r=  r5   )r   optionr"   r"   r#   #test_fragment_scan_options_pickling\  s0    

r  paths_or_selectorr   r   r   r   r_  c                 C   s  t jt jdhd|d}t d}t ttdt tdt	 g|_
|jdks^t|jddgkspt|jd	ks~tt | |||}| }| jttd
t tdt tdtt t	 tdt tdtt t	 dtdt tdt	 gd	ds2tt| tsFtt||t js^t|jt dsvt| }t|t jst| }tjdddddgt d}	tjdddddgt d}
tj tjdddddgt dtjd! t	 d}tdd t"dD }|# }t$|ddgddgD ]\\}}}}tj|gd t d}tj|gd t	 d}tj|d gd t d}|j%d k	st|j&dkst|d |	st|d |
st|d |st|d |st|d |st|d |s0t|d |s<tq<|' }t|tj(sbtt)|dkstt|j&dkstd S ) NrD   rM  )rY  r_  r   r   r   .r  FrH   rI   rJ   rK   r@   Zcheck_metadataTr   r	   r   r?   rz   r   z	0 1 2 3 4c                 S   s"   g | ]}|d  t |d  dqS r>   rC   rE   r!   r"   r"   r#   rG     s   z+test_filesystem_factory.<locals>.<listcomp>r
   r   r   r   rA  r|   )*r   r   rS  r   r   r&   r%   r'   r   r+   r   Zpartition_base_dirr5   Zselector_ignore_prefixesZexclude_invalid_filesr   inspectr   r)   r*   r-  rK   r4   inspect_schemasrS   r   r   r   r   r   r   ZDictionaryArrayfrom_arrayssplitr   r   r   r   r   r   r,   r   )rY   r  r_  rP   r   r   inspected_schemar:   r   r   r   Zexpected_strZexpected_structiteratorr^   r;   r   r   Zexpected_groupZexpected_keyZexpected_constr/   r"   r"   r#   test_filesystem_factoryu  s    

   	


$r  c                 C   s   t  }t jd| |d}|jD ]}||| }|jdgks>t|j|| dgd}||fD ]6}t|t jslt|j	|ksztt|j
t| sXtqX|jdgkstqd S )N/plainr   rP   r   r   )r   r   r:   r   r   r   r5   r4   r   r\   r   r   )r   parquet_formatr:   r\   r;   Zrow_group_fragmentrF  r"   r"   r#   test_make_fragment  s    
r  c                 C   s   t d}t|d}t }||}t|	 tj
s@ttjdddgdddgd	d
dggdddgd}| ||sttt|}| || std S )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8rA   rB   rJ        r|   dogcatrabbitr  numanimalr$  )textwrapdedentr&   	py_bufferencoder   rp  r   r4   r3   BufferReaderr5   r/   r   r   r;  r<  r=  )r   contentbuffer
csv_formatr;   r  pickledr"   r"   r#   "test_make_csv_fragment_from_buffer  s    

r  c                 C   s   d}t |d}t }||}t| t js:t	t j
dddgdddgd	d
dggdddgd}| ||szt	tt|}| || st	d S )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
r  rA   rB   rJ  r  r  r|   r  r  r  r  r  r  r$  )r&   r  r  r   rt  r   r4   r3   r  r5   r/   r   r   r;  r<  r=  )r   r  r  Zjson_formatr;   r  r  r"   r"   r#   #test_make_json_fragment_from_buffer  s    
r  c                 C   s   t dddgt dddgt ddd	gg}|d
  |d |d  g}tjtjddgdddd}|t f||fg}|D ]z\}}t j|dddgd}t  }t	|| |
 }||}	| |	|sttt|	}
| |
|stqd S )NrA   rB   rJ  r  r  r|   r  r  r  r   r	   r   r  r  rM  Tr[  )rY  r^  r]  r  r$  )r&   r   dictionary_encoder   r   rS  r/   ZBufferOutputStreamrW   rX   getvaluer   r   r   r5   r;  r<  r=  )r   arraysZdictionary_arraysZdictionary_formatcasesZformat_r/   r]   r  r;   r  r"   r"   r#   &test_make_parquet_fragment_from_buffer  s6    

	

r  c                 C   sn   t jtddgd dgd dgd  gdddgd	}t| d
 }tj||dg|d tj|dd|d}||fS )Nr   r	   rA   rz   rB   r   r#  r   r$  test_parquet_dataset)partition_cols
chunk_sizerx   r}   )rP   r   r   )r&   r/   r   rD   rW   write_to_datasetr   r:   )r7  r  r   r/   r\   r:   r"   r"   r#   _create_dataset_for_fragments.  s     "    r  z6ignore:Passing 'use_legacy_dataset=True':FutureWarningc                 C   s:  t | \}}t| }t|dks(t|d }ddg}|jj|ksHt|j|j	|j
|jksdt|jtddks~t||}|j|kst||dddst|j||jd}|jdddgkst||ddst|j|jdkst|j||jtddk d	}|jdddgks6td S )
Nr   r   r   r#  r   rA   rz   rL   )r%   r   )r  rS   r2   r   r5   physical_schemar%  rP   r  r\   r   r   r   r   r'   r   r  Zremove_columnslicer%   remove)r7  r   r/   r:   r   rF  Zphysical_namesr   r"   r"   r#   test_fragmentsA  s*    
  r  c                 C   s   t jtddgd dgd  gddgd}t| d }tj||dgd	 tjt d
gdd}tj	|d|d}|j
tddkd}tt|dkstd S )Nr   r	   rz   r   colr   r$  r  r  )r   r.  r}   flavorrx   r'  r   )r&   r/   r   rD   rW   r  r   r   r%   r:   r2   r'   r   rS   r5   )r7  r/   r\   r   r:   r   r"   r"   r#   test_fragments_implicit_caste  s    *r  c           	   	      s  t | \ }d fdd	}t| d }|j}tt|}||||ksXt|j	|j
|j|jd}||||st||d |j	|j
|j|jd}||dtddk d	 |j	|j
|j|jd}||ddgtdd
k d |j	|j
|j|jd}||dtddkd	 d|jddd }tjt|d4 |j	|j
|j|jd}|j|tddkd	 W 5 Q R X d S )Nc                    sP   | j  j||d}|r|n j}|j|ks.t j| |}||sLtd S )Nr%   r   r   )r   r%   r  r5   r  selectr   )r;   Z	row_slicer   r   actualr  r  r/   r"   r#   assert_yields_projectedz  s      z;test_fragments_reconstruct.<locals>.assert_yields_projectedr   )r   )r   rz   )r   r   r   r   r   r   r
  r   rA   z&No match for FieldRef.Name\(part\) in Fr   )NN)r  rS   r2   rP   r;  r<  r=  r   r5   r   r\   r   r   r   r   r'   r  Z	to_stringr   r   
ValueError)	r7  r   r:   r  r;   r  pickled_fragmentnew_fragmentpatternr"   r  r#   test_fragments_reconstructs  sl       
 

     r  c                 C   sb  t | dd\}}t| d }t| }t||j  krFdksLn t|j|d |jd}|j	dddgksttt|dkst|
|ddst|d jd k	st|d jdkst|d jd jddd	ddd	d
kstt|jtddk dd }t|tddk }t|dks0t|j|d tddk d}t|dks^td S )Nr   r  r   rL   r   r#  r   r	   minmaxr   r#  r   )r  rS   r2   r   r   r   r5   r   r%   r  r   r  r   
statisticsr   r'   )r7  r   r/   r:   r;   r   r   r"   r"   r#   !test_fragments_parquet_row_groups  s2       r  c                 C   s   t dtdi}tj|| d dd tj| d dd}t| d }|j	j
|j|jd	d
gd}|jdkspt|  |jdkstt|jdkstd S )NrA   r   test.parquetr   row_group_sizerx   r   r   r	   r?   r  )r&   r/   r   rW   rX   r   r:   rS   r2   rP   r   r\   r   r   r5   ensure_complete_metadatar   r   )r7  r/   r:   Zoriginal_fragmentr;   r"   r"   r#   %test_fragments_parquet_num_row_groups  s     r  c                 C   s   dd l }|tddgddgd}|d d|d< tt|| d	  dd lm	} |	| d	 }|j
||ddkd
}|jd | k  std S )Nr   rA   rB   r	   r   )col1col2r  categoryztest_filter_dictionary.parquetr   )rw   r   dictastyperW   rX   r&   r/   pyarrow.datasetr:   r   r'   Ziloc	to_pandasr*  r5   )r7  r   r   r.   r   r:   r   r"   r"   r#   ,test_fragments_parquet_row_groups_dictionary  s    r  c           
   	   C   sZ  |\}}t | d|d\}}t| d }||jg |  W 5 Q R X |jddgks\t|g  |  W 5 Q R X t|jt	j
st|jj|j|jddgd}|j|jkst|  |jd }|jdkst|jdkst|jd k	sttt|}	||jgD |	jddgks"t|	jd }|jdks<t|jd k	sLtW 5 Q R X d S )Nr   r  r   r   r	   r  )r  rS   r2   r\   r  r   r5   r4   metadatarW   ZFileMetaDatarP   r   r   idr   r  r;  r<  r=  )
r7  ru   rM   ro   r  r:   r;   r  	row_groupr  r"   r"   r#   &test_fragments_parquet_ensure_metadata  s<      

  

r  c           	   	   C   s|   |\}}t | |d\}}t| d }|g  tt|}W 5 Q R X ||jg |j}W 5 Q R X |dgksxtd S )Nr   r	   r   )	r  rS   r2   r;  r<  r=  r\   r   r5   )	r7  ru   rM   ro   r  r:   r;   r  r   r"   r"   r#   )test_fragments_parquet_pickle_no_metadata#  s    
r  c                 C   s  t jt dd dgt  t dddgt  t dddgt  t dddgt  t dddgt  t dddgt  t dddgt 	 t dddgt 
 t dddgt  t dddgt  t dddgt  t d	d d
gt  t d	d d
gt  t dddgt dt dddgt dt dddgt dt dddgt  t dddgt  t dddgt dt dddgt dgdddddddddddddddddd d!d"gd#}t| d$ }tj||d|d% |tj|d&d'd(fS ))NTFr	   r|   *   r   g      $@      E@rA   zrX  rO  usl    jt booleanr.  uint8int16uint16r   uint32r)   uint64r   doubleutf8binaryts[s]ts[ms]ts[us]r(   date64time32time64r$  Ztest_parquet_dataset_all_types)use_legacy_datasetr  rx   r}   r'  )r&   r/   r   Zbool_r.  r  r  r  r   r  r)   r  Zfloat32r*   r  r  	timestampr(   r  r  r  rD   rW   r  r   r:   )r7  r  r/   r\   r"   r"   r#   _create_dataset_all_types6  sb    /
r  c                    s  t | \}}t| d }dd l  fdd} fdd} fdd} j} j}t| }	|	d jd k	srt|	d jd }
|
j	dkst|
j
d	kst|
jd
dddddddddddddddddddddddddddddddddddddd|d|dd|d|dd|d|dd|ddd|dddd|ddd|dddd|ddd|dddd|dddd|ddddddkstd S )Nr   c                    s     ddddd| S N  r	   r   r   r   r  r"   r#   dt_sy      z.test_parquet_fragment_statistics.<locals>.dt_sc              
      s     dddddd| d S )Nr  r	   r   ry   r  r  r  r"   r#   dt_msz  r  z/test_parquet_fragment_statistics.<locals>.dt_msc              	      s     dddddd| S r  r  r  r  r"   r#   dt_us{  r  z/test_parquet_fragment_statistics.<locals>.dt_usr?   ry   FTr  r	   r  r   r  rA   r     a   zr  r   r     )r  r.  r  r  r  r   r  r)   r  r   r  r  r  r  r  r  r(   r  r  r  )r  rS   r2   r   r   timer   r   r5   r   Ztotal_byte_sizer  )r7  r/   r:   r;   r  r  r  r   r  r   r  r"   r  r#    test_parquet_fragment_statisticso  sF    r  c                 C   sv   t ddd d gddd d gd}tj|| d dd tj| d d	d
}t| d  }|d j	d j
i ksrtd S )Nr   r	   rA   rB   r@   r  r   r  rx   r   )r&   r/   rW   rX   r   r:   rS   r2   r   r   r  r5   )r7  r/   r:   r   r"   r"   r#   &test_parquet_fragment_statistics_nulls  s
     r  c                 C   sx   t dddgdddgdd d }|j| d d	d
 tj| d dd}t| d  }|d jd j	i kstt
d S )NrA   rB   rz   r
   rA  r@   r   r  r   Zenginerx   r   )r   r   
to_parquetr   r:   rS   r2   r   r   r  r5   )r7  r.   r:   r   r"   r"   r#   'test_parquet_empty_row_group_statistics  s
    $r  c                 C   s   t | dd\}}t| d }|jtddks:tt|jtddk|j	d}t
|dkshtt|jtddk|j	d}t
|dkstd S )Nr   r  r   r   rA   r   r%   rB   )r  rS   r2   r   r   r   r'   r5   r   r%   r   )r7  r/   r:   r;   r   r"   r"   r#   +test_fragments_parquet_row_groups_predicate  s    r	  c           
   	   C   s6  t | dd\}}t| d }|j}t| }tt|}||||ksZt	|j
|j|j|jdgd}||}	|	||d st	|j
|j|j|jdhd}|j||jddgtddk d	}	|	jddgkst	t|	dkst	|j
|j|j|jdhd}tjtd
d || W 5 Q R X d S )Nr   r  r   )r   r   r	   r   r   r?   r  zreferences row group 2r   )r  rS   r2   rP   r   r;  r<  r=  r   r5   r   r\   r   r   r   r%   r   r'   r  r   r   r   r  )
r7  r   r/   r:   r;   r  r   r  r  r   r"   r"   r#   -test_fragments_parquet_row_groups_reconstruct  sP     
    r
  c           
   	   C   s  |\}}t | d|d\}}t| d }|jddgd}|g : |jdksRt|jddgksdt|jd jd k	sxtW 5 Q R X ||}	|		 ddgddgdkst|jg d}|jdkst|jg kst|j||j
d}	|	jdkst|	|d d std S )	Nr	   r  r   r?   Zrow_group_idsr   r  rL   )r  rS   r2   subsetr   r5   r   r  r   r   r%   r   r   
r7  ru   r   rM   ro   r/   r:   r;   Zsubfragr   r"   r"   r#   !test_fragments_parquet_subset_ids  s$    


r  c           
   	   C   sH  |\}}t | d|d\}}t| d }|tddk}|g : |jdksVtt|j	dksht|j	d j
d k	s|tW 5 Q R X ||}	|	 dddgdddgdkst|tddk}|jdkst|j	g kst|j||jd	}	|	jdkst|	|d d st|jtd
dk|jd	}|jdksDtd S )Nr	   r  r   r   r?   r   r  r
   rL   r   rA   rz   )r  rS   r2   r  r   r'   r   r5   r   r   r  r   r   r%   r   r   r  r"   r"   r#   $test_fragments_parquet_subset_filter  s(    


"r  c              	   C   st   t | dd\}}t| d }tt" |jtddkddgd W 5 Q R X tt |  W 5 Q R X d S )Nr	   r  r   r   r   r  )	r  rS   r2   r   r   r  r  r   r'   )r7  r  r:   r;   r"   r"   r#   %test_fragments_parquet_subset_invalid<  s    &r  c                 C   s   t | d }t|dks tt| \}}tj|dd}t | d }t|d|j	t
|kslt| d }tj|| tj|dd}t | d }t|d|j	t
|kstd S )	Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>rx   r   z-<pyarrow.dataset.ParquetFileFragment path={}>data.featherfeatherz/<pyarrow.dataset.FileFragment type=ipc path={}>)rS   r2   reprr5   _create_single_filer   r:   rP   r   ra   rD   r&   r  write_feather)r7  r:   r;   r/   r\   r"   r"   r#   test_fragments_reprL  s0    r  r  c                 C   s   | S r1   r"   r  r"   r"   r#   <lambda>o  r  r  c                 C   s   t t | S r1   r;  r<  r=  r  r"   r"   r#   r  o  r  c           
      C   s   t jddd}t }td}tjddg}||}t|tjsHt	||_
t| |||}| }tdt fdt fdt fd	t fd
tt t dfdt fdt fg}||st	tj }	t|	tjst	d S )Nr   Tr   r   r   rH   rI   rD   rJ   rK   r@   )rM   r   r   r   r   r   discoverr4   PartitioningFactoryr5   partitioning_factoryr   r  r&   r%   r)   r*   r+   rK   r   r   r(  )
rY   r  r  rP   r   r  r   r  expected_schemaZhive_partitioning_factoryr"   r"   r#   test_partitioning_factorym  s4    
   





	
r  infer_dictionaryc                 C   s   | S r1   r"   r  r"   r"   r#   r    r  c                 C   s   t t | S r1   r  r  r"   r"   r#   r    r  c                 C   s6  t jddd}t }td}tjjddg|d}|||_t| |||}|	 }|rt
t
 t
 }	|dj|	kst|   }
|
dd}t
dgd	 d
gd	   }||st| jtddkd}
|
dd}|dd	}||s2tn|djt
 ks2td S )Nr   Tr   r   r   r  r   r   r
   r   r   )rM   r   r   r   r   r   r  r  r   r  r&   r-  r   r+   r'   r   r5   r   r   combine_chunksr   r   r   r  r   r  )rY   r  r  r  rP   r   r  r   inferred_schemaexpected_typer/   r  r  r"   r"   r#   $test_partitioning_factory_dictionary  s6    
 
   r#  c                 C   s   | S r1   r"   r  r"   r"   r#   r    r  c                 C   s   t t | S r1   r  r  r"   r"   r#   r    r  c                 C   sJ  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }dD ]T}|| ||d 2}	tj|	|}
|
| |
  W 5 Q R X W 5 Q R X qt jd	d
d}td	}tjj|d}| ||_t||||}| }||ksBt| jdtdt id}|d d  dksttjjddgdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj|dd}| ||_#t||||}t|   }|d j!"tddktddk@ sRttjj|dd}| ||_t||||}t$j%tj&dd | }W 5 Q R X t jdd
d}td}tj'j|d}| ||_t||||}| }||kst| jdtdt id}|d d  dks4ttj'jdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj'|dd|_#t||||}t|   }|d j!"tddktddk@ sttj'j|dd}| ||_t||||}t$j%tj&dd | }W 5 Q R X d S )NrH   r|   rL   r   rX  r+   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherr[   Tr   date_intr   r   逎`r  r  2021-05-04 00%3A00%3A00%24r%   r  +Could not cast segments for partition fieldr   r}   )(rM   rN   r   r   r&   r%   r)   r/   r   r   r  r+   rS   rQ   rR   r&  new_filerX   closer   r   r   r  r  r   r  r5   r   r   r'   rC  as_pyr2   r   r   r   r   r   r   r(  )r  rY   rP   r%   r/   partition_schemastring_partition_schemafull_schemar[   sinkwriterr   r   r  r   r!  r  r   r   r"   r"   r#   *test_partitioning_factory_segment_encoding  s    



  


 


 


 


 

 
r3  c                 C   s   | S r1   r"   r  r"   r"   r#   r    r  c                 C   s   t t | S r1   r  r  r"   r"   r#   r    r  c              
   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }tdt
dfdt fg}tdt fdt fg}	d	}
||
 ||
d
 2}tj||}|| |  W 5 Q R X W 5 Q R X t jddd}td}tjj|d}| ||_t||||}| }||kszt| jdtdt id}|d d  dksttjjdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj|dd}| ||_#t||||}t|   }|d j!"tddktddk@ sttjjdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj|	dd}| ||_#t||||}t|   }|d j!"tddktddk@ sPttjj|dd}| ||_t||||}t$j%tj&dd | }W 5 Q R X d S )NrH   r|   rL   ztest'; daterX  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r$  r}   Tr   r%  r   r   r&  urir  z2021-05-04 00:00:00$r  r'  r(  r)  r*  r   )'rM   rN   r   r   r&   r%   r)   r/   r   r   r  r+   rS   rQ   rR   r&  r+  rX   r,  r   r   r(  r  r  r   r  r5   r   r   r'   rC  r-  r2   r   r   r   r   r   r   )r  rY   rP   r%   r/   r.  r/  r0  Zpartition_schema_enZstring_partition_schema_enr[   r1  r2  r   r   r  r   r!  r  r   r   r"   r"   r#   ;test_partitioning_factory_hive_segment_encoding_key_encoded  s    






 


 





 


 
r6  c              
   C   sv   t ddd gdddgd}tt t dt  t dt  g}tt j	 tj
|| d|d W 5 Q R X d S )	Nr   yr  r@   rA   rB   r&  r'  )r&   r/   r   r   r%   r'   r+   r   r   r   r4  r7  r/   r   r"   r"   r#   /test_dictionary_partitioning_outer_nulls_raisesn  s    $r9  c              	   C   sD   t ddd gdddgd}tt t|| d W 5 Q R X d S )Nr   r7  r  r@   zbasename-{i}.arrow)r&   r/   r   r   r   r   r4  )r7  r/   r"   r"   r#   test_positional_keywords_raisesv  s    r:  c                 C   s   d}t t d|d t|d d}tj|d | | d dgd tj|d |d  | d dgd tj| d dgd	}|d jdkst	tj| d dd
gd	}|d jdkst	tj| d dgd	}|d jdkst	d S )Ni   r   r	   )r   r   Zoner   r  Ztwor   r   r   )
r&   r/   r1  r   arangerW   r  
read_tableZ
num_chunksr5   )r7  Z
BATCH_SIZEr/   r"   r"   r#   test_read_partition_keys_only|  s*    
  r=  c                    s    t  }t fdd|D S )Nc                    s"   g | ]}t jt j |qS r"   )osr\   isdirjoin)rE   elbasedirr"   r#   rG     s     z _has_subdirs.<locals>.<listcomp>)r>  listdirany)rC  elementsr"   rB  r#   _has_subdirs  s    
rG  c                 C   sZ   t | D ]J}t j| |}t j|r
t||}t|rJt||| q
|| q
d S r1   )	r>  rD  r\   r@  r?  	posixpathrG  _do_list_all_dirsr   )rC  Zpath_so_farr   rF  Ztrue_nestedZnorm_nestedr"   r"   r#   rI    s    rI  c                 C   s   g }t | d| |S )N )rI  )rC  r   r"   r"   r#   _list_all_dirs  s    rK  c                 C   s    t t| }|t |kstd S r1   )rq   rK  r5   )r7  Zexpected_directoriesZactual_directoriesr"   r"   r#   _check_dataset_directories  s    rL  c              
   C   sn   t dddgddd gd}tt t dt  t dt  g}tj|| d|d t| d	d
dg d S )Nr   r7  r  r@   rA   rB   r&  r'  zx/xzy/y)	r&   r/   r   r   r%   r'   r+   r4  rL  r8  r"   r"   r#   (test_dictionary_partitioning_inner_nulls  s    $rM  c              
   C   sr   t dd dgddd gd}tt t dt  t dt  gd d}tj|| d|d	 t| d
ddg d S )Nr   r  r7  r@   rA   rB   r  r&  r'  za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r&   r/   r   r(  r%   r'   r+   r4  rL  r8  r"   r"   r#   test_hive_partitioning_nulls  s     rN  c               	   C   s  t dt  fdt  fg} ddg}t| }t|tjs@ttj| dd}t|tj	s^ttj|d}t|tj	sztt
t t  W 5 Q R X t
jtdd tj| d W 5 Q R X t
jtdd tj| | d W 5 Q R X tj| d	d
}t|tjsttj| dd	d}t|tj	s(ttjd	d
}t|tj	sFtt
t tj|d	d
 W 5 Q R X t
jtdd tj|d	d W 5 Q R X t
t tj| dd
 W 5 Q R X d S )Nr   r   Zinferr   )field_nameszExpected listr   zCannot specify bothr}   r  )r!  r  zCannot specify 'field_names')rO  r  unsupported)r&   r%   r  r.  r   r   r4   r   r5   r  r   r   r  r(  )r%   r%  r   r"   r"   r#   test_partitioning_function  s4    
rQ  c                 C   s   t t dt t  t  t dt t  t  g}tjj	|d}tj
dd| |d}|jj|ksnt| }|dj|jd st|d dgd	 d
gd	  kst|dj|jd st|d dgd	 dgd	  kstd S )Nr   r   rL   r   rx   rP   r   r   r   r	   r
   r   r   r   )r&   r%   r'   r-  r.  r   r+   r   r   r  r:   r   r5   r   r   r   r   typesr  )rY   r%   r   r:   r/   r"   r"   r#   *test_directory_partitioning_dictionary_key  s"       &rT  c           	      C   s2  t t dt t  t  t dt t  t  g}tjj|d}tj	dd| |d}|j
j|ksnt| }ttdd}ttd	d
}|dj|jd st|djD ]"}|j }|  ||kstq|dj|jd	 st|djD ]&}|j }|  ||kstqd S )Nr   r   rL   r}   rx   rR  i  i  r	      r   )r&   r%   r'   r-  r.  r  r   r(  r  r:   r   r5   r   rS   r   r   r   r   rS  chunksr  sort)	r   r%   r   r:   r/   Zyear_dictionaryZmonth_dictionaryr   r  r"   r"   r#   %test_hive_partitioning_dictionary_key  s2       

rX  c                 C   sL   |d kr,t tddgd dgd  d}| d }tj|||d ||fS )	N	   r   rz   r   r
   r@   r  r  r&   r/   r   rW   rX   )base_dirr/   r  r\   r"   r"   r#   r    s
    $r  c                 C   s   t tddgd dgd  d}| d }t|| t tdddgd dgd  d}| d	 }t|| ||f||ffS )
NrY  r   rz   r   r
   r@   ztest1.parquetr  ztest2.parquetrZ  )r[  Ztable1path1Ztable2path2r"   r"   r#   _create_directory_of_files   s    $&r^  c                 C   sD   | t t | fD ]*}| j|js*t|| |stqd S r1   )r;  r<  r=  r%   r   r5   r   )r:   r/   r   rI  r"   r"   r#   _check_dataset*  s    r_  c              	   K   s   t | tjst| t| | gt| gfD ].}tj| f|}t |tjsJtt||| q(t	| j
2 tj| jf|}t |tjstt||| W 5 Q R X d S r1   )r4   pathlibPathr5   rD   r   r:   r   r_  r   parentname)r\   r/   r   r   rc   r:   r"   r"   r#   _check_dataset_from_path1  s    rd  c                 C   s   t | \}}t||| d S r1   r  rd  r7  r   r/   r\   r"   r"   r#   test_open_dataset_single_fileB  s    rg  c                 C   s    t | dd\}}t||| d S )Nr	   r  re  rf  r"   r"   r#   test_deterministic_row_orderH  s    rh  c                 C   s&   t | \}}t|}t| || d S r1   )r^  r&   concat_tablesrd  )r7  r   tablesr  r/   r"   r"   r#   test_open_dataset_directoryQ  s    
rk  c           	      C   s   t | \}\}}t|}t||gtt|t|gg}|dd |D 7 }|D ].}|j|jsjt|	|}||sTtqTd S )Nc                 S   s   g | ]}t t |qS r"   r  )rE   rI  r"   r"   r#   rG   a  s    z3test_open_dataset_list_of_files.<locals>.<listcomp>)
r^  r&   ri  r   r:   rD   r%   r   r5   r   )	r7  r   rj  r\  r]  r/   Zdatasetsr:   r   r"   r"   r#   test_open_dataset_list_of_filesX  s    

rl  c              	   C   s   t | \}}t|}t|}|j|js0ttj|t d}|j|jsTtt	
t tj|t d W 5 Q R X d S )Nr  )r  r   r   r:   r%   r   r5   rM   r`   r   r   r   rN   )r7  r/   r\   fspathdataset1dataset2r"   r"   r#   #test_open_dataset_filesystem_fspathk  s    
rp  c           
      C   s   | d }|   t|\}}||}t|}tj|t d}tjt|t|d}t	
t	|}	||||  kr||  kr||	ksn td S )Nsingle-filer  )mkdirr  relative_tor   r:   rM   r`   rD   r   r;  r<  r=  r   r5   )
r7  r   r[   r/   r\   relative_pathd1d2d3d4r"   r"   r#   test_construct_from_single_file  s    


ry  c                 C   s   | d }|   t|\}}t|}tj|t d}tj|jt| d}||}||}	||}
||	  kr~|
ksn t	|||fD ]&}t
t
|}|||kst	qd S )Nsingle-directoryr  )rr  r^  r   r:   rM   r`   rc  r   r   r5   r;  r<  r=  )r7  r   r[   rj  rg   ru  rv  rw  t1t2t3rI  restoredr"   r"   r#   $test_construct_from_single_directory  s    



r  c              	      s    d }|   t|\}} fdd|D }t 4 t|}||}t|ttt|ksft	W 5 Q R X tj|t
 d}||}	t|}
||
}tj|t d}||}||	  kr|  kr|ksn t	d S )Nzlist-of-filesc                    s   g | ]}|  qS r"   )rs  rb   r7  r"   r#   rG     s     z5test_construct_from_list_of_files.<locals>.<listcomp>r  )rr  r^  r   r   r:   r   r   sumrT   r5   r   rM   r`   )r7  r   r[   rj  rg   Zrelative_pathsru  r{  rv  r|  rw  r}  rx  Zt4r"   r  r#   !test_construct_from_list_of_files  s    


$



r  c              	   C   s4   ddg}t jtdd tj|| d W 5 Q R X d S )Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r  )r   r   r   r   r:   )rY   r   r"   r"   r#   -test_construct_from_list_of_mixed_paths_fails  s
    r  c                 C   s   t jddg| d}t jd| d}t ||g}t|t js>ttt| dksVt| }t|dksnt|j	dks|tt|j
dkst|j
D ]}|jddgkstqd S )	Nr   r   r  r   rz   r"  r
   r   )r   r:   r4   UnionDatasetr5   r   rS   r2   r   r   childrenr   )rY   rA   rB   r:   r/   childr"   r"   r#   (test_construct_from_mixed_child_datasets  s"    
r  c                  C   s6   t jg dd} |  }|jdks$t|jdks2td S )Nr&  r   r   )r   r:   r   r   r5   r   )emptyr/   r"   r"   r#   test_construct_empty_dataset  s    r  c               	   C   sP   t jg dtdt fdt fgd} tjtdd | 	  W 5 Q R X d S )Nr&  rA   rP   r%   zMultiple matches for .*a.* in r   )
r   r:   r&   r%   r)   r+   r   r   r  r   )r  r"   r"   r#   *test_construct_dataset_with_invalid_schema  s    


r  c              	      s  t j| tdt  d}t j| tdt  d}tjjtt	dgdgd tjjtt	dgdgd}t
jtdd	 t ||g W 5 Q R X d
}t
jt|d	 t dddg W 5 Q R X d}t
jt|d	 t d  W 5 Q R X d}t
jt|d	" t  fddt	dD  W 5 Q R X d}t
jt|d	 t g  W 5 Q R X d}t
jt|d	 t  |g W 5 Q R X d}t
jt|d	 t  dg W 5 Q R X d}t
jt|d	 t  dg W 5 Q R X d S )Nr  r   /schemar|   rA   r$  rB   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr	   r   r?   zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc                 3   s   | ]
} V  qd S r1   r"   rE   r  Zbatch1r"   r#   r  	  s     z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rM   r   r   r&   r   r  r   r   r   r   r   r:   r  InMemoryDataset)r   child1child2Zbatch2r  r"   r  r#   )test_construct_from_invalid_sources_raise  sT    &r  c                 C   s   t jjt tdgdgd}t j|g}tjg dt 	g d
 }|t g ksXt|||g|gfD ]l}t|}| 
||ksttt| dkstt| 
 |kstt jt| |kshtqhd S )Nr|   rA   r$  r&  r  r	   )r&   r   r  r   r   r,   rV   r   r:   r%   r   r/   r5   r   rS   r2   r   r   )r   r^   r/   Zdataset_tablesourcer:   r"   r"   r#   test_construct_in_memory1	  s    
r  r   c              
      s   t jjt tdgdgd t j g}d} fddd f fdd jffD ]P\}}tj	j| || d}|
 |ks~ttjt j|d	 |
  W 5 Q R X qRd S )
Nr|   rA   r$  z#OneShotFragment was already scannedc                      s   t j j gS r1   )r&   RecordBatchReaderrV   r%   r"   r^   r"   r#   r  H	  s    z$test_scan_iterator.<locals>.<lambda>c                      s    fddt dD S )Nc                 3   s   | ]
} V  qd S r1   r"   r  r  r"   r#   r  J	  s     z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>r	   )r   r"   r  r"   r#   r  J	  r  r%   r   r   )r&   r   r  r   r   r,   rV   r%   r   r   r   r5   r   r   r   )r   r/   r   r   r%   r   r"   r  r#   test_scan_iteratorA	  s     
  r  c              	   C   s   t tddgd dgd  d}| d }|  tdD ]6}|d	| }|  t|d| d|d
  q<|dt j	t
dddgdt  d}||fS )NrY  r   rz   r   r
   r@   zdataset-partitionedr?   zpart={}r  r   r   r	   r   r   )r&   r/   r   rr  rP   rW   rX   r  append_columnr   r   r1  r   )rC  r/   r\   r!   r   
full_tabler"   r"   r#   _create_partitioned_datasetT	  s    $ r  c           	   	   C   sL  t | \}}|ddg}t||| tjt|tjddd}|j|jsRt	t
| . tjdtjddd}|j|jst	W 5 Q R X tjt|dd}|j|jst	tjt|tjtdt fgddd}|jtdt }|j|st	| }|dtjtdd	d
gdt d}||sHt	d S )NrA   rB   r}   r  r   zdataset-partitioned/r   r   r	   r   r?   r   )r  r  rd  r   r:   rD   r   r%   r   r5   r   r&   r.  r   r'   r   r  r   r   r1  )	r7  r   r  r\   r/   r:   r  r   r  r"   r"   r#   'test_open_dataset_partitioned_directorye	  s<     


  r  c              	   C   s   t | \}}tt|}|j|js,ttjt|t d}|j|jsTtt	|  tjdt d}W 5 Q R X |j|jstt
t tjt|t d W 5 Q R X d S )Nr  r  )r  r   r:   rD   r%   r   r5   rM   r`   r   r   r   r   rN   )r7  r/   r\   rn  ro  dataset3r"   r"   r#   test_open_dataset_filesystem	  s    
r  c              	   C   s:   t | \}}tjtdd tj|gdd W 5 Q R X d S )Nz format 'blabla' is not supportedr   Zblablar   )r  r   r   r  r   r:   )r7  r  r\   r"   r"   r#   $test_open_dataset_unsupported_format	  s    r  c                 C   s`   t | \}}t|}t||g}t|tjs4ttt|}|	||	|ks\td S r1   )
r  r   r:   r4   r  r5   r;  r<  r=  r   )r7  r   r  r\   r:   unionr  r"   r"   r#   test_open_union_dataset	  s    
r  c              	   C   s>   t jd| dd}tjtdd t j|gdd W 5 Q R X d S )Nr  rx   r  zcannot pass any additionalr   r   )r   r:   r   r   r  )r   r  r"   r"   r#   .test_open_union_dataset_with_additional_kwargs	  s    r  c                	   C   sR   t t tjddd W 5 Q R X t jtjdd tjddd W 5 Q R X d S )Nzi-am-not-existing.arrowr&  r   zcannot be relativer   zfile:i-am-not-existing.arrow)r   r   r   r   r:   r&   r   r"   r"   r"   r#   #test_open_dataset_non_existing_file	  s    r  r   r[   r}   r  r  partition_keysABCr	   r   r?   DEFrz   r
   rA  c                    sn  t tddgd dgd  d}d |d kp:d |d k}|d	krL|rLd S |d	krrtjjd
dg d}d}d }n6|rtjj |d}ntjj d}d}|r|}nd}| d }	|	  |\}
}|
D ]B}|D ]8}|	||p||p| }|jdd t	
||d  qqtjt|	|d} fdd}|jt d
||
d t d||d }|j|sjtd S )NrY  r   rz   r   r
   r@   r   r	   r[   part1part2r  z{0}/{1})r  r  zpart1={0}/part2={1}Z__HIVE_DEFAULT_PARTITION__r:   T)parentsr  r  c                    sL    r.t | trt nt }tt |S t | tr@t S t S d S r1   )r4   rD   r&   r+   r   r-  )r   Z
value_typer  r"   r#   r"  	  s    z/test_partition_discovery.<locals>.expected_type)r&   r/   r   r   r   r  r(  rr  rP   rW   rX   r:   rD   r%   r   r'   r   r5   )r7  r   r  r  r  r/   Zhas_nullfmtZ
null_valueZbasepathZ
part_keys1Z
part_keys2r  r  r\   r:   r"  r  r"   r  r#   test_partition_discovery	  sR    $  r  c                 C   s\  t tddgdtdd}tj|dgjdd}tj	|| |d	d
 tj
| d	tjjddd}t |d |d  d}| |stt| d }|j|jd|d d st|j}tt|}| |sttt|}|j|jd|d d st|j|jd |d d  sFt|j|sXtd S )Nr  r  r
   r|   r   r  r   r}   r  r  r   rP   Tr  r'  r  )r  r   r   rL   )r&   r/   r   r1  r   r   r   r  r%   r4  r:   r(  r  r  r   r   r5   rS   r2   r   r;  r<  r=  r  )r7  r/   r   r:   r  r;   Z	part_exprr~  r"   r"   r#   4test_dataset_partitioned_dictionary_type_reconstruct

  s.       "
r  c              	   C   s   ddl m} | d \}}}}d||||}||\}}|d tdddd	gi}	|d
}
t	|	|
 W 5 Q R X |	|||||||fS )Nr   
FileSystem
connectionz_s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}&allow_bucket_creation=TruemybucketrA   r	   r   r?   zmybucket/data.parquet)
rp   r  rP   from_urirQ   r&   r/   rR   rW   rX   )	s3_serverr  hostport
access_key
secret_keyr4  rM   r\   r/   r]   r"   r"   r#   s3_example_simple)
  s       
r  c                 C   s^   | \}}}}}}}}t j|dd}|||s6tt j|d|d}|||sZtd S )Nrx   r   rP   r   )r   r:   r   r   r5   )r  r   r/   r\   rM   r4  r  r:   r"   r"   r#   test_open_dataset_from_uri_s3?
  s
    r  c                 C   s   | \}}}}}}}}t d}ddlm}	m}
 |j||dd||id}tj|d|d}|	 
|slt|	|
|}tj|d|d}|	 
|std S )	Ns3fsr   )r_   FSSpecHandlerZendpoint_urlzhttp://{}:{})r   secretZclient_kwargsrx   r  )r   importorskiprp   r_   r  S3FileSystemrP   r   r:   r   r   r5   )r  r/   r\   r  r  r  r  r  r  r_   r  rM   r:   r"   r"   r#   $test_open_dataset_from_uri_s3_fsspecN
  s    
 
	r  c              	   C   s  ddl m} | d \}}}}d}d}d||||||}||\}	}|dksRt|	| tdd	d
dgi}
|	|}t	
|
| W 5 Q R X tj|dd}| |
std||||}ddddddg}|D ]4\}}||}tj||dd}| |
stqtjtjdd |d}tjd|d W 5 Q R X d}d}||}tt}tjd|d W 5 Q R X t|j|d||kstd}||}tt}tjd|d W 5 Q R X t|j|d||kstd S ) Nr   r  r  theirbucketnested/folder/data.parquetzOs3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetrA   r	   r   r?   rx   r   3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{})ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr  )r  z/nested/folder/data.parquet)r  r  r  zMissing bucket namer   /z'/theirbucket/nested/folder/data.parquetr  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr  ZNotFoundZFile)rp   r  rP   r  r5   rQ   r&   r/   rR   rW   rX   r   r:   r   r   r   r   r   r  rD   r   )r  r  r  r  r  r  Zbucketr\   r4  rM   r/   r]   r:   templater  prefixr  excr"   r"   r#   -test_open_dataset_from_s3_with_filesystem_urih
  sj         
   



r  c                 C   sD   t | \}}td}|d}tj||d}|j|js@td S )Nfsspecfiler  )	r  r   r  r   r   r:   r%   r   r5   )r7  r/   r\   r  re   r:   r"   r"   r#   test_open_dataset_from_fsspec
  s
    

r  c           	      C   s   t d}tddddgi}| d }t|| |d}|| d dsTt	t
 }tt|}|||}||jst	|||}|j|jst	d S )	Nr  rA   r	   r   r?   r  r  r   )r   r  r&   r/   rW   rX   r   Zlsendswithr5   r   r   rM   r_   r  r  r   r%   r   r  )	r7  r  r/   r\   	fsspec_fsrP   r   r%   r;   r"   r"   r#   test_file_format_inspect_fsspec
  s    

r  c                 C   s  | d }t ddgd tdd}tj|dgjdd	}tj|||d
d tjt dt dfgdd	}tj	|d
|d}t
dtdk}|j||d}|d dddddgkstdd l}t
d|dddk}|j||d}|d dddddgkstd S )NZtest_partition_timestamps
2012-01-01z
2012-01-02r
   r|   )datesr  r  r}   r  r  r  rX  r'  r   r  r	   r?   r   rY  r   i  )r&   r/   r   r   r   r  r%   r4  r  r:   r'   r   	Timestampr   r   r  r5   r   )r7  r   r\   r/   r   r:   r   r   r"   r"   r#   test_filter_timestamp
  s$    
 r  c              
   C   sp   t dt jddddddgt  di}t| |\}}tt|}tddk}t	|j
||d	dksltd S )
NrA   r   r	   r   r?   rz   r
   r   r   )r&   r/   r   r.  r  r   r:   rD   r'   r   r   r5   )r7  r   r/   r  r\   r:   filter_r"   r"   r#   test_filter_implicit_cast
  s
    (r  c                 C   s`   t dddd gi}t| |\}}tt|}|j|tdtd kd}|j	dks\t
d S )Nr  rA   rB   r   r   )r&   r/   r  r   r:   rD   r   r'   r   r   r5   )r7  r   r/   r  r\   r:   r"   r"   r#   test_filter_equal_null
  s     r  c           	      C   s  t ddd ddgdd tdD dd tdd	D d
}t| |\}}tt|}tt	dt 
ddg}|j||djdksttt	ddk}|j||djdksttt	dt	d}|j|d|id}|d  dddddgkstd S )NrA   rB   rJ  c                 S   s   g | ]}t  d dd|qS i  r	   r  r  r"   r"   r#   rG   	  s     z2test_filter_compute_expression.<locals>.<listcomp>r
   c                 S   s   g | ]}t  d d|qS r  r  r  r"   r"   r#   rG   
  s     r	   rA  r  r  r  r  r   r?   r  r   r  r   r   r   rz   )r&   r/   r   r  r   r:   rD   pcZis_inr'   r   r   r   r5   hourZdays_betweenr  )	r7  r   r/   r  r\   r:   r  r   r   r"   r"   r#   test_filter_compute_expression  s    r  c                 C   s   t j| tdt  d}t |g}t| dks:tt	dd | D sTt| d 
| snt| 
| stt| t jstd S )Nr  r   r	   c                 s   s   | ]}t |tjV  qd S r1   )r4   r&   r   )rE   rX  r"   r"   r#   r  #  s     z%test_dataset_union.<locals>.<genexpr>r   )r   r   rM   r   r   ZUnionDatasetFactoryr   r  r5   r*  r   r  r4   r   r   )r   r  r   r"   r"   r#   test_dataset_union  s     r  c                 C   s  t jd|dd}t jd|dddgd}t jd|dd	d}|j|j  krR|jksXn tt |||g}t|t jsxtd
}tjt|d t j||g|d W 5 Q R X t	dt	
 fdt	 fdt	 fdt	 fdt	 fdt	 fdt	 fg}|j|s
t| j|s tt ||g}t	dt	
 fdt	 fdt	 fdt	 fdt	 fdt	 fg}|j|st| j|stt	dt	 fdt	 fdt	
 fg}t j||g|d}| j|stt	dt	 fdt	 fdt	 fg}t j||g|d}| j|s<tt	jtddgd dgd  dgdddgd}t| |d\}	}
t |
}tjt	jdd t ||g W 5 Q R X d S )Nr  rx   r  r  weekr   r   rP   r   /hiver}   z$cannot pass any additional argumentsr   r  r   r   r   r   r   rL   r   rY  r   rz   r   r
   Z	abcdefghjr$  r  zUnable to merge)r   r:   r%   r5   r4   r  r   r   r  r&   r(   r)   r*   r+   r   r   r   r/   r   r  r   )r7  r   r  r  Zchild3Z	assembledmsgr  r/   r  r\   Zchild4r"   r"   r#   &test_union_dataset_from_other_datasets)  sp    

 






	











 
r  c              	   C   s6   d}t jt|d tjdddg| d W 5 Q R X d S )Nz8points to a directory, but only file paths are supportedr   r  r  r  r  )r   r   IsADirectoryErrorr   r:   )r   r  r"   r"   r#   4test_dataset_from_a_list_of_local_directories_raisesk  s    r  c              
   C   s   t t jd| dt jd| dt jd| dg}tdt fdt fdt fdt fg}|j|spt	t t jd| dt jd| dt jd| d	d
g}tdt fdt fdt fdt fdt
 fdt
 fg}|j|st	d S )Nr  r  r  r  r   r   r   r   r}   )r   r   r   r   )r   r:   r&   r%   r(   r)   r*   r+   r   r5   r   )r   r:   r  r"   r"   r#   &test_union_dataset_filesystem_datasetsq  s4    









r  c              	      s  t dddgdddgd}t|d  d fd	d
	}d }|}||||jd |j}|}||| t ddg}t jdddgdddggddgd}||| t dg}t jdddggdgd}||| t ddg}t jdddgt jd d d gddgddgd}||| t ddg}tjtd |d}t j|d 	d|d gddgd}||| t dt 
t  fdg}tjtd |d}|j|sttjtdd  | W 5 Q R X d S )Nr	   r   r?   皙?皙?333333?r@   r  c                    s\   t jtd | d}|d k	r0|j|s@tn|j| s@t |}||sXtd S )Nr  rL   )r   r:   rD   r%   r   r5   r   )r%   r  r  r:   r   r   r7  r"   r#   r_    s    
z-test_specified_schema.<locals>._check_dataset)r  )rB   r*   )rA   r)   rB   rA   r$  )rJ  r   r   r   rJ  )rA   r   rL   z#Unsupported cast from int64 to listr   )N)r&   r/   rW   rX   r%   r   r   r:   rD   rC  list_r   r   r5   r   r   NotImplementedErrorr   )r7  r   r/   r_  r%   r  r:   r"   r  r#   test_specified_schema  sJ    

"



r  c              	   C   s   | d }t ddddgi}t|| t dt  fg}tjt|gd |d}|j	|sdt
||}tjtdd	 | }|  W 5 Q R X d S )
Nr  rA   r	   r   r?   d   rL   z#Unsupported cast from int64 to nullr   )r&   r/   rW   rX   r%   rK  r   r:   rD   r   r5   r   r   r   r  r  r  )r7  r   fnr/   r%   r:   r   r   r"   r"   r#   test_incompatible_schema_hang  s    
r  c           	   	   C   s   t t jdddgddt jdddgd	dd
}t| d }t |.}t ||j}|| d  |	  W 5 Q R X t
j|t
 d}||}||stt| dD ]*}t
j||d}||}||stqd S )Nr	   r   r?   r.  r   r  r  r  r*   r@   z
test.arrowr   r   )r&  arrow)r&   r/   r   rD   Zoutput_streamZRecordBatchFileWriterr%   Zwrite_batchr   r,  r   r:   r   r   r   r5   r=   )	r7  r   r/   r\   r1  r2  r:   r   Z
format_strr"   r"   r#   test_ipc_format  s     

r  c              	   C   s  ddl m} ttjdddgddtjdd	d
gddd}t| d }||| tj|t	 d}t
| }t|d tjst||}|jdd ||stt| tj|dd}||}|jdd ||st|j|dgd}|jdd ||dgst|j|dtdd id}|jdd |tdtjd	ddgddisjt||dks~t|j|tddkddkstd S )Nr   orcr	   r   r?   r.  r   r  r  r  r*   r@   test.orcr   T)fullr  rB   r   Zb2g?g333333?rA   r   )r   r  r&   r/   r   rD   rX   r   r:   rv  rS   r2   r4   ZFileFragmentr5   r   validater   r=   r  r'   r   )r7  r   r  r/   r\   r:   r   r   r"   r"   r#   test_orc_format  s<    

 
r  c                 C   s   ddl m} ttjdddgddtjdd	d
gddd}t| d }||| tj|dd}t	|
|}t|dkst|d jdkst|d |
 d std S )Nr   r  r	   r   r?   r.  r   r  r  r  r*   r@   r  r  r   )r   r  r&   r/   r   rD   rX   r   r:   rS   r   r   r5   r   r   )r7  r   r  r/   r\   r:   r   r"   r"   r#   test_orc_scan_options  s    r  c                  C   sR   zddl m}  W n< tk
rL   tjtdd tjddd W 5 Q R X Y nX d S )Nr   rv  z'not built with support for the ORC filer   r  r  r   )r  rv  rw  r   r   r  r   r:   r  r"   r"   r#   test_orc_format_not_supported1  s     r   c               	   C   sf   t jtdd$ tjtdtdiddd W 5 Q R X t } t jtdd | 	  W 5 Q R X d S )Nz9Writing datasets not yet implemented for this file formatr   rA   r|   r  z/tmp)rP   r[  )
r   r   r  r   r4  r&   r/   r   rv  make_write_options)Zofr"   r"   r#   +test_orc_writer_not_implemented_for_dataset<  s      r  c                 C   s   t t jdddgddt jdddgd	dd
}t| d }| j|dd tj|t d}|	|}|
|sxtt| tj|dd}|	|}|
|std S )Nr	   r   r?   r)   r   r  r  r  r*   r@   test.csvFr   r   rq  )r&   r/   r   rD   r  to_csvr   r:   rp  r   r   r5   r=   )r7  r   r/   r\   r:   r   r"   r"   r#   test_csv_formatN  s    

r  compressionbz2gzipZlz4Zzstdc              	   C   s   t j|std| ttjdddgddtjddd	gd
dd}t	
 }|dkr`|nd}t| d|  }|j||d&}| jdd}||d W 5 Q R X tj|t d}	||	}
|
|std S )Nz{} support is not builtr	   r   r?   r)   r   r  r  r  r*   r@   r	  gzz	test.csv.r  Fr  r  r   )r   Codecis_availabler   skiprP   r&   r/   r   rM   r`   rD   rR   r  r  writer  r   r:   rp  r   r   r5   )r7  r  r   r/   r   suffixr\   r1  Zcsv_strr:   r   r"   r"   r#   test_csv_format_compresseda  s    
r  c                 C   s  t | d }t|d}|d W 5 Q R X tj|dd}||}|tdt	ddd	gisht
tj|tjtjjd
ddd}||}|tdt	dd	gist
tj|tjtjjdgddd}||}|tdt	dddd	gist
d S )Nr  wzskipped
col0
foo
bar
rq  r   skippedcol0rg  barr	   )rh  ri  r  )rD   r3   r  r   r:   r   r   r&   r/   r   r5   rp  rq  rs  )r7  r   r\   r1  r:   r   r"   r"   r#   test_csv_format_options|  s(    



"


r  c              
   C   s   t | d }t|d}|d W 5 Q R X tj|tjtjjdddd}|	|}dd	d
dg}|j
|kspt|ttdgtdgtdgtdgdstd S )Nr  r  z1,a,true,1
T)Zautogenerate_column_namesri  r   f0r   r#  f3r	   rA   )r  r   r#  r  )rD   r3   r  r   r:   rp  r&   rq  rs  r   r  r5   r   r/   r   )r7  r   r\   r1  r:   r   Zexpected_column_namesr"   r"   r#   (test_csv_format_options_generate_columns  s    





r  c           	   
   C   s  t | d }t|d}|d W 5 Q R X tj|dd}tjjdgdd}tj|t	jj
d	d
d}|j||d}|t	dt	ddd gisttj|d}tj||d}||}|t	dt	ddd gistt }|j||d}|t	dt	dddgistd S )Nr  r  zcol0
foo
spam
MYNULL
rq  r   ZMYNULLT)Znull_valuesr{  r~  r  )r}  rY  )Zfragment_scan_optionsr  rg  Zspamr|  )rD   r3   r  r   r:   r   rq  r  r  r&   rs  r   r   r/   r   r5   rp  )	r7  r   r\   r1  r:   r}  r   r   r  r"   r"   r#   test_csv_fragment_options  s,    
$
$r  c              	   C   s   t t jdddgddt jdddgd	dd
}t| d }| jdddd dd}t|d}|| W 5 Q R X t	j
|t	 d}||}||stt| t	j
|dd}||}||std S )Nr	   r   r?   r)   r   r  r  r  r*   r@   	test.jsonrecordsZorientr@  },{}
{r  r   ru  )r&   r/   r   rD   r  to_jsonreplacer3   r  r   r:   rt  r   r   r5   r=   r7  r   r/   r\   r]   rF  r:   r   r"   r"   r#   test_json_format  s     

r$  c              	   C   s   t t jdddgddt jdddgd	dd
}t| d }| jdddd dd}t|d}|| W 5 Q R X t	j
tdd& tj|tjt jjdddd}W 5 Q R X tj|tjt jjdddd}||}||std S Nr	   r   r?   r)   r   r  r  r  r*   r@   r  r  r  r@  r  r   r  ztry to increase block sizer   rz   r  ri  r   @   )r&   r/   r   rD   r  r!  r"  r3   r  r   r   r  r   r:   rt  ru  rs  r   r   r5   r#  r"   r"   r#   test_json_format_options  s$     



r'  c           	   	   C   s  t t jdddgddt jdddgd	dd
}t| d }| jdddd dd}t|d}|| W 5 Q R X t	j
tdd0 tjt jjddd}tj|t|d}W 5 Q R X tjt jjddd}tj|t|d}||}||std S r%  )r&   r/   r   rD   r  r!  r"  r3   r  r   r   r  r   r  ru  rs  r:   rt  r   r   r5   )	r7  r   r/   r\   r]   rF  r   r:   r   r"   r"   r#   test_json_fragment_options  s(     
r(  c              
   C   s   t | d }dD ]\}}t|d}|| W 5 Q R X tdt fdt fg}tjdgdgd|d	}tjj|d
}t	j
|d}	t	j||	d}
|
j|st|
 |stqd S )Nr  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbrA   rB   un
   éléphantr@   rL   encodingri  r   )rD   r3   r  r&   r%   r+   r/   rq  rs  r   rp  r:   r   r5   r   )r7  r   r\   r/  Z
input_rowsr1  r  expected_tablerY  r   dataset_transcodedr"   r"   r#   test_encoding  s    r2  c           
   	   C   s   t | d }t|d}|d W 5 Q R X tdt fdt fg}tjdgdgd|d	}tj|d
|d}t	j
tjjdd || W 5 Q R X tjjdd}tj|d}tj||d}	|	j|st|	 |std S )Nr  r+  s   ,b
un,lphant   érB   r,  r-  )r3  rB   rL   rq  r  zinvalid UTF8r   r)  r.  ri  r   )rD   r3   r  r&   r%   r+   r/   r   r:   r   r   r   libr   r   rq  rs  rp  r   r5   )
r7  r   r\   r1  r  r0  r:   rY  r   r1  r"   r"   r#   test_column_names_encoding  s"    r5  c              	   C   sB  ddl m} ttjdddgddtjdd	d
gddd}| d }|  ||t|d  tj|t	 d}|
|}||stt| tj|dd}|
|}||st|j
|ddgd}|jddgkst|j
|ddgd}|jddgkst||t|d dd tt |
tj|dd W 5 Q R X d S )Nr   )r  r	   r   r?   r.  r   r  r  r  r*   r@   Zfeather_datasetr  r   r  rB   rA   r   zdata1.featherversion)pyarrow.featherr  r&   r/   r   rr  rD   r   r:   r   r   r   r5   r=   r  r   r   r  )r7  r   r  r/   rC  r:   r   r"   r"   r#   test_feather_format+  s*    

r9  brotlic              	   C   s  t t jdgd ddt jdddgd d	dd
}t j|sHt  | d }|  t	 }| d }|  tj
|t|d ||jd dd |dkrtjtdd |j|d}W 5 Q R X tjtdd t |}|j|d}W 5 Q R X d S |j|d}tj
|t|d ||d tj|t	 d}	||	}
|
|sFt|d d }| j}|d d }| j}||k std S )Nr   i,  r.  r   r  r  r  r  r*   r@   Zfeather_dataset_compressedZfeather_dataset_uncompressedz
data.arrowr  rP   Zfile_optionsr:  zCompression typer   r   part-0.arrow)r&   r/   r   r  r  r   r  rr  r   r   r4  rD   r  r   r  r:   r   r   r5   statst_size)r7  r  r   r/   rC  r   Zuncompressed_basedirZwrite_optionscodecr:   r   Zcompressed_fileZcompressed_sizeZuncompressed_fileuncompressed_sizer"   r"   r#   test_feather_format_compressedK  sP    






rA  c                 C   sj   g }t dD ]6}t|gd tjdd}tj|t| |d qt| d }tj	|j
||d ||fS )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    rz   r|   r  metadata_collector	_metadata)r   r&   r/   r   r/  r0  rW   r  rD   write_metadatar%   )	root_pathrC  r!   r/   metadata_pathr"   r"   r#   _create_parquet_dataset_simple  s       rH  c                 C   s\   | d }t |\}}t|}|j|js0tt|jdksBt| }|j	dksXtd S )Nr  rz   (   )
rH  r   parquet_datasetr%   r   r5   r   r   r   r   )r7  rF  rG  r/   r:   r   r"   r"   r#   test_parquet_dataset_factory  s    
rK  win32z'Results in FileNotFoundError on Windows)reasonc           	      C   s   t d}| d }t|\}}|d}tt|}tj||d}|j	
|j	sXtt|jdksjt| }|jdkstd S )Nr  r  r  r  rz   rI  )r   r  rH  r   rM   r_   r  r   rJ  r%   r   r5   r   r   r   r   )	r7  r  rF  rG  r/   r  r   r:   r   r"   r"   r#   #test_parquet_dataset_factory_fsspec  s    

rN  r  c                 C   s   | d }t dgd tjdd}g }tj|t|||d t|d }tj|j	||d t
|}|j	|j	szt| }|jdkstd S )Nr  r   r|   r  )rC  r  rD  rB  )r&   r/   r   r/  r0  rW   r  rD   rE  r%   r   rJ  r   r5   r   r   )r7  r  rF  r/   rC  rG  r:   r   r"   r"   r#   &test_parquet_dataset_factory_roundtrip  s&       
rO  c           	   	   C   s   g }t dD ]Z}tdtt |d |d d i}| | d }tj|||d |d | d qt| d }t|j	|| t
|}| }|d }|tt dd	kstd S )
Nr|   r   r	   z.parquetrB  r@  rD  r   r  )r   r&   r/   rS   rW   rX   set_file_pathrD   rE  r%   r   rJ  r   r   r  r5   )	r7  Z	metadatasr!   r/   Z
table_pathrG  r:   Zscanned_tableZscanned_colr"   r"   r#   "test_parquet_dataset_factory_order  s    
rQ  c              	   C   sz   | d }t |\}}t|dd   t|}|j|jsFtt	|j
dksXttt |  W 5 Q R X d S )NZtest_parquet_dataset_invalid	*.parquetr   rz   )rH  rS   globunlinkr   rJ  r%   r   r5   r   r   r   r   r   r   )r7  rF  rG  r/   r:   r"   r"   r#   $test_parquet_dataset_factory_invalid  s    
rU  c                 C   sz   t t| d}t|d j }g }|D ].}t|j}|t	|
|  || q.| d }tj|||d |S )NrR  r   rD  rB  )rS   r   rglobrW   ZParquetFiler%   Zto_arrow_schemar  rP  rD   rs  r   rE  )rF  Zparquet_pathsr%   rC  r\   r  rG  r"   r"   r#   _create_metadata_file  s      rW  c              	   C   st   t jt tdt tjdt tddgdgdddgd}|d	d
i}t	j
|t| dgd t| |fS )Nr"  rA   rB   r|   r   r#  r   r$  r   r   r  )r&   r/   r   r   r   r/  r0  r1  r-   rW   r  rD   rW  )rF  r/   r"   r"   r#   #_create_parquet_dataset_partitioned  s     rX  c                 C   s   | d }t |\}}tjdd}tj||d}|j|js@tt|jdksRt|	 }|j
dksht| djdd	}| }tj|| d S )
N(test_parquet_dataset_factory_partitionedr}   r  r  r   r"  r   TZdrop)rX  r   r   rJ  r%   r   r5   r   r   r   r   r  sort_valuesreset_indexr   ZtestingZassert_frame_equal)r7  rF  rG  r/   r   r:   r   r  r"   r"   r#   rY    s    rY  c                 C   sh   | d }t |\}}tj|dd}|j|js4td|jjksDtt| }d|d j	jksdtd S )N%test_parquet_dataset_factory_metadatar}   r     keyr   )
rX  r   rJ  r%   r   r5   r  rS   r2   r  )r7  rF  rG  r/   r:   r   r"   r"   r#   r]  .  s    r]  c           
   	   C   s   |\}}| d }t |\}}||g tj|tjdd|d}W 5 Q R X |g  t| }W 5 Q R X |g  t|tddk W 5 Q R X |g  |d tddk W 5 Q R X |g  |d  }	|	d   W 5 Q R X d S )N#test_parquet_dataset_lazy_filteringr}   r  )r   r   r      r   )	rH  r   rJ  r   rS   r2   r'   r   r  )
r7  ru   rM   ro   rF  rG  r  r:   r   Zrg_fragmentsr"   r"   r#   r_  =  s$    


"
"
r_  c                 C   sr   t ddddgi}| d }|| t|}||j}|j|dgdj}d|jks\t|j	|dd	sntd S )
NrA   r	   r   r?   r  r   s   pandasTr  )
r   r   r  r   r:   r   r%   r  r5   r   )r7  r   r.   r\   r:   r%   r   r"   r"   r#   test_dataset_schema_metadataf  s    

ra  c                 C   s   t dt jddddgddi}t|t| d  t dt  fg}tj	| d d	|d
}|j
|tddkd}|d |d ddstt| d }|j
|tddk|d}|d |d ddstd S )Nr  r	   r   r?   rz   r   r   r  rx   r  r   r)   r   r  )r&   r/   r   rW   rX   rD   r%   r)   r   r:   r   r'   r   rC  r  r5   rS   r2   )r7  r   r/   r%   r:   filteredr;   r"   r"   r#   test_filter_mismatching_schemax  s"       "  rc  c                 C   s   t d ttdd}t| d }tj||dgd tj	|dd}|
|}|j
|dgd	}|d|ds|td S )
Nza a b brz   r  r   r   r  r}   r  r   )r&   r/   r  rS   r   rD   rW   r  r   r:   r   r   r   r5   )r7  r   r/   r\   r:   Zall_colsZ	part_onlyr"   r"   r#   +test_dataset_project_only_partition_columns  s    
rd  c                 C   s   dd l }|dtjd d d gddi}| d }|j|dd tj|dtdt	 fgd	}t
dtd d d gt	 i}|||std S )
Nr   r  objectZdtypez(test_dataset_project_null_column.parquetr   r  rx   r  )rw   r   r   r   r  r   r:   r&   r%   r)   r/   r   r   r5   )r7  r   r   r.   rF  r:   r  r"   r"   r#    test_dataset_project_null_column  s     rg  c              	   C   s   ddl m} tdddgdddgd	d
dgd}||| d  tj| d dd}|j|tdtdj	dddtdd	kdd}tdddgtj
dddgdddddgd}||sttjtdd |j|ddid W 5 Q R X d S )Nr   r  r	   r   r?   r   r   r   rA   rB   rJ  r  r  r  r   r  r  r   Fsafer  )Z	A_renamedZB_as_intZC_is_ar   r   TzExpected an Expressionr   )r   r  r&   r/   r  r   r:   r   r'   rC  r   r   r5   r   r   r   )r7  r   r  r/   r:   r   r  r"   r"   r#   test_dataset_project_columns  s"    $
rk  c           	      C   s  t | \}}t|}t|jtjs(tt| \}}t|}t|jtjsPttj|dd}|j}|d k	sptt|tjst|j	t
	dt
 fgkstt|jdkst|jd t
dddgt
 ksttjt
	dt
 fgdd}t|tjstt|jdksttdd	 |jD s2ttj||d}|j}t|tjsXt|j	t
	dt
 fgksxtt|jdksttd
d	 |jD sttj|dd}tjt| |j	|j|jd}|jd kst| d }t|\}}tj|dd}|j}|d k	stt|tjs,t|j	t
	dt
 fgksLtt|jdks`tt|jd  ddhkstd S )Nr}   r  r   r	   r   r   r  c                 s   s   | ]}|d kV  qd S r1   r"   r   r"   r"   r#   r    s     z6test_dataset_preserved_partitioning.<locals>.<genexpr>c                 s   s   | ]}|d kV  qd S r1   r"   r   r"   r"   r#   r    s     r   zdata-partitioned-metadatarA   rB   )r  r   r:   r4   r   r   r5   r  r(  r%   r&   r   r   r!  r   r*  r   rS   r2   rP   r   rX  rJ  r+   rq   r  )	r7  r  r\   r:   r  r   ro  rF  rG  r"   r"   r#   #test_dataset_preserved_partitioning  sP    

$ 
   rl  c                 C   s   t t dt  t dt t  t  g}t jd d ddgtt	dd|d}t
| d }tj||dgdd	 t| d }|d |d kst|d|dstd S )
Nr  r   rA   rz   r  rL   r   Fr  r  )r&   r%   r'   r)   r-  r   r+   r/   rS   r   rD   rW   r  r<  r   r  r5   r   )r7  r%   r/   r\   Zactual_tabler"   r"   r#   +test_write_to_dataset_given_null_just_works  s(    
rn  c                 C   s   t t dt  t dt t  t  g}t jddd d gtt	dd|d}t jddgtt	dd|d}t
| d }tj||dgd	d
 t| d }||kstd S )Nr  r   rA   rz   r  rL   r   r   Trm  )r&   r%   r'   r)   r-  r   r+   r/   rS   r   rD   rW   r  r<  r5   )r7  r%   r/   r  r\   r  r"   r"   r#   'test_legacy_write_to_dataset_drops_null  s*    
 ro  c                 C   s2   dd l m} |j| ||dfgd}|| |S )Nr   	ascending)r   )pyarrow.computecomputeZsort_indicesZSortOptionsr   )tabsort_colr  Zsorted_indicesr"   r"   r#   _sort_table0  s     ru  c                 C   st   |p|}t j| |d|dd t|d}t|t|ks>tt j|d|d}t| |	t|  |sptd S )Nr  FrP   r   r   *r'  )
r   r4  rS   rV  rq   r5   r:   ru  r   r   )r:   r[  expected_filesrt  Zbase_dir_pathr   
file_pathsro  r"   r"   r#   _check_dataset_roundtrip7  s    
   rz  c                 C   s   | d }|   t|}t|}| d }|d g}t|t||d| | d }|d g}t|||d| | d }|   t|}t|}| d }|d g}t|t||d| d S )Nrq  zsingle-file-targetr<  rA   zsingle-file-target2rz  zsingle-directory-target)rr  r  r   r:   rz  rD   r^  )r7  r[   r  r:   targetrx  r"   r"   r#   test_write_datasetJ  s"    





r|  c                 C   s   | d }t |}tjdd}tj||d}| d }|d |d d |d |d d g}tjtd	t fgdd}t|t||d
||d | d }|d |d d |d |d d g}ttd	t fg}t|t||d
||d d S )Npartitionedr}   r  r  zpartitioned-hive-targetpart=ar<  part=br   r   partitioned-dir-targetrA   rB   )	rX  r   r   r:   r&   r%   r+   rz  rD   )r7  r[   r  r   r:   r{  expected_pathsr6  r"   r"   r#   test_write_dataset_partitionedn  sN     
 
      
 
    r  c                    s   t ddd gdddgd}tj| ddgd tj ddgd}|j} fdd	|D }|dddhkslt| }||std S )
Nr   r7  r  r@   r&  rB   r'  c                    s"   h | ]}t t| jqS r"   rD   r`  ra  rs  rb  rE  r  r"   r#   rf     s    z6test_write_dataset_with_field_names.<locals>.<setcomp>	r&   r/   r   r4  r:   r   r5   r   r   r7  r/   r8  r   Zpartitioning_dirsr9  r"   r  r#   #test_write_dataset_with_field_names  s    

r  c                    s   t ddd gdddgd}tj| ddgdd tj ddd	}|j} fd
d|D }|dddhkslt| }||std S )Nr   r7  r  r@   r&  rB   r}   )rP   r   partitioning_flavorr'  c                    s"   h | ]}t t| jqS r"   r  rE  r  r"   r#   rf     s    z;test_write_dataset_with_field_names_hive.<locals>.<setcomp>zb=xzb=yzb=zr  r  r"   r  r#   (test_write_dataset_with_field_names_hive  s    
 
r  c              	   C   s   t ddd gdddgdddgd}tj|| dd	gd
 tj| dd	gd
}t ^}tj|jd	dgd|dd	gd
 tj|dd	gd
}| }t	|
 |d
 kstW 5 Q R X d S )Nr   r7  r  r	   r   r?   )rA   rB   rJ  r&  rB   r'  rJ  r   rA   )r&   r/   r   r4  r:   r2  r3  r   r   r  r   Zdrop_columnsr5   r7  r/   r:   Ztempdir2r8  r9  r"   r"   r#   test_write_dataset_with_scanner  s$    

  
r  c           	         s6  t  G fdddt}t|t ttdt	 g}tj
tttdg|d dd}dd	 fd
d}tjj| |d	dt jfddd}|  z`t fdd}d}d}| dk r|kr|krd	}q}td q|stW 5 d  |  X d S )Nc                       s   e Zd Z fddZdS )z6test_write_dataset_with_backpressure.<locals>.GatingFsc                    s       | jj||dS )Nr  )waitrj   rR   )rl   r\   r  consumer_gater"   r#   rR     s    zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_streamN)r   r   r   rR   r"   r  r"   r#   GatingFs  s   r  r    rb  rL   r          Tc                   3   s.   k r*sd S t d d7  V  q d S )Ng{Gz?r	   )r  sleepr"   )r^   batches_readend
keep_goingr"   r#   counting_generator  s    
z@test_write_dataset_with_backpressure.<locals>.counting_generatorr  c                      s   t jtd dS )Nrx   r  )r   r4  rD   r"   )	gating_fsr   r7  r"   r#   r    s
      z6test_write_dataset_with_backpressure.<locals>.<lambda>)r{  Fc                      s   t     S r1   )r  r"   )startr"   r#   duration  s    z6test_write_dataset_with_backpressure.<locals>.durationr|   rB  )	threadingEventr   rM   r_   r`   r&   r%   r'   r   rU   r   rS   r   r   r   rV   Threadr  rq   r@  r  r  r5   )	r7  r  r%   Zmin_backpressurer  Zwrite_threadr  
last_valueZbackpressure_probably_hitr"   )	r^   r  r  r  r  r  r   r  r7  r#   $test_write_dataset_with_backpressure  sF    	  
r  c              	   C   s   t dddgdddgd}tj|| dd	gd
 tj| dd	gd
}t L}tj||dd	gd
 tj|dd	gd
}| }t|	 |	 kst
W 5 Q R X d S )Nr   r7  r  r	   r   r?   rB   rJ  r&  rB   r'  )r&   r/   r   r4  r:   r2  r3  r   r  r   r5   r  r"   r"   r#   test_write_dataset_with_dataset  s    

 r  c           	   	   C   s  | d }t dddgdddgd}tjt t d	t  gd
d}dd }tj|||dd t ddd	gdddgd}t	t j
 tj|||dd W 5 Q R X t ddgi}|d d }tj|| tj|||ddd t ddddd	gdddddgd}tj| d|d }||| | s4ttj|||ddd t dddd	gddddgd}tj| d|d }||| | rtd S )Nr   r   r7  r  r	   r   r?   r  rJ  r}   )r%   r  c                 S   s>   |   djdd}|  djdd}||s:td S )NrB   TrZ  )r  r[  r\  r   r5   )r{  r|  Zdf1Zdf2r"   r"   r#   compare_tables_ignoring_order(  s    zGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_orderr&  r  rA   rB   rz   ezc=2z	foo.arrowoverwrite_or_ignore)r   rP   existing_data_behaviorr'  Zdelete_matching)r&   r/   r   r   r%   r'   r)   r4  r   r   r   r   r  r  r:   r   existsr5   )	r7  r[   r/   r   r  Zextra_tableZ
extra_fileZoverwrittenZreadbackr"   r"   r#    test_write_dataset_existing_data!  sP     



  

r  r|   c                 C   s   t j||| S r1   )r   r/  randintsizer  r  r"   r"   r#   _generate_random_int_arrayR  s    r  c                 C   sN   g }g }t | D ]*}|t|d|d |dt|  qtj||d}|S )Nr	   r  rJ  r    r%  )r   r   r  rD   r&   rU   )num_of_columnsnum_of_recordsr    r  r!   rU   r"   r"   r#   _generate_data_and_columnsV  s    r  c                 C   s   t tt| d| S )Nz**/*.)r   rS   r`  ra  rS  Zbase_directoryr   r"   r"   r#   _get_num_of_files_generatedb  s    r  c                    s   | d }d d}d}d}t ||}tj||d |d t|}|  d }t||ks\tg }t|D ]6\}	}
|t|
 }tj	|dd}|
| jd	  qh|t|kst|t|kstt fd
d|D std S )Nr   r|   r   #   rx   )rP   max_rows_per_filemax_rows_per_groupr	   r   r   c                 3   s   | ]}| kV  qd S r1   r"   )rE   Zfile_rowcountr  r"   r#   r    s   z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>)r  r   r4  r>  rD  r   r5   rO   rD   r:   r   r   shaper  r*  )r7  r[   r  r  r  rU   files_in_dirZexpected_partitionsZresult_row_combinationr  f_filef_pathr:   r"   r  r#   $test_write_dataset_max_rows_per_filef  s2    

r  c              
      s   | d }d}d}d ddddddddddg
} fdd|D }|d	 }t j||||d
d t|}t|D ]|\}}	|t|	 }
t j|
d
d}| }| }t|D ]@\}}|j	}|t
|d k r||kr||kstq||kstqqld S )Nr   rA  r   r   r
   rz   c                    s   g | ]}t  |qS r"   )r  )rE   r  r  r"   r#   rG     s   z9test_write_dataset_min_rows_per_group.<locals>.<listcomp>Zmin_rows_grouprx   )r    r[  min_rows_per_groupr  rP   r   r	   )r   r4  r>  rD  rO   rD   r:   r   r   r   r   r5   )r7  r[   r  r  Zrecord_sizesZrecord_batchesdata_sourcer  r  r  r  r:   r/   batchesr  r^   Zrows_per_batchr"   r  r#   %test_write_dataset_min_rows_per_group  s6    

r  c                 C   s   | d }d}d}d}t ||}|d }tj|||dd t|}g }|D ]D}	|t|	 }
tj|
dd}| }| }|D ]}|	|j
 q|qJ|dd	gkstd S )
Nr   r  r      Zmax_rows_grouprx   )r    r[  r  rP   r   r  )r  r   r4  r>  rD  rD   r:   r   r   r   r   r5   )r7  r[   r  r  r  rU   r  r  Zbatched_datar  r  r:   r/   r  r^   r"   r"   r#   %test_write_dataset_max_rows_per_group  s,    
r  c                 C   s~  | d }d}d}ddg}t jddddd	d
gddddddgg|d}t jddddd	dgddddddgg|d}t jdd
ddd	dgddddddgg|d}t jddddd	dgddddddgg|d}t j||||g}	tjt || t  fgdd}
|d }tj|	||
|d d d! }|||||\}}||ks8t	|d" }d}tj|	||
||d#d$ |||||\}}||kszt	d S )%Nr   rx   r	   Zc1c2r   r?   rz   r   r|   rA   rB   rJ  rI  r  r  r
   rA  r   r   rY  r  r  rU  rm  r`  r   r}   r  default)r    r[  r   rP   c                 S   s(   t | |d}ttj|| }||fS )Nr  )r  r   r&   rr  unique)r  rU   r   Zcol_idnum_of_files_generatednumber_of_partitionsr"   r"   r#   _get_compare_pair  s     z<test_write_dataset_max_open_files.<locals>._get_compare_pairZmax_1F)r    r[  r   rP   max_open_filesr   )
r&   rU   r,   rV   r   r   r%   r+   r4  r5   )r7  r[   r   Zpartition_column_idr  Zrecord_batch_1Zrecord_batch_2Zrecord_batch_3Zrecord_batch_4r/   r   Zdata_source_1r  r  r  Zdata_source_2r  r"   r"   r#   !test_write_dataset_max_open_files  sp    
    r  c                 C   s   | d }t |}tj|tjjddd}| d }|d |d d |d |d d g}tjt|jd	gd	t	ddgid
}t
|t||d||d d S )Nr}  Tr  r  r  rA   r<  rB   r   r   r   )rX  r   r:   r(  r  r   r&   r%   r'   r   rz  rD   )r7  r[   r  r:   r{  r  r   r"   r"   r#   #test_write_dataset_partitioned_dict  s2     
 

    r  c                    s   | d }t |}tj|dd}tjtdt fgdd}| d }g   fdd}tj||d	|d
|d |d d |d d h}tt	t
j }||kst| d }	tj||	d	|dd tj|d	|d}
tj|	d	|d}|
 | std S )Nr}  r}   r  r   r  Zpartitioned1c                    s     | j d S r1   )r   r\   Zwritten_filepaths_writtenr"   r#   file_visitor8  s    z4test_write_dataset_use_threads.<locals>.file_visitorr  TrP   r   r   r  r~  part-0.featherr  Zpartitioned2Frv  r'  )rX  r   r:   r   r&   r%   r+   r4  rq   rT   r`  ra  r5   r   r   )r7  r[   r  r:   r   Ztarget1r  r  paths_written_setZtarget2Zresult1Zresult2r"   r  r#   test_write_dataset_use_threads+  sD         

   r  c           
   	      s  t jt tdt tjdt tddgdgdddgd}| d	 }tj	||d
dd t
|d}|d g}t|t|ksttj|dd }||st| d }|d |d d |d |d d g}g  g  fdd}tjt dt  fgdd}tj	||dd
||d t
|d}t|t|ksDtdd  D }|ks`ttj|d|d}| |stt dkst D ]}	t|	|kstqd S )Nr"  rA   rB   r|   r   r#  r   r$  singledat_{i}.arrowr  basename_templaterP   rw  zdat_0.arrowr&  r   r}  r~  r  c                    s     | j  | j d S r1   )r   r\   r  r  Zvisited_pathsZvisited_sizesr"   r#   r  n  s    z&test_write_table.<locals>.file_visitorr}   r  )rP   r  r   r  c                 S   s   g | ]}t j|qS r"   )r>  r\   getsizerE   r\   r"   r"   r#   rG   y  s     z$test_write_table.<locals>.<listcomp>r'  r   )r&   r/   r   r   r   r/  r0  r1  r   r4  rS   rV  rq   r5   r:   r   r   r   r%   r+   r   r`  ra  )
r7  r/   r[  ry  r  r   r  r   Zactual_sizesZvisited_pathr"   r  r#   test_write_tableS  sX      
 
 
 
 r  c              	   C   s  t jt tdt tjdt tddgdgdddgd}t |gd	 }| d
 }t	j
||dd t|dt|d gkstt	j|dd |st| d }t	j
|g|dd t|dt|d gkstt	j|dd |st| d }t	j
| |dd t|dt|d gks<tt	j|dd |sZt| d }t	j
||g|dd t|dt|d gkstt	j|dd t |gd	 std S )Nr|   rA   rB   r
   r   r#  r   r$  r   r  r  r   rw  r  r&  zsingle-listmultiplezmultiple-table)r&   r/   r   r   r   r/  r0  r1  ri  r   r4  rq   rV  r5   r:   r   r   r   )r7  r/   r[  r"   r"   r#   #test_write_table_multiple_fragments  s<       r  c              	   C   s   t jt tdt tjdt tddgdgdddgd}| d	 }tj	d
d |
 D ||jddd tj|dd }||st| d }t j|j|
 }tj	||ddd tj|dd }||std S )Nr"  rA   rB   r|   r   r#  r   r$  Zinmemory_iterablec                 s   s   | ]
}|V  qd S r1   r"   )rE   r^   r"   r"   r#   r    s     z&test_write_iterable.<locals>.<genexpr>r  r  )r%   r  rP   r&  r   Zinmemory_readerr  )r&   r/   r   r   r   r/  r0  r1  r   r4  r   r%   r:   r   r   r5   r  rV   )r7  r/   r[  r   r   r"   r"   r#   test_write_iterable  s2      
 r  c              	   C   s  t jt tdt tjdt tddgdgdddgd}t	|}| d	 }tj
|||d
d |tj	|dd}||st| d }tj
|j|dgd|d
d |tj	|dd}||dgsttjtdd  tj
||||jd
d W 5 Q R X d S )Nr"  rA   rB   r|   r   r#  r   r$  Zdataset_from_scannerr  r   r&  Zdataset_from_scanner2r   zCannot specify a schemar   )r%   rP   )r&   r/   r   r   r   r/  r0  r1  r   r:   r4  r   r   r   r5   r  r   r   r  r%   )r7  r   r/   r:   r[  r   r"   r"   r#   test_write_scanner  s:     
   r  c                 C   s   t jt tdt tddgd gddgd}t|	dgj
}| d }tj||d	|d
 tjjdgdd}tj|d|d
 }||std S )Nr"  rA   rB   r|   r  r   r$  r:   r  r'  Tr  r&  )r&   r/   r   r   r   r1  r  r   r   r  r%   r4  r   r  r:   r   r   r5   )r7  r/   r   r[  Zpartitioning_readr   r"   r"   r#   !test_write_table_partitioned_dict  s0          
r  c              	   C   s  t jt jtdddt tjdddddt tdd	gd
gdddgd}| d }tj	||dd t
|d}|d g}t|t|ksttj|dd }||stdD ]}t }|j|d}dt|kst| d| }tj	||||d t|d }	|dkrdnd}
|	j|
ks2ttj|dd }|j}|dkrp|d|dt  }|dkr|d|dt d}||}||stqd S )Nr"  r  r   r  zdatetime64[D]rf  zdatetime64[ns]rA   rB   r|   r   r#  r   r$  rJ  rx   r   rw  part-0.parquet)1.02.42.6r6  z(<pyarrow.dataset.ParquetFileWriteOptionszparquet_dataset_version{0}r;  r  r  r   )r  r  r	   r  )r&   r/   r   r   r   r;  r  r1  r   r4  rS   rV  rq   r5   r:   r   r   r   r  r  rP   rW   Zread_metadataformat_versionr%   r'   Z	with_typer)   r  rC  )r7  r/   r[  ry  r  r   r7  rP   optsmetaZexpected_versionr%   r  r"   r"   r#   test_write_dataset_parquet  sB    	



r  c              	   C   s  t jt tdt tjdt tddgdgdddgd}| d	 }tj	||d
d t
|d}|d g}t|t|ksttj|d
d }||sttjtjj|jjdd}|jdd}| d }tj	||||d tj||d }||s
td S )Nr"  rA   rB   r|   r   r#  Zchr1r$  Zcsv_datasetrq  r   rw  z
part-0.csvr  ri  F)Zinclude_headerZcsv_dataset_noheaderr;  )r&   r/   r   r   r   r/  r0  r1  r   r4  rS   rV  rq   r5   r:   r   r   rp  r   rq  rs  r%   r%  r  )r7  r/   r[  ry  r  r   rP   r  r"   r"   r#   test_write_dataset_csv  s,     


r  c              	      sz   t jt tdt tjdt tddgdgdddgd}d	  fd
d}| d }tj	||d|d  svt
d S )Nr"  rA   rB   r|   r   r#  r   r$  Fc                    s   | j d k	r| j jdkrd d S )Nr?   T)r  r   r  Zvisitor_calledr"   r#   r  =  s    

z=test_write_dataset_parquet_file_visitor.<locals>.file_visitorrJ  rx   )rP   r  )r&   r/   r   r   r   r/  r0  r1  r   r4  r5   )r7  r/   r  r[  r"   r  r#   'test_write_dataset_parquet_file_visitor4  s     
r  c           	         s   dd t dD }dd t dD }t||tddgdd}| d	 }tjtd
t fgdd}g  d  fdd}tj	||d|d|d |d d |d d h}t
ttj }||kstd k	stjdkstd S )Nc                 S   s    g | ]}|gd  D ]}|qqS r|   r"   rE   r   itemr"   r"   r#   rG   L  s       z?test_partition_dataset_parquet_file_visitor.<locals>.<listcomp>rz   c                 S   s$   g | ]}|gd  D ]}|d  qqS r  r"   r  r"   r"   r#   rG   M  s       rA   rB   r"  )r   r#  r   r}  r   r}   r  c                    s   | j r| j  | j d S r1   )r  r   r\   r  r  Zsample_metadatar"   r#   r  Y  s    zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitorrx   Tr  r~  r  r  r   )r   r&   r/   r   r1  r   r   r%   r+   r4  rq   rT   r`  ra  r5   r   )	r7  Zf1_valsZf2_valsr/   rF  r   r  r  r  r"   r  r#   +test_partition_dataset_parquet_file_visitorJ  s8         

r  c                 C   sd   t dtjdddgi}|d jjdks.ttj|| dd t	
| d }|d jjdks`td S )NrA   r  zEurope/Brussels)tzrx   r   r  )r&   r/   r   r  r   r  r5   r   r4  rW   r<  )r7  r/   r   r"   r"   r#   (test_write_dataset_arrow_schema_metadatan  s
    r  c                 C   sd   ddl m} tddddgi}|ddi}tj|| d	d
 || d j}|j	ddiks`t
d S )Nr   rh  rA   r	   r   r?   r^     valuer  r   r  )r   r  r&   r/   r-   r   r4  r<  r%   r  r5   )r7  r  r/   r%   r"   r"   r#   "test_write_dataset_schema_metadata{  s    r  c                 C   sX   t ddddgi}|ddi}tj|| dd t| d	 j}|jddiksTt	d S )
NrA   r	   r   r?   r^  r  rx   r   r  )
r&   r/   r-   r   r4  rW   r<  r%   r  r5   )r7  r/   r%   r"   r"   r#   *test_write_dataset_schema_metadata_parquet  s
    r  c              	   C   sF  | \}}}}}}}}d ||||}tjttdttjdttddgdgdddgd	}t	j
tdt fgd
d}	t	j|d|d|	d t	jd|dd
d }
|
|st| d}t	j||d|	d t	jd|dd
d }
|
|st| d}t	j|d|d|	d t	jd|dd
d }
|
|sBtd S )Nr  r"  rA   rB   r|   r   r#  r   r$  r}   r  zmybucket/datasetr  r  r&  zmybucket/dataset2r'  r  r  zmybucket/dataset3)rP   r&   r/   r   r   r   r/  r0  r1  r   r   r%   r+   r4  r:   r   r   r5   )r  r  rM   r  r  r  r  Zuri_templater/   r   r   r4  r"   r"   r#   test_write_dataset_s3  sl              

   

       
r  aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	   	   C   s  ddl m} | d \}}}}|ddd||dd}t| t tjttd	tt	j
d	tt	d
dgdgdddgd}tjtdt fgdd}tj|d|dd|dd tjd|ddd }||sttj|d|dd|dd tjd|ddd }||sttjtdd tj|d|dddd W 5 Q R X |ddd||ddd}tjtd d tj|d|dddd W 5 Q R X d S )!Nr   )r  r  ZlimitedZ
limited123z{}:{}http)r  r  endpoint_overrideschemer"  rA   rB   r|   r   r#  r   r$  r}   r  zexisting-bucketr  Fr  )r   rP   rQ   r   r  r&  r  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   rP   rQ   r  )r  r  r  r  Zallow_bucket_creationzAccess Denied)rp   r  rP   r   _minio_put_only_policyr&   r/   r   r   r   r/  r0  r1  r   r   r%   r+   r4  r:   r   r   r5   r   r   OSError)	r  r  r  r  r  rM   r/   r   r   r"   r"   r#   test_write_dataset_s3_put_only  s    

        
       
   
   r  c              
   C   s   t dd d gi}t|| d  t t dt t  t  g}t	j
j| d g|t	 t d}||}|j|kstd S )NrA   r  )rg   r%   rP   r   )r&   r/   rW   rX   r%   r'   r-  r   r+   r   r   r   r   rM   r`   r   r5   )r7  r   r/   r%   Zfsdsr"   r"   r#   $test_dataset_null_to_dictionary_cast!  s    
r  c              	   C   s  t dddgdddgd}tj|| d d	d
 tj| d d	d
}t dddgdddgd}tj|| d d	d
 tj| d d	d
}||dd}| t dddgdddgddd gdkst|j|dddd}| dt ddddgdddd gddd dgdkstd S )Nr	   r   rA  rA   rB   rF  colAr  r{  r&  r   c   Zr  r  )colBcol3r|  r  r   r  r  r  
full outer)	join_type	r&   r/   r   r4  r:   r@  r   r5   r   r7  r{  ds1r|  ds2r   r"   r"   r#   test_dataset_join4  s0    



r	  c              	   C   s  t dddgdddgd}tj|| d d	d
 tj| d d	d
}t dddgdddgd}tj|| d d	d
 tj| d d	d
}||d}| t dddgdddgddd gdkst|j|dddd}| dt ddddgdddd gddd dgdkstd S )Nr	   r   rA  rA   rB   rF  r  r{  r&  r   r  r  r  r  )r  r  r|  r  r  r  _rr  Zright_suffixr  r  r"   r"   r#   test_dataset_join_unique_keyS  s0    



r  c              
   C   s  t dddgdddgddd	gd
}tj|| d dd tj| d dd}t dddgdddgdddgd
}tj|| d dd tj| d dd}|j|dddd}| dt jddddgdddd gddd	d gddd dgddd dggdddddgdkstd S )Nr	   r   rA  r|   r"  <   rA   rB   rF  )r  r   colValsr{  r&  r   r  r  r  r  r|  r  r  r
  r  r   r  ZcolB_rZ	colVals_rr$  )	r&   r/   r   r4  r:   r@  r   r   r5   r  r"   r"   r#   test_dataset_join_collisionsr  s0    




r  dstyperM   memc           
   	   C   s  t ddddgddddgd	}|d
krPtj|| d dd tj| d dd}n|dkrdt|}nt|tddk tddk}t	||d
krtj
ntjkst| t dgdgd	kst|dt dgdgd	kst|tddk tddkjtddkd}| t dgdgd	ksFttj|| d dd tj| d dd}| t dgdgd	kst|jtt ddgddgdddd}| dt dd gddgddgdksttt |d  W 5 Q R X tt |  W 5 Q R X |jd}|tddk |}	|	 t dddgiksnttt j ||  W 5 Q R X d S )Nr	   r   rA  r   rA   rB   rF  gr  rM   r{  r&  r   r  r  r?   r  r
   r   rb  r|   r"  r   r  zright outerkeysr  r   )r  r   r  )r&   r/   r   r4  r:   r  r   r  r'   r   r   r  r5   r   r   r   r@  r   r   r   r   r  r2   r%   r  Zreplace_schemar   )
r7  r  r{  r  r   Zr2rb  joinedZschema_without_col2Z	newschemar"   r"   r#   test_dataset_filter  s    

$



 
 r  c           
   	   C   s  t ddddgddddgd	}t d
ddgdddgd	}|dkrtj|| d dd tj| d dd}tj|| d dd tj| d dd}n"|dkrt|}t|}ntt||ftddk tdd
kB }|	 t ddd
gdddgd	ks
t
|jtt ddgddgdddd}|	 dt ddd
gdddgddd gdkslt
|tddk }|tddk }	tjtdd t||	f W 5 Q R X d S ) Nr	   r   rA  r   rA   rB   rF  r  r  rY  r|   r  hr!   lrM   r{  r&  r   r|  r  r  r?   r"  r  r  z
left outerr  )r  r  r   zcurrently not supportedr   )r&   r/   r   r4  r:   r  r   r  r'   r   r5   r@  r   r   r   r  )
r7  r  r{  r|  r  r  Zfiltered_union_dsr  Zfiltered_ds1Zfiltered_ds2r"   r"   r#   test_union_dataset_filter  sP    



 r  c              	   C   s|   | d }t |\}}t|}| }|jdks4t|tddk }| jdksZtt	
t |  W 5 Q R X d S )Ntest_parquet_dataset_filterrI  r   r   r"  )rH  r   rJ  r   r   r5   r   r  r'   r   r   r  r2   )r7  rF  rG  r  r:   r   Zfiltered_dsr"   r"   r#   r    s    
r  c              	   C   s   t jt tdgdgd}t|}dtdi}|j|d}tj|| dgdd t	j
tdd	 tj|| dgdd W 5 Q R X d
S )z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    r"  Zoriginal_columnr$  Zrenamed_columnr   r&  r  z0'Column original_column does not exist in schemar   N)r&   r/   r   r   r   r:   r'   r   r4  r   r   KeyError)r7  r/   Ztable_datasetr   r   r"   r"   r#   4test_write_dataset_with_scanner_use_projected_schema  s.    
        r  rP   )r&  rx   c              
   C   s   |dkrt d tddgddgd dddgdd	id gd
ddd ddgddigd
gd}tj|| d |d tj| d |d}|jdddddgd}| dd ddgd d	dd gddddd ddgdd dgddgkst	d S )Nrx   zpyarrow.parquetZabc123Zqrs456r	   r   buttonrg  r  )r   elementvaluesstructsZscrollZwindowr?   rz   fizzZbuzz)user_ida.dotted.fieldZinteractionr/   r   r#  zinteraction.typezinteraction.valueszinteraction.structsr$  r   )r"  rg  )r#  r   r   r!  r$  )
r   r  r&   r/   r   r4  r:   r   r  r5   )r7  rP   r/   r  r"   r"   r#   test_read_table_nested_columns7  s@    

 
  
 
 r%  c                 C   s   ddl m} | d }tjtdddddgt td	d
d	ddgt gddg}|j||ddgdd |j|ddt	t
dt t
dt gd  }||dkst|d }tt|}dd |D }tt|}||kstd S )Nr   )r:   zslash-writer-xr	   r   r?   rz   r
   zexperiment/A/f.csvzexperiment/B/f.csvzexperiment/C/k.csvzexperiment/M/i.csvZexp_idexp_metar&  r}   )r    r[  rP   r   r  )r  rP   r   r%   c                 S   s   g | ]}d t |dd qS )z	exp_meta=rJ  ri  r   r  r"   r"   r#   rG   r  s     z5test_dataset_partition_with_slash.<locals>.<listcomp>)r   r:   r&   r,   r  r   r   r  r4  r%   r'   r   r   r   r5   r   r  r   rq   r>  rD  )Ztmpdirr   r\   Zdt_tabler<  r&  Zencoded_pathsry  r"   r"   r#   !test_dataset_partition_with_slashS  sF     r'  c                 C   s  t t jdt  ddt jdt  ddg}dddgd d	d gg}t jj||d
}t|| d  tj	| d dd}|
 j|sttj|| d dd tj	| d dd}|
 j|sttj||g| d dd tj	| d dd}|
 j|std S )Nr   F)Znullabler7  Tr	   r   r?   r
   rL   Z	nulltest1rx   r   Z	nulltest2Z	nulltest3)r&   r%   r'   r)   r,   r  rW   r  r   r:   r   r   r5   r4  )r7  Zschema_nullabler  r/   r:   r"   r"   r#   'test_write_dataset_preserve_nullabilityx  s    r(  c                 C   sX  t t jdt  ddidt dt  g}t t dt  t dt  g}dddgd d	d gg}t jj||d
}t jj||d
}tj||g| d dd tj| d dd}|	 jj
|ddsttj||g| d dd tj| d dd}|	 jj
|ddsttj||g| d d|d tj| d dd}|	 jj
|ddsTtd S )Nr   s   foos   barr  r7  r	   r   r?   r
   rL   Ztest1rx   r   Tr  Ztest2Ztest3r  )r&   r%   r'   r)   r,   r  r   r4  r:   r   r   r5   )r7  Zschema_metadataZschema_no_metar  r/   Ztable_no_metar:   r"   r"   r#   *test_write_dataset_preserve_field_metadata  s,    r)  c              	   C   s  t jt dddddgt dddddggd	d
gd}|dkrftj|| d dd tj| d dd}n|dkrzt|}nt|d	 	 dddddgdddddgdkst
|dg 	 dddddgdddddgdkst
|td	dk d	 	 dddgdddgdks&t
t jjt jddddgt  dt ddddggddgd}t|}|dg}| 	 }|d ddddgkst
|d ddddgkst
|dg}| 	 }|d ddddgkst
|d ddddgkst
d S )Nr?   r	   rz   r   r
   rB   rA   rJ  r   r  r$  rM   r{  r&  r   r  )r  r   )r   
descendingr   r  r   rg  Zcarr  Zfoobar)rA   r*  )rA   rp  )r&   r/   r   r   r4  r:   r  r   r   r   r5   r   r  r'   r,   r  r)   )r7  r  r/   r   Z
sorted_tabZsorted_tab_dictr"   r"   r#   test_dataset_sort_by  sT    


r+  )NN)N)NN)NN)rz   r	   r|   )rs   r>  rH  r   r`  r;  sysr  r2  r  r  urllib.parser   Znumpyr   r   r   r&   rq  rr  r  Zpyarrow.csvZpyarrow.jsonr8  rp   rM   Zpyarrow.tests.utilr   r   r   r   r   rw   r   rw  r  r:   r   Zpyarrow.parquetrx   rW   markZ
pytestmarkr$   r0   r=   ZfixturerY   ru   r   r   r   r   r   r   r  r	  r  r  r  r  r:  r?  rD  rL  rW  rZ  rd  rz  r  Zparametrizer   r  r  r  r  r  r  filterwarningsr  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r#  r3  r6  r9  r:  r=  rG  rI  rK  rL  rM  rN  rQ  rT  rX  r  r^  r_  rd  rg  rh  rk  rl  rp  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zs3r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r$  r'  r(  r2  r5  r9  rA  rH  rK  ZskipifplatformrN  rO  rQ  rU  rW  rX  rY  r]  r_  ra  rc  rd  rg  rk  rl  rn  ro  ru  rz  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r%  r'  r(  r)  r+  r"   r"   r"   r#   <module>   s  


"


1
.
G
;

0



y


&'H

% <
(9*


(   
d 
U%












	B

$



	9
B






B 9&
 
/
'=  #K0$#D&/$+#
/J
 N
 -%
 