U
    f/e                     @   s  d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
Z
d dlZd dlmZ d dlZd dlZd dlmZ d dlmZ d dlm  mZ d d	lmZmZmZmZmZmZm Z  d dl!m"Z# d d
l$m%Z% d dl&Zd dl&m'Z' ej()e*Z+ej,ddddgddd Z-dd Z.e/de/ddd Z0dd Z1e/de/de/ddd Z2ej3j4dej5de/de/dgdej5de/ddgd d!G d"d# d#Z6dS )$    )partial)reload)BytesIOStringION)Path)URLError)is_platform_windows)ParserError)	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csvto_datetime)file_path_to_url	read_htmlzchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc                 C   s   |ddd| j S )z6Parametrized fixture for HTML encoding test filenames.iodataZhtml_encoding)param)requestdatapath r   =/tmp/pip-unpacked-wheel-tiezk1ph/pandas/tests/io/test_html.pyhtml_encoding_file&   s    
r   c                 O   s   t | t |ks,tdt |  dt | d}ttdd | |}|sPt|t| |D ]*\}}tj||f|| |jrZtdqZd S )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc                 S   s   t | tot |tS N)
isinstancer
   )xyr   r   r   <lambda><       z(assert_framelist_equal.<locals>.<lambda>zframes are both empty)lenAssertionErrorallmapziptmassert_frame_equalempty)Zlist1Zlist2argskwargsmsgZboth_framesZframe_iZframe_jr   r   r   assert_framelist_equal3   s    r.   bs4html5libc              	   C   sJ   dd l }| |dd tjtdd t|dddd	d
d W 5 Q R X d S )Nr   __version__z4.2zPandas requires versionmatchr   r   html	spam.htmlr/   flavor)r/   setattrpytestraisesImportErrorr   )Zmonkeypatchr   r/   r   r   r   test_bs4_version_failsG   s    r<   c               	   C   s@   d} d}d| d }t jt|d t| d|d W 5 Q R X d S )Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavorsr2   Zgoogler3   r7   )r9   r:   
ValueErrorr   )urlr7   r-   r   r   r   test_invalid_flavorQ   s
    r@   lxmlc                 C   s<   | dddd}t |ddgd}t |ddgd}t|| d S )	Nr   r   r4   valid_markup.htmlr   rA   )	index_colr7   r/   r   r.   )r   filenameZdfs_lxmlZdfs_bs4r   r   r   test_same_orderingZ   s    rF   r7   )Zmarksclass)scopec                   @   s  e Zd Zejdddd Zejddddd Zd	d
 Zejj	dde
jdd Zejj	dde
jdd Ze
jdd Zejjdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#d;d< Z$e
jd=d> Z%e
jejjd?d@ Z&ejjdAdB Z'ejjdCdD Z(dEdF Z)ejjdGdH Z*ejjdIdJ Z+ejjdKdL Z,ejjdMdN Z-ejjdOdP Z.ejjdQdR Z/ejjdSdT Z0dUdV Z1e
jdWdX Z2e
jdYdZ Z3d[d\ Z4d]d^ Z5d_d` Z6dadb Z7dcdd Z8dedf Z9ejjdgdh Z:ejjdidj Z;dkdl Z<dmdn Z=dodp Z>dqdr Z?dsdt Z@dudv ZAdwdx ZBdydz ZCd{d| ZDd}d~ ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOdd ZPejjdd ZQdd ZRejSddeTdgdfdeTdgeTdgfgdd ZUdd ZVdd ZWdd ZXejjdd ZYdd ZZdS )TestReadHtmlT)autousec                 C   s4   |dddd| _ i | _d| jd< |dddd| _d S )Nr   r   r4   r5   zUTF-8encodingbanklist.html)	spam_dataspam_data_kwargsbanklist_data)selfr   r   r   r   	set_filesm   s    
zTestReadHtml.set_filesfunction)rJ   rH   c                 c   s   t t|d| _d V  d S )Nr6   )r   r   )rP   r7   r   r   r   r   set_defaultst   s    zTestReadHtml.set_defaultsc                 C   sV   t jdddd ddddjt}| }| j|dd	id
dd
 }t || d S )N      c                  W   s
   t j S r   )nprandomZrand)r+   r   r   r   r!   ~   r"   z2TestReadHtml.test_to_html_compat.<locals>.<lambda>F)Z
data_gen_fZc_idx_namesZr_idx_namesz{:.3f}rG   Z	dataframer   )attrsrC   )	r(   ZmakeCustomDataframeapplymapformatZastypefloatto_htmlr   r)   )rP   dfoutresr   r   r   test_to_html_compaty   s    z TestReadHtml.test_to_html_compatzHtml file was removed)reasonc              	   C   sf   d}t t | j|dddid}W 5 Q R X t t | j|dddid}W 5 Q R X t|| d S )N9https://www.fdic.gov/bank/individual/failed/banklist.htmlFirst Federal Bank of FloridaidtablerX   Metcalf Bank)r(   Zassert_produces_warningFutureWarningr   r.   rP   r?   df1df2r   r   r   "test_banklist_url_positional_match   s      z/TestReadHtml.test_banklist_url_positional_matchc                 C   s:   d}| j |dddid}| j |dddid}t|| d S )Nrb   rc   rd   re   r3   rX   rg   rD   ri   r   r   r   test_banklist_url   s      zTestReadHtml.test_banklist_urlc                 C   s.   d}| j |dd}| j |dd}t|| d S )Nz^https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/io/data/html/spam.html	.*Water.*r2   UnitrD   ri   r   r   r   test_spam_url   s
    zTestReadHtml.test_spam_urlc                 C   s:   | j | jdddid}| j | jdddid}t|| d S )Nz.*Florida.*rd   re   rm   rg   )r   rO   r.   rP   rj   rk   r   r   r   test_banklist   s        zTestReadHtml.test_banklistc                 C   sZ   | j | jdd}| j | jdd}t|| |d jd dks@t|d jd dksVtd S )Nro   r2   rp   r   r   r   
ProximatesZNutrient)r   rM   r.   ilocr$   columnsrr   r   r   r   	test_spam   s
    
zTestReadHtml.test_spamc                 C   s(   |  | j}|D ]}t|tstqd S r   )r   rM   r   r
   r$   rP   dfsr]   r   r   r   test_spam_no_match   s    zTestReadHtml.test_spam_no_matchc                 C   s0   | j | jddid}|D ]}t|tstqd S )Nrd   re   rf   )r   rO   r   r
   r$   ry   r   r   r   test_banklist_no_match   s    z#TestReadHtml.test_banklist_no_matchc                 C   s6   | j | jdddd }|jd dks(t|jr2td S )Nro      r3   headerr   ru   )r   rM   rw   r$   r*   rP   r]   r   r   r   test_spam_header   s    zTestReadHtml.test_spam_headerc                 C   s2   | j | jddd}| j | jddd}t|| d S Nro      r3   skiprowsrp   r   rM   r.   rr   r   r   r   test_skiprows_int   s    zTestReadHtml.test_skiprows_intc                 C   s:   | j | jdtdd}| j | jdtdd}t|| d S Nro   r}   r   rp   )r   rM   ranger.   rr   r   r   r   test_skiprows_range   s    z TestReadHtml.test_skiprows_rangec                 C   s:   | j | jdddgd}| j | jdddgd}t|| d S Nro   r   r}   r   rp   r   rr   r   r   r   test_skiprows_list   s    zTestReadHtml.test_skiprows_listc                 C   s:   | j | jdddhd}| j | jdddhd}t|| d S r   r   rr   r   r   r   test_skiprows_set   s    zTestReadHtml.test_skiprows_setc                 C   s2   | j | jddd}| j | jddd}t|| d S r   r   rr   r   r   r   test_skiprows_slice   s    z TestReadHtml.test_skiprows_slicec                 C   s:   | j | jdtdd}| j | jdtdd}t|| d S r   r   rM   slicer.   rr   r   r   r   test_skiprows_slice_short   s    z&TestReadHtml.test_skiprows_slice_shortc                 C   s@   | j | jdtddd}| j | jdtdddd}t|| d S )	Nro   r}      r   rp   rT   r   r   rr   r   r   r   test_skiprows_slice_long   s    z%TestReadHtml.test_skiprows_slice_longc                 C   s>   | j | jdtdd}| j | jdtdd}t|| d S r   )r   rM   rV   Zaranger.   rr   r   r   r   test_skiprows_ndarray   s    z"TestReadHtml.test_skiprows_ndarrayc              	   C   s0   t jtdd | j| jddd W 5 Q R X d S )Nz%is not a valid type for skipping rowsr2   ro   Zasdfr   )r9   r:   	TypeErrorr   rM   rP   r   r   r   test_skiprows_invalid   s    z"TestReadHtml.test_skiprows_invalidc                 C   s2   | j | jddd}| j | jddd}t|| d S Nro   r   r3   rC   rp   r   rr   r   r   r   
test_index  s    zTestReadHtml.test_indexc                 C   s6   | j | jdddd}| j | jdddd}t|| d S Nro   r   r   )r3   r   rC   rp   r   rr   r   r   r   test_header_and_index_no_types  s    z+TestReadHtml.test_header_and_index_no_typesc                 C   s6   | j | jdddd}| j | jdddd}t|| d S r   r   rr   r   r   r    test_header_and_index_with_types  s    z-TestReadHtml.test_header_and_index_with_typesc                 C   s2   | j | jddd}| j | jddd}t|| d S r   r   rr   r   r   r   test_infer_types  s    zTestReadHtml.test_infer_typesc              	   C   sz   t | jf| j}t| }W 5 Q R X t | jf| j}t| }W 5 Q R X | j|dd}| j|dd}t|| d S Nro   r2   rp   )openrM   rN   r   readr   r.   )rP   fdata1data2rj   rk   r   r   r   test_string_io  s    zTestReadHtml.test_string_ioc              	   C   sN   t | jf| j}| }W 5 Q R X | j|dd}| j|dd}t|| d S r   )r   rM   rN   r   r   r.   )rP   r   r   rj   rk   r   r   r   test_string$  s
    zTestReadHtml.test_stringc              	   C   sb   t | jf| j}| j|dd}W 5 Q R X t | jf| j}| j|dd}W 5 Q R X t|| d S r   )r   rM   rN   r   r.   )rP   r   rj   rk   r   r   r   test_file_like-  s
    zTestReadHtml.test_file_likec              	   C   s,   t jtdd | jddd W 5 Q R X d S )Nz#urlopen error unknown url type: gitr2   zgit://github.comro   )r9   r:   r   r   r   r   r   r   test_bad_url_protocol6  s    z"TestReadHtml.test_bad_url_protocolc              	   C   s4   d}t jttf|d | jddd W 5 Q R X d S )NzNName or service not known|Temporary failure in name resolution|No tables foundr2   zhttp://www.a23950sdfa908sd.comro   )r9   r:   r   r>   r   rP   r-   r   r   r   test_invalid_url;  s    zTestReadHtml.test_invalid_urlc                 C   sP   | j }| jttj|dddid}t|ts4t|D ]}t|t	s8tq8d S )NZFirstrd   re   rm   )
rO   r   r   ospathabspathr   listr$   r
   rP   r?   rz   r]   r   r   r   test_file_urlE  s      zTestReadHtml.test_file_urlc              	   C   s8   | j }tjtdd | j|dddid W 5 Q R X d S )NzNo tables foundr2   rc   rd   Z	tasdfablerm   )rO   r9   r:   r>   r   )rP   r?   r   r   r   test_invalid_table_attrsO  s      z%TestReadHtml.test_invalid_table_attrsc                 O   s"   | j | jf|dddid|S )NMetcalfrd   re   rm   )r   rO   )rP   r+   r,   r   r   r   
_bank_dataW  s     zTestReadHtml._bank_datac                 C   s(   | j ddgdd }t|jts$td S )Nr   r   r   r   r   rw   r   r$   r   r   r   r   test_multiindex_header\  s    z#TestReadHtml.test_multiindex_headerc                 C   s(   | j ddgdd }t|jts$td S )Nr   r   rC   )r   r   indexr   r$   r   r   r   r   test_multiindex_indexa  s    z"TestReadHtml.test_multiindex_indexc                 C   s>   | j ddgddgdd }t|jts*tt|jts:td S )Nr   r   )r   rC   )r   r   rw   r   r$   r   r   r   r   r   test_multiindex_header_indexf  s    z)TestReadHtml.test_multiindex_header_indexc                 C   s*   | j ddgddd }t|jts&td S Nr   r   )r   r   r   r   r   r   r   &test_multiindex_header_skiprows_tuplesl  s    z3TestReadHtml.test_multiindex_header_skiprows_tuplesc                 C   s*   | j ddgddd }t|jts&td S r   r   r   r   r   r   test_multiindex_header_skiprowsq  s    z,TestReadHtml.test_multiindex_header_skiprowsc                 C   s@   | j ddgddgddd }t|jts,tt|jts<td S )Nr   r   )r   rC   r   )r   r   r   r   r$   rw   r   r   r   r   %test_multiindex_header_index_skiprowsv  s    z2TestReadHtml.test_multiindex_header_index_skiprowsc                 C   s\   | j }| jttj|ttdddid}t|t	s@t
|D ]}t|tsDt
qDd S )NZFloridard   re   rm   )rO   r   r   r   r   r   recompiler   r   r$   r
   r   r   r   r   test_regex_idempotency|  s    z#TestReadHtml.test_regex_idempotencyc              	   C   s4   d}t jt|d | j| jddd W 5 Q R X d S )Nz\(you passed a negative value\)r2   ZWaterr   r   )r9   r:   r>   r   rM   r   r   r   r   test_negative_skiprows  s    z#TestReadHtml.test_negative_skiprowsc                 C   s&   d}| j |dd}t|dks"td S )Nhttps://docs.python.org/2/Pythonr2   r   r   r#   r$   )rP   r?   rz   r   r   r   test_multiple_matches  s    z"TestReadHtml.test_multiple_matchesc                 C   s<   d}| j |dd}dd |D }t|tddgks8td S )Nr   r   r2   c                 S   s   g | ]}|j d  dd qS )rt   r   rT   )rv   ).0r]   r   r   r   
<listcomp>  s     z7TestReadHtml.test_python_docs_table.<locals>.<listcomp>ZRepoZWhat)r   sortedr$   )rP   r?   rz   zzr   r   r   test_python_docs_table  s    z#TestReadHtml.test_python_docs_tablec                 C   s"   d}|  |}t|dkstdS )z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        r   Nr   )rP   r4   resultr   r   r   test_empty_tables  s    
zTestReadHtml.test_empty_tablesc                 C   s:   |  dd }tddgddggddgd	}t|| d S )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   r   r}   rU   rT   ABr   rw   r   r
   r(   r)   rP   r   expectedr   r   r   test_multiple_tbody  s    z TestReadHtml.test_multiple_tbodyc                 C   s0   |  dd }tddidgd}t|| dS )zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirstr   r   Nr   r   r   r   r   test_header_and_one_column  s    z'TestReadHtml.test_header_and_one_columnc                 C   s8   |  dd }tdddggdddgd	}t|| d
S )zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   ZUkraineZOdessa  ZCountryZMunicipalityZYearr   Nr   r   r   r   r   test_thead_without_tr  s    
z"TestReadHtml.test_thead_without_trc                 C   s   d}t ddggddgd}t ddgddggddgd}|jd	d
}|jdd
}| |d }| |d }t|| t|| dS )zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>ZbodyAZbodyBr   r   r   ZfootAZfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r
   rZ   r   r(   r)   )rP   Zdata_templateZ	expected1Z	expected2r   r   Zresult1Zresult2r   r   r   test_tfoot_read	  s     zTestReadHtml.test_tfoot_readc                 C   s4   | j dddd }tddggdd}t|| d S )Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   r   textr   )SIrw   r   r   r   r   r   &test_parse_header_of_non_string_column/  s    z3TestReadHtml.test_parse_header_of_non_string_columnc              
      s   ddl m   fdd}| j| jdddidd }t|d	d
ddttdd}|j|jks^tddddddddddg
}dddddddd d!d"g
}||	||}||}|j
d#d#d$}	d%d&g}
|	|
 t|	|
< t|	| d S )'Nr   _remove_whitespacec                    s(   z
 | W S  t k
r"   |  Y S X d S r   )AttributeErrorr   r   r   r   try_remove_wsJ  s    
z8TestReadHtml.test_banklist_header.<locals>.try_remove_wsr   rd   re   rm   r   r   csvzbanklist.csv)Updated DateClosing Date
convertersz+First Vietnamese American BankIn Vietnamesez!Westernbank Puerto RicoEn Espanolz)R-G Premier Bank of Puerto RicoEn EspanolzEurobankEn EspanolzSanderson State BankEn EspanolzKWashington Mutual Bank(Including its subsidiary Washington Mutual Bank FSB)zSilver State BankEn Espanolz$AmTrade International BankEn EspanolzHamilton Bank, NAEn Espanolz5The Citizens Savings BankPioneer Community Bank, Inc.zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoZEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings BankT)datetimenumericr   r   )pandas.io.htmlr   r   rO   r   r   shaper$   rY   replace_convertapplyr   r(   r)   )rP   r   r   r]   Zground_trutholdnewZdfnewZgtnewZ	convertedZ	date_colsr   r   r   test_banklist_headerF  sL    
z!TestReadHtml.test_banklist_headerc              	   C   s\   d}t | j}| }W 5 Q R X ||ks.t| j| jdddidd }|| ksXtd S )NzGold Canyonrd   re   rm   r   )r   rO   r   r$   r   Z	to_string)rP   gcr   Zraw_textr]   r   r   r   test_gold_canyonx  s      zTestReadHtml.test_gold_canyonc                 C   s4   | j dddd }| j dddd }t|| d S )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   r   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r(   r)   )rP   r   r   r   r   r   test_different_number_of_cols  s     "z*TestReadHtml.test_different_number_of_colsc                 C   s8   |  dd }tdddggdddgd	}t|| d S )
NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   abcr   r   Cr   r   r   r   r   r   test_colspan_rowspan_1  s    z#TestReadHtml.test_colspan_rowspan_1c                 C   sD   | j dddd }tdddddggdd	d
ddgd}t|| d S )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   r   r   r   Zr   XzX.1YWr   r   r   r   r   r    test_colspan_rowspan_copy_values  s     z-TestReadHtml.test_colspan_rowspan_copy_valuesc                 C   sD   | j dddd }tdddddggddddd	gd
}t|| d S )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   r   r   r   DzB.1zB.2r   r   r   r   r   r   r   test_colspan_rowspan_both_not_1   s     z,TestReadHtml.test_colspan_rowspan_both_not_1c                 C   s8   | j dddd }tddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   r   r   r   r   r   r   r   r   r   r   test_rowspan_at_end_of_row  s    z'TestReadHtml.test_rowspan_at_end_of_rowc                 C   s>   | j dddd }tddgddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   r   r   r   r   r   r   r   r   r   test_rowspan_only_rows9  s    
z#TestReadHtml.test_rowspan_only_rowsc                 C   sT   |  dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   r   r   r   r   r   levelscodesr}   r   r   r   r
   r(   r)   rP   r   rw   r   r   r   r   +test_header_inferred_from_rows_with_only_thL  s    $z8TestReadHtml.test_header_inferred_from_rows_with_only_thc                 C   sd   t dtdddi}| }| j|dgdd}t||d  | j|dgdd}t||d  d S )Ndate1/1/2001
   Zperiodsr   r   Zparse_datesrC   )r
   r   r\   r   r(   r)   )rP   r]   r   r_   r   r   r   test_parse_dates_listf  s    z"TestReadHtml.test_parse_dates_listc                 C   sn   t tddd}t|dd |dd d}| j| dd	d
gid	d}td|i}t||d  d S )Nr  r  r  c                 S   s   t |  S r   )strr  r   r   r   r   r!   r  r"   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>c                 S   s   t |  S r   )r  timer   r   r   r   r!   s  r"   )r  r  r   r   r}   r  r   )r   r   r
   r&   r   r\   r(   r)   )rP   Z	raw_datesr]   r_   Znewdfr   r   r   test_parse_dates_combinen  s     
 z%TestReadHtml.test_parse_dates_combinec                 C   s   |dddd}t j|s,tt| dt j|sJtt| d| j|ddd	d
 }|jdksltd|jd ks~t|d j	t
	dkstt
|jd dstd S )Nr   r   r4   wikipedia_states.htmlz is not a filez is an empty fileArizonar   r~   r   )<      Unnamedr   sq mifloat64)r   r  HzPN$A)r   r   isfiler$   reprgetsizer   r   rw   dtyperV   allcloselocrP   r   r   r   r   r   r   test_wikipedia_states_table|  s    z(TestReadHtml.test_wikipedia_states_tablec                 C   sp   |dddd}| j |dddd }|jdks0td	|jd
 d ksFt|jjdksVtt|jd dsltd S )Nr   r   r4   r  r  r   r   )r     r  r   r   r}   )ZAlaska)zTotal area[2]r  r  )r   r   r$   rw   ZnlevelsrV   r$  r%  r&  r   r   r    test_wikipedia_states_multiindex  s    z-TestReadHtml.test_wikipedia_states_multiindexc              	   C   s4   d}t jt|d | jdddgd W 5 Q R X d S )NzGPassed header=\[0,1\] are too many rows for this multi_index of columnsr2   aK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   r   r   )r9   r:   r	   r   r   r   r   r   %test_parser_error_on_empty_header_row  s    z2TestReadHtml.test_parser_error_on_empty_header_rowc                 C   sL   | j dddd }tddidgd}|d jtdks<tt|| d S )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   r   gClg0@r   r  )r   r
   r#  rV   r$   r(   r)   r   r   r   r   test_decimal_rows  s    zTestReadHtml.test_decimal_rowsc              
   C   sB   t d}dD ].}tjt|d | j| j|d W 5 Q R X qd S )NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column names)TFr2   r   )r   escaper9   r:   r   r   rM   )rP   r-   argr   r   r   test_bool_header_arg  s    z!TestReadHtml.test_bool_header_argc                 C   s6   | j ddtidd }tdddgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>r   r   r   z0.763z0.244)r   r  r
   r(   r)   r   r   r   r   test_converters  s    zTestReadHtml.test_convertersc                 C   s6   | j ddgdd }tddtjgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)Z	na_valuesr   r   g"~j?r   r
   rV   nanr(   r)   r   r   r   r   test_na_values  s    zTestReadHtml.test_na_valuesc                 C   sh   d}t dddgi}| j|ddd }t|| t dtjtjgi}| j|ddd }t|| d S )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>r   zN/AZNAF)Zkeep_default_nar   T)r
   r   r(   r)   rV   r3  )rP   Z	html_dataexpected_dfhtml_dfr   r   r   test_keep_default_na  s    z!TestReadHtml.test_keep_default_nac                 C   s>   |  dd }tddgtjtjggddgd}t|| d S )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   r   r   r   r   r   r2  r   r   r   r   test_preserve_empty_rows  s     z%TestReadHtml.test_preserve_empty_rowsc                 C   sT   |  dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   r   r   r   r   r   r	  r}   r   r  r  r   r   r   ,test_ignore_empty_rows_when_inferring_header,  s    $z9TestReadHtml.test_ignore_empty_rows_when_inferring_headerc                 C   sP   t dddgd}dddgdd	d
gg|_|jdd}| |d }t|| d S )N)ZHillaryD   r  )ZBernieJ   r  )ZDonaldE   R)r   zUnnamed: 0_level_0ZAgeZPartyNamezUnnamed: 1_level_1zUnnamed: 2_level_1Fr   r   )r
   rw   r\   r   r(   r)   )rP   r5  r4   r6  r   r   r   test_multiple_header_rowsA  s    z&TestReadHtml.test_multiple_header_rowsc                 C   s@   |dddd}| j |dd}t|ts*tt|d ts<td S )Nr   r   r4   rB   r   r   )r   r   r   r$   r
   )rP   r   rE   rz   r   r   r   test_works_on_valid_markupN  s    z'TestReadHtml.test_works_on_valid_markupc                 C   s&   |dddd}| j |dddgd d S )	Nr   r   r4   rL   ro   rA   r0   r=   r   )rP   r   rO   r   r   r   test_fallback_successT  s    z"TestReadHtml.test_fallback_successc                 C   s:   t ddd}ttjdd|d}| }d|ks6td S )Nz
2000-01-01r  r  rT   r?  )r   r
   rV   rW   Zrandnr\   r$   )rP   rngr]   r   r   r   r   test_to_html_timestampY  s    z#TestReadHtml.test_to_html_timestampzdisplayed_only,exp0,exp1ZfooNFzfoo  bar  baz  quxc                 C   sT   t d}| j||d}t|d | |d k	r@t|d | nt|dksPtd S )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>)displayed_onlyr   r   )r   r   r(   r)   r#   r$   )rP   rE  Zexp0Zexp1r   rz   r   r   r   test_displayed_only`  s    	z TestReadHtml.test_displayed_onlyc           
   	   C   s   t j|}t j|d }|d\}}zt|d}| j| |dd }W 5 Q R X t|d"}| jt	| |dd }W 5 Q R X | j||dd }	t
|| t
||	 W n4 tk
r   t rd|ksd|krt   Y nX d S )Nr   _rb)rK   rC   Z16Z32)r   r   basenamesplitextsplitr   r   r   popr   r(   r)   	Exceptionr   r9   skip)
rP   r   	base_pathrootrG  rK   ZfobjZfrom_stringZfrom_file_likefrom_filenamer   r   r   test_encode  s8      
    
zTestReadHtml.test_encodec              	   C   sj   | j jddkrtd G dd dt}|d}|  |sBttjtdd |  | W 5 Q R X d S )	Nr7   rA   zNot applicable for lxmlc                   @   s   e Zd Zdd ZdS )zFTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc                 S   s   dS NFr   r   r   r   r   seekable  s    zOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekableN)__name__
__module____qualname__rT  r   r   r   r   UnseekableStringIO  s   rX  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file objectr2   )	r   keywordsgetr9   rN  r   r$   r:   r>   )rP   rX  badr   r   r   test_parse_failure_unseekable  s    
z*TestReadHtml.test_parse_failure_unseekablec                 C   s>   G dd d}|d}|d}|  |s,t|  |s:td S )Nc                   @   s.   e Zd Zdd Zd
ddZdd Zdd	 ZdS )z9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFilec                 S   s   || _ d| _d S rS  )r   at_end)rP   r   r   r   r   __init__  s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__Nc                 S   s   | j r
dn| j}d| _ |S )Nr   T)r]  r   )rP   sizer   r   r   r   r     s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.readc                 S   s
   d| _ d S rS  )r]  )rP   offsetr   r   r   seek  s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekc                 S   s   dS )NTr   r   r   r   r   rT    s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekable)N)rU  rV  rW  r^  r   ra  rT  r   r   r   r   MockFile  s   
rb  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>)r   r$   )rP   rb  Zgoodr[  r   r   r   test_parse_failure_rewinds  s
    z'TestReadHtml.test_parse_failure_rewindsc                 C   s   G dd dt j}ttjj |dddd}|| j|fd}|| j|fd}|  |  | s\| rnq\d |j	  kr|j	ksn t
d S )Nc                       s   e Zd Z fddZ  ZS )z@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc              
      sB   zt    W n( tk
r6 } z
|| _W 5 d }~X Y nX d | _d S r   )superrunrM  err)rP   rf  	__class__r   r   re    s
    zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run)rU  rV  rW  re  __classcell__r   r   rg  r   ErrorThread  s   rj  r   r   r4   rB   )targetr+   )	threadingThreadr   pandasr   r4   r   startis_aliverf  r$   )rP   r   rj  rE   Zhelper_thread1Zhelper_thread2r   r   r   test_importcheck_thread_safety  s    
z+TestReadHtml.test_importcheck_thread_safetyc                 C   sB   |dddd}t |}| |d }| |d }t|| d S )Nr   r   r4   r5   r   )r   r   r(   r)   )rP   r   Zfile_path_string	file_pathrj   rk   r   r   r   test_parse_path_object  s
    z#TestReadHtml.test_parse_path_object)[rU  rV  rW  r9   fixturerQ   rS   r`   markZxfailr(   networkrl   rn   rq   Zslowrs   rx   r{   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r'  r)  r*  r-  r0  r1  r4  r7  r8  r9  r@  rA  rB  rD  parametrizer
   rF  rR  r\  rc  rq  rs  r   r   r   r   rI   d   s   	


	



		

	









&
1
D!


 
rI   )7	functoolsr   	importlibr   r   r   r   r   pathlibr   r   rl  urllib.errorr   ZnumpyrV   r9   Zpandas.compatr   Zpandas.errorsr	   Zpandas.util._test_decoratorsutilZ_test_decoratorstdrn  r
   r   r   r   r   r   r   Zpandas._testingZ_testingr(   Zpandas.io.commonr   r   r   r   dirname__file__ZHERErt  r   r.   Z
skip_if_nor<   r@   rF   ru  rw  r   rI   r   r   r   r   <module>   sV   $	
	