U
    n/e                     @   s2   d dl Zd dlZd dlmZmZ G dd dZdS )    N)SentenceTransformerutilc                   @   s2   e Zd ZdejedddZdd Zdd	d
ZdS )MDMNtext_distance)master_datatext_columnc                 C   sB   || _ || _|dkr6|dkr d}t|| _|  | _n|dkr>dS )aa  _summary_

        Args:
            token (str, required): token get from discovery. Defaults to None.
            verbose (bool, optional): display logging. Defaults to True.
            dataplatform_api_uri (str, optional): dataplatform uri. Defaults to "https://api.discovery.data.storemesh.com".

        Raises:
            Exception `Please enter your token from dsmOauth`: Invalid token
            Exception `Can not connect to DataPlatform`: Some thing wrong connection with dataplatform
            Exception `Can not get objectstorage user`: Some thing wrong connection with objectstorage
        Zsemantic_similarityNz%paraphrase-multilingual-MiniLM-L12-v2r   )r   r   r   modelencode_textmaster_data_encoded)selfr   r   Z
model_nameZ
model_type r   2/tmp/pip-unpacked-wheel-t2z2wkof/dsmlibrary/mdm.py__init__   s    
zMDM.__init__c                 C   s   | j | j j}| jj|ddS )NTZconvert_to_tensor)r   r   valuesr   encode)r   	text_listr   r   r   r	       s    zMDM.encode_text   c           
      C   s~   | j j|dd}t|| j}| }tj|dd}|jd }t	|d d d |f }|t
|d d d f |f }	||	fS )NTr      )Zaxisr   )r   r   r   cos_simr
   numpynpZargsortshapeZfliplrZarange)
r   r   ZtopnXr   Z
cos_sim_npZindexsZn_masterZdes_topn_indexsZsorted_scorer   r   r   get_topn_similarity$   s    
zMDM.get_topn_similarity)Nr   )r   )	__name__
__module____qualname__pdZ	DataFramestrr   r	   r   r   r   r   r   r      s   r   )Zpandasr   r   r   Zsentence_transformersr   r   r   r   r   r   r   <module>   s   