U
    h9                     @   s"  d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZmZmZmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZmZmZ G d
d deZe e	e e
e f dddZ!edededededededd edd edd edd edd iZ"e ee e dddZ#dS )z(
Simple HTML -> Telegram entity parser.
    )deque)escape)
HTMLParser)IterableTupleList   )add_surrogatedel_surrogatewithin_surrogate
strip_text)TLObject)MessageEntityBoldMessageEntityItalicMessageEntityCodeMessageEntityPreMessageEntityEmailMessageEntityUrlMessageEntityTextUrlMessageEntityMentionNameMessageEntityUnderlineMessageEntityStrikeMessageEntityBlockquoteTypeMessageEntityc                       s4   e Zd Z fddZdd Zdd Zdd Z  ZS )	HTMLToTelegramParserc                    s0   t    d| _g | _i | _t | _t | _d S )N )super__init__textentities_building_entitiesr   
_open_tags_open_tags_meta)self	__class__ </tmp/pip-unpacked-wheel-c81u5j2r/telethon/extensions/html.pyr      s    
zHTMLToTelegramParser.__init__c                 C   s  | j | | jd  t|}d }i }|dks8|dkr@t}nZ|dksP|dkrXt}nB|dkrht}n2|dksx|dkrt}n|dkrt}n
|d	krz>| j	d
 }z|d t
dd  |_W n tk
r   Y nX W n tk
r   t}Y nX n|d
kr
t}d|d< n|dkrz|d }W n tk
r8   Y d S X |dr\|t
dd  }t}n(|  |krpt}nt}t||d< d }| j  | j| |r|| j	kr|f t
| jdd|| j	|< d S )Nstrongbemiudels
blockquotecodepreclassz	language-r   languageahrefzmailto:urlr   )offsetlength)r!   
appendleftr"   dictr   r   r   r   r   r    lenr3   KeyErrorr   r   
startswithr   get_starttag_textr   r   r
   popleftr   )r#   tagattrsZ
EntityTypeargsr1   r6   r&   r&   r'   handle_starttag   sb    





z$HTMLToTelegramParser.handle_starttagc                 C   sn   t | jdkr| jd nd}|dkr6| jd }|r6|}| j D ]\}}| jt |7  _q@|  j|7  _d S )Nr   r   r4   )r;   r!   r"   r    itemsr8   r   )r#   r   Zprevious_tagr6   r@   entityr&   r&   r'   handle_dataY   s    
z HTMLToTelegramParser.handle_datac                 C   sP   z| j   | j  W n tk
r,   Y nX | j|d }|rL| j| d S N)r!   r?   r"   
IndexErrorr    popr   append)r#   r@   rE   r&   r&   r'   handle_endtage   s    
z"HTMLToTelegramParser.handle_endtag)__name__
__module____qualname__r   rC   rF   rK   __classcell__r&   r&   r$   r'   r      s   <r   )htmlreturnc                 C   sX   | s| g fS t  }|t|  t|j|j}|j  |jjdd d t||jfS )a  
    Parses the given HTML message and returns its stripped representation
    plus a list of the MessageEntity's that were found.

    :param html: the message with HTML to be parsed.
    :return: a tuple consisting of (clean message, [message entities]).
    c                 S   s   | j S rG   )r7   )rE   r&   r&   r'   <lambda>       zparse.<locals>.<lambda>key)	r   feedr	   r   r   r   reversesortr
   )rP   parserr   r&   r&   r'   parsep   s    
rZ   )z<strong>z	</strong>)z<em>z</em>)z<code>z</code>)z<u>z</u>)z<del>z</del>)z<blockquote>z</blockquote>c                 C   s   d | jdfS )Nz-<pre>
    <code class='language-{}'>
        z{}
    </code>
</pre>)formatr3   e_r&   r&   r'   rR      s
    rR   c                 C   s   d |dfS )Nz<a href="mailto:{}"></a>r[   r^   tr&   r&   r'   rR      rS   c                 C   s   d |dfS Nz<a href="{}">r_   r`   ra   r&   r&   r'   rR      rS   c                 C   s   d t| jdfS rc   )r[   r   r6   r\   r&   r&   r'   rR      rS   c                 C   s   d | jdfS )Nz<a href="tg://user?id={}">r_   )r[   Zuser_idr\   r&   r&   r'   rR      rS   )r   r   rQ   c                 C   sB  | s| S |st | S t|tr$|f}t| } g }t|D ]r\}}|j}|j|j }tt	|d}|r8t
|r||| || }||||d f ||| |d f q8|jdd d t| }|r| \}	}
}t| |	r|	d7 }	q| d|	 | t | |	|  | |d  } |	}qt | d| | |d  } t| S )a=  
    Performs the reverse operation to .parse(), effectively returning HTML
    given a normal text and its MessageEntity's.

    :param text: the text to be reconverted into HTML.
    :param entities: the MessageEntity's applied to the text.
    :return: a HTML representation of the combination of both inputs.
    Nr      c                 S   s   | d | d fS )Nr   rd   r&   )rb   r&   r&   r'   rR      rS   zunparse.<locals>.<lambda>rT   )r   
isinstancer   r	   	enumerater7   r8   ENTITY_TO_FORMATTERgettypecallablerJ   rX   r;   rI   r   r
   )r   r   Z	insert_atr+   rE   r.   r]   	delimiterZnext_escape_boundatr^   whatr&   r&   r'   unparse   s6    	


,rn   N)$__doc__collectionsr   rP   r   html.parserr   typingr   r   r   Zhelpersr	   r
   r   r   tlr   Ztl.typesr   r   r   r   r   r   r   r   r   r   r   r   r   strrZ   rg   rn   r&   r&   r&   r'   <module>   sB   8	\           