ó
…¾^Yc           @   sa  d  Z  d d l Z d d l Z d d l Z d d l Z d d l Z d d l Z d d l m Z d d l	 m
 Z
 m Z d d l m Z d „  Z e d d ƒ d	 „  ƒ Z d
 e f d „  ƒ  YZ e ƒ  j Z e j d d e j ƒZ e d d ƒ d „  ƒ Z e d d ƒ d „  ƒ Z e j r!e Z n e Z e d d ƒ d „  ƒ Z e d d d d ƒ d „  ƒ Z d S(   s"   Better tokenizing for coverage.py.iÿÿÿÿN(   t   env(   t   iternextt   unicode_class(   t   contractc         c   sJ  d
 } d } d
 } x1|  D])\ } } \ } } \ } }	 }
 | | k r| r| j d ƒ rt } | t j k ry t } nG | t j k rÀ d | k rÀ | j d d ƒ d d d k rÀ t } qÀ n  | rt	 | j d ƒ d ƒ d } d d | | f | | d	 f | f Vqn  |
 } | } n  | | | | f | |	 f |
 f V| } q Wd
 S(   sB  Return all physical tokens, even line continuations.

    tokenize.generate_tokens() doesn't return a token for the backslash that
    continues lines.  This wrapper provides those tokens so that we can
    re-create a faithful representation of the original source.

    Returns the same values as generate_tokens()

    iÿÿÿÿs   \
s   
i   i    s   \iþÿÿÿiŸ† i   N(
   t   Nonet   endswitht   Truet   tokenizet   COMMENTt   Falset   tokent   STRINGt   splitt   len(   t   tokst	   last_linet   last_linenot
   last_ttypet   ttypet   ttextt   slinenot   scolt   elinenot   ecolt   ltextt   inject_backslasht   ccol(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   phys_tokens   s*    
(	,	 t   sourcet   unicodec         c   s¸  t  t j t j t j t j g ƒ } g  } d } |  j d ƒ j d d ƒ }  t	 |  ƒ } xLt
 | ƒ D]>\ } } \ } } \ } }	 } t }
 xt j d | ƒ D]ñ } | d k rÊ | Vg  } d } t } n¿ | d k rß t } nª | | k rô t } n• |
 r*| | k r*| j d d | | f ƒ t }
 n  t j j | d	 ƒ j ƒ  d
  } | t j k rpt j | ƒ rpd } n  | j | | f ƒ t } d } qž W| rd |	 } qd qd W| r´| Vn  d S(   s  Generate a series of lines, one for each line in `source`.

    Each line is a list of pairs, each pair is a token::

        [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]

    Each pair has a token class, and the token text.

    If you concatenate all the token texts, and then join them with newlines,
    you should have your original `source` back, with two differences:
    trailing whitespace is not preserved, and a final line with no newline
    is indistinguishable from a final line with a newline.

    i    i   s   
s   
s   (
)t    t   wsu    t   xxi   t   keyN(   t   setR
   t   INDENTt   DEDENTt   NEWLINER   t   NLt
   expandtabst   replacet   generate_tokensR   R   t   reR   R	   t   appendt   tok_namet   gett   lowert   NAMEt   keywordt	   iskeyword(   R   t	   ws_tokenst   linet   colt   tokgenR   R   t   _R   R   t
   mark_startt   partt   mark_endt	   tok_class(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   source_token_linesJ   s<    $.					
t   CachedTokenizerc           B   s/   e  Z d  Z d „  Z e d d ƒ d „  ƒ Z RS(   sX  A one-element cache around tokenize.generate_tokens.

    When reporting, coverage.py tokenizes files twice, once to find the
    structure of the file, and once to syntax-color it.  Tokenizing is
    expensive, and easily cached.

    This is a one-element cache so that our twice-in-a-row tokenizing doesn't
    actually tokenize twice.

    c         C   s   d  |  _ d  |  _ d  S(   N(   R   t	   last_textt   last_tokens(   t   self(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   __init__Š   s    	t   textR   c         C   sO   | |  j  k rH | |  _  t | j t ƒ ƒ } t t j | ƒ ƒ |  _ n  |  j S(   s*   A stand-in for `tokenize.generate_tokens`.(   R=   R   t
   splitlinesR   t   listR   R)   R>   (   R?   RA   t   readline(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyR)   Ž   s
    	(   t   __name__t
   __module__t   __doc__R@   R   R)   (    (    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyR<      s   
	s#   ^[ \t]*#.*coding[:=][ \t]*([-\w.]+)t   flagst   bytesc            sç   t  |  t ƒ s t ‚ t |  j t ƒ ƒ ‰ d „  ‰  d } t ‰ d } ‡ f d †  } ‡  ‡ f d †  } | ƒ  } | j t	 j
 ƒ rš t ‰ | d } d } n  | s¤ | S| | ƒ } | rº | S| ƒ  } | sÍ | S| | ƒ } | rã | S| S(   sª   Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string, the text of the program.

    Returns a string, the name of the encoding.

    c         S   sL   |  d  j  ƒ  j d d ƒ } t j d | ƒ r2 d St j d | ƒ rH d S|  S(   s(   Imitates get_normal_name in tokenizer.c.i   R6   t   -s   ^utf-8($|-)s   utf-8s&   ^(latin-1|iso-8859-1|iso-latin-1)($|-)s
   iso-8859-1(   R.   R(   R*   t   match(   t   orig_enct   enc(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   _get_normal_name­   s    t   asciic              s$   y ˆ  ƒ  SWn t  k
 r d SXd S(   s    Get the next source line, or ''.R   N(   t   StopIteration(    (   RD   (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   read_or_stopÄ   s    c            sÌ   y |  j  d ƒ } Wn t k
 r' d SXt j | ƒ } | sA d Sˆ  | d ƒ } y t j | ƒ } Wn! t k
 r‡ t d | ƒ ‚ n Xˆ rÈ t	 | d | ƒ } | d k r» t d ƒ ‚ n  | d 7} n  | S(	   s"   Find an encoding cookie in `line`.RO   i    s   unknown encoding: t   names   utf-8s   encoding problem: utf-8s   -sigN(
   t   decodet   UnicodeDecodeErrorR   t	   COOKIE_REt   findallt   codecst   lookupt   LookupErrort   SyntaxErrort   getattr(   R3   t   line_stringt   matchest   encodingt   codect
   codec_name(   RN   t	   bom_found(    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   find_cookieË   s$    i   s	   utf-8-sigN(   t
   isinstanceRI   t   AssertionErrorR   RB   R   R	   R   t
   startswithRW   t   BOM_UTF8(   R   t   defaultR^   RQ   Rb   t   firstt   second(    (   RN   Ra   RD   s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   _source_encoding_py2   s2    			
		c         C   s&   t  |  j t ƒ ƒ } t j | ƒ d S(   sª   Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    i    (   R   RB   R   R   t   detect_encoding(   R   RD   (    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   _source_encoding_py3ü   s    	c         C   sU   t  |  ƒ }  t j r? t | t ƒ r? | j t j ƒ  d ƒ } n  t |  | | ƒ } | S(   s»  Just like the `compile` builtin, but works on any Unicode string.

    Python 2's compile() builtin has a stupid restriction: if the source string
    is Unicode, then it may not have a encoding declaration in it.  Why not?
    Who knows!  It also decodes to utf8, and then tries to interpret those utf8
    bytes according to the encoding declaration.  Why? Who knows!

    This function neuters the coding declaration, and compiles it.

    R(   (	   t   neuter_encoding_declarationR    t   PY2Rc   R   t   encodet   syst   getfilesystemencodingt   compile(   R   t   filenamet   modet   code(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   compile_unicode  s
    t   returnsc         C   st   t  j |  ƒ rp |  j t ƒ } x= t t d t | ƒ ƒ ƒ D]  } t  j d | | ƒ | | <q: Wd j | ƒ }  n  |  S(   s8   Return `source`, with any encoding declaration neutered.i   s   # (deleted declaration)R   (	   RU   t   searchRB   R   t   ranget   minR   t   subt   join(   R   t   source_linest   lineno(    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyRm   "  s    "(   RG   RW   R0   R*   Rp   R
   R   t   coverageR    t   coverage.backwardR   R   t   coverage.miscR   R   R;   t   objectR<   R)   Rr   t	   MULTILINERU   Rj   Rl   t   PY3t   source_encodingRv   Rm   (    (    (    s5   /tmp/pip-build-hU8Cw8/coverage/coverage/phystokens.pyt   <module>   s*   	85_		