MediaWiki  REL1_22
csslex.py
Go to the documentation of this file.
00001 #!/usr/bin/python
00002 #
00003 # Copyright 2007 Google Inc. All Rights Reserved.
00004 
00005 """CSS Lexical Grammar rules.
00006 
00007 CSS lexical grammar from http://www.w3.org/TR/CSS21/grammar.html
00008 """
00009 
00010 __author__ = ['[email protected] (Lindsey Simon)',
00011               '[email protected] (Mike Samuel)']
00012 
00013 # public symbols
00014 __all__ = [ "NEWLINE", "HEX", "NON_ASCII", "UNICODE", "ESCAPE", "NMSTART", "NMCHAR", "STRING1", "STRING2", "IDENT", "NAME", "HASH", "NUM", "STRING", "URL", "SPACE", "WHITESPACE", "COMMENT", "QUANTITY", "PUNC" ]
00015 
00016 # The comments below are mostly copied verbatim from the grammar.
00017 
00018 # "@import"               {return IMPORT_SYM;}
00019 # "@page"                 {return PAGE_SYM;}
00020 # "@media"                {return MEDIA_SYM;}
00021 # "@charset"              {return CHARSET_SYM;}
00022 KEYWORD = r'(?:\@(?:import|page|media|charset))'
00023 
00024 # nl                      \n|\r\n|\r|\f ; a newline
00025 NEWLINE = r'\n|\r\n|\r|\f'
00026 
00027 # h                       [0-9a-f]      ; a hexadecimal digit
00028 HEX = r'[0-9a-f]'
00029 
00030 # nonascii                [\200-\377]
00031 NON_ASCII = r'[\200-\377]'
00032 
00033 # unicode                 \\{h}{1,6}(\r\n|[ \t\r\n\f])?
00034 UNICODE = r'(?:(?:\\' + HEX + r'{1,6})(?:\r\n|[ \t\r\n\f])?)'
00035 
00036 # escape                  {unicode}|\\[^\r\n\f0-9a-f]
00037 ESCAPE = r'(?:' + UNICODE + r'|\\[^\r\n\f0-9a-f])'
00038 
00039 # nmstart                 [_a-z]|{nonascii}|{escape}
00040 NMSTART = r'(?:[_a-z]|' + NON_ASCII + r'|' + ESCAPE + r')'
00041 
00042 # nmchar                  [_a-z0-9-]|{nonascii}|{escape}
00043 NMCHAR = r'(?:[_a-z0-9-]|' + NON_ASCII + r'|' + ESCAPE + r')'
00044 
00045 # ident                   -?{nmstart}{nmchar}*
00046 IDENT = r'-?' + NMSTART + NMCHAR + '*'
00047 
00048 # name                    {nmchar}+
00049 NAME = NMCHAR + r'+'
00050 
00051 # hash
00052 HASH = r'#' + NAME
00053 
00054 # string1                 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"  ; "string"
00055 STRING1 = r'"(?:[^\"\\]|\\.)*"'
00056 
00057 # string2                 \'([^\n\r\f\\']|\\{nl}|{escape})*\'  ; 'string'
00058 STRING2 = r"'(?:[^\'\\]|\\.)*'"
00059 
00060 # string                  {string1}|{string2}
00061 STRING = '(?:' + STRING1 + r'|' + STRING2 + ')'
00062 
00063 # num                     [0-9]+|[0-9]*"."[0-9]+
00064 NUM = r'(?:[0-9]*\.[0-9]+|[0-9]+)'
00065 
00066 # s                       [ \t\r\n\f]
00067 SPACE = r'[ \t\r\n\f]'
00068 
00069 # w                       {s}*
00070 WHITESPACE = '(?:' + SPACE + r'*)'
00071 
00072 # url special chars
00073 URL_SPECIAL_CHARS = r'[!#$%&*-~]'
00074 
00075 # url chars               ({url_special_chars}|{nonascii}|{escape})*
00076 URL_CHARS = r'(?:%s|%s|%s)*' % (URL_SPECIAL_CHARS, NON_ASCII, ESCAPE)
00077 
00078 # url
00079 URL = r'url\(%s(%s|%s)%s\)' % (WHITESPACE, STRING, URL_CHARS, WHITESPACE)
00080 
00081 # comments
00082 # see http://www.w3.org/TR/CSS21/grammar.html
00083 COMMENT = r'/\*[^*]*\*+([^/*][^*]*\*+)*/'
00084 
00085 # {E}{M}             {return EMS;}
00086 # {E}{X}             {return EXS;}
00087 # {P}{X}             {return LENGTH;}
00088 # {C}{M}             {return LENGTH;}
00089 # {M}{M}             {return LENGTH;}
00090 # {I}{N}             {return LENGTH;}
00091 # {P}{T}             {return LENGTH;}
00092 # {P}{C}             {return LENGTH;}
00093 # {D}{E}{G}          {return ANGLE;}
00094 # {R}{A}{D}          {return ANGLE;}
00095 # {G}{R}{A}{D}       {return ANGLE;}
00096 # {M}{S}             {return TIME;}
00097 # {S}                {return TIME;}
00098 # {H}{Z}             {return FREQ;}
00099 # {K}{H}{Z}          {return FREQ;}
00100 # %                  {return PERCENTAGE;}
00101 UNIT = r'(?:em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)'
00102 
00103 # {num}{UNIT|IDENT}                   {return NUMBER;}
00104 QUANTITY = '%s(?:%s%s|%s)?' % (NUM, WHITESPACE, UNIT, IDENT)
00105 
00106 # "<!--"                  {return CDO;}
00107 # "-->"                   {return CDC;}
00108 # "~="                    {return INCLUDES;}
00109 # "|="                    {return DASHMATCH;}
00110 # {w}"{"                  {return LBRACE;}
00111 # {w}"+"                  {return PLUS;}
00112 # {w}">"                  {return GREATER;}
00113 # {w}","                  {return COMMA;}
00114 PUNC =  r'<!--|-->|~=|\|=|[\{\+>,:;]'