测试gitnore

This commit is contained in:
ladeng07
2022-05-06 15:45:57 +08:00
parent 12f390949b
commit 51552904f9
2347 changed files with 120102 additions and 53549 deletions
+35 -64
View File
@@ -1,4 +1,4 @@
"""JsLex: a lexer for JavaScript"""
"""JsLex: a lexer for Javascript"""
# Originally from https://bitbucket.org/ned/jslex
import re
@@ -7,7 +7,6 @@ class Tok:
"""
A specification for a token class.
"""
num = 0
def __init__(self, name, regex, next=None):
@@ -76,23 +75,23 @@ class Lexer:
class JsLexer(Lexer):
"""
A JavaScript lexer
A Javascript lexer
>>> lexer = JsLexer()
>>> list(lexer.lex("a = 1"))
[('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')]
This doesn't properly handle non-ASCII characters in the JavaScript source.
This doesn't properly handle non-ASCII characters in the Javascript source.
"""
# Because these tokens are matched as alternatives in a regex, longer
# possibilities must appear in the list before shorter ones, for example,
# '>>' before '>'.
#
# Note that we don't have to detect malformed JavaScript, only properly
# lex correct JavaScript, so much of this is simplified.
# Note that we don't have to detect malformed Javascript, only properly
# lex correct Javascript, so much of this is simplified.
# Details of JavaScript lexical structure are taken from
# Details of Javascript lexical structure are taken from
# http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf
# A useful explanation of automatic semicolon insertion is at
@@ -102,34 +101,21 @@ class JsLexer(Lexer):
Tok("comment", r"/\*(.|\n)*?\*/"),
Tok("linecomment", r"//.*?$"),
Tok("ws", r"\s+"),
Tok(
"keyword",
literals(
"""
Tok("keyword", literals("""
break case catch class const continue debugger
default delete do else enum export extends
finally for function if import in instanceof
new return super switch this throw try typeof
var void while with
""",
suffix=r"\b",
),
next="reg",
),
Tok("reserved", literals("null true false", suffix=r"\b"), next="div"),
Tok(
"id",
r"""
""", suffix=r"\b"), next='reg'),
Tok("reserved", literals("null true false", suffix=r"\b"), next='div'),
Tok("id", r"""
([a-zA-Z_$ ]|\\u[0-9a-fA-Z]{4}) # first char
([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})* # rest chars
""",
next="div",
),
Tok("hnum", r"0[xX][0-9a-fA-F]+", next="div"),
""", next='div'),
Tok("hnum", r"0[xX][0-9a-fA-F]+", next='div'),
Tok("onum", r"0[0-7]+"),
Tok(
"dnum",
r"""
Tok("dnum", r"""
( (0|[1-9][0-9]*) # DecimalIntegerLiteral
\. # dot
[0-9]* # DecimalDigits-opt
@@ -142,23 +128,15 @@ class JsLexer(Lexer):
(0|[1-9][0-9]*) # DecimalIntegerLiteral
([eE][-+]?[0-9]+)? # ExponentPart-opt
)
""",
next="div",
),
Tok(
"punct",
literals(
"""
""", next='div'),
Tok("punct", literals("""
>>>= === !== >>> <<= >>= <= >= == != << >> &&
|| += -= *= %= &= |= ^=
"""
),
next="reg",
),
Tok("punct", literals("++ -- ) ]"), next="div"),
Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next="reg"),
Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next="div"),
Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next="div"),
"""), next="reg"),
Tok("punct", literals("++ -- ) ]"), next='div'),
Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'),
Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next='div'),
Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next='div'),
]
both_after = [
@@ -167,16 +145,13 @@ class JsLexer(Lexer):
states = {
# slash will mean division
"div": both_before
+ [
Tok("punct", literals("/= /"), next="reg"),
]
+ both_after,
'div': both_before + [
Tok("punct", literals("/= /"), next='reg'),
] + both_after,
# slash will mean regex
"reg": both_before
+ [
Tok(
"regex",
'reg': both_before + [
Tok("regex",
r"""
/ # opening slash
# First character is..
@@ -199,51 +174,47 @@ class JsLexer(Lexer):
)* # many times
/ # closing slash
[a-zA-Z0-9]* # trailing flags
""",
next="div",
),
]
+ both_after,
""", next='div'),
] + both_after,
}
def __init__(self):
super().__init__(self.states, "reg")
super().__init__(self.states, 'reg')
def prepare_js_for_gettext(js):
"""
Convert the JavaScript source `js` into something resembling C for
Convert the Javascript source `js` into something resembling C for
xgettext.
What actually happens is that all the regex literals are replaced with
"REGEX".
"""
def escape_quotes(m):
"""Used in a regex to properly escape double quotes."""
s = m[0]
if s == '"':
return r"\""
return r'\"'
else:
return s
lexer = JsLexer()
c = []
for name, tok in lexer.lex(js):
if name == "regex":
if name == 'regex':
# C doesn't grok regexes, and they aren't needed for gettext,
# so just output a string instead.
tok = '"REGEX"'
elif name == "string":
elif name == 'string':
# C doesn't have single-quoted strings, so make all strings
# double-quoted.
if tok.startswith("'"):
guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1])
tok = '"' + guts + '"'
elif name == "id":
elif name == 'id':
# C can't deal with Unicode escapes in identifiers. We don't
# need them for gettext anyway, so replace them with something
# innocuous
tok = tok.replace("\\", "U")
c.append(tok)
return "".join(c)
return ''.join(c)