Skip to content

Commit cb1f8df

Browse files
author
christian.heimes
committed
Patch #2477: Added from __future__ import unicode_literals
The new PyParser_*Ex() functions are based on Neal's suggestion and initial patch. The new __future__ feature makes all '' and r'' unicode strings. b'' and br'' stay (byte) strings. git-svn-id: http://svn.python.org/projects/python/trunk@61953 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 9fb1852 commit cb1f8df

12 files changed

Lines changed: 107 additions & 33 deletions

File tree

Include/code.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ typedef struct {
4949
#define CO_FUTURE_ABSOLUTE_IMPORT 0x4000 /* do absolute imports by default */
5050
#define CO_FUTURE_WITH_STATEMENT 0x8000
5151
#define CO_FUTURE_PRINT_FUNCTION 0x10000
52+
#define CO_FUTURE_UNICODE_LITERALS 0x20000
5253

5354
/* This should be defined if a future statement modifies the syntax.
5455
For example, when a keyword is added.

Include/compile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ typedef struct {
2525
#define FUTURE_ABSOLUTE_IMPORT "absolute_import"
2626
#define FUTURE_WITH_STATEMENT "with_statement"
2727
#define FUTURE_PRINT_FUNCTION "print_function"
28+
#define FUTURE_UNICODE_LITERALS "unicode_literals"
2829

2930

3031
struct _mod; /* Declare the existence of this type */

Include/parsetok.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ typedef struct {
2828
#endif
2929

3030
#define PyPARSE_PRINT_IS_FUNCTION 0x0004
31+
#define PyPARSE_UNICODE_LITERALS 0x0008
3132

3233

3334

@@ -41,11 +42,18 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlags(const char *, grammar *, int,
4142
PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, grammar *,
4243
int, char *, char *,
4344
perrdetail *, int);
45+
PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx(FILE *, const char *, grammar *,
46+
int, char *, char *,
47+
perrdetail *, int *);
4448

4549
PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename(const char *,
4650
const char *,
4751
grammar *, int,
4852
perrdetail *, int);
53+
PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *,
54+
const char *,
55+
grammar *, int,
56+
perrdetail *, int *);
4957

5058
/* Note that he following function is defined in pythonrun.c not parsetok.c. */
5159
PyAPI_FUNC(void) PyParser_SetError(perrdetail *);

Include/pythonrun.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ extern "C" {
88
#endif
99

1010
#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \
11-
CO_FUTURE_WITH_STATEMENT|CO_FUTURE_PRINT_FUNCTION)
11+
CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \
12+
CO_FUTURE_UNICODE_LITERALS)
1213
#define PyCF_MASK_OBSOLETE (CO_NESTED)
1314
#define PyCF_SOURCE_IS_UTF8 0x0100
1415
#define PyCF_DONT_IMPLY_DEDENT 0x0200

Lib/__future__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"absolute_import",
5555
"with_statement",
5656
"print_function",
57+
"unicode_literals",
5758
]
5859

5960
__all__ = ["all_feature_names"] + all_feature_names
@@ -68,6 +69,7 @@
6869
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default
6970
CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement
7071
CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function
72+
CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals
7173

7274
class _Feature:
7375
def __init__(self, optionalRelease, mandatoryRelease, compiler_flag):
@@ -120,3 +122,7 @@ def __repr__(self):
120122
print_function = _Feature((2, 6, 0, "alpha", 2),
121123
(3, 0, 0, "alpha", 0),
122124
CO_FUTURE_PRINT_FUNCTION)
125+
126+
unicode_literals = _Feature((2, 6, 0, "alpha", 2),
127+
(3, 0, 0, "alpha", 0),
128+
CO_FUTURE_UNICODE_LITERALS)

Misc/NEWS

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ What's New in Python 2.6 alpha 2?
1212
Core and builtins
1313
-----------------
1414

15+
- Patch #2477: Added from __future__ import unicode_literals
16+
1517
- Added backport of bytearray type.
1618

1719
- Issue #2355: add Py3k warning for buffer().
@@ -186,6 +188,12 @@ Build
186188

187189
- Patch #2284: Add -x64 option to rt.bat.
188190

191+
C API
192+
-----
193+
194+
- Patch #2477: Added PyParser_ParseFileFlagsEx() and
195+
PyParser_ParseStringFlagsFilenameEx()
196+
189197
What's New in Python 2.6 alpha 1?
190198
=================================
191199

Parser/parser.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,18 @@ future_hack(parser_state *ps)
202202

203203
for (i = 0; i < NCH(ch); i += 2) {
204204
cch = CHILD(ch, i);
205-
if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME &&
206-
strcmp(STR(CHILD(cch, 0)), "with_statement") == 0) {
207-
ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
208-
break;
209-
} else if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME &&
210-
strcmp(STR(CHILD(cch, 0)), "print_function") == 0) {
211-
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
212-
break;
205+
if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206+
char *str_ch = STR(CHILD(cch, 0));
207+
if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208+
ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
209+
break;
210+
} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
211+
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
212+
break;
213+
} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
214+
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
215+
break;
216+
}
213217
}
214218
}
215219
}

Parser/parsetok.c

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ int Py_TabcheckFlag;
1414

1515

1616
/* Forward */
17-
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int);
17+
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
1818
static void initerr(perrdetail *err_ret, const char* filename);
1919

2020
/* Parse input coming from a string. Return error code, print some errors. */
@@ -36,6 +36,16 @@ node *
3636
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
3737
grammar *g, int start,
3838
perrdetail *err_ret, int flags)
39+
{
40+
int iflags = flags;
41+
return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
42+
err_ret, &iflags);
43+
}
44+
45+
node *
46+
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
47+
grammar *g, int start,
48+
perrdetail *err_ret, int *flags)
3949
{
4050
struct tok_state *tok;
4151

@@ -69,6 +79,14 @@ PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
6979
node *
7080
PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
7181
char *ps1, char *ps2, perrdetail *err_ret, int flags)
82+
{
83+
int iflags = flags;
84+
return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
85+
}
86+
87+
node *
88+
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
89+
char *ps1, char *ps2, perrdetail *err_ret, int *flags)
7290
{
7391
struct tok_state *tok;
7492

@@ -85,7 +103,6 @@ PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
85103
tok->alterror++;
86104
}
87105

88-
89106
return parsetok(tok, g, start, err_ret, flags);
90107
}
91108

@@ -110,7 +127,7 @@ warn(const char *msg, const char *filename, int lineno)
110127

111128
static node *
112129
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
113-
int flags)
130+
int *flags)
114131
{
115132
parser_state *ps;
116133
node *n;
@@ -123,8 +140,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
123140
return NULL;
124141
}
125142
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
126-
if (flags & PyPARSE_PRINT_IS_FUNCTION)
143+
if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
127144
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
145+
}
146+
if (*flags & PyPARSE_UNICODE_LITERALS) {
147+
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
148+
}
149+
128150
#endif
129151

130152
for (;;) {
@@ -147,7 +169,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
147169
except if a certain flag is given --
148170
codeop.py uses this. */
149171
if (tok->indent &&
150-
!(flags & PyPARSE_DONT_IMPLY_DEDENT))
172+
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
151173
{
152174
tok->pendin = -tok->indent;
153175
tok->indent = 0;
@@ -191,6 +213,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
191213
else
192214
n = NULL;
193215

216+
*flags = ps->p_flags;
194217
PyParser_Delete(ps);
195218

196219
if (n == NULL) {

Python/ast.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
/* Data structure used internally */
1919
struct compiling {
2020
char *c_encoding; /* source encoding */
21+
int c_future_unicode; /* __future__ unicode literals flag */
2122
PyArena *c_arena; /* arena for allocating memeory */
2223
const char *c_filename; /* filename */
2324
};
@@ -36,7 +37,7 @@ static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
3637
static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
3738

3839
static PyObject *parsenumber(const char *);
39-
static PyObject *parsestr(const char *s, const char *encoding);
40+
static PyObject *parsestr(struct compiling *, const char *);
4041
static PyObject *parsestrplus(struct compiling *, const node *n);
4142

4243
#ifndef LINENO
@@ -198,6 +199,7 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
198199
} else {
199200
c.c_encoding = NULL;
200201
}
202+
c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
201203
c.c_arena = arena;
202204
c.c_filename = filename;
203205

@@ -3247,13 +3249,13 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
32473249
* parsestr parses it, and returns the decoded Python string object.
32483250
*/
32493251
static PyObject *
3250-
parsestr(const char *s, const char *encoding)
3252+
parsestr(struct compiling *c, const char *s)
32513253
{
32523254
size_t len;
32533255
int quote = Py_CHARMASK(*s);
32543256
int rawmode = 0;
32553257
int need_encoding;
3256-
int unicode = 0;
3258+
int unicode = c->c_future_unicode;
32573259

32583260
if (isalpha(quote) || quote == '_') {
32593261
if (quote == 'u' || quote == 'U') {
@@ -3262,6 +3264,7 @@ parsestr(const char *s, const char *encoding)
32623264
}
32633265
if (quote == 'b' || quote == 'B') {
32643266
quote = *++s;
3267+
unicode = 0;
32653268
}
32663269
if (quote == 'r' || quote == 'R') {
32673270
quote = *++s;
@@ -3293,12 +3296,12 @@ parsestr(const char *s, const char *encoding)
32933296
}
32943297
#ifdef Py_USING_UNICODE
32953298
if (unicode || Py_UnicodeFlag) {
3296-
return decode_unicode(s, len, rawmode, encoding);
3299+
return decode_unicode(s, len, rawmode, c->c_encoding);
32973300
}
32983301
#endif
3299-
need_encoding = (encoding != NULL &&
3300-
strcmp(encoding, "utf-8") != 0 &&
3301-
strcmp(encoding, "iso-8859-1") != 0);
3302+
need_encoding = (c->c_encoding != NULL &&
3303+
strcmp(c->c_encoding, "utf-8") != 0 &&
3304+
strcmp(c->c_encoding, "iso-8859-1") != 0);
33023305
if (rawmode || strchr(s, '\\') == NULL) {
33033306
if (need_encoding) {
33043307
#ifndef Py_USING_UNICODE
@@ -3310,7 +3313,7 @@ parsestr(const char *s, const char *encoding)
33103313
PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
33113314
if (u == NULL)
33123315
return NULL;
3313-
v = PyUnicode_AsEncodedString(u, encoding, NULL);
3316+
v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
33143317
Py_DECREF(u);
33153318
return v;
33163319
#endif
@@ -3320,7 +3323,7 @@ parsestr(const char *s, const char *encoding)
33203323
}
33213324

33223325
return PyString_DecodeEscape(s, len, NULL, unicode,
3323-
need_encoding ? encoding : NULL);
3326+
need_encoding ? c->c_encoding : NULL);
33243327
}
33253328

33263329
/* Build a Python string object out of a STRING atom. This takes care of
@@ -3333,11 +3336,11 @@ parsestrplus(struct compiling *c, const node *n)
33333336
PyObject *v;
33343337
int i;
33353338
REQ(CHILD(n, 0), STRING);
3336-
if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
3339+
if ((v = parsestr(c, STR(CHILD(n, 0)))) != NULL) {
33373340
/* String literal concatenation */
33383341
for (i = 1; i < NCH(n); i++) {
33393342
PyObject *s;
3340-
s = parsestr(STR(CHILD(n, i)), c->c_encoding);
3343+
s = parsestr(c, STR(CHILD(n, i)));
33413344
if (s == NULL)
33423345
goto onError;
33433346
if (PyString_Check(v) && PyString_Check(s)) {

Python/future.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ future_check_features(PyFutureFeatures *ff, stmt_ty s, const char *filename)
3535
ff->ff_features |= CO_FUTURE_WITH_STATEMENT;
3636
} else if (strcmp(feature, FUTURE_PRINT_FUNCTION) == 0) {
3737
ff->ff_features |= CO_FUTURE_PRINT_FUNCTION;
38+
} else if (strcmp(feature, FUTURE_UNICODE_LITERALS) == 0) {
39+
ff->ff_features |= CO_FUTURE_UNICODE_LITERALS;
3840
} else if (strcmp(feature, "braces") == 0) {
3941
PyErr_SetString(PyExc_SyntaxError,
4042
"not a chance");

0 commit comments

Comments
 (0)