Skip to content

Commit 5467d4c

Browse files
committed
Patch #612627: Add encoding attribute to file objects, and determine
the terminal encoding on Windows and Unix.
1 parent b7b4ce2 commit 5467d4c

File tree

6 files changed

+109
-1
lines changed

6 files changed

+109
-1
lines changed

Doc/api/concrete.tex

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,6 +2029,12 @@ \subsection{File Objects \label{fileObjects}}
20292029
creation.
20302030
\end{cfuncdesc}
20312031

2032+
\begin{cfuncdesc}{int}{PyFile_Encoding}{PyFileObject *p, char *enc}
2033+
Set the file's encoding for Unicode output to \var{enc}. Return
2034+
1 on success and 0 on failure.
2035+
\versionadded{2.3}
2036+
\end{cfuncdesc}
2037+
20322038
\begin{cfuncdesc}{int}{PyFile_SoftSpace}{PyObject *p, int newflag}
20332039
This function exists for internal use by the interpreter. Sets the
20342040
\member{softspace} attribute of \var{p} to \var{newflag} and

Doc/lib/libstdtypes.tex

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,20 @@ \subsection{File Objects
13721372
It may not be available on all file-like objects.
13731373
\end{memberdesc}
13741374

1375+
\begin{memberdesc}[file]{encoding}
1376+
The encoding that this file uses. When Unicode strings are written
1377+
to a file, they will be converted to byte strings using this encoding.
1378+
In addition, when the file is connected to a terminal, the attribute
1379+
gives the encoding that the terminal is likely to use (that
1380+
information might be incorrect if the user has misconfigured the
1381+
terminal). The attribute is read-only and may not be present on
1382+
all file-like objects. It may also be \code{None}, in which case
1383+
the file uses the system default encoding for converting Unicode
1384+
strings.
1385+
1386+
\versionadded{2.3}
1387+
\end{memberdesc}
1388+
13751389
\begin{memberdesc}[file]{mode}
13761390
The I/O mode for the file. If the file was created using the
13771391
\function{open()} built-in function, this will be the value of the

Include/fileobject.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef struct {
2424
int f_newlinetypes; /* Types of newlines seen */
2525
int f_skipnextlf; /* Skip next \n */
2626
#endif
27+
PyObject *f_encoding;
2728
} PyFileObject;
2829

2930
PyAPI_DATA(PyTypeObject) PyFile_Type;
@@ -33,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
3334

3435
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
3536
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
37+
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
3638
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
3739
int (*)(FILE *));
3840
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 2.3 beta 2?
1212
Core and builtins
1313
-----------------
1414

15+
- The encoding attribute has been added for file objects, and set to
16+
the terminal encoding on Unix and Windows.
17+
1518
- The softspace attribute of file objects became read-only by oversight.
1619
It's writable again.
1720

Objects/fileobject.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
116116

117117
Py_DECREF(f->f_name);
118118
Py_DECREF(f->f_mode);
119+
Py_DECREF(f->f_encoding);
119120
#ifdef Py_USING_UNICODE
120121
if (wname)
121122
f->f_name = PyUnicode_FromObject(wname);
@@ -133,7 +134,9 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
133134
f->f_newlinetypes = NEWLINE_UNKNOWN;
134135
f->f_skipnextlf = 0;
135136
#endif
136-
137+
Py_INCREF(Py_None);
138+
f->f_encoding = Py_None;
139+
137140
if (f->f_name == NULL || f->f_mode == NULL)
138141
return NULL;
139142
f->f_fp = fp;
@@ -302,6 +305,21 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
302305
}
303306
}
304307

308+
/* Set the encoding used to output Unicode strings.
309+
Returh 1 on success, 0 on failure. */
310+
311+
int
312+
PyFile_SetEncoding(PyObject *f, const char *enc)
313+
{
314+
PyFileObject *file = (PyFileObject*)f;
315+
PyObject *str = PyString_FromString(enc);
316+
if (!str)
317+
return 0;
318+
Py_DECREF(file->f_encoding);
319+
file->f_encoding = str;
320+
return 1;
321+
}
322+
305323
static PyObject *
306324
err_closed(void)
307325
{
@@ -323,6 +341,7 @@ file_dealloc(PyFileObject *f)
323341
}
324342
Py_XDECREF(f->f_name);
325343
Py_XDECREF(f->f_mode);
344+
Py_XDECREF(f->f_encoding);
326345
drop_readahead(f);
327346
f->ob_type->tp_free((PyObject *)f);
328347
}
@@ -1667,6 +1686,8 @@ static PyMemberDef file_memberlist[] = {
16671686
"file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
16681687
{"name", T_OBJECT, OFF(f_name), RO,
16691688
"file name"},
1689+
{"encoding", T_OBJECT, OFF(f_encoding), RO,
1690+
"file encoding"},
16701691
/* getattr(f, "closed") is implemented without this table */
16711692
{NULL} /* Sentinel */
16721693
};
@@ -1851,6 +1872,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
18511872
((PyFileObject *)self)->f_name = not_yet_string;
18521873
Py_INCREF(not_yet_string);
18531874
((PyFileObject *)self)->f_mode = not_yet_string;
1875+
Py_INCREF(Py_None);
1876+
((PyFileObject *)self)->f_encoding = Py_None;
18541877
}
18551878
return self;
18561879
}
@@ -2034,11 +2057,28 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
20342057
}
20352058
else if (PyFile_Check(f)) {
20362059
FILE *fp = PyFile_AsFile(f);
2060+
PyObject *enc = ((PyFileObject*)f)->f_encoding;
2061+
int result;
20372062
if (fp == NULL) {
20382063
err_closed();
20392064
return -1;
20402065
}
2066+
#ifdef Py_USING_UNICODE
2067+
if (PyUnicode_Check(v) && enc != Py_None) {
2068+
char *cenc = PyString_AS_STRING(enc);
2069+
value = PyUnicode_AsEncodedString(v, cenc, "strict");
2070+
if (value == NULL)
2071+
return -1;
2072+
} else {
2073+
value = v;
2074+
Py_INCREF(value);
2075+
}
2076+
result = PyObject_Print(value, fp, flags);
2077+
Py_DECREF(value);
2078+
return result;
2079+
#else
20412080
return PyObject_Print(v, fp, flags);
2081+
#endif
20422082
}
20432083
writer = PyObject_GetAttrString(f, "write");
20442084
if (writer == NULL)

Python/sysmodule.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ extern const char *PyWin_DLLVersionString;
3636
#include <unixlib.h>
3737
#endif
3838

39+
#ifdef MS_WINDOWS
40+
#include <windows.h>
41+
#endif
42+
43+
#ifdef HAVE_LANGINFO_H
44+
#include <locale.h>
45+
#include <langinfo.h>
46+
#endif
47+
3948
PyObject *
4049
PySys_GetObject(char *name)
4150
{
@@ -881,6 +890,12 @@ _PySys_Init(void)
881890
PyObject *m, *v, *sysdict;
882891
PyObject *sysin, *sysout, *syserr;
883892
char *s;
893+
#ifdef MS_WINDOWS
894+
char buf[10];
895+
#endif
896+
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
897+
char *oldloc, *codeset;
898+
#endif
884899

885900
m = Py_InitModule3("sys", sys_methods, sys_doc);
886901
sysdict = PyModule_GetDict(m);
@@ -890,6 +905,34 @@ _PySys_Init(void)
890905
syserr = PyFile_FromFile(stderr, "<stderr>", "w", NULL);
891906
if (PyErr_Occurred())
892907
return NULL;
908+
#ifdef MS_WINDOWS
909+
if(isatty(_fileno(stdin))){
910+
sprintf(buf, "cp%d", GetConsoleCP());
911+
if (!PyFile_SetEncoding(sysin, buf))
912+
return NULL;
913+
}
914+
if(isatty(_fileno(stdout))) {
915+
sprintf(buf, "cp%d", GetConsoleOutputCP());
916+
if (!PyFile_SetEncoding(sysout, buf))
917+
return NULL;
918+
}
919+
#endif
920+
921+
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
922+
oldloc = setlocale(LC_CTYPE, 0);
923+
setlocale(LC_CTYPE, "");
924+
codeset = nl_langinfo(CODESET);
925+
setlocale(LC_CTYPE, oldloc);
926+
if(codeset && isatty(fileno(stdin))){
927+
if (!PyFile_SetEncoding(sysin, codeset))
928+
return NULL;
929+
}
930+
if(codeset && isatty(fileno(stdout))) {
931+
if (!PyFile_SetEncoding(sysout, codeset))
932+
return NULL;
933+
}
934+
#endif
935+
893936
PyDict_SetItemString(sysdict, "stdin", sysin);
894937
PyDict_SetItemString(sysdict, "stdout", sysout);
895938
PyDict_SetItemString(sysdict, "stderr", syserr);

0 commit comments

Comments
 (0)