@deffn {Scheme Procedure} port-encoding port
@deffnx {C Function} scm_port_encoding
-Returns, as a string, the character encoding that @var{port} uses to
-interpret its input and output.
+Returns, as a string, the character encoding that @var{port} uses to interpret
+its input and output. The value @code{#f} is equivalent to @code{"ISO-8859-1"}.
@end deffn
@deffn {Scheme Procedure} set-port-conversion-strategy! port sym
(For reference, Guile leaves text versus binary up to the C library,
@code{b} here just adds @code{O_BINARY} to the underlying @code{open}
call, when that flag is available.)
+
+Also, open the file using the 8-bit character encoding "ISO-8859-1",
+ignoring any coding declaration or port encoding.
+
+Note that, when reading or writing binary data with ports, the
+bytevector ports in the @code{(rnrs io ports)} module are preferred,
+as they return vectors, and not strings (@pxref{R6RS I/O Ports}).
@end table
If a file cannot be opened with the access
requested, @code{open-file} throws an exception.
+When the file is opened, this procedure will scan for a coding
+declaration (@pxref{Character Encoding of Source Files}). If present
+will use that encoding for interpreting the file. Otherwise, the
+port's encoding will be used. To supress this behavior, open
+the file in binary mode and then set the port encoding explicitly
+using @code{set-port-encoding!}.
+
In theory we could create read/write ports which were buffered
in one direction only. However this isn't included in the
current interfaces.
* Return the new port.
*/
SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
- (SCM filename, SCM mode),
+ (SCM filename, SCM mode),
"Open the file whose name is @var{filename}, and return a port\n"
"representing that file. The attributes of the port are\n"
"determined by the @var{mode} string. The way in which this is\n"
"The following additional characters can be appended:\n"
"@table @samp\n"
"@item b\n"
- "Open the underlying file in binary mode, if supported by the operating system. "
+ "Open the underlying file in binary mode, if supported by the system.\n"
+ "Also, open the file using the binary-compatible character encoding\n"
+ "\"ISO-8859-1\", ignoring the port's encoding and the coding declaration\n"
+ "at the top of the input file, if any.\n"
"@item +\n"
"Open the port for both input and output. E.g., @code{r+}: open\n"
"an existing file for both input and output.\n"
"Add line-buffering to the port. The port output buffer will be\n"
"automatically flushed whenever a newline character is written.\n"
"@end table\n"
+ "When the file is opened, this procedure will scan for a coding\n"
+ "declaration@pxref{Character Encoding of Source Files}. If present\n"
+ "will use that encoding for interpreting the file. Otherwise, the\n"
+ "port's encoding will be used.\n"
+ "\n"
"In theory we could create read/write ports which were buffered\n"
"in one direction only. However this isn't included in the\n"
"current interfaces. If a file cannot be opened with the access\n"
#define FUNC_NAME s_scm_open_file
{
SCM port;
- int fdes, flags = 0;
+ int fdes, flags = 0, use_encoding = 1;
unsigned int retries;
char *file, *md, *ptr;
flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR;
break;
case 'b':
+ use_encoding = 0;
#if defined (O_BINARY)
flags |= O_BINARY;
#endif
}
}
+ /* Create a port from this file descriptor. The port's encoding is initially
+ %default-port-encoding. */
port = scm_i_fdes_to_port (fdes, scm_i_mode_bits (mode),
fport_canonicalize_filename (filename));
+ if (use_encoding)
+ {
+ /* If this file has a coding declaration, use that as the port
+ encoding. */
+ if (SCM_INPUT_PORT_P (port))
+ {
+ char *enc = scm_i_scan_for_encoding (port);
+ if (enc != NULL)
+ scm_i_set_port_encoding_x (port, enc);
+ }
+ }
+ else
+ /* If this is a binary file, use the binary-friendly ISO-8859-1
+ encoding. */
+ scm_i_set_port_encoding_x (port, NULL);
+
scm_dynwind_end ();
return port;
:use-module (test-suite lib)
:use-module (test-suite guile-test)
:use-module (ice-9 popen)
- :use-module (ice-9 rdelim))
+ :use-module (ice-9 rdelim)
+ :use-module (rnrs bytevectors))
(define (display-line . args)
(for-each display args)
;;; read-line should use the port encoding (not the locale encoding).
(let ((str "ĉu bone?"))
- (with-locale "C"
- (let* ((filename (test-file))
- (port (open-file filename "wl")))
- (set-port-encoding! port "UTF-8")
- (write-line str port)
- (let ((in-port (open-input-file filename)))
- (set-port-encoding! in-port "UTF-8")
- (let ((line (read-line in-port)))
- (close-port in-port)
- (close-port port)
- (pass-if "file: read-line honors port encoding"
- (string=? line str))))
- (delete-file filename))))
+ (with-locale "C"
+ (let* ((filename (test-file))
+ (port (open-file filename "wl")))
+ (set-port-encoding! port "UTF-8")
+ (write-line str port)
+ (let ((in-port (open-input-file filename)))
+ (set-port-encoding! in-port "UTF-8")
+ (let ((line (read-line in-port)))
+ (close-port in-port)
+ (close-port port)
+ (pass-if "file: read-line honors port encoding"
+ (string=? line str))))
+ (delete-file filename))))
+
+;;; binary mode ignores port encoding
+(pass-if "file: binary mode ignores port encoding"
+ (with-fluids ((%default-port-encoding "UTF-8"))
+ (let* ((filename (test-file))
+ (port (open-file filename "w"))
+ (test-string "一二三")
+ (binary-test-string
+ (apply string
+ (map integer->char
+ (uniform-vector->list
+ (string->utf8 test-string))))))
+ (write-line test-string port)
+ (close-port port)
+ (let* ((in-port (open-file filename "rb"))
+ (line (read-line in-port)))
+ (close-port in-port)
+ (delete-file filename)
+ (string=? line binary-test-string)))))
+
+;;; binary mode ignores file coding declaration
+(pass-if "file: binary mode ignores file coding declaration"
+ (with-fluids ((%default-port-encoding "UTF-8"))
+ (let* ((filename (test-file))
+ (port (open-file filename "w"))
+ (test-string "一二三")
+ (binary-test-string
+ (apply string
+ (map integer->char
+ (uniform-vector->list
+ (string->utf8 test-string))))))
+ (write-line ";; coding: utf-8" port)
+ (write-line test-string port)
+ (close-port port)
+ (let* ((in-port (open-file filename "rb"))
+ (line1 (read-line in-port))
+ (line2 (read-line in-port)))
+ (close-port in-port)
+ (delete-file filename)
+ (string=? line2 binary-test-string)))))
+
+;; open-file honors file coding declarations
+(pass-if "file: open-file honors coding declarations"
+ (with-fluids ((%default-port-encoding "UTF-8"))
+ (let* ((filename (test-file))
+ (port (open-output-file filename))
+ (test-string "€100"))
+ (set-port-encoding! port "ISO-8859-15")
+ (write-line ";; coding: iso-8859-15" port)
+ (write-line test-string port)
+ (close-port port)
+ (let* ((in-port (open-input-file filename))
+ (line1 (read-line in-port))
+ (line2 (read-line in-port)))
+ (close-port in-port)
+ (delete-file filename)
+ (string=? line2 test-string)))))
;;; ungetting characters and strings.
(with-input-from-string "walk on the moon\nmoon"