+/* A fluid specifying the default encoding for newly created ports. If it is
+ a string, that is the encoding. If it is #f, it is in the "native"
+ (Latin-1) encoding. */
+SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
+
+static int scm_port_encoding_init = 0;
+
+/* Return a C string representation of the current encoding. */
+const char *
+scm_i_get_port_encoding (SCM port)
+{
+ SCM encoding;
+
+ if (scm_is_false (port))
+ {
+ if (!scm_port_encoding_init)
+ return NULL;
+ else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
+ return NULL;
+ else
+ {
+ encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
+ if (!scm_is_string (encoding))
+ return NULL;
+ else
+ return scm_i_string_chars (encoding);
+ }
+ }
+ else
+ {
+ scm_t_port *pt;
+ pt = SCM_PTAB_ENTRY (port);
+ if (pt->encoding)
+ return pt->encoding;
+ else
+ return NULL;
+ }
+}
+
+/* Returns ENC if it is a recognized encoding. If it isn't, it tries
+ to find an alias of ENC that is valid. Otherwise, it returns
+ NULL. */
+static const char *
+find_valid_encoding (const char *enc)
+{
+ int isvalid = 0;
+ const char str[] = " ";
+ scm_t_uint32 *u32;
+ size_t u32len;
+
+ u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
+ NULL, NULL, &u32len);
+ isvalid = (u32 != NULL);
+ free (u32);
+
+ if (isvalid)
+ return enc;
+
+ return NULL;
+}
+
+void
+scm_i_set_port_encoding_x (SCM port, const char *enc)
+{
+ const char *valid_enc;
+ scm_t_port *pt;
+
+ /* Null is shorthand for the native, Latin-1 encoding. */
+ if (enc == NULL)
+ valid_enc = NULL;
+ else
+ {
+ valid_enc = find_valid_encoding (enc);
+ if (valid_enc == NULL)
+ {
+ SCM err;
+ err = scm_from_locale_string (enc);
+ scm_misc_error (NULL, "invalid or unknown character encoding ~s",
+ scm_list_1 (err));
+ }
+ }
+
+ if (scm_is_false (port))
+ {
+ /* Set the default encoding for future ports. */
+ if (!scm_port_encoding_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
+ scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
+ SCM_EOL);
+
+ if (valid_enc == NULL
+ || !strcmp (valid_enc, "ASCII")
+ || !strcmp (valid_enc, "ANSI_X3.4-1968")
+ || !strcmp (valid_enc, "ISO-8859-1"))
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
+ else
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
+ scm_from_locale_string (valid_enc));
+ }
+ else
+ {
+ /* Set the character encoding for this port. */
+ pt = SCM_PTAB_ENTRY (port);
+ if (valid_enc == NULL)
+ pt->encoding = NULL;
+ else
+ pt->encoding = scm_gc_strdup (valid_enc, "port");
+ }
+}
+
+SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
+ (SCM port),
+ "Returns, as a string, the character encoding that @var{port}\n"
+ "uses to interpret its input and output.\n")
+#define FUNC_NAME s_scm_port_encoding
+{
+ scm_t_port *pt;
+ const char *enc;
+
+ SCM_VALIDATE_PORT (1, port);
+
+ pt = SCM_PTAB_ENTRY (port);
+ enc = scm_i_get_port_encoding (port);
+ if (enc)
+ return scm_from_locale_string (pt->encoding);
+ else
+ return scm_from_locale_string ("NONE");
+}
+#undef FUNC_NAME
+
+SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
+ (SCM port, SCM enc),
+ "Sets the character encoding that will be used to interpret all\n"
+ "port I/O. New ports are created with the encoding\n"
+ "appropriate for the current locale if @code{setlocale} has \n"
+ "been called or ISO-8859-1 otherwise\n"
+ "and this procedure can be used to modify that encoding.\n")
+#define FUNC_NAME s_scm_set_port_encoding_x
+{
+ char *enc_str;
+ const char *valid_enc_str;
+
+ SCM_VALIDATE_PORT (1, port);
+ SCM_VALIDATE_STRING (2, enc);
+
+ enc_str = scm_to_locale_string (enc);
+ valid_enc_str = find_valid_encoding (enc_str);
+ if (valid_enc_str == NULL)
+ {
+ free (enc_str);
+ scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
+ scm_list_1 (enc));
+ }
+ else
+ {
+ scm_i_set_port_encoding_x (port, valid_enc_str);
+ free (enc_str);
+ }
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+
+/* This determines how conversions handle unconvertible characters. */
+SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
+static int scm_conversion_strategy_init = 0;
+
+scm_t_string_failed_conversion_handler
+scm_i_get_conversion_strategy (SCM port)
+{
+ SCM encoding;
+
+ if (scm_is_false (port))
+ {
+ if (!scm_conversion_strategy_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
+ return SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else
+ {
+ encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
+ if (scm_is_false (encoding))
+ return SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else
+ return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
+ }
+ }
+ else
+ {
+ scm_t_port *pt;
+ pt = SCM_PTAB_ENTRY (port);
+ return pt->ilseq_handler;
+ }
+
+}
+
+void
+scm_i_set_conversion_strategy_x (SCM port,
+ scm_t_string_failed_conversion_handler handler)
+{
+ SCM strategy;
+ scm_t_port *pt;
+
+ strategy = scm_from_int ((int) handler);
+
+ if (scm_is_false (port))
+ {
+ /* Set the default encoding for future ports. */
+ if (!scm_conversion_strategy
+ || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
+ scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
+ SCM_EOL);
+ scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
+ }
+ else
+ {
+ /* Set the character encoding for this port. */
+ pt = SCM_PTAB_ENTRY (port);
+ pt->ilseq_handler = handler;
+ }
+}
+
+SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
+ 1, 0, 0, (SCM port),
+ "Returns the behavior of the port when handling a character that\n"
+ "is not representable in the port's current encoding.\n"
+ "It returns the symbol @code{error} if unrepresentable characters\n"
+ "should cause exceptions, @code{substitute} if the port should\n"
+ "try to replace unrepresentable characters with question marks or\n"
+ "approximate characters, or @code{escape} if unrepresentable\n"
+ "characters should be converted to string escapes.\n"
+ "\n"
+ "If @var{port} is @code{#f}, then the current default behavior\n"
+ "will be returned. New ports will have this default behavior\n"
+ "when they are created.\n")
+#define FUNC_NAME s_scm_port_conversion_strategy
+{
+ scm_t_string_failed_conversion_handler h;
+
+ SCM_VALIDATE_OPPORT (1, port);
+
+ if (!scm_is_false (port))
+ {
+ SCM_VALIDATE_OPPORT (1, port);
+ }
+
+ h = scm_i_get_conversion_strategy (port);
+ if (h == SCM_FAILED_CONVERSION_ERROR)
+ return scm_from_locale_symbol ("error");
+ else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
+ return scm_from_locale_symbol ("substitute");
+ else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ return scm_from_locale_symbol ("escape");
+ else
+ abort ();
+
+ /* Never gets here. */
+ return SCM_UNDEFINED;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
+ 2, 0, 0,
+ (SCM port, SCM sym),
+ "Sets the behavior of the interpreter when outputting a character\n"
+ "that is not representable in the port's current encoding.\n"
+ "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
+ "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
+ "when an unconvertible character is encountered. If it is\n"
+ "@code{'substitute}, then unconvertible characters will \n"
+ "be replaced with approximate characters, or with question marks\n"
+ "if no approximately correct character is available.\n"
+ "If it is @code{'escape},\n"
+ "it will appear as a hex escape when output.\n"
+ "\n"
+ "If @var{port} is an open port, the conversion error behavior\n"
+ "is set for that port. If it is @code{#f}, it is set as the\n"
+ "default behavior for any future ports that get created in\n"
+ "this thread.\n")
+#define FUNC_NAME s_scm_set_port_conversion_strategy_x
+{
+ SCM err;
+ SCM qm;
+ SCM esc;
+
+ if (!scm_is_false (port))
+ {
+ SCM_VALIDATE_OPPORT (1, port);
+ }
+
+ err = scm_from_locale_symbol ("error");
+ if (scm_is_true (scm_eqv_p (sym, err)))
+ {
+ scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
+ return SCM_UNSPECIFIED;
+ }
+
+ qm = scm_from_locale_symbol ("substitute");
+ if (scm_is_true (scm_eqv_p (sym, qm)))
+ {
+ scm_i_set_conversion_strategy_x (port,
+ SCM_FAILED_CONVERSION_QUESTION_MARK);
+ return SCM_UNSPECIFIED;
+ }
+
+ esc = scm_from_locale_symbol ("escape");
+ if (scm_is_true (scm_eqv_p (sym, esc)))
+ {
+ scm_i_set_conversion_strategy_x (port,
+ SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
+ return SCM_UNSPECIFIED;
+ }
+
+ SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
+
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+
+