[kaffe] CVS kaffe (robilad): Resynced with GNU Classpath: character conversion fixes
Kaffe CVS
cvs-commits at kaffe.org
Sat Feb 5 12:59:05 PST 2005
PatchSet 5980
Date: 2005/02/05 20:50:01
Author: robilad
Branch: HEAD
Tag: (none)
Log:
Resynced with GNU Classpath: character conversion fixes
2005-02-05 Dalibor Topic <robilad at kaffe.org>
Resynced with GNU Classpath.
2005-02-03 Robert Schuster <thebohemian at gmx.net>
* gnu/java/nio/charset/ISO_8859_1.java,
gnu/java/nio/charset/US_ASCII.java,
gnu/java/nio/charset/UTF_16.java,
gnu/java/nio/charset/UTF_16_LE.java,
gnu/java/nio/charset/UTF_16_BE.java,
gnu/java/nio/charset/UTF_8.java: Fixed canonical names
and aliases according to
http://www.iana.org/assignments/character-sets,
http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
and http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL.
* gnu/java/nio/charset/Provider.java: Made charset lookup
case-insensitive which fixes bug #11740.
Members:
ChangeLog:1.3518->1.3519
libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.2->1.3
libraries/javalib/gnu/java/nio/charset/Provider.java:1.1->1.2
libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.2->1.3
libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.4->1.5
libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.4->1.5
libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.4->1.5
libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.2->1.3
Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.3518 kaffe/ChangeLog:1.3519
--- kaffe/ChangeLog:1.3518 Sat Feb 5 20:36:22 2005
+++ kaffe/ChangeLog Sat Feb 5 20:50:01 2005
@@ -1,3 +1,22 @@
+2005-02-05 Dalibor Topic <robilad at kaffe.org>
+
+ Resynced with GNU Classpath.
+
+ 2005-02-03 Robert Schuster <thebohemian at gmx.net>
+
+ * gnu/java/nio/charset/ISO_8859_1.java,
+ gnu/java/nio/charset/US_ASCII.java,
+ gnu/java/nio/charset/UTF_16.java,
+ gnu/java/nio/charset/UTF_16_LE.java,
+ gnu/java/nio/charset/UTF_16_BE.java,
+ gnu/java/nio/charset/UTF_8.java: Fixed canonical names
+ and aliases according to
+ "http://www.iana.org/assignments/character-sets",
+ "http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
+ and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
+ * gnu/java/nio/charset/Provider.java: Made charset lookup
+ case-insensitive which fixes bug #11740.
+
2005-02-05 Sven de Marothy <sven at physto.se>
* java/text/SimpleDateFormat.java
Index: kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.2 kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.3
--- kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.2 Mon Nov 8 10:47:13 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* ISO_8859_1.java --
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -53,7 +53,28 @@
{
ISO_8859_1 ()
{
- super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
+ /* Canonical charset name chosen according to:
+ * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+ */
+ super ("ISO-8859-1", new String[] {
+ /* These names are provided by
+ * http://www.iana.org/assignments/character-sets
+ */
+ "iso-ir-100",
+ "ISO_8859-1",
+ "latin1",
+ "l1",
+ "IBM819",
+ "CP819",
+ "csISOLatin1",
+ "8859_1",
+ /* These names are provided by
+ * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+ */
+ "ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987",
+ "819"
+ });
+
}
public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java:1.1 kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java:1.2
--- kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java:1.1 Thu Nov 28 13:39:26 2002
+++ kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* Provider.java --
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -48,6 +48,7 @@
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
*
* @author Jesse Rosenstock
+ * @author Robert Schuster (thebohemian at gmx.net)
* @see Charset
*/
public final class Provider extends CharsetProvider
@@ -63,12 +64,14 @@
}
/**
- * Map from charset name to charset canonical name.
+ * Map from charset name to charset canonical name. The strings
+ * are all lower-case to allow case-insensitive retrieval of
+ * Charset instances.
*/
private final HashMap canonicalNames;
/**
- * Map from canonical name to Charset.
+ * Map from lower-case canonical name to Charset.
* TODO: We may want to use soft references. We would then need to keep
* track of the class name to regenerate the object.
*/
@@ -76,8 +79,6 @@
private Provider ()
{
- // FIXME: We might need to make the name comparison case insensitive.
- // Verify this with the Sun JDK.
canonicalNames = new HashMap ();
charsets = new HashMap ();
@@ -106,24 +107,42 @@
.iterator ();
}
+ /**
+ * Returns a Charset instance by converting the given
+ * name to lower-case, looking up the canonical charset
+ * name and finally looking up the Charset with that name.
+ *
+ * <p>The lookup is therefore case-insensitive.</p>
+ *
+ * @returns The Charset having <code>charsetName</code>
+ * as its alias or null if no such Charset exist.
+ */
public Charset charsetForName (String charsetName)
{
- return (Charset) charsets.get (canonicalize (charsetName));
- }
-
- private Object canonicalize (String charsetName)
- {
- Object o = canonicalNames.get (charsetName);
- return o == null ? charsetName : o;
+ return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase()));
}
+ /**
+ * Puts a Charset under its canonical name into the 'charsets' map.
+ * Then puts a mapping from all its alias names to the canonical name.
+ *
+ * <p>All names are converted to lower-case</p>.
+ *
+ * @param cs
+ */
private void addCharset (Charset cs)
{
- String canonicalName = cs.name ();
+ String canonicalName = cs.name().toLowerCase();
charsets.put (canonicalName, cs);
+
+ /* Adds a mapping between the canonical name
+ * itself making a lookup using that name
+ * no special case.
+ */
+ canonicalNames.put(canonicalName, canonicalName);
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
- canonicalNames.put (i.next (), canonicalName);
+ canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
}
public static synchronized Provider provider ()
Index: kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.2 kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.3
--- kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.2 Mon Nov 8 10:47:13 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* US_ASCII.java --
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -53,7 +53,29 @@
{
US_ASCII ()
{
- super ("US-ASCII", new String[]{"ISO646-US"});
+ /* Canonical charset name chosen according to:
+ * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+ */
+ super ("US-ASCII", new String[] {
+ /* These names are provided by
+ * http://www.iana.org/assignments/character-sets
+ */
+ "iso-ir-6",
+ "ANSI_X3.4-1986",
+ "ISO_646.irv:1991",
+ "ASCII",
+ "ISO646-US",
+ "ASCII",
+ "us",
+ "IBM367",
+ "cp367",
+ "csASCII",
+ /* These names are provided by
+ * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+ */
+ "ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646",
+ "windows-20127"
+ });
}
public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.4 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.5
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.4 Fri Oct 15 10:41:44 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* UTF_16.java --
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -51,7 +51,14 @@
{
UTF_16 ()
{
- super ("UTF-16", null);
+ super ("UTF-16", new String[] {
+ // witnessed by the internet
+ "UTF16",
+ /* These names are provided by
+ * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+ */
+ "ISO-10646-UCS-2", "unicode", "csUnicode", "ucs-2"
+ });
}
public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.4 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.5
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.4 Fri Oct 15 10:41:44 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* UTF_16BE.java --
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -51,7 +51,18 @@
{
UTF_16BE ()
{
- super ("UTF-16BE", null);
+ super ("UTF-16BE", new String[] {
+ // witnessed by the internet
+ "UTF16BE",
+ /* These names are provided by
+ * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+ */
+ "x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297",
+ "ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201",
+ "UTF16_BigEndian",
+ // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+ "UnicodeBigUnmarked"
+ });
}
public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.4 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.5
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.4 Fri Oct 15 10:41:44 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* UTF_16LE.java --
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -51,7 +51,17 @@
{
UTF_16LE ()
{
- super ("UTF-16LE", null);
+ super ("UTF-16LE", new String[] {
+ // witnessed by the internet
+ "UTF16LE",
+ /* These names are provided by
+ * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+ */
+ "x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586",
+ "UTF16_LittleEndian",
+ // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+ "UnicodeLittleUnmarked"
+ });
}
public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.2 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.3
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.2 Mon Nov 8 10:47:13 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java Sat Feb 5 20:50:05 2005
@@ -1,5 +1,5 @@
/* UTF_8.java --
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -62,7 +62,15 @@
{
UTF_8 ()
{
- super ("UTF-8", null);
+ super ("UTF-8", new String[] {
+ /* These names are provided by
+ * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+ */
+ "ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305",
+ "windows-65001", "cp1208",
+ // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+ "UTF8"
+ });
}
public boolean contains (Charset cs)
More information about the kaffe
mailing list