[kaffe] CVS kaffe (guilhem): Implemented RuleBasedCollator, CollationElementIterator according to the spec.
Kaffe CVS
cvs-commits at kaffe.org
Tue Aug 19 03:33:02 PDT 2003
PatchSet 3962
Date: 2003/08/19 10:30:22
Author: guilhem
Branch: HEAD
Tag: (none)
Log:
Implemented RuleBasedCollator, CollationElementIterator according to the spec.
But there are still one feature missing: getMaxExpansion() is still quite a dummy.
Members:
ChangeLog:1.1560->1.1561
libraries/javalib/java/text/CollationElementIterator.java:1.8->1.9
libraries/javalib/java/text/RuleBasedCollator.java:1.11->1.12
Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.1560 kaffe/ChangeLog:1.1561
--- kaffe/ChangeLog:1.1560 Mon Aug 18 17:40:27 2003
+++ kaffe/ChangeLog Tue Aug 19 10:30:22 2003
@@ -1,3 +1,13 @@
+2003-08-19 Guilhem Lavaux <guilhem at kaffe.org>
+
+ * libraries/javalib/java/text/RuleBasedCollator.java:
+ implemented full rule parsing (near full reimplementation).
+ updated documentation.
+
+ * libraries/javalib/java/text/CollationElementIterator.java:
+ implemented full behaviour. getMaxExpansion is still missing
+ as its functionality is not clear for the moment.
+
2003-08-18 Dalibor Topic <robilad at kaffe.org>
Merged in GNU Classpath's RMI implementation.
Index: kaffe/libraries/javalib/java/text/CollationElementIterator.java
diff -u kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.8 kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.9
--- kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.8 Sat Aug 16 11:03:47 2003
+++ kaffe/libraries/javalib/java/text/CollationElementIterator.java Tue Aug 19 10:30:23 2003
@@ -38,6 +38,11 @@
package java.text;
+import java.util.Vector;
+import java.util.NoSuchElementException;
+import java.util.Map;
+import java.util.SortedMap;
+
/* Written using "Java Class Libraries", 2nd edition, plus online
* API docs for JDK 1.2 from http://www.javasoft.com.
* Status: Believed complete and correct to JDK 1.1.
@@ -73,11 +78,22 @@
private String text;
/**
- * This is the index into the String where we are currently scanning.
+ * This is the index into the collation decomposition where we are currently scanning.
*/
private int index;
/**
+ * This is the index into the String where we are currently scanning.
+ */
+ private int textIndex;
+
+ /**
+ * Array containing the collation decomposition of the
+ * text given to the constructor.
+ */
+ private Object[] text_decomposition;
+
+ /**
* This method initializes a new instance of <code>CollationElementIterator</code>
* to iterate over the specified <code>String</code> using the rules in the
* specified <code>RuleBasedCollator</code>.
@@ -88,24 +104,70 @@
CollationElementIterator (RuleBasedCollator collator, String text)
{
this.collator = collator;
- this.text = text;
+
+ setText (text);
+ }
+
+ RuleBasedCollator.CollationElement nextBlock()
+ {
+ if (index >= text_decomposition.length)
+ return null;
+
+ RuleBasedCollator.CollationElement e =
+ (RuleBasedCollator.CollationElement) text_decomposition[index++];
+
+ textIndex += e.char_seq.length();
+
+ return e;
+ }
+
+ RuleBasedCollator.CollationElement previousBlock()
+ {
+ if (index == 0)
+ return null;
+
+ index--;
+ RuleBasedCollator.CollationElement e =
+ (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+ textIndex -= e.char_seq.length();
+
+ return e;
}
/**
- * This method returns the collation ordering value of the next character
- * in the string. This method will return <code>NULLORDER</code> if the
+ * This method returns the collation ordering value of the next character sequence
+ * in the string (it may be an extended character following collation rules).
+ * This method will return <code>NULLORDER</code> if the
* end of the string was reached.
*
* @return The collation ordering value.
*/
public int next ()
{
- if (index >= text.length ())
+ RuleBasedCollator.CollationElement e = nextBlock ();
+
+ if (e == null)
return NULLORDER;
+
+ return e.getValue();
+ }
+
+ /**
+ * This method returns the collation ordering value of the previous character
+ * in the string. This method will return <code>NULLORDER</code> if the
+ * beginning of the string was reached.
+ *
+ * @return The collation ordering value.
+ */
+ public int previous ()
+ {
+ RuleBasedCollator.CollationElement e = previousBlock ();
- String s = text.charAt (index) + "";
- index++;
- return collator.getCollationElementValue (s);
+ if (e == null)
+ return NULLORDER;
+
+ return e.getValue();
}
/**
@@ -129,6 +191,7 @@
public void reset ()
{
index = 0;
+ textIndex = 0;
}
/**
@@ -167,8 +230,65 @@
*/
public void setText (String text)
{
+ int idx = 0;
+
this.text = text;
index = 0;
+
+ String work_text = text.intern();
+
+ Vector v = new Vector();
+ // Build element collection ordered as they come in "text".
+ while (idx < work_text.length())
+ {
+ String key, key_old;
+
+ try
+ {
+ Object object = null;
+ int p = 1;
+
+ // IMPROVE: use a TreeMap with a prefix-ordering rule.
+ key_old = key = null;
+ do
+ {
+ if (object != null)
+ key_old = key;
+ key = work_text.substring (idx, idx+p);
+ object = collator.prefix_tree.get (key);
+ p++;
+ }
+ while (idx+p <= work_text.length());
+
+ if (object == null)
+ key = key_old;
+
+ RuleBasedCollator.CollationElement prefix =
+ (RuleBasedCollator.CollationElement)collator.prefix_tree.get (key);
+
+ if (prefix.expansion != null)
+ {
+ work_text = prefix.expansion + work_text.substring (idx+prefix.char_seq.length());
+ idx = 0;
+ v.add (prefix);
+ }
+ else
+ {
+ if (!prefix.ignore)
+ v.add (prefix);
+ idx += prefix.char_seq.length ();
+ }
+ }
+ catch (NullPointerException _)
+ {
+ RuleBasedCollator.CollationElement e = collator.getDefaultElement(work_text.charAt (idx));
+
+ v.add (e);
+ idx++;
+ }
+ }
+
+ text_decomposition = v.toArray();
}
/**
@@ -201,7 +321,7 @@
*/
public int getOffset ()
{
- return index;
+ return textIndex;
}
/**
@@ -224,8 +344,18 @@
else if (offset > (text.length () - 1))
throw new IllegalArgumentException ("Offset too large: " + offset);
- index = offset;
- }
+ textIndex = 0;
+ for (int i=0;i<text_decomposition.length;i++)
+ {
+ RuleBasedCollator.CollationElement e =
+ (RuleBasedCollator.CollationElement) text_decomposition[i];
+ int idx = textIndex + e.char_seq.length();
+
+ if (idx > offset)
+ break;
+ textIndex = idx;
+ }
+ }
/**
* This method returns the maximum length of any expansion sequence that
@@ -237,24 +367,6 @@
*/
public int getMaxExpansion (int value)
{
- //************ Implement me!!!!!!!!!
- return 5;
- }
-
- /**
- * This method returns the collation ordering value of the previous character
- * in the string. This method will return <code>NULLORDER</code> if the
- * beginning of the string was reached.
- *
- * @return The collation ordering value.
- */
- public int previous ()
- {
- --index;
- if (index < 0)
- return NULLORDER;
-
- String s = text.charAt (index) + "";
- return collator.getCollationElementValue (s);
+ return 1;
}
}
Index: kaffe/libraries/javalib/java/text/RuleBasedCollator.java
diff -u kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.11 kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.12
--- kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.11 Sat Aug 16 11:03:47 2003
+++ kaffe/libraries/javalib/java/text/RuleBasedCollator.java Tue Aug 19 10:30:23 2003
@@ -39,6 +39,9 @@
package java.text;
import java.util.Vector;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Comparator;
/* Written using "Java Class Libraries", 2nd edition, plus online
* API docs for JDK 1.2 from http://www.javasoft.com.
@@ -60,7 +63,10 @@
* <li> Reset: '&' : <text>
* </ul>
* The modifier character indicates that accents sort backward as is the
- * case with French. The relational operators specify how the text
+ * case with French. The modifier applies to all rules <b>after</b>
+ * the modifier but before the next primary sequence. If placed at the end
+ * of the sequence if applies to all unknown accented character.
+ * The relational operators specify how the text
* argument relates to the previous term. The relation characters have
* the following meanings:
* <ul>
@@ -111,6 +117,9 @@
* anywhere in the previous rule string segment so the rule following the
* reset rule cannot be inserted.
* <p>
+ * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following
+ * "< a & A < E & e < f & F".
+ * <p>
* For a description of the various comparison strength types, see the
* documentation for the <code>Collator</code> class.
* <p>
@@ -132,7 +141,8 @@
*
* @author Aaron M. Renn <arenn at urbanophile.com>
* @author Tom Tromey <tromey at cygnus.com>
- * @date March 25, 1999
+ * @author Guilhem Lavaux <guilhem at kaffe.org>
+ * @date August 17, 2003
*/
public class RuleBasedCollator extends Collator
{
@@ -142,17 +152,328 @@
int primary;
short secondary;
short tertiary;
+ short equality;
+ boolean ignore;
+ String expansion;
- CollationElement(String char_seq, int primary, short secondary, short tertiary)
+ CollationElement(String char_seq, int primary, short secondary, short tertiary,
+ short equality, String expansion)
{
this.char_seq = char_seq;
this.primary = primary;
this.secondary = secondary;
this.tertiary = tertiary;
+ this.equality = equality;
+ this.ignore = false;
+ this.expansion = expansion;
+ }
+
+ CollationElement(String char_seq)
+ {
+ this.char_seq = char_seq;
+ this.ignore = true;
+ }
+
+ final int getValue()
+ {
+ return (primary << 16) + (secondary << 8) + tertiary;
}
-
+
} // inner class CollationElement
+ class CollationBlock
+ {
+ int value;
+ String textBlock;
+ }
+
+ class CollationSorter
+ {
+ static final int GREATERP = 0;
+ static final int GREATERS = 1;
+ static final int GREATERT = 2;
+ static final int EQUAL = 3;
+ static final int RESET = 4;
+ static final int IGNORE = 5;
+ static final int INVERSE_SECONDARY = 6;
+ static final int NEW_SEQUENCE = 7; /* For expansion */
+
+ int comparisonType;
+ String textElement;
+ int hashText;
+ int offset;
+
+ String expansionOrdering;
+ }
+
+ static int findPrefixLength(String prefix, String s)
+ {
+ int i;
+
+ for (i=0;i<prefix.length() && i < s.length();i++)
+ {
+ if (prefix.charAt(i) != s.charAt(i))
+ return i;
+ }
+ return i;
+ }
+
+ void mergeRules(int offset, String starter, Vector main, Vector patch)
+ throws ParseException
+ {
+ Enumeration elements = main.elements();
+ int insertion_point = -1;
+ int max_length = 0;
+
+ /* We must check that no rules conflict with another already present. If it
+ * is the case delete the old rule.
+ */
+
+ /* For the moment good old O(N^2) algorithm.
+ */
+ for (int i=0;i<patch.size();i++)
+ {
+ int j = 0;
+
+ while (j < main.size())
+ {
+ CollationSorter rule1 = (CollationSorter)patch.elementAt(i);
+ CollationSorter rule2 = (CollationSorter)main.elementAt(j);
+
+ if (rule1.textElement.equals(rule2.textElement))
+ main.removeElementAt(j);
+ else
+ j++;
+ }
+ }
+
+ // Find the insertion point... O(N)
+ for (int i=0;i<main.size();i++)
+ {
+ CollationSorter sorter = (CollationSorter)main.elementAt(i);
+ int length = findPrefixLength(starter, sorter.textElement);
+
+ if (length > max_length)
+ {
+ max_length = length;
+ insertion_point = i+1;
+ }
+ }
+
+ if (insertion_point < 0)
+ throw new ParseException("no insertion point found for " + starter, offset);
+
+ if (max_length < starter.length())
+ {
+ /*
+ * We need to expand the first entry. It must be sorted
+ * like if it was the reference key itself (like the spec
+ * said. So the first entry is special: the element is
+ * replaced by the specified text element for the sorting.
+ * This text replace the old one for comparisons. However
+ * to preserve the behaviour we replace the first key (corresponding
+ * to the found prefix) by a new code rightly ordered in the
+ * sequence. The rest of the subsequence must be appended
+ * to the end of the sequence.
+ */
+ CollationSorter sorter = (CollationSorter)patch.elementAt(0);
+ CollationSorter expansionPrefix = (CollationSorter)main.elementAt(insertion_point-1);
+
+ sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
+
+ main.insertElementAt(sorter, insertion_point);
+
+ /*
+ * This is a new set of rules. Append to the list.
+ */
+ patch.removeElementAt(0);
+ insertion_point = main.size();
+ }
+
+ // Now insert all elements of patch at the insertion point.
+ for (int i=0;i<patch.size();i++)
+ main.insertElementAt(patch.elementAt(i), i+insertion_point);
+
+ }
+
+ int subParseString(boolean stop_on_reset, Vector v, int base_offset, String rules) throws ParseException
+ {
+ boolean ignoreChars = (base_offset == 0);
+ int operator = -1;
+ StringBuffer sb = new StringBuffer("");
+ boolean doubleQuote = false;
+ boolean eatingChars = false;
+ boolean nextIsModifier = false;
+ boolean isModifier = false;
+ int i;
+
+main_parse_loop:
+ for (i=0;i<rules.length();i++)
+ {
+ char c = rules.charAt(i);
+ int type = -1;
+
+ if (!eatingChars &&
+ ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
+ continue;
+
+ isModifier = nextIsModifier;
+ nextIsModifier = false;
+
+ if (eatingChars && c != '\'')
+ {
+ doubleQuote = false;
+ sb.append(c);
+ continue;
+ }
+ if (doubleQuote && eatingChars)
+ {
+ sb.append(c);
+ doubleQuote = false;
+ continue;
+ }
+
+ switch (c) {
+ case '<':
+ ignoreChars = false;
+ type = CollationSorter.GREATERP;
+ break;
+ case ';':
+ if (!ignoreChars)
+ type = CollationSorter.GREATERS;
+ else
+ type = CollationSorter.IGNORE;
+ break;
+ case ',':
+ if (!ignoreChars)
+ type = CollationSorter.GREATERT;
+ else
+ type = CollationSorter.IGNORE;
+ break;
+ case '=':
+ if (!ignoreChars)
+ type = CollationSorter.EQUAL;
+ else
+ type = CollationSorter.IGNORE;
+ break;
+ case '\'':
+ eatingChars = !eatingChars;
+ doubleQuote = true;
+ break;
+ case '@':
+ if (ignoreChars)
+ throw new ParseException("comparison list has not yet been started. You may only use"
+ + "(<,;=&)", i+base_offset);
+ // Inverse the order of secondaries from now on.
+ nextIsModifier = true;
+ type = CollationSorter.INVERSE_SECONDARY;
+ break;
+ case '&':
+ type = CollationSorter.RESET;
+ if (stop_on_reset)
+ break main_parse_loop;
+ break;
+ default:
+ if (operator < 0)
+ throw new ParseException("operator missing at " + (i+base_offset), i+base_offset);
+ if (!eatingChars &&
+ ((c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) ||
+ (c >= 0x7B && c <= 0x7E)))
+ throw new ParseException("unquoted punctuation character '"+c+"'", i+base_offset);
+
+ //type = ignoreChars ? CollationSorter.IGNORE : -1;
+ sb.append(c);
+ break;
+ }
+
+ if (type < 0)
+ continue;
+
+ if (operator < 0)
+ {
+ operator = type;
+ continue;
+ }
+
+ if (sb.length() == 0 && !isModifier)
+ throw new ParseException("text element empty at " + (i+base_offset), i+base_offset);
+
+ if (operator == CollationSorter.RESET)
+ {
+ /* Reposition in the sorting list at the position
+ * indicated by the text element.
+ */
+ String subrules = rules.substring(i);
+ Vector sorted_rules = new Vector();
+ int idx;
+
+ // Parse the subrules but do not iterate through all
+ // sublist. This is the priviledge of the first call.
+ idx = subParseString(true, sorted_rules, base_offset+i, subrules);
+
+ // Merge new parsed rules into the list.
+ mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
+ sb.setLength(0);
+
+ // Reset state to none.
+ operator = -1;
+ type = -1;
+ // We have found a new subrule at 'idx' but it has not been parsed.
+ if (idx >= 0)
+ {
+ i += idx-1;
+ continue main_parse_loop;
+ }
+ else
+ // No more rules.
+ break main_parse_loop;
+ }
+
+ CollationSorter sorter = new CollationSorter();
+
+ sorter.comparisonType = operator;
+ sorter.textElement = sb.toString();
+ sorter.hashText = sorter.textElement.hashCode();
+ sorter.offset = base_offset+rules.length();
+ sb.setLength(0);
+
+ v.add(sorter);
+ operator = type;
+ }
+
+ if (operator >= 0)
+ {
+ CollationSorter sorter = new CollationSorter();
+ int pos = rules.length() + base_offset;
+
+ if ((sb.length() != 0 && nextIsModifier) ||
+ (sb.length() == 0 && !nextIsModifier && !eatingChars))
+ throw new ParseException("text element empty at " + pos, pos);
+
+ sorter.comparisonType = operator;
+ sorter.textElement = sb.toString();
+ sorter.hashText = sorter.textElement.hashCode();
+ sorter.offset = base_offset+pos;
+ v.add(sorter);
+ }
+
+ if (i == rules.length())
+ return -1;
+ else
+ return i;
+ }
+
+ private Vector parseString(String rules) throws ParseException
+ {
+ Vector v = new Vector();
+
+ // result of the first subParseString is not certain (may be -1 or a positive integer). But we
+ // do not care.
+ subParseString(false, v, 0, rules);
+
+ return v;
+ }
+
/**
* This the the original rule string.
*/
@@ -164,6 +485,117 @@
private Object[] ce_table;
/**
+ * Quick-prefix finder.
+ */
+ HashMap prefix_tree;
+
+ /**
+ * This is the value of the last sequence entered into
+ * <code>ce_table</code>. It is used to compute the
+ * ordering value of unspecified character.
+ */
+ private int last_primary_value;
+
+ private void buildCollationVector(Vector parsedElements) throws ParseException
+ {
+ int primary_seq = 0;
+ short secondary_seq = 0;
+ short tertiary_seq = 0;
+ short equality_seq = 0;
+ boolean inverseComparisons = false;
+ final boolean DECREASING = false;
+ final boolean INCREASING = true;
+ boolean secondaryType = INCREASING;
+ Vector v = new Vector();
+
+ // elts is completely sorted.
+element_loop:
+ for (int i=0;i<parsedElements.size();i++)
+ {
+ CollationSorter elt = (CollationSorter)parsedElements.elementAt(i);
+ boolean ignoreChar = false;
+
+ switch (elt.comparisonType)
+ {
+ case CollationSorter.GREATERP:
+ primary_seq++;
+ if (inverseComparisons)
+ {
+ secondary_seq = Short.MAX_VALUE;
+ secondaryType = DECREASING;
+ }
+ else
+ {
+ secondary_seq = 0;
+ secondaryType = INCREASING;
+ }
+ tertiary_seq = 0;
+ equality_seq = 0;
+ inverseComparisons = false;
+ break;
+ case CollationSorter.GREATERS:
+ if (secondaryType == DECREASING)
+ secondary_seq--;
+ else
+ secondary_seq++;
+ tertiary_seq = 0;
+ equality_seq = 0;
+ break;
+ case CollationSorter.INVERSE_SECONDARY:
+ inverseComparisons = true;
+ continue element_loop;
+ case CollationSorter.GREATERT:
+ tertiary_seq++;
+ equality_seq = 0;
+ break;
+ case CollationSorter.IGNORE:
+ ignoreChar = true;
+ case CollationSorter.EQUAL:
+ equality_seq++;
+ break;
+ case CollationSorter.RESET:
+ throw new ParseException("Invalid reached state 'RESET'. Internal error", elt.offset);
+ default:
+ throw new ParseException("Invalid unknown state '"+elt.comparisonType+"'", elt.offset);
+ }
+
+ CollationElement e;
+
+ if (!ignoreChar)
+ {
+ e = new CollationElement(elt.textElement, primary_seq,
+ secondary_seq, tertiary_seq,
+ equality_seq, elt.expansionOrdering);
+ }
+ else
+ e = new CollationElement(elt.textElement);
+
+ v.add(e);
+ }
+
+ ce_table = v.toArray();
+
+ last_primary_value = primary_seq+1;
+ }
+
+ /**
+ * Build a tree where all keys are the texts of collation elements and data is
+ * the collation element itself. The tree is used when extracting all prefix
+ * for a given text.
+ */
+ private void buildPrefixAccess()
+ {
+ prefix_tree = new HashMap();
+
+ for (int i=0;i<ce_table.length;i++)
+ {
+ CollationElement e = (CollationElement)ce_table[i];
+
+ prefix_tree.put(e.char_seq, e);
+ }
+ }
+
+ /**
* This method initializes a new instance of <code>RuleBasedCollator</code>
* with the specified collation rules. Note that an application normally
* obtains an instance of <code>RuleBasedCollator</code> by calling the
@@ -179,117 +611,10 @@
this.rules = rules;
if (rules.equals(""))
- throw new IllegalArgumentException("Empty rule set");
+ throw new ParseException("Empty rule set", 0);
- Vector v = new Vector();
- boolean ignore_chars = true;
- int primary_seq = 0;
- short secondary_seq = 0;
- short tertiary_seq = 0;
- StringBuffer sb = new StringBuffer("");
- for (int i = 0; i < rules.length(); i++)
- {
- char c = rules.charAt(i);
-
- // Check if it is a whitespace character
- if (((c >= 0x09) && (c <= 0x0D)) || (c == 0x20))
- continue;
-
- // Primary difference
- if (c == '<')
- {
- ignore_chars = false;
- CollationElement e = new CollationElement(sb.toString(), primary_seq,
- secondary_seq,
- tertiary_seq);
- secondary_seq = 0;
- tertiary_seq = 0;
- ++primary_seq;
-
- v.add(e);
- sb.setLength(0);
- continue;
- }
-
- // Secondary difference
- if (c == ';')
- {
- if (primary_seq == 0)
- throw new ParseException(rules, i);
-
- CollationElement e = new CollationElement(sb.toString(), primary_seq,
- secondary_seq,
- tertiary_seq);
- ++secondary_seq;
- tertiary_seq = 0;
-
- v.add(e);
- sb.setLength(0);
- continue;
- }
-
- // Tertiary difference
- if (c == ',')
- {
- if (primary_seq == 0)
- throw new ParseException(rules, i);
-
- CollationElement e = new CollationElement(sb.toString(), primary_seq,
- secondary_seq,
- tertiary_seq);
- ++tertiary_seq;
-
- v.add(e);
- sb.setLength(0);
- continue;
- }
-
- // Is equal to
- if (c == '=')
- {
- if (primary_seq == 0)
- throw new ParseException(rules, i);
-
- CollationElement e = new CollationElement(sb.toString(), primary_seq,
- secondary_seq,
- tertiary_seq);
- v.add(e);
- sb.setLength(0);
- continue;
- }
-
- // Sort accents backwards
- if (c == '@')
- {
- throw new ParseException("French style accents not implemented yet", 0);
- }
-
- // Reset command
- if (c == '&')
- {
- throw new ParseException("Reset not implemented yet", 0);
- }
-
- // See if we are still reading characters to skip
- if (ignore_chars == true)
- {
- CollationElement e = new CollationElement(c + "", 0, (short)0,
- (short)0);
- v.add(e);
- continue;
- }
-
- sb.append(c);
- }
-
- if (sb.length() > 0)
- {
- CollationElement e = new CollationElement (sb.toString(), primary_seq,
- secondary_seq, tertiary_seq);
- v.add (e);
- }
-
- ce_table = v.toArray();
+ buildCollationVector(parseString(rules));
+ buildPrefixAccess();
}
/**
@@ -303,28 +628,9 @@
return(rules);
}
- /**
- * This method calculates the collation element value for the specified
- * character(s).
- */
- int getCollationElementValue(String str)
+ CollationElement getDefaultElement(char c)
{
- CollationElement e = null;
-
- // The table is sorted. Change to a binary search later.
- for (int i = 0; i < ce_table.length; i++)
- if (((CollationElement)ce_table[i]).char_seq.equals(str))
- {
- e = (CollationElement)ce_table[i];
- break;
- }
-
- if (e == null)
- e = new CollationElement(str, 0xFFFF, (short)0xFF, (short)0xFF);
-
- int retval = (e.primary << 16) + (e.secondary << 8) + e.tertiary;
-
- return(retval);
+ return new CollationElement (""+c, last_primary_value + c, (short)0, (short)0, (short) 0, null);
}
/**
@@ -384,21 +690,40 @@
for(;;)
{
- int ord1 = cei1.next();
- int ord2 = cei2.next();
-
- // Check for end of string
- if (ord1 == CollationElementIterator.NULLORDER)
- if (ord2 == CollationElementIterator.NULLORDER)
- return(0);
- else
- return(-1);
- else if (ord2 == CollationElementIterator.NULLORDER)
- return(1);
-
- // We know chars are totally equal, so skip
+ CollationElement ord1block = cei1.nextBlock();
+ CollationElement ord2block = cei2.nextBlock();
+ int ord1;
+ int ord2;
+
+ // Hehehe. What we would do not to use if.
+ try
+ {
+ ord1 = ord1block.getValue();
+ }
+ catch (NullPointerException _)
+ {
+ if (ord2block == null)
+ return 0;
+ return -1;
+ }
+
+ try
+ {
+ ord2 = ord2block.getValue();
+ }
+ catch (NullPointerException _)
+ {
+ return 1;
+ }
+
+ // We know chars are totally equal, so skip
if (ord1 == ord2)
- continue;
+ {
+ if (getStrength() == IDENTICAL)
+ if (!ord1block.char_seq.equals(ord2block.char_seq))
+ return ord1block.char_seq.compareTo(ord2block.char_seq);
+ continue;
+ }
// Check for primary strength differences
int prim1 = cei1.primaryOrder(ord1);
@@ -424,12 +749,17 @@
// Check for tertiary differences
int tert1 = cei1.tertiaryOrder(ord1);
- int tert2 = cei2.tertiaryOrder(ord1);
+ int tert2 = cei2.tertiaryOrder(ord2);
if (tert1 < tert2)
return(-1);
else if (tert1 > tert2)
return(1);
+ else if (getStrength() == TERTIARY)
+ continue;
+
+ // Apparently JDK does this (at least for my test case).
+ return ord1block.char_seq.compareTo(ord2block.char_seq);
}
*** Patch too long, truncated ***
More information about the kaffe
mailing list