[kaffe] CVS kaffe (guilhem): Fixes for RuleBasedCollator.
Kaffe CVS
Kaffe Mailing List <kaffe@kaffe.org>
Thu May 13 12:54:02 2004
PatchSet 4727
Date: 2004/05/13 19:18:19
Author: guilhem
Branch: HEAD
Tag: (none)
Log:
Fixes for RuleBasedCollator.
* libraries/javalib/java/text/RuleBasedCollator.java
(CollationElement, CollationSorter): Made static.
(last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
collation elements for unknown character this is for sequence
using resets.
(mergeRules): Fixed insertion point.
(buildCollationVector): Initialize last_tertiary_value.
(compare): Handle special cases of accented characters.
(getDefaultAccentedElement): New method.
(getCollationFixed): Fixed key building.
* libraries/javalib/java/text/CollationElementIterator.java
(text_indexes): New field.
(nextBlock, previousBlock): Updated textIndex according to
text_indexes.
(setText): Build text_indexes. Better handling of expansion ordering.
Members:
ChangeLog:1.2301->1.2302
libraries/javalib/java/text/CollationElementIterator.java:1.15->1.16
libraries/javalib/java/text/RuleBasedCollator.java:1.20->1.21
Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.2301 kaffe/ChangeLog:1.2302
--- kaffe/ChangeLog:1.2301 Thu May 13 18:59:22 2004
+++ kaffe/ChangeLog Thu May 13 19:18:19 2004
@@ -1,3 +1,22 @@
+2004-05-13 Guilhem Lavaux <guilhem@kaffe.org>
+
+ * libraries/javalib/java/text/RuleBasedCollator.java
+ (CollationElement, CollationSorter): Made static.
+ (last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
+ collation elements for unknown character this is for sequence
+ using resets.
+ (mergeRules): Fixed insertion point.
+ (buildCollationVector): Initialize last_tertiary_value.
+ (compare): Handle special cases of accented characters.
+ (getDefaultAccentedElement): New method.
+ (getCollationFixed): Fixed key building.
+
+ * libraries/javalib/java/text/CollationElementIterator.java
+ (text_indexes): New field.
+ (nextBlock, previousBlock): Updated textIndex according to
+ text_indexes.
+ (setText): Build text_indexes. Better handling of expansion ordering.
+
2004-05-13 Dalibor Topic <robilad@kaffe.org>
* configure.ac: Allow enabling of jvmpi, xdebugging and
Index: kaffe/libraries/javalib/java/text/CollationElementIterator.java
diff -u kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.15 kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.16
--- kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.15 Fri Apr 23 17:35:12 2004
+++ kaffe/libraries/javalib/java/text/CollationElementIterator.java Thu May 13 19:18:21 2004
@@ -92,6 +92,11 @@
private Object[] text_decomposition;
/**
+ * Array containing the index of the specified block.
+ */
+ private int[] text_indexes;
+
+ /**
* This method initializes a new instance of <code>CollationElementIterator</code>
* to iterate over the specified <code>String</code> using the rules in the
* specified <code>RuleBasedCollator</code>.
@@ -112,9 +117,11 @@
return null;
RuleBasedCollator.CollationElement e =
- (RuleBasedCollator.CollationElement) text_decomposition[index++];
+ (RuleBasedCollator.CollationElement) text_decomposition[index];
- textIndex += e.key.length();
+ textIndex = text_indexes[index];
+
+ index++;
return e;
}
@@ -128,7 +135,7 @@
RuleBasedCollator.CollationElement e =
(RuleBasedCollator.CollationElement) text_decomposition[index];
- textIndex -= e.key.length();
+ textIndex = text_indexes[index];
return e;
}
@@ -231,7 +238,9 @@
public void setText(String text)
{
int idx = 0;
+ int idx_idx = 1;
int alreadyExpanded = 0;
+ int idxToMove = 0;
this.text = text;
this.index = 0;
@@ -239,6 +248,8 @@
String work_text = text.intern();
Vector v = new Vector();
+ Vector vi = new Vector();
+
// Build element collection ordered as they come in "text".
while (idx < work_text.length())
{
@@ -277,11 +288,36 @@
if (prefix == null)
{
- RuleBasedCollator.CollationElement e =
- collator.getDefaultElement(work_text.charAt (idx));
-
- v.add (e);
- idx++;
+ if (alreadyExpanded > 0)
+ {
+ RuleBasedCollator.CollationElement e =
+ collator.getDefaultAccentedElement (work_text.charAt (idx));
+
+ v.add (e);
+ vi.add (new Integer(idx_idx));
+ idx++;
+ alreadyExpanded--;
+ if (alreadyExpanded == 0)
+ {
+ idx_idx += idxToMove;
+ idxToMove = 0;
+ }
+ else
+ idx_idx++;
+ }
+ else
+ {
+ RuleBasedCollator.CollationElement e =
+ collator.getDefaultElement (work_text.charAt (idx));
+ Integer i_ref = new Integer(idx_idx);
+
+ v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+ vi.add (i_ref);
+ v.add (e);
+ vi.add (i_ref);
+ idx_idx++;
+ idx++;
+ }
continue;
}
@@ -290,18 +326,39 @@
work_text = prefix.expansion
+ work_text.substring (idx+prefix.key.length());
idx = 0;
- alreadyExpanded = prefix.expansion.length();
v.add (prefix);
+ vi.add (new Integer(idx_idx));
+ if (alreadyExpanded == 0)
+ idxToMove = prefix.key.length();
+ else
+ idxToMove = 0;
+ alreadyExpanded += prefix.expansion.length();
}
else
{
if (!prefix.ignore)
- v.add (prefix);
+ {
+ v.add (prefix);
+ vi.add (new Integer(idx_idx));
+ }
idx += prefix.key.length();
+ if (alreadyExpanded > 0)
+ {
+ alreadyExpanded -= prefix.key.length();
+ if (alreadyExpanded == 0)
+ {
+ idx_idx += idxToMove;
+ idxToMove = 0;
+ }
+ } else
+ idx_idx += prefix.key.length();
}
}
text_decomposition = v.toArray();
+ text_indexes = new int[vi.size()];
+ for (int i = 0; i < vi.size(); i++)
+ text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
}
/**
Index: kaffe/libraries/javalib/java/text/RuleBasedCollator.java
diff -u kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.20 kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.21
--- kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.20 Fri Apr 23 18:38:28 2004
+++ kaffe/libraries/javalib/java/text/RuleBasedCollator.java Thu May 13 19:18:21 2004
@@ -147,7 +147,7 @@
* This class describes what rank has a character (or a sequence of characters)
* in the lexicographic order. Each element in a rule has a collation element.
*/
- final class CollationElement
+ final static class CollationElement
{
String key;
int primary;
@@ -189,7 +189,7 @@
* {@link #mergeRules(int,java.lang.String,java.util.Vector,java.util.Vector)})
* as a temporary state while merging two sets of instructions.
*/
- final class CollationSorter
+ final static class CollationSorter
{
static final int GREATERP = 0;
static final int GREATERS = 1;
@@ -230,10 +230,27 @@
private int last_primary_value;
/**
+ * This is the value of the last secondary sequence of the
+ * primary 0, entered into
+ * <code>ce_table</code>. It is used to compute the
+ * ordering value of an unspecified accented character.
+ */
+ private int last_tertiary_value;
+
+ /**
* This variable is true if accents need to be sorted
* in the other direction.
*/
private boolean inverseAccentComparison;
+
+ /**
+ * This collation element is special to unknown sequence.
+ * The JDK uses it to mark and sort the characters which has
+ * no collation rules.
+ */
+ static final CollationElement SPECIAL_UNKNOWN_SEQ =
+ new CollationElement("", (short) 32767, (short) 0, (short) 0,
+ (short) 0, null);
/**
* This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -356,14 +373,14 @@
(CollationSorter) main.elementAt(insertion_point-1);
sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
-
+
main.insertElementAt(sorter, insertion_point);
/*
* This is a new set of rules. Append to the list.
*/
patch.removeElementAt(0);
- insertion_point = main.size();
+ insertion_point++;
}
// Now insert all elements of patch at the insertion point.
@@ -392,7 +409,7 @@
{
boolean ignoreChars = (base_offset == 0);
int operator = -1;
- StringBuffer sb = new StringBuffer("");
+ StringBuffer sb = new StringBuffer();
boolean doubleQuote = false;
boolean eatingChars = false;
boolean nextIsModifier = false;
@@ -605,6 +622,7 @@
throws ParseException
{
int primary_seq = 0;
+ int last_tertiary_seq = 0;
short secondary_seq = 0;
short tertiary_seq = 0;
short equality_seq = 0;
@@ -652,6 +670,8 @@
continue element_loop;
case CollationSorter.GREATERT:
tertiary_seq++;
+ if (primary_seq == 0)
+ last_tertiary_seq = tertiary_seq;
equality_seq = 0;
break;
case CollationSorter.IGNORE:
@@ -686,6 +706,7 @@
ce_table = v.toArray();
last_primary_value = primary_seq+1;
+ last_tertiary_value = last_tertiary_seq+1;
}
/**
@@ -757,6 +778,17 @@
// Check for primary strength differences
int prim1 = CollationElementIterator.primaryOrder(ord1);
int prim2 = CollationElementIterator.primaryOrder(ord2);
+
+ if (prim1 == 0 && getStrength() < TERTIARY)
+ {
+ ct.previousBlock();
+ continue;
+ }
+ else if (prim2 == 0 && getStrength() < TERTIARY)
+ {
+ cs.previousBlock();
+ continue;
+ }
if (prim1 < prim2)
return -1;
@@ -769,7 +801,7 @@
int sec1 = CollationElementIterator.secondaryOrder(ord1);
int sec2 = CollationElementIterator.secondaryOrder(ord2);
- if (sec1 < sec2)
+ if (sec1 < sec2)
return -1;
else if (sec1 > sec2)
return 1;
@@ -833,6 +865,28 @@
}
/**
+ * This method builds a default collation element for an accented character
+ * without invoking the database created from the rules passed to the constructor.
+ *
+ * @param c Character which needs a collation element.
+ * @return A valid brand new CollationElement instance.
+ */
+ CollationElement getDefaultAccentedElement(char c)
+ {
+ int v;
+
+ // Preliminary support for generic accent sorting inversion (I don't know if all
+ // characters in the range should be sorted backward). This is the place
+ // to fix this if needed.
+ if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
+ v = 0x0361 - ((int) c - 0x02B9);
+ else
+ v = (short) c;
+ return new CollationElement("" + c, (short) 0,
+ (short) 0, (short) (last_tertiary_value + v), (short) 0, null);
+ }
+
+ /**
* This method returns an instance for <code>CollationElementIterator</code>
* for the specified <code>String</code> under the collation rules for this
* object.
@@ -894,11 +948,12 @@
switch (getStrength())
{
case PRIMARY:
- ord = CollationElementIterator.primaryOrder(ord);
- break;
-
+ ord = CollationElementIterator.primaryOrder(ord);
+ break;
+
case SECONDARY:
- ord = CollationElementIterator.secondaryOrder(ord);
+ ord = CollationElementIterator.primaryOrder(ord) << 8;
+ ord |= CollationElementIterator.secondaryOrder(ord);
default:
break;