[kaffe] CVS kaffe (guilhem): Fixes for RuleBasedCollator.

Thu May 13 12:54:02 PDT 2004

PatchSet 4727 
Date: 2004/05/13 19:18:19
Author: guilhem
Branch: HEAD
Tag: (none) 
Log:
Fixes for RuleBasedCollator.

        * libraries/javalib/java/text/RuleBasedCollator.java
        (CollationElement, CollationSorter): Made static.
        (last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
        collation elements for unknown character this is for sequence
        using resets.
        (mergeRules): Fixed insertion point.
        (buildCollationVector): Initialize last_tertiary_value.
        (compare): Handle special cases of accented characters.
        (getDefaultAccentedElement): New method.
        (getCollationFixed): Fixed key building.

        * libraries/javalib/java/text/CollationElementIterator.java
        (text_indexes): New field.
        (nextBlock, previousBlock): Updated textIndex according to
        text_indexes.
        (setText): Build text_indexes. Better handling of expansion ordering.

Members: 
	ChangeLog:1.2301->1.2302 
	libraries/javalib/java/text/CollationElementIterator.java:1.15->1.16 
	libraries/javalib/java/text/RuleBasedCollator.java:1.20->1.21 

Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.2301 kaffe/ChangeLog:1.2302

--- kaffe/ChangeLog:1.2301	Thu May 13 18:59:22 2004
+++ kaffe/ChangeLog	Thu May 13 19:18:19 2004
@@ -1,3 +1,22 @@
+2004-05-13 Guilhem Lavaux <guilhem at kaffe.org>
+
+	* libraries/javalib/java/text/RuleBasedCollator.java
+	(CollationElement, CollationSorter): Made static.
+	(last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
+	collation elements for unknown character this is for sequence
+	using resets.
+	(mergeRules): Fixed insertion point.
+	(buildCollationVector): Initialize last_tertiary_value.
+	(compare): Handle special cases of accented characters.
+	(getDefaultAccentedElement): New method.
+	(getCollationFixed): Fixed key building.
+	
+	* libraries/javalib/java/text/CollationElementIterator.java
+	(text_indexes): New field.
+	(nextBlock, previousBlock): Updated textIndex according to
+	text_indexes.
+	(setText): Build text_indexes. Better handling of expansion ordering.
+
 2004-05-13  Dalibor Topic  <robilad at kaffe.org>
 
 	*  configure.ac: Allow enabling of jvmpi, xdebugging and 
Index: kaffe/libraries/javalib/java/text/CollationElementIterator.java
diff -u kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.15 kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.16
--- kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.15	Fri Apr 23 17:35:12 2004
+++ kaffe/libraries/javalib/java/text/CollationElementIterator.java	Thu May 13 19:18:21 2004
@@ -92,6 +92,11 @@
   private Object[] text_decomposition;
 
   /**
+   * Array containing the index of the specified block.
+   */
+  private int[] text_indexes;
+
+  /**
    * This method initializes a new instance of <code>CollationElementIterator</code>
    * to iterate over the specified <code>String</code> using the rules in the
    * specified <code>RuleBasedCollator</code>.
@@ -112,9 +117,11 @@
       return null;
     
     RuleBasedCollator.CollationElement e =
-      (RuleBasedCollator.CollationElement) text_decomposition[index++];
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
     
-    textIndex += e.key.length();
+    textIndex = text_indexes[index];
+    
+    index++;
 
     return e;
   }
@@ -128,7 +135,7 @@
     RuleBasedCollator.CollationElement e =
       (RuleBasedCollator.CollationElement) text_decomposition[index];
 
-    textIndex -= e.key.length();
+    textIndex = text_indexes[index];
     
     return e;
   }
@@ -231,7 +238,9 @@
   public void setText(String text)
   {
     int idx = 0;
+    int idx_idx = 1;
     int alreadyExpanded = 0;
+    int idxToMove = 0;
 
     this.text = text;
     this.index = 0;
@@ -239,6 +248,8 @@
     String work_text = text.intern();
 
     Vector v = new Vector();
+    Vector vi = new Vector();
+
     // Build element collection ordered as they come in "text".
     while (idx < work_text.length())
       {
@@ -277,11 +288,36 @@
 	
 	if (prefix == null)
 	  {
-	    RuleBasedCollator.CollationElement e =
-	      collator.getDefaultElement(work_text.charAt (idx));
-	    
-	    v.add (e);
-	    idx++;
+	    if (alreadyExpanded > 0)
+	      {
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultAccentedElement (work_text.charAt (idx));
+		
+		v.add (e);
+		vi.add (new Integer(idx_idx));
+		idx++;
+		alreadyExpanded--;
+		if (alreadyExpanded == 0)
+		  {
+		    idx_idx += idxToMove;
+		    idxToMove = 0; 
+		  }
+		else
+		  idx_idx++;
+	      }
+	    else
+	      {
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultElement (work_text.charAt (idx));
+		Integer i_ref = new Integer(idx_idx);
+
+		v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+		vi.add (i_ref);
+		v.add (e);
+		vi.add (i_ref);
+		idx_idx++;
+		idx++;
+	      }
 	    continue;
 	  }
 
@@ -290,18 +326,39 @@
 	    work_text = prefix.expansion
 	      + work_text.substring (idx+prefix.key.length());
 	    idx = 0;
-	    alreadyExpanded = prefix.expansion.length();
 	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    if (alreadyExpanded == 0)
+	      idxToMove = prefix.key.length();
+	    else
+	      idxToMove = 0;
+	    alreadyExpanded += prefix.expansion.length();
 	  }
 	else
 	  {
 	    if (!prefix.ignore)
-	      v.add (prefix);
+	      {
+		v.add (prefix);
+		vi.add (new Integer(idx_idx));
+	      }
 	    idx += prefix.key.length();
+	    if (alreadyExpanded > 0)
+	      {
+		alreadyExpanded -= prefix.key.length();
+		if (alreadyExpanded == 0)
+		  {
+		    idx_idx += idxToMove;
+		    idxToMove = 0;
+		  }
+	      } else
+		idx_idx += prefix.key.length();
 	  }
       }
     
     text_decomposition = v.toArray();
+    text_indexes = new int[vi.size()];
+    for (int i = 0; i < vi.size(); i++) 
+      text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
   }
 
   /**
Index: kaffe/libraries/javalib/java/text/RuleBasedCollator.java
diff -u kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.20 kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.21
--- kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.20	Fri Apr 23 18:38:28 2004
+++ kaffe/libraries/javalib/java/text/RuleBasedCollator.java	Thu May 13 19:18:21 2004
@@ -147,7 +147,7 @@
    * This class describes what rank has a character (or a sequence of characters) 
    * in the lexicographic order. Each element in a rule has a collation element.
    */
-  final class CollationElement
+  final static class CollationElement
   {
     String key;
     int primary;
@@ -189,7 +189,7 @@
    * {@link #mergeRules(int,java.lang.String,java.util.Vector,java.util.Vector)})
    * as a temporary state while merging two sets of instructions.
    */
-  final class CollationSorter
+  final static class CollationSorter
   {
     static final int GREATERP = 0;
     static final int GREATERS = 1;
@@ -230,10 +230,27 @@
   private int last_primary_value;
 
   /**
+   * This is the value of the last secondary sequence of the
+   * primary 0, entered into
+   * <code>ce_table</code>. It is used to compute the
+   * ordering value of an unspecified accented character.
+   */
+  private int last_tertiary_value;
+
+  /**
    * This variable is true if accents need to be sorted
    * in the other direction.
    */
   private boolean inverseAccentComparison;
+
+  /**
+   * This collation element is special to unknown sequence.
+   * The JDK uses it to mark and sort the characters which has
+   * no collation rules.
+   */
+  static final CollationElement SPECIAL_UNKNOWN_SEQ = 
+    new CollationElement("", (short) 32767, (short) 0, (short) 0,
+			 (short) 0, null);
   
   /**
    * This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -356,14 +373,14 @@
 	  (CollationSorter) main.elementAt(insertion_point-1);
 	
 	sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
-	
+		
 	main.insertElementAt(sorter, insertion_point);
 	
 	/*
 	 * This is a new set of rules. Append to the list.
 	 */
 	patch.removeElementAt(0);
-	insertion_point = main.size();
+	insertion_point++;
       }
 
     // Now insert all elements of patch at the insertion point.
@@ -392,7 +409,7 @@
   {
     boolean ignoreChars = (base_offset == 0);
     int operator = -1;
-    StringBuffer sb = new StringBuffer("");
+    StringBuffer sb = new StringBuffer();
     boolean doubleQuote = false;
     boolean eatingChars = false;
     boolean nextIsModifier = false;
@@ -605,6 +622,7 @@
     throws ParseException
   {
     int primary_seq = 0;
+    int last_tertiary_seq = 0;
     short secondary_seq = 0;
     short tertiary_seq = 0;
     short equality_seq = 0;
@@ -652,6 +670,8 @@
 	    continue element_loop;
 	  case CollationSorter.GREATERT:
 	    tertiary_seq++;
+	    if (primary_seq == 0)
+	      last_tertiary_seq = tertiary_seq;
 	    equality_seq = 0;
 	    break;
 	  case CollationSorter.IGNORE:
@@ -686,6 +706,7 @@
     ce_table = v.toArray();
 
     last_primary_value = primary_seq+1;
+    last_tertiary_value = last_tertiary_seq+1;
   }
 
   /**
@@ -757,6 +778,17 @@
         // Check for primary strength differences
         int prim1 = CollationElementIterator.primaryOrder(ord1); 
         int prim2 = CollationElementIterator.primaryOrder(ord2); 
+	
+	if (prim1 == 0 && getStrength() < TERTIARY)
+	  {
+	    ct.previousBlock();
+	    continue;
+	  }
+	else if (prim2 == 0 && getStrength() < TERTIARY)
+	  {
+	    cs.previousBlock();
+	    continue;
+	  }
 
         if (prim1 < prim2)
           return -1;
@@ -769,7 +801,7 @@
         int sec1 = CollationElementIterator.secondaryOrder(ord1);
         int sec2 = CollationElementIterator.secondaryOrder(ord2);
 
-        if (sec1 < sec2)
+	if (sec1 < sec2)
           return -1;
         else if (sec1 > sec2)
           return 1;
@@ -833,6 +865,28 @@
   }
 
   /**
+   * This method builds a default collation element for an accented character
+   * without invoking the database created from the rules passed to the constructor.
+   *
+   * @param c Character which needs a collation element.
+   * @return A valid brand new CollationElement instance.
+   */
+  CollationElement getDefaultAccentedElement(char c)
+  {
+    int v;
+
+    // Preliminary support for generic accent sorting inversion (I don't know if all
+    // characters in the range should be sorted backward). This is the place
+    // to fix this if needed.
+    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
+      v = 0x0361 - ((int) c - 0x02B9);
+    else
+      v = (short) c;
+    return new CollationElement("" + c, (short) 0,
+				(short) 0, (short) (last_tertiary_value + v), (short) 0, null);
+  }
+
+  /**
    * This method returns an instance for <code>CollationElementIterator</code>
    * for the specified <code>String</code> under the collation rules for this
    * object.
@@ -894,11 +948,12 @@
         switch (getStrength())
           {
             case PRIMARY:
-               ord = CollationElementIterator.primaryOrder(ord);
-               break;
-
+	      ord = CollationElementIterator.primaryOrder(ord);
+	      break;
+	      
             case SECONDARY:
-               ord = CollationElementIterator.secondaryOrder(ord);
+	      ord = CollationElementIterator.primaryOrder(ord) << 8;
+	      ord |= CollationElementIterator.secondaryOrder(ord);
 
             default:
                break;