[kaffe] HUGE speedup in KaffeEncoder/KaffeDecoder

Mon Dec 6 23:41:20 PST 2004

I was writing a parser for dpkg control files.  My first version was very
stupid.  It did reads of one char, in a loop.

Under sun, I got acceptable speeds(3s to parse all of /var/lib/dpkg/status).
Kaffe, however, took 1.5 minutes(or so).

So, after a bit of digging, I found the problem.  KaffeDecoder(and it's
brother, KaffeEncoder) are very inefficient.  When doing single reads, it
calls the large read(buf, off, len) routine, with an array length of one.
This causes a separate conversion call for each char, and this is *very* slow.

I then noticed in the javadoc for InputStreamReader, and OutputStreamWriter,
that the implementation was allowed to do internal buffering, to make
conversion more efficient.  So that's what I did.

Before doing this, however, I wrote a small test framework.  It tests
input/output, buffered/unbuffered, with/without encoding.  8 tests.  I've ran
it under sun14, kaffe, kaffe-fix, and gcj.  I won't include the numbers here,
unless someone asks.

However, I will report on the speed increases I saw.

With my fix in place, and a dump loop reading(or writing) one char at time, I
saw a read increase of 200 fold(200 times!), and a write increase of 90 fold.
The stupid version of my parser saw a 25 fold increase.

Anyways, attached you'll find the PerfTest program I wrote, and the patch
itself.

ps: I do have commit access, but this is a very low-level change, and wanted
others to see it first.  I haven't run any test cases, other than my parsing
program.
-------------- next part --------------
? Makefile.in.es
Index: gnu/java/io/decode/KaffeDecoder.java
===================================================================
RCS file: /cvs/kaffe/kaffe/libraries/javalib/gnu/java/io/decode/KaffeDecoder.java,v
retrieving revision 1.4
diff -u -r1.4 KaffeDecoder.java

--- gnu/java/io/decode/KaffeDecoder.java	18 May 2004 16:13:28 -0000	1.4
+++ gnu/java/io/decode/KaffeDecoder.java	7 Dec 2004 07:32:28 -0000
@@ -56,6 +56,14 @@
 
 ByteToCharConverter converter;
 
+/* These three vars are used for the general buffer management */
+private int ptr = 0;
+private int end = 0;
+private char[] buffer = new char[4096];
+
+/* This array is a temporary used during the conversion process. */
+private byte[] inbuf = new byte[4096];
+
 /*************************************************************************/
 
 /*
@@ -103,15 +111,83 @@
   return(cbuf);
 }
 
-/**
-  * Read the requested number of chars from the underlying stream.
-  * Some byte fragments may remain in the converter and they are
-  * used by the following read.  So read and convertToChars must
-  * not be used for the same converter instance.
-  */
-// copied from kaffe's java/io/InputStreamReader.java
+
+public int
+read() throws IOException
+{
+    synchronized (lock) {
+        if (ptr < end) return buffer[ptr++];
+        int r = _read(buffer, 0, buffer.length);
+        if (r == -1) return -1;
+        ptr = 1;
+        end = r;
+        return buffer[0];
+    }
+}
+
 public int
-read ( char cbuf[], int off, int len ) throws IOException
+read(char cbuf[], int off, int len) throws IOException
+{
+    synchronized (lock) {
+        int bytesRead = 0;
+        if (len < end - ptr) {
+            System.arraycopy(buffer, ptr, cbuf, off, len);
+            ptr += len;
+            return len;
+        }
+
+        int preCopy = end - ptr;
+        if (preCopy > 0) {
+            System.arraycopy(buffer, ptr, cbuf, off, preCopy);
+            off += preCopy;
+            len -= preCopy;
+            bytesRead += preCopy;
+        }
+        ptr = 0;
+        end = 0; 
+
+        int remainder = len % buffer.length;
+        int bulkCopy = len - remainder;
+        if (bulkCopy > 0) {
+            int r = _read(cbuf, off, bulkCopy);
+            if (r == -1) {
+                return bytesRead == 0 ? -1 : bytesRead;
+            }
+            off += r;
+            len -= r;
+            bytesRead += r;
+        }
+
+        if (remainder > 0) {
+            int r = _read(buffer, 0, buffer.length);
+            if (r == -1) {
+                return bytesRead == 0 ? -1 : bytesRead;
+            }
+            end = r;
+            int remainderCopy = r < remainder ? r : remainder;
+            System.arraycopy(buffer, 0, cbuf, off, remainderCopy);
+            off += remainderCopy;
+            len -= remainderCopy;
+            ptr = remainderCopy;
+            bytesRead += remainderCopy;
+	}
+
+        return bytesRead;
+    }
+}
+
+/*
+ * Read the requested number of chars from the underlying stream.
+ * Some byte fragments may remain in the converter and they are
+ * used by the following read.  So read and convertToChars must
+ * not be used for the same converter instance.
+ *
+ * This method *must* be called with lock held, as it uses the
+ * instance variable inbuf.
+ */
+// copied from kaffe's java/io/InputStreamReader.java
+private int
+_read ( char cbuf[], int off, int len ) throws IOException
 {
     if (len < 0 || off < 0 || off + len > cbuf.length) {
             throw new IndexOutOfBoundsException();
@@ -119,8 +195,6 @@
 
     int outlen = 0;
     boolean seenEOF = false;
-
-    byte[] inbuf = new byte[2048];
 
     while (len > outlen) {
         // First we retreive anything left in the converter
Index: gnu/java/io/encode/KaffeEncoder.java
===================================================================
RCS file: /cvs/kaffe/kaffe/libraries/javalib/gnu/java/io/encode/KaffeEncoder.java,v
retrieving revision 1.4
diff -u -r1.4 KaffeEncoder.java
--- gnu/java/io/encode/KaffeEncoder.java	6 Dec 2004 21:20:40 -0000	1.4
+++ gnu/java/io/encode/KaffeEncoder.java	7 Dec 2004 07:32:28 -0000
@@ -65,6 +65,15 @@
 
 CharToByteConverter converter;
  
+/* These 2 variables are used in the general buffer management */
+private int ptr = 0;
+private char[] buffer = new char[4096];
+
+/* This buffer is used during the conversion process.  It gets expanded
+ * automatically when it overflows.
+ */
+private byte[] bbuf = new byte[buffer.length * 3];
+
 /*************************************************************************/
 
 /*
@@ -127,9 +136,74 @@
   * Write the requested number of chars to the underlying stream
   */
 public void
+write(int c) throws IOException
+{
+    synchronized (lock) {
+        buffer[ptr++] = (char) c;
+        if (ptr == buffer.length) localFlush();
+    }
+}
+
+/**
+  * Write the requested number of chars to the underlying stream
+  */
+public void
 write(char[] buf, int offset, int len) throws IOException
 {
-  out.write(convertToBytes(buf, offset, len));
+    synchronized (lock) {
+        if (len > buffer.length - ptr) {
+            localFlush();
+            _write(buf, offset, len);
+        } else if (len == 1) {
+            buffer[ptr++] = buf[offset];
+        } else {
+            System.arraycopy(buf, offset, buffer, ptr, len);
+            ptr += len;
+        }
+    }
+}
+
+/* This *must* be called with the lock held. */
+private void
+localFlush() throws IOException
+{
+    if (ptr > 0) {
+        // Reset ptr to 0 before the _write call.  Otherwise, a
+        // very nasty loop could occur.  Please don't ask.
+        int length = ptr;
+        ptr = 0;
+        _write(buffer, 0, length);
+    }
+}
+
+public void
+flush() throws IOException
+{
+    synchronized (lock) {
+        localFlush();
+        out.flush();
+    }
+}
+/*
+ * Write the requested number of chars to the underlying stream
+ *
+ * This method *must* be called with the lock held, as it accesses
+ * the variable bbuf.
+ */
+private void
+_write(char[] buf, int offset, int len) throws IOException
+{
+    int bbuflen = converter.convert(buf, offset, len, bbuf, 0, bbuf.length);
+    int bufferNeeded = 0;
+    while (bbuflen > 0) {
+        out.write(bbuf, 0, bbuflen);
+        bbuflen = converter.flush(bbuf, 0, bbuf.length);
+        bufferNeeded += bbuflen;
+    }
+    if (bufferNeeded > bbuf.length) {
+        // increase size of array
+        bbuf = new byte[bufferNeeded];
+    }
 }
 
 } // class KaffEncoder
-------------- next part --------------
A non-text attachment was scrubbed...
Name: PerfTest.java
Type: text/x-java
Size: 14192 bytes
Desc: 
Url : http://kaffe.org/pipermail/kaffe/attachments/20041207/4b2b3f85/attachment-0002.java