[kaffe] HUGE speedup in KaffeEncoder/KaffeDecoder
Adam Heath
doogie at brainfood.com
Mon Dec 6 23:41:20 PST 2004
I was writing a parser for dpkg control files. My first version was very
stupid. It did reads of one char, in a loop.
Under sun, I got acceptable speeds(3s to parse all of /var/lib/dpkg/status).
Kaffe, however, took 1.5 minutes(or so).
So, after a bit of digging, I found the problem. KaffeDecoder(and it's
brother, KaffeEncoder) are very inefficient. When doing single reads, it
calls the large read(buf, off, len) routine, with an array length of one.
This causes a separate conversion call for each char, and this is *very* slow.
I then noticed in the javadoc for InputStreamReader, and OutputStreamWriter,
that the implementation was allowed to do internal buffering, to make
conversion more efficient. So that's what I did.
Before doing this, however, I wrote a small test framework. It tests
input/output, buffered/unbuffered, with/without encoding. 8 tests. I've ran
it under sun14, kaffe, kaffe-fix, and gcj. I won't include the numbers here,
unless someone asks.
However, I will report on the speed increases I saw.
With my fix in place, and a dump loop reading(or writing) one char at time, I
saw a read increase of 200 fold(200 times!), and a write increase of 90 fold.
The stupid version of my parser saw a 25 fold increase.
Anyways, attached you'll find the PerfTest program I wrote, and the patch
itself.
ps: I do have commit access, but this is a very low-level change, and wanted
others to see it first. I haven't run any test cases, other than my parsing
program.
-------------- next part --------------
? Makefile.in.es
Index: gnu/java/io/decode/KaffeDecoder.java
===================================================================
RCS file: /cvs/kaffe/kaffe/libraries/javalib/gnu/java/io/decode/KaffeDecoder.java,v
retrieving revision 1.4
diff -u -r1.4 KaffeDecoder.java
--- gnu/java/io/decode/KaffeDecoder.java 18 May 2004 16:13:28 -0000 1.4
+++ gnu/java/io/decode/KaffeDecoder.java 7 Dec 2004 07:32:28 -0000
@@ -56,6 +56,14 @@
ByteToCharConverter converter;
+/* These three vars are used for the general buffer management */
+private int ptr = 0;
+private int end = 0;
+private char[] buffer = new char[4096];
+
+/* This array is a temporary used during the conversion process. */
+private byte[] inbuf = new byte[4096];
+
/*************************************************************************/
/*
@@ -103,15 +111,83 @@
return(cbuf);
}
-/**
- * Read the requested number of chars from the underlying stream.
- * Some byte fragments may remain in the converter and they are
- * used by the following read. So read and convertToChars must
- * not be used for the same converter instance.
- */
-// copied from kaffe's java/io/InputStreamReader.java
+
+public int
+read() throws IOException
+{
+ synchronized (lock) {
+ if (ptr < end) return buffer[ptr++];
+ int r = _read(buffer, 0, buffer.length);
+ if (r == -1) return -1;
+ ptr = 1;
+ end = r;
+ return buffer[0];
+ }
+}
+
public int
-read ( char cbuf[], int off, int len ) throws IOException
+read(char cbuf[], int off, int len) throws IOException
+{
+ synchronized (lock) {
+ int bytesRead = 0;
+ if (len < end - ptr) {
+ System.arraycopy(buffer, ptr, cbuf, off, len);
+ ptr += len;
+ return len;
+ }
+
+ int preCopy = end - ptr;
+ if (preCopy > 0) {
+ System.arraycopy(buffer, ptr, cbuf, off, preCopy);
+ off += preCopy;
+ len -= preCopy;
+ bytesRead += preCopy;
+ }
+ ptr = 0;
+ end = 0;
+
+ int remainder = len % buffer.length;
+ int bulkCopy = len - remainder;
+ if (bulkCopy > 0) {
+ int r = _read(cbuf, off, bulkCopy);
+ if (r == -1) {
+ return bytesRead == 0 ? -1 : bytesRead;
+ }
+ off += r;
+ len -= r;
+ bytesRead += r;
+ }
+
+ if (remainder > 0) {
+ int r = _read(buffer, 0, buffer.length);
+ if (r == -1) {
+ return bytesRead == 0 ? -1 : bytesRead;
+ }
+ end = r;
+ int remainderCopy = r < remainder ? r : remainder;
+ System.arraycopy(buffer, 0, cbuf, off, remainderCopy);
+ off += remainderCopy;
+ len -= remainderCopy;
+ ptr = remainderCopy;
+ bytesRead += remainderCopy;
+ }
+
+ return bytesRead;
+ }
+}
+
+/*
+ * Read the requested number of chars from the underlying stream.
+ * Some byte fragments may remain in the converter and they are
+ * used by the following read. So read and convertToChars must
+ * not be used for the same converter instance.
+ *
+ * This method *must* be called with lock held, as it uses the
+ * instance variable inbuf.
+ */
+// copied from kaffe's java/io/InputStreamReader.java
+private int
+_read ( char cbuf[], int off, int len ) throws IOException
{
if (len < 0 || off < 0 || off + len > cbuf.length) {
throw new IndexOutOfBoundsException();
@@ -119,8 +195,6 @@
int outlen = 0;
boolean seenEOF = false;
-
- byte[] inbuf = new byte[2048];
while (len > outlen) {
// First we retreive anything left in the converter
Index: gnu/java/io/encode/KaffeEncoder.java
===================================================================
RCS file: /cvs/kaffe/kaffe/libraries/javalib/gnu/java/io/encode/KaffeEncoder.java,v
retrieving revision 1.4
diff -u -r1.4 KaffeEncoder.java
--- gnu/java/io/encode/KaffeEncoder.java 6 Dec 2004 21:20:40 -0000 1.4
+++ gnu/java/io/encode/KaffeEncoder.java 7 Dec 2004 07:32:28 -0000
@@ -65,6 +65,15 @@
CharToByteConverter converter;
+/* These 2 variables are used in the general buffer management */
+private int ptr = 0;
+private char[] buffer = new char[4096];
+
+/* This buffer is used during the conversion process. It gets expanded
+ * automatically when it overflows.
+ */
+private byte[] bbuf = new byte[buffer.length * 3];
+
/*************************************************************************/
/*
@@ -127,9 +136,74 @@
* Write the requested number of chars to the underlying stream
*/
public void
+write(int c) throws IOException
+{
+ synchronized (lock) {
+ buffer[ptr++] = (char) c;
+ if (ptr == buffer.length) localFlush();
+ }
+}
+
+/**
+ * Write the requested number of chars to the underlying stream
+ */
+public void
write(char[] buf, int offset, int len) throws IOException
{
- out.write(convertToBytes(buf, offset, len));
+ synchronized (lock) {
+ if (len > buffer.length - ptr) {
+ localFlush();
+ _write(buf, offset, len);
+ } else if (len == 1) {
+ buffer[ptr++] = buf[offset];
+ } else {
+ System.arraycopy(buf, offset, buffer, ptr, len);
+ ptr += len;
+ }
+ }
+}
+
+/* This *must* be called with the lock held. */
+private void
+localFlush() throws IOException
+{
+ if (ptr > 0) {
+ // Reset ptr to 0 before the _write call. Otherwise, a
+ // very nasty loop could occur. Please don't ask.
+ int length = ptr;
+ ptr = 0;
+ _write(buffer, 0, length);
+ }
+}
+
+public void
+flush() throws IOException
+{
+ synchronized (lock) {
+ localFlush();
+ out.flush();
+ }
+}
+/*
+ * Write the requested number of chars to the underlying stream
+ *
+ * This method *must* be called with the lock held, as it accesses
+ * the variable bbuf.
+ */
+private void
+_write(char[] buf, int offset, int len) throws IOException
+{
+ int bbuflen = converter.convert(buf, offset, len, bbuf, 0, bbuf.length);
+ int bufferNeeded = 0;
+ while (bbuflen > 0) {
+ out.write(bbuf, 0, bbuflen);
+ bbuflen = converter.flush(bbuf, 0, bbuf.length);
+ bufferNeeded += bbuflen;
+ }
+ if (bufferNeeded > bbuf.length) {
+ // increase size of array
+ bbuf = new byte[bufferNeeded];
+ }
}
} // class KaffEncoder
-------------- next part --------------
A non-text attachment was scrubbed...
Name: PerfTest.java
Type: text/x-java
Size: 14192 bytes
Desc:
Url : http://kaffe.org/pipermail/kaffe/attachments/20041207/4b2b3f85/attachment-0002.java
More information about the kaffe
mailing list