[Commit] cairo_u128 ChangeLog, 1.1, 1.2 Makefile, 1.3, 1.4 cairo_test.c, 1.3, 1.4 cairo_uint128.c, 1.3, 1.4 cairo_uint128.h, 1.3, 1.4 checkdata.5c, 1.3, 1.4 makedata.5c, 1.1.1.1, 1.2

Keith Packard commit at keithp.com
Tue May 25 14:21:55 PDT 2004


Committed by: keithp

Update of /local/src/CVS/cairo_u128
In directory home.keithp.com:/tmp/cvs-serv5781

Modified Files:
	ChangeLog Makefile cairo_test.c cairo_uint128.c 
	cairo_uint128.h checkdata.5c makedata.5c 
Log Message:
2004-05-25  Keith Packard  <keithp at keithp.com>

	* Makefile:
	* cairo_test.c: (read_uint128), (main):
	* cairo_uint128.c: (_cairo_uint64_divmod),
	(_cairo_leading_zeros64), (_cairo_uint32s_to_uint64),
	(_cairo_uint64_to_uint32), (_cairo_uint64_lsl),
	(_cairo_uint64_rsl), (_cairo_leading_zeros32),
	(_cairo_uint64x32_normalized_divmod), (_cairo_uint128_to_uint64),
	(_cairo_uint128_to_uint32), (_cairo_uint128_lsl),
	(_cairo_uint128_rsl), (_cairo_uint128x64_normalized_divmod),
	(_cairo_uint128_divmod):
	* cairo_uint128.h:
	* checkdata.5c:
	* makedata.5c:
	Redo tests so they just run primitives on pairs of numbers.
	Add divide operations (algorithm from GCC).


Index: ChangeLog
===================================================================
RCS file: /local/src/CVS/cairo_u128/ChangeLog,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- a/ChangeLog	25 May 2004 06:43:27 -0000	1.1
+++ b/ChangeLog	25 May 2004 21:21:52 -0000	1.2
@@ -1,3 +1,21 @@
+2004-05-25  Keith Packard  <keithp at keithp.com>
+
+	* Makefile:
+	* cairo_test.c: (read_uint128), (main):
+	* cairo_uint128.c: (_cairo_uint64_divmod),
+	(_cairo_leading_zeros64), (_cairo_uint32s_to_uint64),
+	(_cairo_uint64_to_uint32), (_cairo_uint64_lsl),
+	(_cairo_uint64_rsl), (_cairo_leading_zeros32),
+	(_cairo_uint64x32_normalized_divmod), (_cairo_uint128_to_uint64),
+	(_cairo_uint128_to_uint32), (_cairo_uint128_lsl),
+	(_cairo_uint128_rsl), (_cairo_uint128x64_normalized_divmod),
+	(_cairo_uint128_divmod):
+	* cairo_uint128.h:
+	* checkdata.5c:
+	* makedata.5c:
+	Redo tests so they just run primitives on pairs of numbers.
+	Add divide operations (algorithm from GCC).
+
 2004-05-24  Keith Packard  <keithp at keithp.com>
 
 	* .cvsignore:

Index: Makefile
===================================================================
RCS file: /local/src/CVS/cairo_u128/Makefile,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- a/Makefile	25 May 2004 06:43:12 -0000	1.3
+++ b/Makefile	25 May 2004 21:21:52 -0000	1.4
@@ -1,28 +1,27 @@
-WIDTH=8
-SETS=1000
-REPS=1000
+SETS=10000
+REPS=100
 HAVE_UINT64_T=1
 
-CFLAGS=-O4 -DWIDTH=$(WIDTH) -DHAVE_UINT64_T=$(HAVE_UINT64_T)
+CFLAGS=-O4 -DHAVE_UINT64_T=$(HAVE_UINT64_T)
 OBJS = cairo_uint128.o cairo_test.o
 
 check: c nickle
 	cmp c nickle
 
 c: cairo_test data Makefile
-	./cairo_test $(WIDTH) $(SETS) < data > $@
+	./cairo_test $(SETS) < data > $@
 
 nickle: checkdata.5c data Makefile
-	./checkdata.5c $(WIDTH) < data > $@
+	./checkdata.5c < data > $@
 
 data: makedata.5c Makefile
-	./makedata.5c $(WIDTH) $(SETS) > data
+	./makedata.5c $(SETS) > data
 
 cairo_test: $(OBJS)
 	$(CC) $(CFLAGS) -o $@ $(OBJS)
 
-bench:
-	time ./cairo_test $(WIDTH) $(SETS) $(REPS) < data
+bench: cairo_test data Makefile
+	time ./cairo_test $(SETS) $(REPS) < data
 
 $(OBJS): cairo_uint128.h Makefile
 

Index: cairo_test.c
===================================================================
RCS file: /local/src/CVS/cairo_u128/cairo_test.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- a/cairo_test.c	25 May 2004 06:43:12 -0000	1.3
+++ b/cairo_test.c	25 May 2004 21:21:52 -0000	1.4
@@ -25,6 +25,7 @@
 #include "cairo_uint128.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include <ctype.h>
 
 void
 zero_dump_uint64 (cairo_uint64_t q, int zero)
@@ -60,6 +61,37 @@
     printf ("\n");
 }
 
+cairo_uint128_t
+read_uint128 (FILE *f)
+{
+    cairo_uint128_t x;
+    int		    c;
+
+    x = _cairo_uint32_to_uint128 (0);
+    while ((c = getc (f)) != EOF)
+	if (isxdigit (c))
+	{
+	    do
+	    {
+		int	v;
+
+		if ('0' <= c && c <= '9')
+		    v = c - '0';
+		else if ('a' <= c && c <= 'f')
+		    v = c - 'a' + 10;
+		else if ('A' <= c && c <= 'F')
+		    v = c - 'A' + 10;
+		x = _cairo_uint128_lsl (x, 4);
+		x = _cairo_uint128_add (x, _cairo_uint32_to_uint128 (v));
+		c = getc (f);
+	    } while (isxdigit (c));
+	    if (c != EOF)
+		ungetc (c, f);
+	    return x;
+	}
+    exit (0);
+}
+
 int
 main (int argc, char **argv)
 {
@@ -67,9 +99,9 @@
     cairo_uint128_t *q, qv;
     cairo_uint64_t  *d, dv;
     int		    c;
-    int		    width = atoi (argv[1]);
-    int		    sets = atoi (argv[2]);
-    int		    reps = argv[3] ? atoi(argv[3]) : 1;
+    int		    width = 2;
+    int		    sets = atoi (argv[1]);
+    int		    reps = argv[2] ? atoi(argv[2]) : 1;
     int		    r;
     int		    s;
 
@@ -79,50 +111,119 @@
     {
 	for (c = 0; c < width; c++)
 	{
-	    if (scanf ("%u", &i) != 1)
-		exit (0);
-	    q[c + s*width] = _cairo_uint32_to_uint128 (i);
-	    d[c + s*width] = _cairo_uint32_to_uint64 (i);
+	    q[c + s*width] = read_uint128 (stdin);
+	    d[c + s*width] = _cairo_uint128_to_uint64 (q[c+s*width]);
 	}
     }
     for (r = 0; r < reps; r++)
     {
 	for (s = 0; s < sets; s++)
 	{
-	    qv = _cairo_uint32_to_uint128 (1);
-	    dv = _cairo_uint32_to_uint64 (1);
-	    for (c = 0; c < width; c++)
+	    if (reps == 1)
 	    {
-		qv = _cairo_uint128_mul (qv, q[c + s*width]);
-		dv = _cairo_uint64_mul (dv, d[c + s*width]);
+		dump_uint128 (q[0+s*width]);
+		dump_uint128 (q[1+s*width]);
 	    }
+	    
+	    qv = _cairo_uint128_add (q[0+s*width], q[1+s*width]);
 	    if (reps == 1)
 	    {
+		printf ("+ ");
 		dump_uint128 (qv);
+	    }
+	    
+	    dv = _cairo_uint64_add (d[0+s*width], d[1+s*width]);
+	    if (reps == 1)
+	    {
+		printf ("+ ");
 		dump_uint64 (dv);
 	    }
-
-	    for (c = 0; c < width; c++)
+	    
+	    qv = _cairo_uint128_sub (q[0+s*width], q[1+s*width]);
+	    if (reps == 1)
 	    {
-		qv = _cairo_uint128_sub (qv, q[c + s*width]);
-		dv = _cairo_uint64_sub (dv, d[c + s*width]);
+		printf ("- ");
+		dump_uint128 (qv);
 	    }
+	    
+	    dv = _cairo_uint64_sub (d[0+s*width], d[1+s*width]);
 	    if (reps == 1)
 	    {
+		printf ("- ");
+		dump_uint64 (dv);
+	    }
+	    
+	    qv = _cairo_uint128_mul (q[0+s*width], q[1+s*width]);
+	    if (reps == 1)
+	    {
+		printf ("* ");
 		dump_uint128 (qv);
+	    }
+	    
+	    dv = _cairo_uint64_mul (d[0+s*width], d[1+s*width]);
+	    if (reps == 1)
+	    {
+		printf ("* ");
 		dump_uint64 (dv);
 	    }
-
-	    qv = _cairo_uint32_to_uint128 (0);
-	    dv = _cairo_uint32_to_uint64 (0);
-	    for (c = 0; c < width; c++)
+	    
+	    qv = _cairo_uint128_rsl (q[0+s*width],
+				     _cairo_uint128_to_uint32(q[1+s*width]) % 128);
+	    if (reps == 1)
 	    {
-		qv = _cairo_uint128_add (qv, q[c + s*width]);
-		dv = _cairo_uint64_add (dv, d[c + s*width]);
+		printf (">> ");
+		dump_uint128 (qv);
+	    }
+	    
+	    dv = _cairo_uint64_rsl (d[0+s*width],
+				    _cairo_uint64_to_uint32(d[1+s*width]) % 64);
+	    if (reps == 1)
+	    {
+		printf (">> ");
+		dump_uint64 (dv);
+	    }
+	    
+	    qv = _cairo_uint128_lsl (q[0+s*width],
+				     _cairo_uint128_to_uint32(q[1+s*width]) % 128);
+	    if (reps == 1)
+	    {
+		printf ("<< ");
+		dump_uint128 (qv);
+	    }
+	    
+	    dv = _cairo_uint64_lsl (d[0+s*width],
+				    _cairo_uint64_to_uint32(d[1+s*width]) % 64);
+	    if (reps == 1)
+	    {
+		printf ("<< ");
+		dump_uint64 (dv);
+	    }
+	    
+	    qv = _cairo_uint128_divmod (q[0+s*width], q[1+s*width]).quo;
+	    if (reps == 1)
+	    {
+		printf ("/ ");
+		dump_uint128 (qv);
+	    }
+	    
+	    dv = _cairo_uint64_divmod (d[0+s*width], d[1+s*width]).quo;
+	    if (reps == 1)
+	    {
+		printf ("/ ");
+		dump_uint64 (dv);
 	    }
+	    
+	    qv = _cairo_uint128_divmod (q[0+s*width], q[1+s*width]).rem;
 	    if (reps == 1)
 	    {
+		printf ("%% ");
 		dump_uint128 (qv);
+	    }
+
+	    dv = _cairo_uint64_divmod (d[0+s*width], d[1+s*width]).rem;
+	    if (reps == 1)
+	    {
+		printf ("%% ");
 		dump_uint64 (dv);
 	    }
 	}

Index: cairo_uint128.c
===================================================================
RCS file: /local/src/CVS/cairo_u128/cairo_uint128.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- a/cairo_uint128.c	25 May 2004 06:43:12 -0000	1.3
+++ b/cairo_uint128.c	25 May 2004 21:21:52 -0000	1.4
@@ -24,26 +24,57 @@
 
 #include "cairo_uint128.h"
 
+static const unsigned char top_bit[256] =
+{
+    0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
+
 #if HAVE_UINT64_T
 
-typedef uint32_t    cairo_uint_t;
-typedef uint64_t    cairo_2uint_t;
-#define NUM_IN_UINT128	4
-#define BITS_IN_UINT	32
-#define MAX_UINT	0xffffffff
+#define _cairo_uint32s_to_uint64(h,l) ((uint64_t) (h) << 32 | (l))
 
-#else
+const cairo_quorem64_t
+_cairo_uint64_divmod (cairo_uint64_t num, cairo_uint64_t den)
+{
+    cairo_quorem64_t	qr;
 
-typedef uint16_t    cairo_uint_t;
-typedef uint32_t    cairo_2uint_t;
-#define NUM_IN_UINT128	8
-#define NUM_IN_UINT64	CAIRO_NUM_IN_UINT64
-#define BITS_IN_UINT	16
-#define MAX_UINT	0xffff
+    qr.quo = num / den;
+    qr.rem = num % den;
+    return qr;
+}
 
-#endif
+static const int
+_cairo_leading_zeros64 (cairo_uint64_t q)
+{
+    int	top = 0;
+    
+    if (q >= (uint64_t) 0x10000 << 16)
+    {
+	top += 32;
+	q >>= 32;
+    }
+    if (q >= (uint64_t) 0x10000)
+    {
+	top += 16;
+	q >>= 16;
+    }
+    if (q >= (uint64_t) 0x100)
+    {
+	top += 8;
+	q >>= 8;
+    }
+    top += top_bit [q];
+    return 64 - top;
+}
 
-#if !HAVE_UINT64_T
+#else
 
 const cairo_uint64_t
 _cairo_uint32_to_uint64 (uint32_t i)
@@ -55,6 +86,22 @@
     return q;
 }
 
+static const cairo_uint64_t
+_cairo_uint32s_to_uint64 (uint32_t h, uint32_t l)
+{
+    cairo_uint64_t	q;
+
+    q.b[0] = l;
+    q.b[1] = h;
+    return q;
+}
+
+const uint32_t
+_cairo_uint64_to_uint32 (cairo_uint64_t i)
+{
+    return i.b[0];
+}
+
 const cairo_uint64_t
 _cairo_uint64_add (cairo_uint64_t a, cairo_uint64_t b)
 {
@@ -121,6 +168,40 @@
     return s;
 }
 
+const cairo_uint64_t
+_cairo_uint64_lsl (cairo_uint64_t a, int shift)
+{
+    if (shift >= 32)
+    {
+	a.b[1] = a.b[0];
+	a.b[0] = 0;
+	shift -= 32;
+    }
+    if (shift)
+    {
+	a.b[1] = a.b[1] << shift | a.b[0] >> (32 - shift);
+	a.b[0] = a.b[0] << shift;
+    }
+    return a;
+}
+
+const cairo_uint64_t
+_cairo_uint64_rsl (cairo_uint64_t a, int shift)
+{
+    if (shift >= 32)
+    {
+	a.b[0] = a.b[1];
+	a.b[1] = 0;
+	shift -= 32;
+    }
+    if (shift)
+    {
+	a.b[0] = a.b[0] >> shift | a.b[1] << (32 - shift);
+	a.b[1] = a.b[1] >> shift;
+    }
+    return a;
+}
+
 const int
 _cairo_uint64_lt (cairo_uint64_t a, cairo_uint64_t b)
 {
@@ -134,6 +215,213 @@
     return a.b[1] == b.b[1] && a.b[0] == b.b[0];
 }
 
+/*
+ * The design of this algorithm comes from GCC,
+ * but the actual implementation is new
+ */
+
+static const int
+_cairo_leading_zeros32 (uint32_t i)
+{
+    int	top;
+    
+    if (i < 0x100)
+	top = 0;
+    else if (i < 0x10000)
+	top = 8;
+    else if (i < 0x1000000)
+	top = 16;
+    else
+	top = 24;
+    top = top + top_bit [i >> top];
+    return 32 - top;
+}
+
+static const int
+_cairo_leading_zeros64 (cairo_uint64_t d)
+{
+    if (d.b[1])
+	return _cairo_leading_zeros32 (d.b[1]);
+    else
+	return 32 + _cairo_leading_zeros32 (d.b[0]);
+}
+
+typedef struct _cairo_quorem32_t {
+    uint32_t	quo;
+    uint32_t	rem;
+} cairo_quorem32_t;
+
+/*
+ * den >= num.b[1]
+ */
+static const cairo_quorem32_t
+_cairo_uint64x32_normalized_divmod (cairo_uint64_t num, uint32_t den)
+{
+    cairo_quorem32_t	qr;
+    uint32_t		q0, q1, r0, r1;
+    uint16_t		d0, d1;
+    uint32_t		t;
+
+    d0 = den & 0xffff;
+    d1 = den >> 16;
+
+    q1 = num.b[1] / d1;
+    r1 = num.b[1] % d1;
+
+    t = q1 * d0;
+    r1 = (r1 << 16) | (num.b[0] >> 16);
+    if (r1 < t)
+    {
+	q1--;
+	r1 += den;
+	if (r1 >= den && r1 < t)
+	{
+	    q1--;
+	    r1 += den;
+	}
+    }
+
+    r1 -= t;
+
+    q0 = r1 / d1;
+    r0 = r1 % d1;
+    t = q0 * d0;
+    r0 = (r0 << 16) | (num.b[0] & 0xffff);
+    if (r0 < t)
+    {
+	q0--;
+	r0 += den;
+	if (r0 >= den && r0 < t)
+	{
+	    q0--;
+	    r0 += den;
+	}
+    }
+    r0 -= t;
+    qr.quo = (q1 << 16) | q0;
+    qr.rem = r0;
+    return qr;
+}
+
+const cairo_quorem64_t
+_cairo_uint64_divmod (cairo_uint64_t num, cairo_uint64_t den)
+{
+    cairo_quorem32_t	qr32;
+    cairo_quorem64_t	qr;
+    int			norm;
+    uint32_t		q1, q0, r1, r0;
+    uint16_t		d0;
+    cairo_uint64_t	t;
+
+    if (den.b[1] == 0)
+    {
+	if (den.b[0] > num.b[1])
+	{
+	    /* 0q = nn / 0d */
+
+	    norm = _cairo_leading_zeros32 (den.b[0]);
+	    if (norm)
+	    {
+		den.b[0] <<= norm;
+		num = _cairo_uint64_lsl (num, norm);
+	    }
+	    q1 = 0;
+	}
+	else
+	{
+	    /* qq = NN / 0d */
+
+	    if (den.b[0] == 0)
+		den.b[0] = 1 / den.b[0];
+
+	    norm = _cairo_leading_zeros32 (den.b[0]);
+	    if (norm)
+	    {
+		cairo_uint64_t	num1;
+		den.b[0] <<= norm;
+		num1 = _cairo_uint64_rsl (num, 32 - norm);
+		qr32 = _cairo_uint64x32_normalized_divmod (num1, den.b[0]);
+		q1 = qr32.quo;
+		num.b[1] = qr32.rem;
+		num.b[0] <<= norm;
+	    }
+	    else
+	    {
+		num.b[1] -= den.b[0];
+		q1 = 1;
+	    }
+	}
+	qr32 = _cairo_uint64x32_normalized_divmod (num, den.b[0]);
+	q0 = qr32.quo;
+	r1 = 0;
+	r0 = qr32.rem >> norm;
+    }
+    else
+    {
+	if (den.b[1] > num.b[1])
+	{
+	    /* 00 = nn / DD */
+	    q0 = q1 = 0;
+	    r0 = num.b[0];
+	    r1 = num.b[1];
+	}
+	else
+	{
+	    /* 0q = NN / dd */
+
+	    norm = _cairo_leading_zeros32 (den.b[1]);
+	    if (norm == 0)
+	    {
+		if (num.b[1] > den.b[1] || num.b[0] >= den.b[0])
+		{
+		    q0 = 1;
+		    num = _cairo_uint64_sub (num, den);
+		}
+		else
+		    q0 = 0;
+		
+		q1 = 0;
+		r0 = num.b[0];
+		r1 = num.b[1];
+	    }
+	    else
+	    {
+		cairo_uint64_t	num1;
+		cairo_uint64_t	part;
+
+		num1 = _cairo_uint64_rsl (num, 32 - norm);
+		den = _cairo_uint64_lsl (den, norm);
+
+		qr32 = _cairo_uint64x32_normalized_divmod (num1, den.b[1]);
+		part = _cairo_uint32x32_64_mul (qr32.quo, den.b[0]);
+
+		q0 = qr32.quo;
+		
+		num.b[0] <<= norm;
+		num.b[1] = qr32.rem;
+		
+		if (_cairo_uint64_gt (part, num))
+		{
+		    q0--;
+		    part = _cairo_uint64_sub (part, den);
+		}
+
+		q1 = 0;
+
+		num = _cairo_uint64_sub (num, part);
+		num = _cairo_uint64_rsl (num, norm);
+		r0 = num.b[0];
+		r1 = num.b[1];
+	    }
+	}
+    }
+    qr.quo.b[0] = q0;
+    qr.quo.b[1] = q1;
+    qr.rem.b[0] = r0;
+    qr.rem.b[1] = r1;
+    return qr;
+}
+
 #endif /* !HAVE_UINT64_T */
 
 const cairo_uint128_t
@@ -156,6 +444,18 @@
     return q;
 }
 
+const cairo_uint64_t
+_cairo_uint128_to_uint64 (cairo_uint128_t i)
+{
+    return i.b[0];
+}
+
+const uint32_t
+_cairo_uint128_to_uint32 (cairo_uint128_t i)
+{
+    return _cairo_uint64_to_uint32 (i.b[0]);
+}
+
 const cairo_uint128_t
 _cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b)
 {
@@ -269,6 +569,42 @@
     return s;
 }
 
+const cairo_uint128_t
+_cairo_uint128_lsl (cairo_uint128_t a, int shift)
+{
+    if (shift >= 64)
+    {
+	a.b[1] = a.b[0];
+	a.b[0] = _cairo_uint32_to_uint64 (0);
+	shift -= 64;
+    }
+    if (shift)
+    {
+	a.b[1] = _cairo_uint64_add (_cairo_uint64_lsl (a.b[1], shift),
+				    _cairo_uint64_rsl (a.b[0], (64 - shift)));
+	a.b[0] = _cairo_uint64_lsl (a.b[0], shift);
+    }
+    return a;
+}
+
+const cairo_uint128_t
+_cairo_uint128_rsl (cairo_uint128_t a, int shift)
+{
+    if (shift >= 64)
+    {
+	a.b[0] = a.b[1];
+	a.b[1] = _cairo_uint32_to_uint64 (0);
+	shift -= 64;
+    }
+    if (shift)
+    {
+	a.b[0] = _cairo_uint64_add (_cairo_uint64_rsl (a.b[0], shift),
+				    _cairo_uint64_lsl (a.b[1], (64 - shift)));
+	a.b[1] = _cairo_uint64_rsl (a.b[1], shift);
+    }
+    return a;
+}
+
 const int
 _cairo_uint128_lt (cairo_uint128_t a, cairo_uint128_t b)
 {
@@ -284,3 +620,195 @@
 	    _cairo_uint64_eq (a.b[0], b.b[0]));
 }
 
+/*
+ * The design of this algorithm comes from GCC,
+ * but the actual implementation is new
+ */
+
+/*
+ * den >= num.b[1]
+ */
+static const cairo_quorem64_t
+_cairo_uint128x64_normalized_divmod (cairo_uint128_t num, cairo_uint64_t den)
+{
+    cairo_quorem64_t	qr64;
+    cairo_quorem64_t	qr;
+    uint32_t		q0, q1;
+    cairo_uint64_t	r0, r1;
+    uint32_t		d0, d1;
+    cairo_uint64_t	t;
+
+    d0 = uint64_lo32 (den);
+    d1 = uint64_hi32 (den);
+
+    qr64 = _cairo_uint64_divmod (num.b[1], _cairo_uint32_to_uint64 (d1));
+    q1 = _cairo_uint64_to_uint32 (qr64.quo);
+    r1 = qr64.rem;
+
+    t = _cairo_uint32x32_64_mul (q1, d0);
+    
+    r1 = _cairo_uint64_add (_cairo_uint64_lsl (r1, 32),
+			    _cairo_uint64_rsl (num.b[0], 32));
+    
+    if (_cairo_uint64_lt (r1, t))
+    {
+	q1--;
+	r1 = _cairo_uint64_add (r1, den);
+	if (_cairo_uint64_ge (r1, den) && _cairo_uint64_lt (r1, t))
+	{
+	    q1--;
+	    r1 = _cairo_uint64_add (r1, den);
+	}
+    }
+
+    r1 = _cairo_uint64_sub (r1, t);
+
+    qr64 = _cairo_uint64_divmod (r1, _cairo_uint32_to_uint64 (d1));
+
+    q0 = _cairo_uint64_to_uint32 (qr64.quo);
+    r0 = qr64.rem;
+
+    t = _cairo_uint32x32_64_mul (q0, d0);
+
+    r0 = _cairo_uint64_add (_cairo_uint64_lsl (r0, 32),
+			    _cairo_uint32_to_uint64 (_cairo_uint64_to_uint32 (num.b[0])));
+    if (_cairo_uint64_lt (r0, t))
+    {
+	q0--;
+	r0 = _cairo_uint64_add (r0, den);
+	if (_cairo_uint64_ge (r0, den) && _cairo_uint64_lt (r0, t))
+	{
+	    q0--;
+	    r0 = _cairo_uint64_add (r0, den);
+	}
+    }
+
+    r0 = _cairo_uint64_sub (r0, t);
+
+    qr.quo = _cairo_uint32s_to_uint64 (q1, q0);
+    qr.rem = r0;
+    return qr;
+}
+
+const cairo_quorem128_t
+_cairo_uint128_divmod (cairo_uint128_t num, cairo_uint128_t den)
+{
+    cairo_quorem64_t	qr64;
+    cairo_quorem128_t	qr;
+    int			norm;
+    cairo_uint64_t	q1, q0, r1, r0;
+    uint32_t		d0;
+    cairo_uint128_t	t;
+
+    if (_cairo_uint64_eq (den.b[1], _cairo_uint32_to_uint64 (0)))
+    {
+	if (_cairo_uint64_gt (den.b[0], num.b[1]))
+	{
+	    /* 0q = nn / 0d */
+
+	    norm = _cairo_leading_zeros64 (den.b[0]);
+	    if (norm)
+	    {
+		den.b[0] = _cairo_uint64_lsl (den.b[0], norm);
+		num = _cairo_uint128_lsl (num, norm);
+	    }
+	    q1 = _cairo_uint32_to_uint64 (0);
+	}
+	else
+	{
+	    /* qq = NN / 0d */
+
+	    if (_cairo_uint64_eq (den.b[0], _cairo_uint32_to_uint64 (0)))
+		den.b[0] = _cairo_uint64_divmod (_cairo_uint32_to_uint64 (1),
+						 den.b[0]).quo;
+
+	    norm = _cairo_leading_zeros64 (den.b[0]);
+	    if (norm)
+	    {
+		cairo_uint128_t	num1;
+
+		den.b[0] = _cairo_uint64_lsl (den.b[0], norm);
+		num1 = _cairo_uint128_rsl (num, 64 - norm);
+		qr64 = _cairo_uint128x64_normalized_divmod (num1, den.b[0]);
+		q1 = qr64.quo;
+		num.b[1] = qr64.rem;
+		num.b[0] = _cairo_uint64_lsl (num.b[0], norm);
+	    }
+	    else
+	    {
+		num.b[1] = _cairo_uint64_sub (num.b[1], den.b[0]);
+		q1 = _cairo_uint32_to_uint64 (1);
+	    }
+	}
+	qr64 = _cairo_uint128x64_normalized_divmod (num, den.b[0]);
+	q0 = qr64.quo;
+	r1 = _cairo_uint32_to_uint64 (0);
+	r0 = _cairo_uint64_rsl (qr64.rem, norm);
+    }
+    else
+    {
+	if (_cairo_uint64_gt (den.b[1], num.b[1]))
+	{
+	    /* 00 = nn / DD */
+	    q0 = q1 = _cairo_uint32_to_uint64 (0);
+	    r0 = num.b[0];
+	    r1 = num.b[1];
+	}
+	else
+	{
+	    /* 0q = NN / dd */
+
+	    norm = _cairo_leading_zeros64 (den.b[1]);
+	    if (norm == 0)
+	    {
+		if (_cairo_uint64_gt (num.b[1], den.b[1]) ||
+		    _cairo_uint64_ge (num.b[0], den.b[0]))
+		{
+		    q0 = _cairo_uint32_to_uint64 (1);
+		    num = _cairo_uint128_sub (num, den);
+		}
+		else
+		    q0 = _cairo_uint32_to_uint64 (0);
+		
+		q1 = _cairo_uint32_to_uint64 (0);
+		r0 = num.b[0];
+		r1 = num.b[1];
+	    }
+	    else
+	    {
+		cairo_uint128_t	num1;
+		cairo_uint128_t	part;
+
+		num1 = _cairo_uint128_rsl (num, 64 - norm);
+		den = _cairo_uint128_lsl (den, norm);
+
+		qr64 = _cairo_uint128x64_normalized_divmod (num1, den.b[1]);
+		part = _cairo_uint64x64_128_mul (qr64.quo, den.b[0]);
+
+		q0 = qr64.quo;
+		
+		num.b[0] = _cairo_uint64_lsl (num.b[0], norm);
+		num.b[1] = qr64.rem;
+		
+		if (_cairo_uint128_gt (part, num))
+		{
+		    q0 = _cairo_uint64_sub (q0, _cairo_uint32_to_uint64 (1));
+		    part = _cairo_uint128_sub (part, den);
+		}
+
+		q1 = _cairo_uint32_to_uint64 (0);
+
+		num = _cairo_uint128_sub (num, part);
+		num = _cairo_uint128_rsl (num, norm);
+		r0 = num.b[0];
+		r1 = num.b[1];
+	    }
+	}
+    }
+    qr.quo.b[0] = q0;
+    qr.quo.b[1] = q1;
+    qr.rem.b[0] = r0;
+    qr.rem.b[1] = r1;
+    return qr;
+}
+

Index: cairo_uint128.h
===================================================================
RCS file: /local/src/CVS/cairo_u128/cairo_uint128.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- a/cairo_uint128.h	25 May 2004 06:43:12 -0000	1.3
+++ b/cairo_uint128.h	25 May 2004 21:21:52 -0000	1.4
@@ -38,6 +38,9 @@
 extern const cairo_uint64_t
 _cairo_uint32_to_uint64 (uint32_t i);
 
+extern const uint32_t
+_cairo_uint64_to_uint32 (cairo_uint64_t i);
+
 extern const cairo_uint64_t
 _cairo_uint64_add (cairo_uint64_t a, cairo_uint64_t b);
 
@@ -50,6 +53,12 @@
 extern const cairo_uint64_t
 _cairo_uint32x32_64_mul (uint32_t a, uint32_t b);
 
+extern const cairo_uint64_t
+_cairo_uint64_lsl (cairo_uint64_t a, int shift);
+
+extern const cairo_uint64_t
+_cairo_uint64_rsl (cairo_uint64_t a, int shift);
+
 extern const int
 _cairo_uint64_less (cairo_uint64_t a, cairo_uint64_t b);
 
@@ -61,15 +70,26 @@
 typedef uint64_t    cairo_uint64_t;
 
 #define _cairo_uint32_to_uint64(i)	((uint64_t) (i))
+#define _cairo_uint64_to_uint32(i)	((uint32_t) (i))
 #define _cairo_uint64_add(a,b)		((a) + (b))
 #define _cairo_uint64_sub(a,b)		((a) - (b))
 #define _cairo_uint64_mul(a,b)		((a) * (b))
 #define _cairo_uint32x32_64_mul(a,b)	((uint64_t) (a) * (b))
+#define _cairo_uint64_lsl(a,b)		((a) << (b))
+#define _cairo_uint64_rsl(a,b)		((a) >> (b))
 #define _cairo_uint64_lt(a,b)		((a) < (b))
 #define _cairo_uint64_eq(a,b)		((a) == (b))
 
 #endif
 
+typedef struct _cairo_quorem64 {
+    cairo_uint64_t	quo;
+    cairo_uint64_t	rem;
+} cairo_quorem64_t;
+
+extern const cairo_quorem64_t
+_cairo_uint64_divmod (cairo_uint64_t num, cairo_uint64_t den);
+
 #define _cairo_uint64_gt(a,b) _cairo_uint64_lt(b,a)
 #define _cairo_uint64_le(a,b) (!_cairo_uint64_gt(a,b))
 #define _cairo_uint64_ge(a,b) (!_cairo_uint64_lt(a,b))
@@ -83,6 +103,15 @@
 _cairo_uint32_to_uint128 (uint32_t i);
 
 extern const cairo_uint128_t
+_cairo_uint64_to_uint128 (cairo_uint64_t i);
+
+extern const cairo_uint64_t
+_cairo_uint128_to_uint64 (cairo_uint128_t i);
+
+extern const uint32_t
+_cairo_uint128_to_uint32 (cairo_uint128_t i);
+
+extern const cairo_uint128_t
 _cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b);
 
 extern const cairo_uint128_t
@@ -91,6 +120,12 @@
 extern const cairo_uint128_t
 _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b);
 
+extern const cairo_uint128_t
+_cairo_uint128_lsl (cairo_uint128_t a, int shift);
+
+extern const cairo_uint128_t
+_cairo_uint128_rsl (cairo_uint128_t a, int shift);
+
 extern const int
 _cairo_uint128_lt (cairo_uint128_t a, cairo_uint128_t b);
 
@@ -102,4 +137,12 @@
 #define _cairo_uint128_ge(a,b) (!_cairo_uint128_lt(a,b))
 #define _cairo_uint128_ne(a,b) (!_cairo_uint128_eq(a,b))
 
+typedef struct _cairo_quorem128 {
+    cairo_uint128_t	quo;
+    cairo_uint128_t	rem;
+} cairo_quorem128_t;
+
+extern const cairo_quorem128_t
+_cairo_uint128_divmod (cairo_uint128_t num, cairo_uint128_t den);
+
 #endif /* CAIRO_UINT128_H */

Index: checkdata.5c
===================================================================
RCS file: /local/src/CVS/cairo_u128/checkdata.5c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- a/checkdata.5c	25 May 2004 06:43:12 -0000	1.3
+++ b/checkdata.5c	25 May 2004 21:21:52 -0000	1.4
@@ -1,12 +1,12 @@
 #!/usr/bin/env nickle
 
-int width = string_to_integer (argv[1]);
+int width = 2;
 
 int[width] readnums ()
 {
     int[width] x;
     for (int i = 0; i < width; i++)
-	if (File::scanf ("%d", &x[i]) != 1)
+	if (File::scanf ("%x", &x[i]) != 1)
 	    exit (0);
     return x;
 }
@@ -27,17 +27,30 @@
     return p;
 }
 
+void op (string name, int (int, int, int) f, int[2] n)
+{
+    void dump (int bits)
+    {
+	try {
+	    int	mask = (1 << bits) - 1;
+	    printf ("%s %x\n", name, f (n[0] & mask, n[1] & mask, bits) & mask);
+	} catch divide_by_zero (string msg, real a, real b) {
+	    return;
+	}
+    }
+    dump (128);
+    dump (64);
+}
+
 for (;;)
 {
     int[*]  n = readnums();
-    int	    bits128 = (1 << 128) - 1;
-    int	    bits64 = (1 << 64) - 1;
-    int	    p = prod(n);
-    int	    s = sum(n);
-    printf ("%x\n", p & bits128);
-    printf ("%x\n", p & bits64);
-    printf ("%x\n", (p - s) & bits128);
-    printf ("%x\n", (p - s) & bits64);
-    printf ("%x\n", (s) & bits128);
-    printf ("%x\n", (s) & bits64);
+    printf ("%x\n%x\n", n[0], n[1]);
+    op ("+", int func (int a, int b, int bits) { return a + b; }, n);
+    op ("-", int func (int a, int b, int bits) { return a - b; }, n);
+    op ("*", int func (int a, int b, int bits) { return a * b; }, n);
+    op (">>", int func (int a, int b, int bits) { return a >> (b % bits); }, n);
+    op ("<<", int func (int a, int b, int bits) { return a << (b % bits); }, n);
+    op ("/", int func (int a, int b, int bits) { return a // b; }, n);
+    op ("%", int func (int a, int b, int bits) { return a % b; }, n);
 }

Index: makedata.5c
===================================================================
RCS file: /local/src/CVS/cairo_u128/makedata.5c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -d -r1.1.1.1 -r1.2
--- a/makedata.5c	21 May 2004 08:59:32 -0000	1.1.1.1
+++ b/makedata.5c	25 May 2004 21:21:52 -0000	1.2
@@ -1,12 +1,11 @@
 #!/usr/bin/env nickle
 
-int width = string_to_integer (argv[1]);
-int nsets = string_to_integer (argv[2]);
+int nsets = string_to_integer (argv[1]);
 
 for (int n = 0; n < nsets; n++)
 {
-    for (int i = 0; i < width; i++)
-	printf ("%d ", PRNG::randbits(32));
+    for (int i = 0; i < 2; i++)
+	printf ("%x ", PRNG::randbits(128));
     printf ("\n");
 }
 	




More information about the Commit mailing list