--- /dev/null
+diff -Nura klibc-1.1.16/include/arch/sparc/klibc/archsignal.h klibc-1.1.16.sparc/include/arch/sparc/klibc/archsignal.h
+--- klibc-1.1.16/include/arch/sparc/klibc/archsignal.h 2006-01-06 03:11:43.000000000 -0300
++++ klibc-1.1.16.sparc/include/arch/sparc/klibc/archsignal.h 2006-01-25 14:49:01.000000000 -0300
+@@ -10,6 +10,7 @@
+
+ /* Hidden definitions */
+
++#ifndef _KLIBC_SIGACTION
+ struct __new_sigaction {
+ __sighandler_t sa_handler;
+ unsigned long sa_flags;
+@@ -34,5 +35,6 @@
+ int ss_flags;
+ size_t ss_size;
+ } stack_t;
++#endif
+
+ #endif
+diff -Nura klibc-1.1.16/klibc/arch/sparc/divrem.m4 klibc-1.1.16.sparc/klibc/arch/sparc/divrem.m4
+--- klibc-1.1.16/klibc/arch/sparc/divrem.m4 2006-01-06 03:11:43.000000000 -0300
++++ klibc-1.1.16.sparc/klibc/arch/sparc/divrem.m4 1969-12-31 21:00:00.000000000 -0300
+@@ -1,276 +0,0 @@
+-/*
+- * Copyright (c) 1992, 1993
+- * The Regents of the University of California. All rights reserved.
+- *
+- * This software was developed by the Computer Systems Engineering group
+- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+- * contributed to Berkeley.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions and the following disclaimer.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. All advertising materials mentioning features or use of this software
+- * must display the following acknowledgement:
+- * This product includes software developed by the University of
+- * California, Berkeley and its contributors.
+- * 4. Neither the name of the University nor the names of its contributors
+- * may be used to endorse or promote products derived from this software
+- * without specific prior written permission.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+- * SUCH DAMAGE.
+- *
+- * from: Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp
+- * $NetBSD: divrem.m4,v 1.4 1997/10/09 10:07:54 lukem Exp $
+- */
+-
+-/*
+- * Division and remainder, from Appendix E of the Sparc Version 8
+- * Architecture Manual, with fixes from Gordon Irlam.
+- */
+-
+-#if defined(LIBC_SCCS) && !defined(lint)
+- .asciz "@(#)divrem.m4 8.1 (Berkeley) 6/4/93"
+-#endif /* LIBC_SCCS and not lint */
+-
+-/*
+- * Input: dividend and divisor in %o0 and %o1 respectively.
+- *
+- * m4 parameters:
+- * NAME name of function to generate
+- * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
+- * S S=true => signed; S=false => unsigned
+- *
+- * Algorithm parameters:
+- * N how many bits per iteration we try to get (4)
+- * WORDSIZE total number of bits (32)
+- *
+- * Derived constants:
+- * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken)
+- * TOPBITS number of bits in the top `decade' of a number
+- *
+- * Important variables:
+- * Q the partial quotient under development (initially 0)
+- * R the remainder so far, initially the dividend
+- * ITER number of main division loop iterations required;
+- * equal to ceil(log2(quotient) / N). Note that this
+- * is the log base (2^N) of the quotient.
+- * V the current comparand, initially divisor*2^(ITER*N-1)
+- *
+- * Cost:
+- * Current estimate for non-large dividend is
+- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+- * different path, as the upper bits of the quotient must be developed
+- * one bit at a time.
+- */
+-
+-define(N, `4')
+-define(TWOSUPN, `16')
+-define(WORDSIZE, `32')
+-define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
+-
+-define(dividend, `%o0')
+-define(divisor, `%o1')
+-define(Q, `%o2')
+-define(R, `%o3')
+-define(ITER, `%o4')
+-define(V, `%o5')
+-
+-/* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
+-define(T, `%g1')
+-define(SC, `%g7')
+-ifelse(S, `true', `define(SIGN, `%g6')')
+-
+-/*
+- * This is the recursive definition for developing quotient digits.
+- *
+- * Parameters:
+- * $1 the current depth, 1 <= $1 <= N
+- * $2 the current accumulation of quotient bits
+- * N max depth
+- *
+- * We add a new bit to $2 and either recurse or insert the bits in
+- * the quotient. R, Q, and V are inputs and outputs as defined above;
+- * the condition codes are expected to reflect the input R, and are
+- * modified to reflect the output R.
+- */
+-define(DEVELOP_QUOTIENT_BITS,
+-` ! depth $1, accumulated bits $2
+- bl L.$1.eval(TWOSUPN+$2)
+- srl V,1,V
+- ! remainder is positive
+- subcc R,V,R
+- ifelse($1, N,
+- ` b 9f
+- add Q, ($2*2+1), Q
+- ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
+-L.$1.eval(TWOSUPN+$2):
+- ! remainder is negative
+- addcc R,V,R
+- ifelse($1, N,
+- ` b 9f
+- add Q, ($2*2-1), Q
+- ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
+- ifelse($1, 1, `9:')')
+-
+-#include <machine/asm.h>
+-#include <machine/trap.h>
+-
+-FUNC(NAME)
+-ifelse(S, `true',
+-` ! compute sign of result; if neither is negative, no problem
+- orcc divisor, dividend, %g0 ! either negative?
+- bge 2f ! no, go do the divide
+- ifelse(OP, `div',
+- `xor divisor, dividend, SIGN',
+- `mov dividend, SIGN') ! compute sign in any case
+- tst divisor
+- bge 1f
+- tst dividend
+- ! divisor is definitely negative; dividend might also be negative
+- bge 2f ! if dividend not negative...
+- neg divisor ! in any case, make divisor nonneg
+-1: ! dividend is negative, divisor is nonnegative
+- neg dividend ! make dividend nonnegative
+-2:
+-')
+- ! Ready to divide. Compute size of quotient; scale comparand.
+- orcc divisor, %g0, V
+- bnz 1f
+- mov dividend, R
+-
+- ! Divide by zero trap. If it returns, return 0 (about as
+- ! wrong as possible, but that is what SunOS does...).
+- t ST_DIV0
+- retl
+- clr %o0
+-
+-1:
+- cmp R, V ! if divisor exceeds dividend, done
+- blu Lgot_result ! (and algorithm fails otherwise)
+- clr Q
+- sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
+- cmp R, T
+- blu Lnot_really_big
+- clr ITER
+-
+- ! `Here the dividend is >= 2^(31-N) or so. We must be careful here,
+- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+- ! Compute ITER in an unorthodox manner: know we need to shift V into
+- ! the top decade: so do not even bother to compare to R.'
+- 1:
+- cmp V, T
+- bgeu 3f
+- mov 1, SC
+- sll V, N, V
+- b 1b
+- inc ITER
+-
+- ! Now compute SC.
+- 2: addcc V, V, V
+- bcc Lnot_too_big
+- inc SC
+-
+- ! We get here if the divisor overflowed while shifting.
+- ! This means that R has the high-order bit set.
+- ! Restore V and subtract from R.
+- sll T, TOPBITS, T ! high order bit
+- srl V, 1, V ! rest of V
+- add V, T, V
+- b Ldo_single_div
+- dec SC
+-
+- Lnot_too_big:
+- 3: cmp V, R
+- blu 2b
+- nop
+- be Ldo_single_div
+- nop
+- /* NB: these are commented out in the V8-Sparc manual as well */
+- /* (I do not understand this) */
+- ! V > R: went too far: back up 1 step
+- ! srl V, 1, V
+- ! dec SC
+- ! do single-bit divide steps
+- !
+- ! We have to be careful here. We know that R >= V, so we can do the
+- ! first divide step without thinking. BUT, the others are conditional,
+- ! and are only done if R >= 0. Because both R and V may have the high-
+- ! order bit set in the first step, just falling into the regular
+- ! division loop will mess up the first time around.
+- ! So we unroll slightly...
+- Ldo_single_div:
+- deccc SC
+- bl Lend_regular_divide
+- nop
+- sub R, V, R
+- mov 1, Q
+- b Lend_single_divloop
+- nop
+- Lsingle_divloop:
+- sll Q, 1, Q
+- bl 1f
+- srl V, 1, V
+- ! R >= 0
+- sub R, V, R
+- b 2f
+- inc Q
+- 1: ! R < 0
+- add R, V, R
+- dec Q
+- 2:
+- Lend_single_divloop:
+- deccc SC
+- bge Lsingle_divloop
+- tst R
+- b,a Lend_regular_divide
+-
+-Lnot_really_big:
+-1:
+- sll V, N, V
+- cmp V, R
+- bleu 1b
+- inccc ITER
+- be Lgot_result
+- dec ITER
+-
+- tst R ! set up for initial iteration
+-Ldivloop:
+- sll Q, N, Q
+- DEVELOP_QUOTIENT_BITS(1, 0)
+-Lend_regular_divide:
+- deccc ITER
+- bge Ldivloop
+- tst R
+- bl,a Lgot_result
+- ! non-restoring fixup here (one instruction only!)
+-ifelse(OP, `div',
+-` dec Q
+-', ` add R, divisor, R
+-')
+-
+-Lgot_result:
+-ifelse(S, `true',
+-` ! check to see if answer should be < 0
+- tst SIGN
+- bl,a 1f
+- ifelse(OP, `div', `neg Q', `neg R')
+-1:')
+- retl
+- ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
+diff -Nura klibc-1.1.16/klibc/arch/sparc/Makefile.inc klibc-1.1.16.sparc/klibc/arch/sparc/Makefile.inc
+--- klibc-1.1.16/klibc/arch/sparc/Makefile.inc 2006-01-06 03:11:43.000000000 -0300
++++ klibc-1.1.16.sparc/klibc/arch/sparc/Makefile.inc 2006-01-25 14:24:02.000000000 -0300
+@@ -8,12 +8,6 @@
+ #
+
+ ARCHOBJS = \
+- arch/$(ARCH)/sdiv.o \
+- arch/$(ARCH)/udiv.o \
+- arch/$(ARCH)/srem.o \
+- arch/$(ARCH)/urem.o \
+- arch/$(ARCH)/smul.o \
+- arch/$(ARCH)/umul.o \
+ arch/$(ARCH)/setjmp.o \
+ arch/$(ARCH)/syscall.o \
+ arch/$(ARCH)/sysfork.o \
+@@ -23,29 +17,5 @@
+ libgcc/__umoddi3.o \
+ libgcc/__udivmoddi4.o
+
+-arch/$(ARCH)/sdiv.S: arch/$(ARCH)/divrem.m4
+- @echo 'building $@ from $^'
+- @(echo "define(NAME,\`.div')define(OP,\`div')define(S,\`true')"; \
+- cat $^) | m4 > $@
+- @chmod 444 $@
+-
+-arch/$(ARCH)/udiv.S: arch/$(ARCH)/divrem.m4
+- @echo 'building $@ from $^'
+- @(echo "define(NAME,\`.udiv')define(OP,\`div')define(S,\`false')"; \
+- cat $^) | m4 > $@
+- @chmod 444 $@
+-
+-arch/$(ARCH)/srem.S: arch/$(ARCH)/divrem.m4
+- @echo 'building $@ from $^'
+- @(echo "define(NAME,\`.rem')define(OP,\`rem')define(S,\`true')"; \
+- cat $^) | m4 > $@
+- @chmod 444 $@
+-
+-arch/$(ARCH)/urem.S: arch/$(ARCH)/divrem.m4
+- @echo 'building $@ from $^'
+- @(echo "define(NAME,\`.urem')define(OP,\`rem')define(S,\`false')"; \
+- cat $^) | m4 > $@
+- @chmod 444 $@
+-
+ archclean:
+- rm -f arch/$(ARCH)/?div.S arch/$(ARCH)/?rem.S
++
+diff -Nura klibc-1.1.16/klibc/arch/sparc/MCONFIG klibc-1.1.16.sparc/klibc/arch/sparc/MCONFIG
+--- klibc-1.1.16/klibc/arch/sparc/MCONFIG 2006-01-06 03:11:43.000000000 -0300
++++ klibc-1.1.16.sparc/klibc/arch/sparc/MCONFIG 2006-01-25 14:48:42.000000000 -0300
+@@ -7,7 +7,7 @@
+ # accordingly.
+ #
+
+-OPTFLAGS = -Os -m32 -mptr32
++OPTFLAGS = -Os -m32 -mptr32 -mcpu=v8 -mtune=v8 -D_KLIBC_SIGACTION
+ BITSIZE = 32
+
+ # Extra linkflags when building the shared version of the library
+diff -Nura klibc-1.1.16/klibc/arch/sparc/smul.S klibc-1.1.16.sparc/klibc/arch/sparc/smul.S
+--- klibc-1.1.16/klibc/arch/sparc/smul.S 2006-01-06 03:11:43.000000000 -0300
++++ klibc-1.1.16.sparc/klibc/arch/sparc/smul.S 1969-12-31 21:00:00.000000000 -0300
+@@ -1,160 +0,0 @@
+-/* $NetBSD: mul.S,v 1.3 1997/07/16 14:37:42 christos Exp $ */
+-
+-/*
+- * Copyright (c) 1992, 1993
+- * The Regents of the University of California. All rights reserved.
+- *
+- * This software was developed by the Computer Systems Engineering group
+- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+- * contributed to Berkeley.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions and the following disclaimer.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. All advertising materials mentioning features or use of this software
+- * must display the following acknowledgement:
+- * This product includes software developed by the University of
+- * California, Berkeley and its contributors.
+- * 4. Neither the name of the University nor the names of its contributors
+- * may be used to endorse or promote products derived from this software
+- * without specific prior written permission.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+- * SUCH DAMAGE.
+- *
+- * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp
+- */
+-
+-#include <machine/asm.h>
+-#if defined(LIBC_SCCS) && !defined(lint)
+-#if 0
+- .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93"
+-#else
+- RCSID("$NetBSD: mul.S,v 1.3 1997/07/16 14:37:42 christos Exp $")
+-#endif
+-#endif /* LIBC_SCCS and not lint */
+-
+-/*
+- * Signed multiply, from Appendix E of the Sparc Version 8
+- * Architecture Manual.
+- *
+- * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
+- * the 64-bit product).
+- *
+- * This code optimizes short (less than 13-bit) multiplies.
+- */
+-
+-FUNC(.mul)
+- mov %o0, %y ! multiplier -> Y
+- andncc %o0, 0xfff, %g0 ! test bits 12..31
+- be Lmul_shortway ! if zero, can do it the short way
+- andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+-
+- /*
+- * Long multiply. 32 steps, followed by a final shift step.
+- */
+- mulscc %o4, %o1, %o4 ! 1
+- mulscc %o4, %o1, %o4 ! 2
+- mulscc %o4, %o1, %o4 ! 3
+- mulscc %o4, %o1, %o4 ! 4
+- mulscc %o4, %o1, %o4 ! 5
+- mulscc %o4, %o1, %o4 ! 6
+- mulscc %o4, %o1, %o4 ! 7
+- mulscc %o4, %o1, %o4 ! 8
+- mulscc %o4, %o1, %o4 ! 9
+- mulscc %o4, %o1, %o4 ! 10
+- mulscc %o4, %o1, %o4 ! 11
+- mulscc %o4, %o1, %o4 ! 12
+- mulscc %o4, %o1, %o4 ! 13
+- mulscc %o4, %o1, %o4 ! 14
+- mulscc %o4, %o1, %o4 ! 15
+- mulscc %o4, %o1, %o4 ! 16
+- mulscc %o4, %o1, %o4 ! 17
+- mulscc %o4, %o1, %o4 ! 18
+- mulscc %o4, %o1, %o4 ! 19
+- mulscc %o4, %o1, %o4 ! 20
+- mulscc %o4, %o1, %o4 ! 21
+- mulscc %o4, %o1, %o4 ! 22
+- mulscc %o4, %o1, %o4 ! 23
+- mulscc %o4, %o1, %o4 ! 24
+- mulscc %o4, %o1, %o4 ! 25
+- mulscc %o4, %o1, %o4 ! 26
+- mulscc %o4, %o1, %o4 ! 27
+- mulscc %o4, %o1, %o4 ! 28
+- mulscc %o4, %o1, %o4 ! 29
+- mulscc %o4, %o1, %o4 ! 30
+- mulscc %o4, %o1, %o4 ! 31
+- mulscc %o4, %o1, %o4 ! 32
+- mulscc %o4, %g0, %o4 ! final shift
+-
+- ! If %o0 was negative, the result is
+- ! (%o0 * %o1) + (%o1 << 32))
+- ! We fix that here.
+-
+- tst %o0
+- bge 1f
+- rd %y, %o0
+-
+- ! %o0 was indeed negative; fix upper 32 bits of result by subtracting
+- ! %o1 (i.e., return %o4 - %o1 in %o1).
+- retl
+- sub %o4, %o1, %o1
+-
+-1:
+- retl
+- mov %o4, %o1
+-
+-Lmul_shortway:
+- /*
+- * Short multiply. 12 steps, followed by a final shift step.
+- * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+- * but there is no problem with %o0 being negative (unlike above).
+- */
+- mulscc %o4, %o1, %o4 ! 1
+- mulscc %o4, %o1, %o4 ! 2
+- mulscc %o4, %o1, %o4 ! 3
+- mulscc %o4, %o1, %o4 ! 4
+- mulscc %o4, %o1, %o4 ! 5
+- mulscc %o4, %o1, %o4 ! 6
+- mulscc %o4, %o1, %o4 ! 7
+- mulscc %o4, %o1, %o4 ! 8
+- mulscc %o4, %o1, %o4 ! 9
+- mulscc %o4, %o1, %o4 ! 10
+- mulscc %o4, %o1, %o4 ! 11
+- mulscc %o4, %o1, %o4 ! 12
+- mulscc %o4, %g0, %o4 ! final shift
+-
+- /*
+- * %o4 has 20 of the bits that should be in the low part of the
+- * result; %y has the bottom 12 (as %y's top 12). That is:
+- *
+- * %o4 %y
+- * +----------------+----------------+
+- * | -12- | -20- | -12- | -20- |
+- * +------(---------+------)---------+
+- * --hi-- ----low-part----
+- *
+- * The upper 12 bits of %o4 should be sign-extended to form the
+- * high part of the product (i.e., highpart = %o4 >> 20).
+- */
+-
+- rd %y, %o5
+- sll %o4, 12, %o0 ! shift middle bits left 12
+- srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left
+- or %o5, %o0, %o0 ! construct low part of result
+- retl
+- sra %o4, 20, %o1 ! ... and extract high part of result
+diff -Nura klibc-1.1.16/klibc/arch/sparc/umul.S klibc-1.1.16.sparc/klibc/arch/sparc/umul.S
+--- klibc-1.1.16/klibc/arch/sparc/umul.S 2006-01-06 03:11:43.000000000 -0300
++++ klibc-1.1.16.sparc/klibc/arch/sparc/umul.S 1969-12-31 21:00:00.000000000 -0300
+@@ -1,193 +0,0 @@
+-/* $NetBSD: umul.S,v 1.3 1997/07/16 14:37:44 christos Exp $ */
+-
+-/*
+- * Copyright (c) 1992, 1993
+- * The Regents of the University of California. All rights reserved.
+- *
+- * This software was developed by the Computer Systems Engineering group
+- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+- * contributed to Berkeley.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions and the following disclaimer.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. All advertising materials mentioning features or use of this software
+- * must display the following acknowledgement:
+- * This product includes software developed by the University of
+- * California, Berkeley and its contributors.
+- * 4. Neither the name of the University nor the names of its contributors
+- * may be used to endorse or promote products derived from this software
+- * without specific prior written permission.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+- * SUCH DAMAGE.
+- *
+- * from: Header: umul.s,v 1.4 92/06/25 13:24:05 torek Exp
+- */
+-
+-#include <machine/asm.h>
+-#if defined(LIBC_SCCS) && !defined(lint)
+-#if 0
+- .asciz "@(#)umul.s 8.1 (Berkeley) 6/4/93"
+-#else
+- RCSID("$NetBSD: umul.S,v 1.3 1997/07/16 14:37:44 christos Exp $")
+-#endif
+-#endif /* LIBC_SCCS and not lint */
+-
+-/*
+- * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+- * upper 32 bits of the 64-bit product).
+- *
+- * This code optimizes short (less than 13-bit) multiplies. Short
+- * multiplies require 25 instruction cycles, and long ones require
+- * 45 instruction cycles.
+- *
+- * On return, overflow has occurred (%o1 is not zero) if and only if
+- * the Z condition code is clear, allowing, e.g., the following:
+- *
+- * call .umul
+- * nop
+- * bnz overflow (or tnz)
+- */
+-
+-FUNC(.umul)
+- or %o0, %o1, %o4
+- mov %o0, %y ! multiplier -> Y
+- andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
+- be Lmul_shortway ! if zero, can do it the short way
+- andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+-
+- /*
+- * Long multiply. 32 steps, followed by a final shift step.
+- */
+- mulscc %o4, %o1, %o4 ! 1
+- mulscc %o4, %o1, %o4 ! 2
+- mulscc %o4, %o1, %o4 ! 3
+- mulscc %o4, %o1, %o4 ! 4
+- mulscc %o4, %o1, %o4 ! 5
+- mulscc %o4, %o1, %o4 ! 6
+- mulscc %o4, %o1, %o4 ! 7
+- mulscc %o4, %o1, %o4 ! 8
+- mulscc %o4, %o1, %o4 ! 9
+- mulscc %o4, %o1, %o4 ! 10
+- mulscc %o4, %o1, %o4 ! 11
+- mulscc %o4, %o1, %o4 ! 12
+- mulscc %o4, %o1, %o4 ! 13
+- mulscc %o4, %o1, %o4 ! 14
+- mulscc %o4, %o1, %o4 ! 15
+- mulscc %o4, %o1, %o4 ! 16
+- mulscc %o4, %o1, %o4 ! 17
+- mulscc %o4, %o1, %o4 ! 18
+- mulscc %o4, %o1, %o4 ! 19
+- mulscc %o4, %o1, %o4 ! 20
+- mulscc %o4, %o1, %o4 ! 21
+- mulscc %o4, %o1, %o4 ! 22
+- mulscc %o4, %o1, %o4 ! 23
+- mulscc %o4, %o1, %o4 ! 24
+- mulscc %o4, %o1, %o4 ! 25
+- mulscc %o4, %o1, %o4 ! 26
+- mulscc %o4, %o1, %o4 ! 27
+- mulscc %o4, %o1, %o4 ! 28
+- mulscc %o4, %o1, %o4 ! 29
+- mulscc %o4, %o1, %o4 ! 30
+- mulscc %o4, %o1, %o4 ! 31
+- mulscc %o4, %o1, %o4 ! 32
+- mulscc %o4, %g0, %o4 ! final shift
+-
+-
+- /*
+- * Normally, with the shift-and-add approach, if both numbers are
+- * positive you get the correct result. WIth 32-bit two's-complement
+- * numbers, -x is represented as
+- *
+- * x 32
+- * ( 2 - ------ ) mod 2 * 2
+- * 32
+- * 2
+- *
+- * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
+- * we can treat this as if the radix point were just to the left
+- * of the sign bit (multiply by 2^32), and get
+- *
+- * -x = (2 - x) mod 2
+- *
+- * Then, ignoring the `mod 2's for convenience:
+- *
+- * x * y = xy
+- * -x * y = 2y - xy
+- * x * -y = 2x - xy
+- * -x * -y = 4 - 2x - 2y + xy
+- *
+- * For signed multiplies, we subtract (x << 32) from the partial
+- * product to fix this problem for negative multipliers (see mul.s).
+- * Because of the way the shift into the partial product is calculated
+- * (N xor V), this term is automatically removed for the multiplicand,
+- * so we don't have to adjust.
+- *
+- * But for unsigned multiplies, the high order bit wasn't a sign bit,
+- * and the correction is wrong. So for unsigned multiplies where the
+- * high order bit is one, we end up with xy - (y << 32). To fix it
+- * we add y << 32.
+- */
+- tst %o1
+- bl,a 1f ! if %o1 < 0 (high order bit = 1),
+- add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
+-1: rd %y, %o0 ! get lower half of product
+- retl
+- addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
+-
+-Lmul_shortway:
+- /*
+- * Short multiply. 12 steps, followed by a final shift step.
+- * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+- * but there is no problem with %o0 being negative (unlike above),
+- * and overflow is impossible (the answer is at most 24 bits long).
+- */
+- mulscc %o4, %o1, %o4 ! 1
+- mulscc %o4, %o1, %o4 ! 2
+- mulscc %o4, %o1, %o4 ! 3
+- mulscc %o4, %o1, %o4 ! 4
+- mulscc %o4, %o1, %o4 ! 5
+- mulscc %o4, %o1, %o4 ! 6
+- mulscc %o4, %o1, %o4 ! 7
+- mulscc %o4, %o1, %o4 ! 8
+- mulscc %o4, %o1, %o4 ! 9
+- mulscc %o4, %o1, %o4 ! 10
+- mulscc %o4, %o1, %o4 ! 11
+- mulscc %o4, %o1, %o4 ! 12
+- mulscc %o4, %g0, %o4 ! final shift
+-
+- /*
+- * %o4 has 20 of the bits that should be in the result; %y has
+- * the bottom 12 (as %y's top 12). That is:
+- *
+- * %o4 %y
+- * +----------------+----------------+
+- * | -12- | -20- | -12- | -20- |
+- * +------(---------+------)---------+
+- * -----result-----
+- *
+- * The 12 bits of %o4 left of the `result' area are all zero;
+- * in fact, all top 20 bits of %o4 are zero.
+- */
+-
+- rd %y, %o5
+- sll %o4, 12, %o0 ! shift middle bits left 12
+- srl %o5, 20, %o5 ! shift low bits right 20
+- or %o5, %o0, %o0
+- retl
+- addcc %g0, %g0, %o1 ! %o1 = zero, and set Z