From: Chris Gianelloni Date: Tue, 7 Feb 2006 20:48:43 +0000 (+0000) Subject: Added klibc patch for sparc to CVS (from distfiles). This is 3.3.11 so like... run... X-Git-Tag: v3.4.10.902~388 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=55e036cae4eae5459b6281ec5b43ce37d0bf0ad7;p=genkernel.git Added klibc patch for sparc to CVS (from distfiles). This is 3.3.11 so like... run in fear or something... git-svn-id: svn+ssh://svn.gentoo.org/var/svnroot/genkernel/trunk@364 67a159dc-881f-0410-a524-ba9dfbe2cb84 --- diff --git a/genkernel b/genkernel index 949ba89..f07207b 100755 --- a/genkernel +++ b/genkernel @@ -2,7 +2,7 @@ # Genkernel v3 PATH="/bin:/usr/bin:/sbin:/usr/sbin" -GK_V='3.3.11_pre8' +GK_V='3.3.11' TMPDIR='/var/tmp/genkernel' TODEBUGCACHE=1 # Until an error occurs or DEBUGFILE is fully qualified. diff --git a/pkg/klibc-1.1.16-sparc2.patch b/pkg/klibc-1.1.16-sparc2.patch new file mode 100644 index 0000000..83b2e1b --- /dev/null +++ b/pkg/klibc-1.1.16-sparc2.patch @@ -0,0 +1,718 @@ +diff -Nura klibc-1.1.16/include/arch/sparc/klibc/archsignal.h klibc-1.1.16.sparc/include/arch/sparc/klibc/archsignal.h +--- klibc-1.1.16/include/arch/sparc/klibc/archsignal.h 2006-01-06 03:11:43.000000000 -0300 ++++ klibc-1.1.16.sparc/include/arch/sparc/klibc/archsignal.h 2006-01-25 14:49:01.000000000 -0300 +@@ -10,6 +10,7 @@ + + /* Hidden definitions */ + ++#ifndef _KLIBC_SIGACTION + struct __new_sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; +@@ -34,5 +35,6 @@ + int ss_flags; + size_t ss_size; + } stack_t; ++#endif + + #endif +diff -Nura klibc-1.1.16/klibc/arch/sparc/divrem.m4 klibc-1.1.16.sparc/klibc/arch/sparc/divrem.m4 +--- klibc-1.1.16/klibc/arch/sparc/divrem.m4 2006-01-06 03:11:43.000000000 -0300 ++++ klibc-1.1.16.sparc/klibc/arch/sparc/divrem.m4 1969-12-31 21:00:00.000000000 -0300 +@@ -1,276 +0,0 @@ +-/* +- * Copyright (c) 1992, 1993 +- * The Regents of the University of California. All rights reserved. +- * +- * This software was developed by the Computer Systems Engineering group +- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and +- * contributed to Berkeley. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions and the following disclaimer. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. All advertising materials mentioning features or use of this software +- * must display the following acknowledgement: +- * This product includes software developed by the University of +- * California, Berkeley and its contributors. +- * 4. Neither the name of the University nor the names of its contributors +- * may be used to endorse or promote products derived from this software +- * without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. +- * +- * from: Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp +- * $NetBSD: divrem.m4,v 1.4 1997/10/09 10:07:54 lukem Exp $ +- */ +- +-/* +- * Division and remainder, from Appendix E of the Sparc Version 8 +- * Architecture Manual, with fixes from Gordon Irlam. +- */ +- +-#if defined(LIBC_SCCS) && !defined(lint) +- .asciz "@(#)divrem.m4 8.1 (Berkeley) 6/4/93" +-#endif /* LIBC_SCCS and not lint */ +- +-/* +- * Input: dividend and divisor in %o0 and %o1 respectively. +- * +- * m4 parameters: +- * NAME name of function to generate +- * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1 +- * S S=true => signed; S=false => unsigned +- * +- * Algorithm parameters: +- * N how many bits per iteration we try to get (4) +- * WORDSIZE total number of bits (32) +- * +- * Derived constants: +- * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken) +- * TOPBITS number of bits in the top `decade' of a number +- * +- * Important variables: +- * Q the partial quotient under development (initially 0) +- * R the remainder so far, initially the dividend +- * ITER number of main division loop iterations required; +- * equal to ceil(log2(quotient) / N). Note that this +- * is the log base (2^N) of the quotient. +- * V the current comparand, initially divisor*2^(ITER*N-1) +- * +- * Cost: +- * Current estimate for non-large dividend is +- * ceil(log2(quotient) / N) * (10 + 7N/2) + C +- * A large dividend is one greater than 2^(31-TOPBITS) and takes a +- * different path, as the upper bits of the quotient must be developed +- * one bit at a time. +- */ +- +-define(N, `4') +-define(TWOSUPN, `16') +-define(WORDSIZE, `32') +-define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N))) +- +-define(dividend, `%o0') +-define(divisor, `%o1') +-define(Q, `%o2') +-define(R, `%o3') +-define(ITER, `%o4') +-define(V, `%o5') +- +-/* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */ +-define(T, `%g1') +-define(SC, `%g7') +-ifelse(S, `true', `define(SIGN, `%g6')') +- +-/* +- * This is the recursive definition for developing quotient digits. +- * +- * Parameters: +- * $1 the current depth, 1 <= $1 <= N +- * $2 the current accumulation of quotient bits +- * N max depth +- * +- * We add a new bit to $2 and either recurse or insert the bits in +- * the quotient. R, Q, and V are inputs and outputs as defined above; +- * the condition codes are expected to reflect the input R, and are +- * modified to reflect the output R. +- */ +-define(DEVELOP_QUOTIENT_BITS, +-` ! depth $1, accumulated bits $2 +- bl L.$1.eval(TWOSUPN+$2) +- srl V,1,V +- ! remainder is positive +- subcc R,V,R +- ifelse($1, N, +- ` b 9f +- add Q, ($2*2+1), Q +- ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') +-L.$1.eval(TWOSUPN+$2): +- ! remainder is negative +- addcc R,V,R +- ifelse($1, N, +- ` b 9f +- add Q, ($2*2-1), Q +- ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') +- ifelse($1, 1, `9:')') +- +-#include +-#include +- +-FUNC(NAME) +-ifelse(S, `true', +-` ! compute sign of result; if neither is negative, no problem +- orcc divisor, dividend, %g0 ! either negative? +- bge 2f ! no, go do the divide +- ifelse(OP, `div', +- `xor divisor, dividend, SIGN', +- `mov dividend, SIGN') ! compute sign in any case +- tst divisor +- bge 1f +- tst dividend +- ! divisor is definitely negative; dividend might also be negative +- bge 2f ! if dividend not negative... +- neg divisor ! in any case, make divisor nonneg +-1: ! dividend is negative, divisor is nonnegative +- neg dividend ! make dividend nonnegative +-2: +-') +- ! Ready to divide. Compute size of quotient; scale comparand. +- orcc divisor, %g0, V +- bnz 1f +- mov dividend, R +- +- ! Divide by zero trap. If it returns, return 0 (about as +- ! wrong as possible, but that is what SunOS does...). +- t ST_DIV0 +- retl +- clr %o0 +- +-1: +- cmp R, V ! if divisor exceeds dividend, done +- blu Lgot_result ! (and algorithm fails otherwise) +- clr Q +- sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T +- cmp R, T +- blu Lnot_really_big +- clr ITER +- +- ! `Here the dividend is >= 2^(31-N) or so. We must be careful here, +- ! as our usual N-at-a-shot divide step will cause overflow and havoc. +- ! The number of bits in the result here is N*ITER+SC, where SC <= N. +- ! Compute ITER in an unorthodox manner: know we need to shift V into +- ! the top decade: so do not even bother to compare to R.' +- 1: +- cmp V, T +- bgeu 3f +- mov 1, SC +- sll V, N, V +- b 1b +- inc ITER +- +- ! Now compute SC. +- 2: addcc V, V, V +- bcc Lnot_too_big +- inc SC +- +- ! We get here if the divisor overflowed while shifting. +- ! This means that R has the high-order bit set. +- ! Restore V and subtract from R. +- sll T, TOPBITS, T ! high order bit +- srl V, 1, V ! rest of V +- add V, T, V +- b Ldo_single_div +- dec SC +- +- Lnot_too_big: +- 3: cmp V, R +- blu 2b +- nop +- be Ldo_single_div +- nop +- /* NB: these are commented out in the V8-Sparc manual as well */ +- /* (I do not understand this) */ +- ! V > R: went too far: back up 1 step +- ! srl V, 1, V +- ! dec SC +- ! do single-bit divide steps +- ! +- ! We have to be careful here. We know that R >= V, so we can do the +- ! first divide step without thinking. BUT, the others are conditional, +- ! and are only done if R >= 0. Because both R and V may have the high- +- ! order bit set in the first step, just falling into the regular +- ! division loop will mess up the first time around. +- ! So we unroll slightly... +- Ldo_single_div: +- deccc SC +- bl Lend_regular_divide +- nop +- sub R, V, R +- mov 1, Q +- b Lend_single_divloop +- nop +- Lsingle_divloop: +- sll Q, 1, Q +- bl 1f +- srl V, 1, V +- ! R >= 0 +- sub R, V, R +- b 2f +- inc Q +- 1: ! R < 0 +- add R, V, R +- dec Q +- 2: +- Lend_single_divloop: +- deccc SC +- bge Lsingle_divloop +- tst R +- b,a Lend_regular_divide +- +-Lnot_really_big: +-1: +- sll V, N, V +- cmp V, R +- bleu 1b +- inccc ITER +- be Lgot_result +- dec ITER +- +- tst R ! set up for initial iteration +-Ldivloop: +- sll Q, N, Q +- DEVELOP_QUOTIENT_BITS(1, 0) +-Lend_regular_divide: +- deccc ITER +- bge Ldivloop +- tst R +- bl,a Lgot_result +- ! non-restoring fixup here (one instruction only!) +-ifelse(OP, `div', +-` dec Q +-', ` add R, divisor, R +-') +- +-Lgot_result: +-ifelse(S, `true', +-` ! check to see if answer should be < 0 +- tst SIGN +- bl,a 1f +- ifelse(OP, `div', `neg Q', `neg R') +-1:') +- retl +- ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') +diff -Nura klibc-1.1.16/klibc/arch/sparc/Makefile.inc klibc-1.1.16.sparc/klibc/arch/sparc/Makefile.inc +--- klibc-1.1.16/klibc/arch/sparc/Makefile.inc 2006-01-06 03:11:43.000000000 -0300 ++++ klibc-1.1.16.sparc/klibc/arch/sparc/Makefile.inc 2006-01-25 14:24:02.000000000 -0300 +@@ -8,12 +8,6 @@ + # + + ARCHOBJS = \ +- arch/$(ARCH)/sdiv.o \ +- arch/$(ARCH)/udiv.o \ +- arch/$(ARCH)/srem.o \ +- arch/$(ARCH)/urem.o \ +- arch/$(ARCH)/smul.o \ +- arch/$(ARCH)/umul.o \ + arch/$(ARCH)/setjmp.o \ + arch/$(ARCH)/syscall.o \ + arch/$(ARCH)/sysfork.o \ +@@ -23,29 +17,5 @@ + libgcc/__umoddi3.o \ + libgcc/__udivmoddi4.o + +-arch/$(ARCH)/sdiv.S: arch/$(ARCH)/divrem.m4 +- @echo 'building $@ from $^' +- @(echo "define(NAME,\`.div')define(OP,\`div')define(S,\`true')"; \ +- cat $^) | m4 > $@ +- @chmod 444 $@ +- +-arch/$(ARCH)/udiv.S: arch/$(ARCH)/divrem.m4 +- @echo 'building $@ from $^' +- @(echo "define(NAME,\`.udiv')define(OP,\`div')define(S,\`false')"; \ +- cat $^) | m4 > $@ +- @chmod 444 $@ +- +-arch/$(ARCH)/srem.S: arch/$(ARCH)/divrem.m4 +- @echo 'building $@ from $^' +- @(echo "define(NAME,\`.rem')define(OP,\`rem')define(S,\`true')"; \ +- cat $^) | m4 > $@ +- @chmod 444 $@ +- +-arch/$(ARCH)/urem.S: arch/$(ARCH)/divrem.m4 +- @echo 'building $@ from $^' +- @(echo "define(NAME,\`.urem')define(OP,\`rem')define(S,\`false')"; \ +- cat $^) | m4 > $@ +- @chmod 444 $@ +- + archclean: +- rm -f arch/$(ARCH)/?div.S arch/$(ARCH)/?rem.S ++ +diff -Nura klibc-1.1.16/klibc/arch/sparc/MCONFIG klibc-1.1.16.sparc/klibc/arch/sparc/MCONFIG +--- klibc-1.1.16/klibc/arch/sparc/MCONFIG 2006-01-06 03:11:43.000000000 -0300 ++++ klibc-1.1.16.sparc/klibc/arch/sparc/MCONFIG 2006-01-25 14:48:42.000000000 -0300 +@@ -7,7 +7,7 @@ + # accordingly. + # + +-OPTFLAGS = -Os -m32 -mptr32 ++OPTFLAGS = -Os -m32 -mptr32 -mcpu=v8 -mtune=v8 -D_KLIBC_SIGACTION + BITSIZE = 32 + + # Extra linkflags when building the shared version of the library +diff -Nura klibc-1.1.16/klibc/arch/sparc/smul.S klibc-1.1.16.sparc/klibc/arch/sparc/smul.S +--- klibc-1.1.16/klibc/arch/sparc/smul.S 2006-01-06 03:11:43.000000000 -0300 ++++ klibc-1.1.16.sparc/klibc/arch/sparc/smul.S 1969-12-31 21:00:00.000000000 -0300 +@@ -1,160 +0,0 @@ +-/* $NetBSD: mul.S,v 1.3 1997/07/16 14:37:42 christos Exp $ */ +- +-/* +- * Copyright (c) 1992, 1993 +- * The Regents of the University of California. All rights reserved. +- * +- * This software was developed by the Computer Systems Engineering group +- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and +- * contributed to Berkeley. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions and the following disclaimer. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. All advertising materials mentioning features or use of this software +- * must display the following acknowledgement: +- * This product includes software developed by the University of +- * California, Berkeley and its contributors. +- * 4. Neither the name of the University nor the names of its contributors +- * may be used to endorse or promote products derived from this software +- * without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. +- * +- * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp +- */ +- +-#include +-#if defined(LIBC_SCCS) && !defined(lint) +-#if 0 +- .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93" +-#else +- RCSID("$NetBSD: mul.S,v 1.3 1997/07/16 14:37:42 christos Exp $") +-#endif +-#endif /* LIBC_SCCS and not lint */ +- +-/* +- * Signed multiply, from Appendix E of the Sparc Version 8 +- * Architecture Manual. +- * +- * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of +- * the 64-bit product). +- * +- * This code optimizes short (less than 13-bit) multiplies. +- */ +- +-FUNC(.mul) +- mov %o0, %y ! multiplier -> Y +- andncc %o0, 0xfff, %g0 ! test bits 12..31 +- be Lmul_shortway ! if zero, can do it the short way +- andcc %g0, %g0, %o4 ! zero the partial product and clear N and V +- +- /* +- * Long multiply. 32 steps, followed by a final shift step. +- */ +- mulscc %o4, %o1, %o4 ! 1 +- mulscc %o4, %o1, %o4 ! 2 +- mulscc %o4, %o1, %o4 ! 3 +- mulscc %o4, %o1, %o4 ! 4 +- mulscc %o4, %o1, %o4 ! 5 +- mulscc %o4, %o1, %o4 ! 6 +- mulscc %o4, %o1, %o4 ! 7 +- mulscc %o4, %o1, %o4 ! 8 +- mulscc %o4, %o1, %o4 ! 9 +- mulscc %o4, %o1, %o4 ! 10 +- mulscc %o4, %o1, %o4 ! 11 +- mulscc %o4, %o1, %o4 ! 12 +- mulscc %o4, %o1, %o4 ! 13 +- mulscc %o4, %o1, %o4 ! 14 +- mulscc %o4, %o1, %o4 ! 15 +- mulscc %o4, %o1, %o4 ! 16 +- mulscc %o4, %o1, %o4 ! 17 +- mulscc %o4, %o1, %o4 ! 18 +- mulscc %o4, %o1, %o4 ! 19 +- mulscc %o4, %o1, %o4 ! 20 +- mulscc %o4, %o1, %o4 ! 21 +- mulscc %o4, %o1, %o4 ! 22 +- mulscc %o4, %o1, %o4 ! 23 +- mulscc %o4, %o1, %o4 ! 24 +- mulscc %o4, %o1, %o4 ! 25 +- mulscc %o4, %o1, %o4 ! 26 +- mulscc %o4, %o1, %o4 ! 27 +- mulscc %o4, %o1, %o4 ! 28 +- mulscc %o4, %o1, %o4 ! 29 +- mulscc %o4, %o1, %o4 ! 30 +- mulscc %o4, %o1, %o4 ! 31 +- mulscc %o4, %o1, %o4 ! 32 +- mulscc %o4, %g0, %o4 ! final shift +- +- ! If %o0 was negative, the result is +- ! (%o0 * %o1) + (%o1 << 32)) +- ! We fix that here. +- +- tst %o0 +- bge 1f +- rd %y, %o0 +- +- ! %o0 was indeed negative; fix upper 32 bits of result by subtracting +- ! %o1 (i.e., return %o4 - %o1 in %o1). +- retl +- sub %o4, %o1, %o1 +- +-1: +- retl +- mov %o4, %o1 +- +-Lmul_shortway: +- /* +- * Short multiply. 12 steps, followed by a final shift step. +- * The resulting bits are off by 12 and (32-12) = 20 bit positions, +- * but there is no problem with %o0 being negative (unlike above). +- */ +- mulscc %o4, %o1, %o4 ! 1 +- mulscc %o4, %o1, %o4 ! 2 +- mulscc %o4, %o1, %o4 ! 3 +- mulscc %o4, %o1, %o4 ! 4 +- mulscc %o4, %o1, %o4 ! 5 +- mulscc %o4, %o1, %o4 ! 6 +- mulscc %o4, %o1, %o4 ! 7 +- mulscc %o4, %o1, %o4 ! 8 +- mulscc %o4, %o1, %o4 ! 9 +- mulscc %o4, %o1, %o4 ! 10 +- mulscc %o4, %o1, %o4 ! 11 +- mulscc %o4, %o1, %o4 ! 12 +- mulscc %o4, %g0, %o4 ! final shift +- +- /* +- * %o4 has 20 of the bits that should be in the low part of the +- * result; %y has the bottom 12 (as %y's top 12). That is: +- * +- * %o4 %y +- * +----------------+----------------+ +- * | -12- | -20- | -12- | -20- | +- * +------(---------+------)---------+ +- * --hi-- ----low-part---- +- * +- * The upper 12 bits of %o4 should be sign-extended to form the +- * high part of the product (i.e., highpart = %o4 >> 20). +- */ +- +- rd %y, %o5 +- sll %o4, 12, %o0 ! shift middle bits left 12 +- srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left +- or %o5, %o0, %o0 ! construct low part of result +- retl +- sra %o4, 20, %o1 ! ... and extract high part of result +diff -Nura klibc-1.1.16/klibc/arch/sparc/umul.S klibc-1.1.16.sparc/klibc/arch/sparc/umul.S +--- klibc-1.1.16/klibc/arch/sparc/umul.S 2006-01-06 03:11:43.000000000 -0300 ++++ klibc-1.1.16.sparc/klibc/arch/sparc/umul.S 1969-12-31 21:00:00.000000000 -0300 +@@ -1,193 +0,0 @@ +-/* $NetBSD: umul.S,v 1.3 1997/07/16 14:37:44 christos Exp $ */ +- +-/* +- * Copyright (c) 1992, 1993 +- * The Regents of the University of California. All rights reserved. +- * +- * This software was developed by the Computer Systems Engineering group +- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and +- * contributed to Berkeley. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions and the following disclaimer. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. All advertising materials mentioning features or use of this software +- * must display the following acknowledgement: +- * This product includes software developed by the University of +- * California, Berkeley and its contributors. +- * 4. Neither the name of the University nor the names of its contributors +- * may be used to endorse or promote products derived from this software +- * without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. +- * +- * from: Header: umul.s,v 1.4 92/06/25 13:24:05 torek Exp +- */ +- +-#include +-#if defined(LIBC_SCCS) && !defined(lint) +-#if 0 +- .asciz "@(#)umul.s 8.1 (Berkeley) 6/4/93" +-#else +- RCSID("$NetBSD: umul.S,v 1.3 1997/07/16 14:37:44 christos Exp $") +-#endif +-#endif /* LIBC_SCCS and not lint */ +- +-/* +- * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the +- * upper 32 bits of the 64-bit product). +- * +- * This code optimizes short (less than 13-bit) multiplies. Short +- * multiplies require 25 instruction cycles, and long ones require +- * 45 instruction cycles. +- * +- * On return, overflow has occurred (%o1 is not zero) if and only if +- * the Z condition code is clear, allowing, e.g., the following: +- * +- * call .umul +- * nop +- * bnz overflow (or tnz) +- */ +- +-FUNC(.umul) +- or %o0, %o1, %o4 +- mov %o0, %y ! multiplier -> Y +- andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args +- be Lmul_shortway ! if zero, can do it the short way +- andcc %g0, %g0, %o4 ! zero the partial product and clear N and V +- +- /* +- * Long multiply. 32 steps, followed by a final shift step. +- */ +- mulscc %o4, %o1, %o4 ! 1 +- mulscc %o4, %o1, %o4 ! 2 +- mulscc %o4, %o1, %o4 ! 3 +- mulscc %o4, %o1, %o4 ! 4 +- mulscc %o4, %o1, %o4 ! 5 +- mulscc %o4, %o1, %o4 ! 6 +- mulscc %o4, %o1, %o4 ! 7 +- mulscc %o4, %o1, %o4 ! 8 +- mulscc %o4, %o1, %o4 ! 9 +- mulscc %o4, %o1, %o4 ! 10 +- mulscc %o4, %o1, %o4 ! 11 +- mulscc %o4, %o1, %o4 ! 12 +- mulscc %o4, %o1, %o4 ! 13 +- mulscc %o4, %o1, %o4 ! 14 +- mulscc %o4, %o1, %o4 ! 15 +- mulscc %o4, %o1, %o4 ! 16 +- mulscc %o4, %o1, %o4 ! 17 +- mulscc %o4, %o1, %o4 ! 18 +- mulscc %o4, %o1, %o4 ! 19 +- mulscc %o4, %o1, %o4 ! 20 +- mulscc %o4, %o1, %o4 ! 21 +- mulscc %o4, %o1, %o4 ! 22 +- mulscc %o4, %o1, %o4 ! 23 +- mulscc %o4, %o1, %o4 ! 24 +- mulscc %o4, %o1, %o4 ! 25 +- mulscc %o4, %o1, %o4 ! 26 +- mulscc %o4, %o1, %o4 ! 27 +- mulscc %o4, %o1, %o4 ! 28 +- mulscc %o4, %o1, %o4 ! 29 +- mulscc %o4, %o1, %o4 ! 30 +- mulscc %o4, %o1, %o4 ! 31 +- mulscc %o4, %o1, %o4 ! 32 +- mulscc %o4, %g0, %o4 ! final shift +- +- +- /* +- * Normally, with the shift-and-add approach, if both numbers are +- * positive you get the correct result. WIth 32-bit two's-complement +- * numbers, -x is represented as +- * +- * x 32 +- * ( 2 - ------ ) mod 2 * 2 +- * 32 +- * 2 +- * +- * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, +- * we can treat this as if the radix point were just to the left +- * of the sign bit (multiply by 2^32), and get +- * +- * -x = (2 - x) mod 2 +- * +- * Then, ignoring the `mod 2's for convenience: +- * +- * x * y = xy +- * -x * y = 2y - xy +- * x * -y = 2x - xy +- * -x * -y = 4 - 2x - 2y + xy +- * +- * For signed multiplies, we subtract (x << 32) from the partial +- * product to fix this problem for negative multipliers (see mul.s). +- * Because of the way the shift into the partial product is calculated +- * (N xor V), this term is automatically removed for the multiplicand, +- * so we don't have to adjust. +- * +- * But for unsigned multiplies, the high order bit wasn't a sign bit, +- * and the correction is wrong. So for unsigned multiplies where the +- * high order bit is one, we end up with xy - (y << 32). To fix it +- * we add y << 32. +- */ +- tst %o1 +- bl,a 1f ! if %o1 < 0 (high order bit = 1), +- add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) +-1: rd %y, %o0 ! get lower half of product +- retl +- addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 +- +-Lmul_shortway: +- /* +- * Short multiply. 12 steps, followed by a final shift step. +- * The resulting bits are off by 12 and (32-12) = 20 bit positions, +- * but there is no problem with %o0 being negative (unlike above), +- * and overflow is impossible (the answer is at most 24 bits long). +- */ +- mulscc %o4, %o1, %o4 ! 1 +- mulscc %o4, %o1, %o4 ! 2 +- mulscc %o4, %o1, %o4 ! 3 +- mulscc %o4, %o1, %o4 ! 4 +- mulscc %o4, %o1, %o4 ! 5 +- mulscc %o4, %o1, %o4 ! 6 +- mulscc %o4, %o1, %o4 ! 7 +- mulscc %o4, %o1, %o4 ! 8 +- mulscc %o4, %o1, %o4 ! 9 +- mulscc %o4, %o1, %o4 ! 10 +- mulscc %o4, %o1, %o4 ! 11 +- mulscc %o4, %o1, %o4 ! 12 +- mulscc %o4, %g0, %o4 ! final shift +- +- /* +- * %o4 has 20 of the bits that should be in the result; %y has +- * the bottom 12 (as %y's top 12). That is: +- * +- * %o4 %y +- * +----------------+----------------+ +- * | -12- | -20- | -12- | -20- | +- * +------(---------+------)---------+ +- * -----result----- +- * +- * The 12 bits of %o4 left of the `result' area are all zero; +- * in fact, all top 20 bits of %o4 are zero. +- */ +- +- rd %y, %o5 +- sll %o4, 12, %o0 ! shift middle bits left 12 +- srl %o5, 20, %o5 ! shift low bits right 20 +- or %o5, %o0, %o0 +- retl +- addcc %g0, %g0, %o1 ! %o1 = zero, and set Z