Browse Source

Move the rounds into separate functions on sparc64 so gcc's optimizer

doesn't blow up.  This is a hack but is better than compiling sha1.c
with -O0 on sparc64.  From NetBSD (mrg).
deraadt@ OK
OPENBSD_3_3
millert 22 years ago
parent
commit
36f3924c0e
2 changed files with 69 additions and 22 deletions
  1. +1
    -14
      src/lib/libc/hash/Makefile.inc
  2. +68
    -8
      src/lib/libc/hash/sha1.c

+ 1
- 14
src/lib/libc/hash/Makefile.inc View File

@ -1,4 +1,4 @@
# $OpenBSD: Makefile.inc,v 1.12 2002/01/16 19:29:13 marc Exp $
# $OpenBSD: Makefile.inc,v 1.13 2003/01/08 19:53:59 millert Exp $
# hash functions # hash functions
.PATH: ${LIBCSRCDIR}/hash .PATH: ${LIBCSRCDIR}/hash
@ -11,16 +11,3 @@ MLINKS+=sha1.3 SHA1Transform.3
MLINKS+=rmd160.3 RMD160Init.3 rmd160.3 RMD160Update.3 rmd160.3 RMD160Final.3 MLINKS+=rmd160.3 RMD160Init.3 rmd160.3 RMD160Update.3 rmd160.3 RMD160Final.3
MLINKS+=rmd160.3 RMD160End.3 rmd160.3 RMD160File.3 rmd160.3 RMD160Data.3 MLINKS+=rmd160.3 RMD160End.3 rmd160.3 RMD160File.3 rmd160.3 RMD160Data.3
MLINKS+=rmd160.3 RMD160Transform.3 MLINKS+=rmd160.3 RMD160Transform.3
.if (${MACHINE_ARCH} == "sparc64")
sha1.o:
${CC} ${CFLAGS} ${CPPFLAGS} -O0 -c ${.IMPSRC}
sha1.po:
${CC} ${CFLAGS} ${CPPFLAGS} -O0 -c ${.IMPSRC} -o $@
sha1.so:
${CC} ${CFLAGS} ${CPPFLAGS} -O0 ${PICFLAG} -DPIC -c ${.IMPSRC} -o $@
sha1.go:
${CC} ${CFLAGS} ${CPPFLAGS} -O0 -g -c ${.IMPSRC} -o $@
.endif

+ 68
- 8
src/lib/libc/hash/sha1.c View File

@ -1,4 +1,4 @@
/* $OpenBSD: sha1.c,v 1.10 2002/12/23 04:33:31 millert Exp $ */
/* $OpenBSD: sha1.c,v 1.11 2003/01/08 19:53:59 millert Exp $ */
/* /*
* SHA-1 in C * SHA-1 in C
@ -15,7 +15,7 @@
*/ */
#if defined(LIBC_SCCS) && !defined(lint) #if defined(LIBC_SCCS) && !defined(lint)
static char rcsid[] = "$OpenBSD: sha1.c,v 1.10 2002/12/23 04:33:31 millert Exp $";
static char rcsid[] = "$OpenBSD: sha1.c,v 1.11 2003/01/08 19:53:59 millert Exp $";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
#define SHA1HANDSOFF /* Copies data before messing with it. */ #define SHA1HANDSOFF /* Copies data before messing with it. */
@ -48,6 +48,63 @@ static char rcsid[] = "$OpenBSD: sha1.c,v 1.10 2002/12/23 04:33:31 millert Exp $
#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); #define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
typedef union {
u_char c[64];
u_int l[16];
} CHAR64LONG16;
#ifdef __sparc_v9__
static void do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
static void do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
static void do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
static void do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);
#define nR0(v,w,x,y,z,i) R0(*v,*w,*x,*y,*z,i)
#define nR1(v,w,x,y,z,i) R1(*v,*w,*x,*y,*z,i)
#define nR2(v,w,x,y,z,i) R2(*v,*w,*x,*y,*z,i)
#define nR3(v,w,x,y,z,i) R3(*v,*w,*x,*y,*z,i)
#define nR4(v,w,x,y,z,i) R4(*v,*w,*x,*y,*z,i)
static void
do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
{
nR0(a,b,c,d,e, 0); nR0(e,a,b,c,d, 1); nR0(d,e,a,b,c, 2); nR0(c,d,e,a,b, 3);
nR0(b,c,d,e,a, 4); nR0(a,b,c,d,e, 5); nR0(e,a,b,c,d, 6); nR0(d,e,a,b,c, 7);
nR0(c,d,e,a,b, 8); nR0(b,c,d,e,a, 9); nR0(a,b,c,d,e,10); nR0(e,a,b,c,d,11);
nR0(d,e,a,b,c,12); nR0(c,d,e,a,b,13); nR0(b,c,d,e,a,14); nR0(a,b,c,d,e,15);
nR1(e,a,b,c,d,16); nR1(d,e,a,b,c,17); nR1(c,d,e,a,b,18); nR1(b,c,d,e,a,19);
}
static void
do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
{
nR2(a,b,c,d,e,20); nR2(e,a,b,c,d,21); nR2(d,e,a,b,c,22); nR2(c,d,e,a,b,23);
nR2(b,c,d,e,a,24); nR2(a,b,c,d,e,25); nR2(e,a,b,c,d,26); nR2(d,e,a,b,c,27);
nR2(c,d,e,a,b,28); nR2(b,c,d,e,a,29); nR2(a,b,c,d,e,30); nR2(e,a,b,c,d,31);
nR2(d,e,a,b,c,32); nR2(c,d,e,a,b,33); nR2(b,c,d,e,a,34); nR2(a,b,c,d,e,35);
nR2(e,a,b,c,d,36); nR2(d,e,a,b,c,37); nR2(c,d,e,a,b,38); nR2(b,c,d,e,a,39);
}
static void
do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
{
nR3(a,b,c,d,e,40); nR3(e,a,b,c,d,41); nR3(d,e,a,b,c,42); nR3(c,d,e,a,b,43);
nR3(b,c,d,e,a,44); nR3(a,b,c,d,e,45); nR3(e,a,b,c,d,46); nR3(d,e,a,b,c,47);
nR3(c,d,e,a,b,48); nR3(b,c,d,e,a,49); nR3(a,b,c,d,e,50); nR3(e,a,b,c,d,51);
nR3(d,e,a,b,c,52); nR3(c,d,e,a,b,53); nR3(b,c,d,e,a,54); nR3(a,b,c,d,e,55);
nR3(e,a,b,c,d,56); nR3(d,e,a,b,c,57); nR3(c,d,e,a,b,58); nR3(b,c,d,e,a,59);
}
static void
do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)
{
nR4(a,b,c,d,e,60); nR4(e,a,b,c,d,61); nR4(d,e,a,b,c,62); nR4(c,d,e,a,b,63);
nR4(b,c,d,e,a,64); nR4(a,b,c,d,e,65); nR4(e,a,b,c,d,66); nR4(d,e,a,b,c,67);
nR4(c,d,e,a,b,68); nR4(b,c,d,e,a,69); nR4(a,b,c,d,e,70); nR4(e,a,b,c,d,71);
nR4(d,e,a,b,c,72); nR4(c,d,e,a,b,73); nR4(b,c,d,e,a,74); nR4(a,b,c,d,e,75);
nR4(e,a,b,c,d,76); nR4(d,e,a,b,c,77); nR4(c,d,e,a,b,78); nR4(b,c,d,e,a,79);
}
#endif
/* /*
* Hash a single 512-bit block. This is the core of the algorithm. * Hash a single 512-bit block. This is the core of the algorithm.
@ -56,15 +113,11 @@ void
SHA1Transform(u_int32_t state[5], const u_char buffer[64]) SHA1Transform(u_int32_t state[5], const u_char buffer[64])
{ {
u_int32_t a, b, c, d, e; u_int32_t a, b, c, d, e;
typedef union {
u_char c[64];
u_int l[16];
} CHAR64LONG16;
CHAR64LONG16 *block; CHAR64LONG16 *block;
#ifdef SHA1HANDSOFF #ifdef SHA1HANDSOFF
static u_char workspace[64];
block = (CHAR64LONG16 *)workspace;
static CHAR64LONG16 workspace;
block = &workspace;
(void)memcpy(block, buffer, 64); (void)memcpy(block, buffer, 64);
#else #else
block = (CHAR64LONG16 *)buffer; block = (CHAR64LONG16 *)buffer;
@ -77,6 +130,12 @@ SHA1Transform(u_int32_t state[5], const u_char buffer[64])
d = state[3]; d = state[3];
e = state[4]; e = state[4];
#ifdef __sparc_v9__
do_R01(&a, &b, &c, &d, &e, block);
do_R2(&a, &b, &c, &d, &e, block);
do_R3(&a, &b, &c, &d, &e, block);
do_R4(&a, &b, &c, &d, &e, block);
#else
/* 4 rounds of 20 operations each. Loop unrolled. */ /* 4 rounds of 20 operations each. Loop unrolled. */
R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
@ -98,6 +157,7 @@ SHA1Transform(u_int32_t state[5], const u_char buffer[64])
R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
#endif
/* Add the working vars back into context.state[] */ /* Add the working vars back into context.state[] */
state[0] += a; state[0] += a;


Loading…
Cancel
Save