45s login delay

Matt Johnston matt at ucc.asn.au
Thu Mar 17 23:23:44 WST 2011


On Wed, Mar 16, 2011 at 07:16:34PM -0500, Rob Landley wrote:
> On 03/16/2011 02:25 AM, Peter Turczak wrote:
> > Hi Magnus, hi Rob,
> > 
> > a while ago I made the same observations you did. On an m68k-nommu
> > with 166 MHz the RSA exchange took quite forever. After some
> > profiling I found out the comba multiply routine in libtommath was
> > eating most of the time. It seems gcc produces quite inefficient code
> > there. Libtommath resizes its large integers while calculating
> > leading to more work for user memory management.
> 
> User mememory management?  It's got a malloc/free in an inner loop?  BARF!
> 
> (Yeah, that'll blow your L1 cache wide open and slow stuff down by at
> least an order of magnitude.  Allocation functions are some of the most
> cache unfriendly things you can do, pretty much by definition.  Unused
> memory is not cache hot, pretty much by definition.  That's sort of the
> point.  Copying the data sucks too, but it's doing the copying on all
> platforms I'd guess...)

I guess it's possible. Logging in to a server with 1024 bit
RSA and RSA authorized_key I get ~229 reallocs in mp_grow(),
not a massive number if spread over 45 seconds. The patch
below drops it to ~30 reallcs. 

Magnus: It might be worth seeing if it changes your
timing. I haven't looked whether it increases memory usage.

Matt

--- libtommath/bn_mp_exptmod_fast.c	5a692f134deeab0992612206c16f8bf970b5088c
+++ libtommath/bn_mp_exptmod_fast.c	5391873ccf8a11171774425c69f584195b4fdba4
@@ -67,13 +67,13 @@ int mp_exptmod_fast (mp_int * G, mp_int 
 
   /* init M array */
   /* init first cell */
-  if ((err = mp_init(&M[1])) != MP_OKAY) {
+  if ((err = mp_init_size(&M[1], P->used)) != MP_OKAY) {
      return err;
   }
 
   /* now init the second half of the array */
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
-    if ((err = mp_init(&M[x])) != MP_OKAY) {
+    if ((err = mp_init_size(&M[x], P->alloc+1)) != MP_OKAY) {
       for (y = 1<<(winsize-1); y < x; y++) {
         mp_clear (&M[y]);
       }
@@ -96,7 +96,7 @@ int mp_exptmod_fast (mp_int * G, mp_int 
 
      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
-     if (((P->used * 2 + 1) < MP_WARRAY) &&
+     if (((P->alloc * 2 + 1) < MP_WARRAY) &&
           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
         redux = fast_mp_montgomery_reduce;
      } else 
@@ -133,7 +133,7 @@ int mp_exptmod_fast (mp_int * G, mp_int 
   }
 
   /* setup result */
-  if ((err = mp_init (&res)) != MP_OKAY) {
+  if ((err = mp_init_size (&res, P->used)) != MP_OKAY) {
     goto LBL_M;
   }
 
============================================================
--- libtommath/bn_mp_init_copy.c	fd7c20c0ee3473615de23c59074cf5c6757a20ca
+++ libtommath/bn_mp_init_copy.c	841949a75e387e818f2f4d9adedff0ba9c9374c0
@@ -20,7 +20,7 @@ int mp_init_copy (mp_int * a, mp_int * b
 {
   int     res;
 
-  if ((res = mp_init (a)) != MP_OKAY) {
+  if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
     return res;
   }
   return mp_copy (b, a);
============================================================
--- libtommath/bn_mp_mod.c	3bed12926c4d019853f2b4dac814a7505580380e
+++ libtommath/bn_mp_mod.c	9265cd0294d2c86f1c3c73eaa5bf19c30403e13b
@@ -22,7 +22,7 @@ mp_mod (mp_int * a, mp_int * b, mp_int *
   mp_int  t;
   int     res;
 
-  if ((res = mp_init (&t)) != MP_OKAY) {
+  if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
     return res;
   }
 
============================================================
--- libtommath/bn_mp_mulmod.c	935d0f5903589ddf62f42fc691cb2f83aa2832c4
+++ libtommath/bn_mp_mulmod.c	ef9063432e3a0c62b7118dfc3d01d04cd4dc8bb9
@@ -21,7 +21,7 @@ int mp_mulmod (mp_int * a, mp_int * b, m
   int     res;
   mp_int  t;
 
-  if ((res = mp_init (&t)) != MP_OKAY) {
+  if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
     return res;
   }
 


More information about the Dropbear mailing list