From 1030d66a14c29026efc6c8d3ad69ad2c57bf4589 Mon Sep 17 00:00:00 2001
From: Paul McMillan <Paul@McMillan.ws>
Date: Thu, 2 Feb 2012 04:44:17 +0000
Subject: [PATCH] Fixed #17481. pbkdf2 hashes no longer ommit leading zeros.

Some existing user passwords may need to be reset or converted
after this change. See the 1.4-beta release notes for more details.

Thanks bhuztez for the report and initial patch, claudep for the test.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@17418 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/utils/crypto.py                | 30 +++++++++++++--------------
 tests/regressiontests/utils/crypto.py | 23 ++++++++++++++++----
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/django/utils/crypto.py b/django/utils/crypto.py
index ff6096c6f9..f3a0675cc6 100644
--- a/django/utils/crypto.py
+++ b/django/utils/crypto.py
@@ -10,8 +10,8 @@ import operator
 from django.conf import settings
 
 
-trans_5c = "".join([chr(x ^ 0x5C) for x in xrange(256)])
-trans_36 = "".join([chr(x ^ 0x36) for x in xrange(256)])
+_trans_5c = "".join([chr(x ^ 0x5C) for x in xrange(256)])
+_trans_36 = "".join([chr(x ^ 0x36) for x in xrange(256)])
 
 
 def salted_hmac(key_salt, value, secret=None):
@@ -66,7 +66,7 @@ def constant_time_compare(val1, val2):
     return result == 0
 
 
-def bin_to_long(x):
+def _bin_to_long(x):
     """
     Convert a binary string into a long integer
 
@@ -75,17 +75,15 @@ def bin_to_long(x):
     return long(x.encode('hex'), 16)
 
 
-def long_to_bin(x):
+def _long_to_bin(x, hex_format_string):
     """
-    Convert a long integer into a binary string
+    Convert a long integer into a binary string.
+    hex_format_string is like "%020x" for padding 10 characters.
     """
-    hex = "%x" % (x)
-    if len(hex) % 2 == 1:
-        hex = '0' + hex
-    return binascii.unhexlify(hex)
+    return binascii.unhexlify(hex_format_string % x)
 
 
-def fast_hmac(key, msg, digest):
+def _fast_hmac(key, msg, digest):
     """
     A trimmed down version of Python's HMAC implementation
     """
@@ -93,9 +91,9 @@ def fast_hmac(key, msg, digest):
     if len(key) > dig1.block_size:
         key = digest(key).digest()
     key += chr(0) * (dig1.block_size - len(key))
-    dig1.update(key.translate(trans_36))
+    dig1.update(key.translate(_trans_36))
     dig1.update(msg)
-    dig2.update(key.translate(trans_5c))
+    dig2.update(key.translate(_trans_5c))
     dig2.update(dig1.digest())
     return dig2
 
@@ -123,13 +121,15 @@ def pbkdf2(password, salt, iterations, dklen=0, digest=None):
     l = -(-dklen // hlen)
     r = dklen - (l - 1) * hlen
 
+    hex_format_string = "%%0%ix" % (hlen * 2)
+
     def F(i):
         def U():
             u = salt + struct.pack('>I', i)
             for j in xrange(int(iterations)):
-                u = fast_hmac(password, u, digest).digest()
-                yield bin_to_long(u)
-        return long_to_bin(reduce(operator.xor, U()))
+                u = _fast_hmac(password, u, digest).digest()
+                yield _bin_to_long(u)
+        return _long_to_bin(reduce(operator.xor, U()), hex_format_string)
 
     T = [F(x) for x in range(1, l + 1)]
     return ''.join(T[:-1]) + T[-1][:r]
diff --git a/tests/regressiontests/utils/crypto.py b/tests/regressiontests/utils/crypto.py
index e791e0aeef..2bdc5ba530 100644
--- a/tests/regressiontests/utils/crypto.py
+++ b/tests/regressiontests/utils/crypto.py
@@ -108,6 +108,17 @@ class TestUtilsCryptoPBKDF2(unittest.TestCase):
                        "c4007d5298f9033c0241d5ab69305e7b64eceeb8d"
                        "834cfec"),
         },
+        # Check leading zeros are not stripped (#17481) 
+        {
+            "args": { 
+                "password": chr(186), 
+                "salt": "salt", 
+                "iterations": 1, 
+                "dklen": 20, 
+                "digest": hashlib.sha1, 
+            }, 
+            "result": '0053d3b91a7f1e54effebd6d68771e8a6e0b2c5b',
+        },
     ]
 
     def test_public_vectors(self):
@@ -125,11 +136,15 @@ class TestUtilsCryptoPBKDF2(unittest.TestCase):
         Theory: If you run with 100 iterations, it should take 100
         times as long as running with 1 iteration.
         """
-        n1, n2 = 1000, 100000
-        elapsed = lambda f: timeit.Timer(f, 'from django.utils.crypto import pbkdf2').timeit(number=1)
+        # These values are chosen as a reasonable tradeoff between time
+        # to run the test suite and false positives caused by imprecise
+        # measurement.
+        n1, n2 = 200000, 800000
+        elapsed = lambda f: timeit.Timer(f, 
+                    'from django.utils.crypto import pbkdf2').timeit(number=1)
         t1 = elapsed('pbkdf2("password", "salt", iterations=%d)' % n1)
         t2 = elapsed('pbkdf2("password", "salt", iterations=%d)' % n2)
         measured_scale_exponent = math.log(t2 / t1, n2 / n1)
-        # This should be less than 1. We allow up to 1.1 so that tests don't 
+        # This should be less than 1. We allow up to 1.2 so that tests don't 
         # fail nondeterministically too often.
-        self.assertLess(measured_scale_exponent, 1.1)
+        self.assertLess(measured_scale_exponent, 1.2)