Do you have a lot of short, single-use, private functions in your Python code?
Do you have a lot of short, single-use, private functions in your Python code? For example, below is some stubbed out authentication code I've been working on. It checks if a user's password is correct and updates the hash algorithm to use bcrypt. The 4 private functions with the leading underscore are from 1 to 10 lines long and are only used by the check_password
function. These functions are part of a larger module with about 20 functions. I don't like that these 4 functions add clutter to the module and are not grouped with the function that uses them, check_password
.
def _get_password_hash_from_db(email_address):
"""Get the user's password hash from the database.
"""
def _determine_password_hash_algorithm(password_hash):
"""Determine the hash algorithm.
"""
def _hash_password_old(password):
"""This is the OLD password hash algorithm.
"""
def _hash_existing_password_bcrypt(password, db_password_hash):
"""This is the NEW algorithm used for hashing existing passwords.
"""
def check_password(email_address, password):
"""Check if a user's supplied password is correct.
"""
db_password_hash = _get_password_hash_from_db(email_address)
hash_alg = _determine_password_hash_algorithm(db_password_hash)
if hash_alg == 'BCRYPT':
input_password_hash = _hash_existing_password_bcrypt(password, db_password_hash)
else:
input_password_hash = _hash_password_old(password)
password_correct = (input_password_hash == db_password_hash)
if password_correct and hash_alg != 'BCRYPT':
call_change_password(email_address, password)
return password_correct
def call_change_password(email_address, new_password):
"""Change the user's password.
"""
Sometimes, in cases like this, I move the 4 private functions to be nested functions inside check_password
. I like how the functions are grouped together and that the module is not littered with extraneous functions. However, the inner functions are not easily testable and I don't see many people doing this.
def check_password(email_address, password):
"""Check if a user's supplied password is correct.
"""
def get_password_hash_from_db(email_address):
"""Get the user's password hash from the database.
"""
def determine_password_hash_algorithm(password_hash):
"""Determine the hash algorithm.
"""
def hash_password_old(password):
"""This is the OLD password hash algorithm.
"""
def hash_existing_password_bcrypt(password, db_password_hash):
"""This is the NEW algorithm used for hashing existing passwords.
"""
db_password_hash = get_password_hash_from_db(email_address)
hash_alg = determine_password_hash_algorithm(db_password_hash)
if hash_alg == 'BCRYPT':
input_password_hash = hash_existing_password_bcrypt(password, db_password_hash)
else:
input_password_hash = hash_password_old(password)
password_correct = (input_password_hash == db_password_hash)
if password_correct and hash_alg != 'BCRYPT':
call_change_password(email_address, password)
return password_correct
def call_change_password(email_address, new_password):
"""Change the user's password.
"""
Another option is to create a PasswordChecker class instead. This seems the most powerful and now the private methods are testable. However, this adds more overhead and I hear Jack Diederich telling me to Stop Writing Classes!
class _PasswordChecker(object):
"""Check if a user's supplied password is correct.
"""
@staticmethod
def _get_password_hash_from_db(email_address):
"""Get the user's password hash from the database.
"""
@staticmethod
def _determine_password_hash_algorithm(password_hash):
"""Determine the hash algorithm.
"""
@staticmethod
def _hash_password_old(password):
"""This is the OLD password hash algorithm.
"""
@staticmethod
def _hash_existing_password_bcrypt(password, db_password_hash):
"""This is the NEW algorithm used for hashing existing passwords.
"""
def __call__(self, email_address, password):
db_password_hash = self._get_password_hash_from_db(email_address)
hash_alg = self._determine_password_hash_algorithm(db_password_hash)
if hash_alg == 'BCRYPT':
input_password_hash = self._hash_existing_password_bcrypt(password, db_password_hash)
else:
input_password_hash = self._hash_password_old(password)
password_correct = (input_password_hash == db_password_hash)
if password_correct and hash_alg != 'BCRYPT':
call_change_password(email_address, password)
return password_correct
check_password = _PasswordChecker()
def call_change_password(email_address, new_password):
"""Change the user's password.
"""
Maybe the solution is to break up the module into smaller modules which act like the class above? However this might leave me with some unevenly sized modules. How do you handle this?
Comments
I must admin I have usually just been using the first approach like you and left the itch unscratched. I like the nested function approach, and by using a decorator you could easily pull the functions out for individual testing.
testables = {}
def testable(func):
testables[func.__name__] = func
return func
def first():
@testable
def second():
return 5
return 5 + second()
print first()
print testables['second']()
Thoughts?
If the only issue with testing the inner functions is accessing them from outside (I.e., if you don't build them to capture variables from the containing function's scope), you can expose them on the function itself:
def outer():
def inner():
pass
outer._inner = inner
def test_inner():
outer._inner() # and testing
Asbjørn, tikitu,
Solving the problem of the testability of the inner functions is interesting. tikitu, you're right, the inner functions would be restricted from accessing variables from the containing function (i.e. they could not be closures). In that case, we would not expect testability of the individual inner functions anyways, right?
Note that the inner functions are re-created each time the enclosing function is called. 'def' statements are just statements in Python; they create function objects and "bind" them in the current enclosing namespace. Inner functions can be used to make closures but they incur the overhead of creating a function object each time the 'def' statement runs, so I would avoid using them just for stylistic reasons.
I consider readability to be one of the main strengths of Python so if having a bunch of little one-off functions makes it easier to understand how the code works then they are well worth it in my opinion.
The same goes for "promoting" a function and its dedicated helper functions to a class. That is what namespaces are for, after all, and although it's somewhat unusual I see nothing wrong with using a class that way and have done so myself in the past.
Simon, thanks for the heads up about the overhead of using inner functions. Thanks for your input on using classes also. I want to write good code and like hearing how others handle these types of things.
You're very welcome. :)
That's the problem of not understanding how OOP works. I recommend reading Uncle Bobs 'Clean Code'. Only do one thing at a time. Same for classes. Only one behaviour. So far I have addressed 4 different behaviours.
- IO (db access)
- algorithm detection
- hashing
- and the password check itself
Throwing together functions into a class is not necessarily OOP. There are principles to follow. But normally a long way of experience/pain. Nowadays I prefer functional languages more. For me it is so much easier using functional languages for most of the tasks
Here is my OOP approach which from my POV not only is much cleaner and easier to read and also it's fully testable, extendable and replaceable, without changing the code from PasswordChecker, or the Hashfactory. You only add a new algorithm or replace it with another (maybe faster or fixed one) and put it into the hash_alogorithms map. This mapping/configuration can even be read from a config file. Thats how IoC and Dependency Injection works in the enterprise (Java, .NET). A little outdate, but I hope you enjoy this solution
class MyRepo(object):
def get_password_hash(email_address):
"""Get the user's password hash from the database.
"""
pass
BCRYPT = 'bcrypt'
SHA1 = 'sha1'
class MyBcrypt(object):
"""This is the NEW algorithm used for hashing existing passwords.
"""
def __init__(self, target_hash):
self._target_hash = target_hash
self.hash_algorithm = BRCYPT
def __call__(self, password):
pass
class MySha1(object):
"""This is the OLD password hash algorithm.
"""
def __init__(self, target_hash):
self._target_hash = target_hash
self.hash_algorythm = SHA1
def __call__(self, password):
pass
class HashFactory(object):
def __init__(self, hasher_map):
self._hashers = hasher_map
def determine(self, hashed_text):
"""Determine if the hash algorithm is bcrypt or sha1
"""
pass
def create(self, hashed_text):
alogrithm = self.determine(hashed_text)
constructor = self._hashers[algorithm]
hasher = constructor(hashed_text)
return hasher
def is_valid(self, hasher):
return hasher.algorithm != BCRYPT
class PasswordChecker(object):
"""Check if a user's supplied password is correct.
"""
def __init__(self, repository, hash_factory):
self._repo = repository
self._hash_factory = hash_factory
def __call__(self, email_address, password):
db_password_hash = self._repo.get_password_hash(email_address)
hasher = self._hash_factory.determine(db_password_hash)
input_password_hash = hasher(password)
password_correct = (input_password_hash == db_password_hash)
if password_correct and self._hash_factory.is_obsolete(hasher):
self._repo.change_password(email_address, password)
return True
return False
def check_password_example():
repo = MyRepo()
hash_algorithms = {BCRYPT: MyBcrypt,
SHA1: MySha1}
hash_factory = HashFactory(hash_algorithms)
pw_check = PasswordChecker()
pw_check("[email protected]","12345safepw")