Some more python recursion examples
I had a number of yaml files that contained passwords that needed encrypting. After parsing the yaml file with pyyaml, the data looked something like this:
EXAMPLE_DATA = {
'jobs': [{'frequency': '* * * * *',
'jobconfig': [{'config': [('*',
{'maxspeed': 1048576,
'password': 'onesecretpassword',
'port': 22,
'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
'username': 'myusername'})],
'hasbro': 'basset'},
{'config': [('*',
{'field_delim': ',',
'field_line': True,
'no_blanks': True,
'quote_char': '"'})],
'hasbro': 'pen'},
{'config': [('*',
{'db_database': 'mydatabase',
'db_host': 'myhost',
'db_password': 'anothersecretpassword',
'db_table': 'mytable',
'db_user': 'myuser'})],
'hasbro': 'dart'}],
'jobdesc': 'Data from tootsiepop',
'jobname': 'tootsiepop',
'max_records_fail': '110%',
'min_failure_time': '1000y'}],
'vendor': 'tootsiepop'}
Print all leaf nodes¶
Here is a recursive function that prints all the leaf nodes of my nested data structure.
def print_all_leaf_nodes(data):
if isinstance(data, dict):
for item in data.values():
print_all_leaf_nodes(item)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
print_all_leaf_nodes(item)
else:
print data
print_all_leaf_nodes(EXAMPLE_DATA)
Results:
tootsiepop 1000y tootsiepop * basset://basset1.domain.com/tootsiepop/123.csv myusername onesecretpassword 1048576 22 basset * True " , True pen * anothersecretpassword mytable myhost mydatabase myuser dart * * * * * 110% Data from tootsiepop
Get all leaf nodes¶
This function returns all leaf nodes as a list instead of printing them. A wrapper function is used to create a Namespace instance to hold the results variable. This could alternatively be stored in a global (module-level) variable. See my notes on variable scope for more info about using a class as a namespace.
def get_all_leaf_nodes(data):
class Namespace(object):
pass
ns = Namespace()
ns.results = []
def inner(data):
if isinstance(data, dict):
for item in data.values():
inner(item)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
else:
ns.results.append(data)
inner(data)
return ns.results
from pprint import pprint
pprint(get_all_leaf_nodes(EXAMPLE_DATA))
Results:
['tootsiepop', '1000y', 'tootsiepop', '*', 'basset://basset1.domain.com/tootsiepop/123.csv', 'myusername', 'onesecretpassword', 1048576, 22, 'basset', '*', True, '"', ',', True, 'pen', '*', 'anothersecretpassword', 'mytable', 'myhost', 'mydatabase', 'myuser', 'dart', '* * * * *', '110%', 'Data from tootsiepop']
Get all leaf key value pairs¶
This function gets all key value pairs where values are not compound data structures (i.e. dicts or lists)
def get_all_key_value_pairs_where_values_are_simple(data):
class Namespace(object):
pass
ns = Namespace()
ns.results = []
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
inner(data)
return ns.results
from pprint import pprint
pprint(get_all_key_value_pairs_where_values_are_simple(EXAMPLE_DATA))
Results:
[('vendor', 'tootsiepop'),
('min_failure_time', '1000y'),
('jobname', 'tootsiepop'),
('url', 'basset://basset1.domain.com/tootsiepop/123.csv'),
('username', 'myusername'),
('password', 'onesecretpassword'),
('maxspeed', 1048576),
('port', 22),
('hasbro', 'basset'),
('field_line', True),
('quote_char', '"'),
('field_delim', ','),
('no_blanks', True),
('hasbro', 'pen'),
('db_password', 'anothersecretpassword'),
('db_table', 'mytable'),
('db_host', 'myhost'),
('db_database', 'mydatabase'),
('db_user', 'myuser'),
('hasbro', 'dart'),
('frequency', '* * * * *'),
('max_records_fail', '110%'),
('jobdesc', 'Data from tootsiepop')]Modify values of terminal key value in a nested dict¶
This function modifies all the values of all dicts that are not compound data structures (i.e. dicts or lists). The modfn argument is a function that modifies the key value pair. It should accept two arguments: a key and value and it should return the modified value.
The example function, super_secure_encrypt is a function that checks if the string 'password' is in the key, and "encrypts" the value using the <sarcasm>super secure</sarcasm> ROT13 algorithm. (We are actually using the keyczar toolkit from google to do the encryption.)
def modify_all_simple_dict_values(data, modfn):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
modify_all_simple_dict_values(v, modfn)
else:
data[k] = modfn(k, v)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
modify_all_simple_dict_values(item, modfn)
return data
def super_secure_encrypt(key, value):
if 'password' in key:
value = value.encode('rot13')
return value
from pprint import pprint
pprint(modify_all_simple_dict_values(EXAMPLE_DATA, super_secure_encrypt))
Results:
{'jobs': [{'frequency': '* * * * *',
'jobconfig': [{'config': [('*',
{'maxspeed': 1048576,
'password': 'barfrpergcnffjbeq',
'port': 22,
'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
'username': 'myusername'})],
'hasbro': 'basset'},
{'config': [('*',
{'field_delim': ',',
'field_line': True,
'no_blanks': True,
'quote_char': '"'})],
'hasbro': 'pen'},
{'config': [('*',
{'db_database': 'mydatabase',
'db_host': 'myhost',
'db_password': 'nabgurefrpergcnffjbeq',
'db_table': 'mytable',
'db_user': 'myuser'})],
'hasbro': 'dart'}],
'jobdesc': 'Data from tootsiepop',
'jobname': 'tootsiepop',
'max_records_fail': '110%',
'min_failure_time': '1000y'}],
'vendor': 'tootsiepop'}Related posts
- Find all combinations of a set of lists with itertools.product — posted 2011-11-01
- Free Computer Science courses online — posted 2009-06-30
- Find the N longest lines in a file with Python — posted 2009-06-28
- How to reverse words in a sentence using Python and C — posted 2009-04-22
- Python recursion example to navigate tree data — posted 2008-08-19
Comments
Great writeup!
I thought I should mention that isinstance() can test against multiple types at once, this lets you remove some of your ifs and ors if you wanted:
inner(data) could be rewritten as:
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if isinstance(v, (dict, list, tuple)):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, (list, tuple)):
for item in data:
inner(item)
opposed to what you have:
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
This isn't incredibly important to the subject of your post but I figured I'd mention it anyways.
Stuart
Stuart, thanks for the tip-- I will have to remember that!
