Some more python recursion examples
I had a number of yaml files that contained passwords that needed encrypting. After parsing the yaml file with pyyaml, the data looked something like this:
EXAMPLE_DATA = {
'jobs': [{'frequency': '* * * * *',
'jobconfig': [{'config': [('*',
{'maxspeed': 1048576,
'password': 'onesecretpassword',
'port': 22,
'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
'username': 'myusername'})],
'hasbro': 'basset'},
{'config': [('*',
{'field_delim': ',',
'field_line': True,
'no_blanks': True,
'quote_char': '"'})],
'hasbro': 'pen'},
{'config': [('*',
{'db_database': 'mydatabase',
'db_host': 'myhost',
'db_password': 'anothersecretpassword',
'db_table': 'mytable',
'db_user': 'myuser'})],
'hasbro': 'dart'}],
'jobdesc': 'Data from tootsiepop',
'jobname': 'tootsiepop',
'max_records_fail': '110%',
'min_failure_time': '1000y'}],
'vendor': 'tootsiepop'}
Print all leaf nodes¶
Here is a recursive function that prints all the leaf nodes of my nested data structure.
def print_all_leaf_nodes(data):
if isinstance(data, dict):
for item in data.values():
print_all_leaf_nodes(item)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
print_all_leaf_nodes(item)
else:
print data
print_all_leaf_nodes(EXAMPLE_DATA)
Results:
tootsiepop 1000y tootsiepop * basset://basset1.domain.com/tootsiepop/123.csv myusername onesecretpassword 1048576 22 basset * True " , True pen * anothersecretpassword mytable myhost mydatabase myuser dart * * * * * 110% Data from tootsiepop
Get all leaf nodes¶
This function returns all leaf nodes as a list instead of printing them. A wrapper function is used to create a Namespace instance to hold the results variable. This could alternatively be stored in a global (module-level) variable. See my notes on variable scope for more info about using a class as a namespace.
def get_all_leaf_nodes(data):
class Namespace(object):
pass
ns = Namespace()
ns.results = []
def inner(data):
if isinstance(data, dict):
for item in data.values():
inner(item)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
else:
ns.results.append(data)
inner(data)
return ns.results
from pprint import pprint
pprint(get_all_leaf_nodes(EXAMPLE_DATA))
Results:
['tootsiepop', '1000y', 'tootsiepop', '*', 'basset://basset1.domain.com/tootsiepop/123.csv', 'myusername', 'onesecretpassword', 1048576, 22, 'basset', '*', True, '"', ',', True, 'pen', '*', 'anothersecretpassword', 'mytable', 'myhost', 'mydatabase', 'myuser', 'dart', '* * * * *', '110%', 'Data from tootsiepop']
Get all leaf key value pairs¶
This function gets all key value pairs where values are not compound data structures (i.e. dicts or lists)
def get_all_key_value_pairs_where_values_are_simple(data):
class Namespace(object):
pass
ns = Namespace()
ns.results = []
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
inner(data)
return ns.results
from pprint import pprint
pprint(get_all_key_value_pairs_where_values_are_simple(EXAMPLE_DATA))
Results:
[('vendor', 'tootsiepop'), ('min_failure_time', '1000y'), ('jobname', 'tootsiepop'), ('url', 'basset://basset1.domain.com/tootsiepop/123.csv'), ('username', 'myusername'), ('password', 'onesecretpassword'), ('maxspeed', 1048576), ('port', 22), ('hasbro', 'basset'), ('field_line', True), ('quote_char', '"'), ('field_delim', ','), ('no_blanks', True), ('hasbro', 'pen'), ('db_password', 'anothersecretpassword'), ('db_table', 'mytable'), ('db_host', 'myhost'), ('db_database', 'mydatabase'), ('db_user', 'myuser'), ('hasbro', 'dart'), ('frequency', '* * * * *'), ('max_records_fail', '110%'), ('jobdesc', 'Data from tootsiepop')]
Modify values of terminal key value in a nested dict¶
This function modifies all the values of all dicts that are not compound data structures (i.e. dicts or lists). The modfn
argument is a function that modifies the key value pair. It should accept two arguments: a key and value and it should return the modified value.
The example function, super_secure_encrypt
is a function that checks if the string 'password' is in the key, and "encrypts" the value using the <sarcasm>super secure</sarcasm> ROT13 algorithm. (We are actually using the keyczar toolkit from google to do the encryption.)
def modify_all_simple_dict_values(data, modfn):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
modify_all_simple_dict_values(v, modfn)
else:
data[k] = modfn(k, v)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
modify_all_simple_dict_values(item, modfn)
return data
def super_secure_encrypt(key, value):
if 'password' in key:
value = value.encode('rot13')
return value
from pprint import pprint
pprint(modify_all_simple_dict_values(EXAMPLE_DATA, super_secure_encrypt))
Results:
{'jobs': [{'frequency': '* * * * *', 'jobconfig': [{'config': [('*', {'maxspeed': 1048576, 'password': 'barfrpergcnffjbeq', 'port': 22, 'url': 'basset://basset1.domain.com/tootsiepop/123.csv', 'username': 'myusername'})], 'hasbro': 'basset'}, {'config': [('*', {'field_delim': ',', 'field_line': True, 'no_blanks': True, 'quote_char': '"'})], 'hasbro': 'pen'}, {'config': [('*', {'db_database': 'mydatabase', 'db_host': 'myhost', 'db_password': 'nabgurefrpergcnffjbeq', 'db_table': 'mytable', 'db_user': 'myuser'})], 'hasbro': 'dart'}], 'jobdesc': 'Data from tootsiepop', 'jobname': 'tootsiepop', 'max_records_fail': '110%', 'min_failure_time': '1000y'}], 'vendor': 'tootsiepop'}
Related posts
- Find all combinations of a set of lists with itertools.product — posted 2011-11-01
- Free Computer Science courses online — posted 2009-06-30
- Find the N longest lines in a file with Python — posted 2009-06-28
- How to reverse words in a sentence using Python and C — posted 2009-04-22
- Python recursion example to navigate tree data — posted 2008-08-19
Comments
Great writeup!
I thought I should mention that isinstance()
can test against multiple types at once, this lets you remove some of your if
s and or
s if you wanted:
inner(data)
could be rewritten as:
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if isinstance(v, (dict, list, tuple)):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, (list, tuple)):
for item in data:
inner(item)
opposed to what you have:
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
This isn't incredibly important to the subject of your post but I figured I'd mention it anyways.
Stuart
Stuart, thanks for the tip-- I will have to remember that!