I had a number of yaml files that contained passwords that needed encrypting. After parsing the yaml file with pyyaml, the data looked something like this:
EXAMPLE_DATA = {
'jobs': [{'frequency': '* * * * *',
'jobconfig': [{'config': [('*',
{'maxspeed': 1048576,
'password': 'onesecretpassword',
'port': 22,
'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
'username': 'myusername'})],
'hasbro': 'basset'},
{'config': [('*',
{'field_delim': ',',
'field_line': True,
'no_blanks': True,
'quote_char': '"'})],
'hasbro': 'pen'},
{'config': [('*',
{'db_database': 'mydatabase',
'db_host': 'myhost',
'db_password': 'anothersecretpassword',
'db_table': 'mytable',
'db_user': 'myuser'})],
'hasbro': 'dart'}],
'jobdesc': 'Data from tootsiepop',
'jobname': 'tootsiepop',
'max_records_fail': '110%',
'min_failure_time': '1000y'}],
'vendor': 'tootsiepop'}
Print all leaf nodes¶
Here is a recursive function that prints all the leaf nodes of my nested data structure.
def print_all_leaf_nodes(data):
if isinstance(data, dict):
for item in data.values():
print_all_leaf_nodes(item)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
print_all_leaf_nodes(item)
else:
print data
print_all_leaf_nodes(EXAMPLE_DATA)
Results:
tootsiepop
1000y
tootsiepop
*
basset://basset1.domain.com/tootsiepop/123.csv
myusername
onesecretpassword
1048576
22
basset
*
True
"
,
True
pen
*
anothersecretpassword
mytable
myhost
mydatabase
myuser
dart
* * * * *
110%
Data from tootsiepop
Get all leaf nodes¶
This function returns all leaf nodes as a list instead of printing them. A wrapper function is used to create a Namespace instance to hold the results variable. This could alternatively be stored in a global (module-level) variable. See my notes on variable scope for more info about using a class as a namespace.
def get_all_leaf_nodes(data):
class Namespace(object):
pass
ns = Namespace()
ns.results = []
def inner(data):
if isinstance(data, dict):
for item in data.values():
inner(item)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
else:
ns.results.append(data)
inner(data)
return ns.results
from pprint import pprint
pprint(get_all_leaf_nodes(EXAMPLE_DATA))
Results:
['tootsiepop',
'1000y',
'tootsiepop',
'*',
'basset://basset1.domain.com/tootsiepop/123.csv',
'myusername',
'onesecretpassword',
1048576,
22,
'basset',
'*',
True,
'"',
',',
True,
'pen',
'*',
'anothersecretpassword',
'mytable',
'myhost',
'mydatabase',
'myuser',
'dart',
'* * * * *',
'110%',
'Data from tootsiepop']
Get all leaf key value pairs¶
This function gets all key value pairs where values are not compound data structures (i.e. dicts or lists)
def get_all_key_value_pairs_where_values_are_simple(data):
class Namespace(object):
pass
ns = Namespace()
ns.results = []
def inner(data):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
inner(v)
else:
ns.results.append((k, v))
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
inner(item)
inner(data)
return ns.results
from pprint import pprint
pprint(get_all_key_value_pairs_where_values_are_simple(EXAMPLE_DATA))
Results:
[('vendor', 'tootsiepop'),
('min_failure_time', '1000y'),
('jobname', 'tootsiepop'),
('url', 'basset://basset1.domain.com/tootsiepop/123.csv'),
('username', 'myusername'),
('password', 'onesecretpassword'),
('maxspeed', 1048576),
('port', 22),
('hasbro', 'basset'),
('field_line', True),
('quote_char', '"'),
('field_delim', ','),
('no_blanks', True),
('hasbro', 'pen'),
('db_password', 'anothersecretpassword'),
('db_table', 'mytable'),
('db_host', 'myhost'),
('db_database', 'mydatabase'),
('db_user', 'myuser'),
('hasbro', 'dart'),
('frequency', '* * * * *'),
('max_records_fail', '110%'),
('jobdesc', 'Data from tootsiepop')]
Modify values of terminal key value in a nested dict¶
This function modifies all the values of all dicts that are not compound data structures (i.e. dicts or lists). The modfn
argument is a function that modifies the key value pair. It should accept two arguments: a key and value and it should return the modified value.
The example function, super_secure_encrypt
is a function that checks if the string 'password' is in the key, and "encrypts" the value using the <sarcasm>super secure</sarcasm> ROT13 algorithm. (We are actually using the keyczar toolkit from google to do the encryption.)
def modify_all_simple_dict_values(data, modfn):
if isinstance(data, dict):
for k, v in data.iteritems():
if (isinstance(v, dict) or
isinstance(v, list) or
isinstance(v, tuple)
):
modify_all_simple_dict_values(v, modfn)
else:
data[k] = modfn(k, v)
elif isinstance(data, list) or isinstance(data, tuple):
for item in data:
modify_all_simple_dict_values(item, modfn)
return data
def super_secure_encrypt(key, value):
if 'password' in key:
value = value.encode('rot13')
return value
from pprint import pprint
pprint(modify_all_simple_dict_values(EXAMPLE_DATA, super_secure_encrypt))
Results:
{'jobs': [{'frequency': '* * * * *',
'jobconfig': [{'config': [('*',
{'maxspeed': 1048576,
'password': 'barfrpergcnffjbeq',
'port': 22,
'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
'username': 'myusername'})],
'hasbro': 'basset'},
{'config': [('*',
{'field_delim': ',',
'field_line': True,
'no_blanks': True,
'quote_char': '"'})],
'hasbro': 'pen'},
{'config': [('*',
{'db_database': 'mydatabase',
'db_host': 'myhost',
'db_password': 'nabgurefrpergcnffjbeq',
'db_table': 'mytable',
'db_user': 'myuser'})],
'hasbro': 'dart'}],
'jobdesc': 'Data from tootsiepop',
'jobname': 'tootsiepop',
'max_records_fail': '110%',
'min_failure_time': '1000y'}],
'vendor': 'tootsiepop'}