from treelib import Node, Tree
from pprint import pprint
import json
import os
[docs]class PathTree(Tree):
"""
Basic tree operations are accomplished via identifiers:
tree[id] return node
node.fpointer returns list of children ids
node.bpointer returns list of parent ids
Each treelib node has
tag nice name, a pure file or directory name with no path e.g. 'b' or 'c.py'
identifier unique id, full path name e.g. 'a/b/c.py'. If dir then no trailing slash e.g. 'a/b'
"""
ROOT_ID = 'root'
[docs] def create_root(self):
self.create_node("Root", # tag
self.ROOT_ID, # identifier
parent=None,
data=NodeData(url=''))
[docs] def path_to_subtree(self, path):
# Builds subtree, node by node for a path - finding any existing nodes, if any
path_as_components = [s.strip() for s in path.split('/')]
parent = self.ROOT_ID
for index, name in enumerate(path_as_components):
path_so_far = os.path.sep.join(path_as_components[:index + 1])
if not self.contains(path_so_far):
self.create_node(name, path_so_far, parent=parent, data=NodeData(url=''))
parent = path_so_far
# self.show()
[docs]class NodeData(dict):
# Holds extra fields for each node. E.g. node.data['url']
def __init__(self, url):
dict.__init__(self, url=url)
[docs]def byteify(input):
# convert unicode strings to normal strings http://stackoverflow.com/questions/956867/how-to-get-string-objects-instead-of-unicode-ones-from-json-in-python
if isinstance(input, dict):
return {byteify(key): byteify(value)
for key, value in input.iteritems()}
elif isinstance(input, list):
return [byteify(element) for element in input]
# elif isinstance(input, unicode):
# return input.encode('utf-8')
else:
return input
[docs]def pretty(d, indent=0):
for key, value in d.iteritems():
print('\t' * indent + str(key))
if isinstance(value, dict):
pretty(value, indent+1)
else:
print('\t' * (indent+1) + str(value))
def _paths_to_dicts(paths, ztree=False):
"""
Converts a list of 'paths' into a list of dictionaries with enriched information.
@:param: paths
- single string with paths representing files/modules on each line. No directories. P newline sep string
- list of dictionaries (keys: path) P list
- list of dictionaries (keys: path, url) PU list
@:param: ztree
- False
Enriches dictionaries with (keys: path, url, package).
Only real .py file paths are expected and returned i.e. no dicts generated for paths that are directories.
Intended use is to pre-process incoming paths before later parsing them and generating
a list of ALSM objects. One ALSM instance represents the parsed structure of one source code module/file.
Benefits of this pre-processing, ensures:
- that 'package' is intelligently calculated
- that the dictionaries in the list are in a known format viz (keys: path, url, package)
- True
Enriches dictionaries with (keys: path, url, name, ppath), incl. dirs
Intended purpose to create dir structure list for ztree javascript widget.
Note that 'ppath' is a parent reference and refers to a 'path'
In other words:
- 'package' is not calculated TODO - check this
- creates extra keys 'name' and 'ppath' in each dictionary
- dictionaries are created for directories too.
Dictionaries key possibilities:
- path - path P
- path, url - sometimes known as an un-enriched 'module path' PU
- path, url, package - sometimes known as a 'module path' PUK
- path, url, name, ppath - sometimes known as a 'tree path' PUNA
- url
Dictionary key definitions:
- path
Full path to the source code file
If its a directory, will have a trailing slash
- ppath
Parent path, refers to a parent path
If its a directory, will have a trailing slash
- url
Url
- package
Package prefix of the module, in module path notation e.g. 'src.common'
- name
Base name of the module e.g. 'util.py' or directory e.g. 'src' (no trailing slash)
On trailing slashes in path and ppath in the resulting module list:
- the path has the trailing slash in the first place because
- in normal mode, all paths are files - thus never get trailing slashes anyway
- in ztree mode paths will get trialing slashes as a convention
- ppath gets a trailing slash because it must accurately refer to a path which has the trailing slash
by convention
Thus could remove trailing slash behaviour altogether from the 'path' and 'ppath' of the resulting
dictionaries produced, but it is a convention to have the slashes, so make it clear we are referring
to a directory. Not a big deal, the convention could be removed if needed.
:return: list of dictionaries (keys: path, url, package) or (keys: path, url, name, ppath)
"""
result = []
tree = PathTree()
tree.create_root()
def convert_path_string_to_list_of_dicts(paths):
# convert strings into list of 'module' dictionaries
dicts = []
for path in paths.splitlines():
path = path.strip()
if not path:
continue
d = {'path': path,
'url': None}
dicts.append(d)
paths = dicts
return paths
if isinstance(paths, str):
paths = convert_path_string_to_list_of_dicts(paths)
# Build the tree
for d in paths:
path, url = d['path'], d['url']
if 'path' not in d.keys() or not path:
continue
tree.path_to_subtree(path)
tree[path].data['url'] = url
def get_package_path(node):
# if there is a sibling __init__ file
package = None
if node.bpointer:
sibling_nodes = [tree[nid] for nid in tree[node.bpointer].fpointer]
for node in sibling_nodes:
if os.path.basename(node.identifier) == '__init__.py':
package = os.path.dirname(node.identifier)
package = package.replace("/", ".")
break
return package
def get_parent_path(node):
if node.bpointer and node.bpointer != tree.ROOT_ID:
ppath = node.bpointer + os.path.sep
else:
ppath = ''
return ppath
# now build list of modules
def scan(node):
children = [tree[nid] for nid in node.fpointer]
for node in children:
d = {
'path': node.identifier if '.py' in node.tag else node.identifier + os.path.sep,
'url': node.data['url'],
'package': get_package_path(node),
}
if ztree:
# creates modules for both directories and files - used for displaying tree in ztree javascript widget
d['name'] = node.tag
d['ppath'] = get_parent_path(node)
result.append(d)
if not node.is_leaf():
scan(node)
else:
# only creates modules for files
if node.is_leaf():
result.append(d)
else:
scan(node)
scan(tree['root'])
return result
[docs]def paths_to_modules(paths):
"""
@:param: paths - PU dict
:returns: list of module dictionaries PUK, no dirs
"""
return _paths_to_dicts(paths, ztree=False)
[docs]def paths_to_tree_paths(paths):
"""
@:param: paths - PU dict
:returns: list of path dictionaries PUNA, incl. dirs
"""
return _paths_to_dicts(paths, ztree=True)