Browse Source

Add operation stats; Reformat file

master
wchen342 11 months ago
parent
commit
58ce91dcc3
Signed by: wchen342 GPG Key ID: 720B70365E800508
2 changed files with 52 additions and 16 deletions
  1. +1
    -1
      README.md
  2. +51
    -15
      source_parser.py

+ 1
- 1
README.md View File

@ -1,2 +1,2 @@
# Python-Source-Parser
A simple .py source file parser using AST
A simple .py source file parser using AST. It can output statistics about functions, operations, attributes used as well as flow control branchings as a tree. It also does very simple tracing on change of variable types.

+ 51
- 15
source_parser.py View File

@ -18,18 +18,22 @@ from enum import Enum
class BranchingType(Enum):
If = 1
Else = 2
class BranchingTreeNode:
"""Node for branch tracing. This is an n-ary tree node."""
def __init__(self, val, type=None):
self._val = val # value of the node
self._type = type # type of the node
self._parent = None
self._var_list = defaultdict(list) # for variables tracing
self._subtrees = [] # list of subtree node objects
def add_child(self, node):
self._subtrees.append(node)
node.parent = self
def get_child(self):
return self._subtrees
@ -61,6 +65,7 @@ class BranchingTreeNode:
class BranchingTree:
"""Contains branching information of a function."""
def __init__(self, func_def):
if not isinstance(func_def, ast.FunctionDef):
raise RuntimeError("Error building branching tree.")
@ -79,11 +84,20 @@ class BranchingTree:
if ret is not None:
return ret
def walk(self, node):
"""BFS walk of the tree, return all var_list"""
ret = [node.var_list]
for n in node.get_child():
ret.extend(self.walk(n))
return ret
def build(self):
self._build_subroutine(self.root, self.root.val)
# self.print_tree()
def _build_subroutine(self, node, ast_node):
cls_name = ast_node.__class__.__name__
if cls_name == "If":
num_else = len(ast_node.orelse)
@ -131,6 +145,7 @@ class BranchingTree:
if node.val.__class__.__name__ == "If":
_str += indent + " " + pprint.pformat(node.var_list, compact=False, indent=len(indent)- 4)
# _str += " " + ast.dump(node.val.test)
output += _str + "\n"
# Sort if..else
children = node.get_child()
@ -158,7 +173,8 @@ def iter_fields(node):
yield field, getattr(node, field)
except AttributeError:
pass
class NodeVisitor(ast.NodeVisitor):
def __init__(self, caller):
@ -181,12 +197,13 @@ class NodeVisitor(ast.NodeVisitor):
elif isinstance(value, AST):
self.visit(value)
def func_body_visit(self, node, body_stat, func_stat, branching_tree):
def func_body_visit(self, node, body_stat, func_stat, op_stat, branching_tree):
"""Visit a node. Function body ver."""
cls_name = node.__class__.__name__
return self.func_body_generic_visit(node, body_stat, func_stat, branching_tree)
def func_body_generic_visit(self, node, body_stat, func_stat, branching_tree):
return self.func_body_generic_visit(node, body_stat, func_stat, op_stat, branching_tree)
def func_body_generic_visit(self, node, body_stat, func_stat, op_stat, branching_tree):
"""Visit function used only for tracing variables and operations in function bodies"""
# TODO: class is not considered currently
cls_name = node.__class__.__name__
@ -194,15 +211,21 @@ class NodeVisitor(ast.NodeVisitor):
if isinstance(value, list):
for item in value:
if isinstance(item, AST):
self.func_body_visit(item, body_stat, func_stat, branching_tree)
self.func_body_visit(item, body_stat, func_stat, op_stat, branching_tree)
elif isinstance(value, AST):
self.func_body_visit(value, body_stat, func_stat, branching_tree)
self.func_body_visit(value, body_stat, func_stat, op_stat, branching_tree)
# print(ast.dump(node))
# Operation stats
body_stat[cls_name] += 1
# Function calling stats
if cls_name == "Call":
func_stat[node.func.id] += 1
if node.func.__class__.__name__ == "Attribute":
func_stat[node.func.attr] += 1
else:
func_stat[node.func.id] += 1
# Variable tracing
if cls_name == "Assign":
for var in node.targets:
@ -238,10 +261,14 @@ class NodeVisitor(ast.NodeVisitor):
if right_type == "str":
out_type = "str"
elif right_type == "int" or right_type == "float":
out_type = None # Cannot be determined
if isinstance(node.op, ast.Add) or isinstance(node.op, ast.Sub):
out_type = "float"
else:
out_type = None # Cannot be determined
else:
warnings.warn("Type of both sides of an expression cannot be determined!")
out_type = None
# Add variable change to list
for i in range(2):
if sub_nodes[i].__class__.__name__ == "Name":
@ -249,8 +276,11 @@ class NodeVisitor(ast.NodeVisitor):
types[i], sub_nodes[i])
# var_list[sub_nodes[i].id].append({"op": node.op.__class__.__name__, "type": types[i]})
node.ret_type = out_type
# Add OP stats
op_stat.append({'type': node.op.__class__.__name__, 'left': left_type, 'right': right_type})
elif cls_name == "BoolOp":
pass # TODO
raise NotImplementedError("Not yet implemented")
elif cls_name == "Tuple" or cls_name == "List" or cls_name == "Set" or cls_name == "Dict":
node.ret_type = cls_name
elif cls_name == "Call":
@ -286,6 +316,7 @@ class NodeVisitor(ast.NodeVisitor):
types[i] = None
else:
types[i] = sub_nodes[i].ret_type # Inherent from previous operation
for i in range(len(sub_nodes)):
if sub_nodes[i].__class__.__name__ == "Name":
NodeVisitor._add_var_trace(branching_tree, sub_nodes[i].id,
@ -316,6 +347,7 @@ class NodeVisitor(ast.NodeVisitor):
n_type = None
else:
n_type = sub_node.ret_type # Inherent from previous operation
if sub_node.__class__.__name__ == "Name":
NodeVisitor._add_var_trace(branching_tree, sub_node.id, "Attribute", n_type, sub_node)
# var_list[sub_node.id].append({"op": "Attribute", "type": n_type})
@ -348,6 +380,7 @@ class NodeVisitor(ast.NodeVisitor):
warnings.warn("Non-standard argument encountered. Visual inspection recommended.")
func_def["args"][idx]["default_val"] = ast.dump(args.defaults[i])
idx -= 1
# Parse body
# Collect used operators, statements and function names. Note that elif will be counted as multiple ifs.
# It also traces a history of all operations on variables. This can be used to detect e.g. str * int.
@ -370,11 +403,14 @@ class NodeVisitor(ast.NodeVisitor):
# Variable tracing
body_stat = defaultdict(int)
func_stat = defaultdict(int)
op_list = []
# var_list = defaultdict(list)
for sub_nodes in node.body:
self.func_body_visit(sub_nodes, body_stat, func_stat, branching_tree)
self.func_body_visit(sub_nodes, body_stat, func_stat, op_list, branching_tree)
func_def["body_stat"] = body_stat
func_def["func_stat"] = func_stat
func_def["op_list"] = op_list
func_def["branching_tree"] = branching_tree
# Parse returns
@ -392,7 +428,7 @@ class NodeVisitor(ast.NodeVisitor):
else:
# the statement returns non-standard value (list, tuple, dict, func, etc.)
warnings.warn("Non-standard return statement encountered. Visual inspection recommended.")
returns.append(ast.dump(n.value))
returns.append({"val": ast.dump(n.value), "type": n.value.__class__.__name__})
func_def["returns"] = returns
self.caller.add_func_def(func_def)
@ -447,8 +483,9 @@ class FileParser:
Read a python source file and parse it.
@param f: a file handler
"""
self.__tree = None
self.__func_list = []
self.__func_list = {}
self.__tree = ast.parse(f.read())
@ -461,7 +498,7 @@ class FileParser:
return self.__func_list
def add_func_def(self, func_def):
self.__func_list.append(func_def)
self.__func_list[func_def['name']] = func_def
def get_used_func(self):
"""
@ -471,7 +508,6 @@ class FileParser:
nv.visit(self.__tree)
# DEBUG
class MyPrettyPrinter(pprint.PrettyPrinter):
_dispatch = pprint.PrettyPrinter._dispatch.copy()


Loading…
Cancel
Save