mirror of
				https://github.com/django/django.git
				synced 2025-10-31 09:41:08 +00:00 
			
		
		
		
	Fixed #24366 -- Optimized traversal of large migration dependency graphs.
Switched from an adjancency list and uncached, iterative depth-first search to a Node-based design with direct parent/child links and a cached, recursive depth-first search. With this change, calculating a migration plan for a large graph takes several seconds instead of several hours. Marked test `migrations.test_graph.GraphTests.test_dfs` as an expected failure due to reaching the maximum recursion depth.
This commit is contained in:
		
				
					committed by
					
						 Markus Holtermann
						Markus Holtermann
					
				
			
			
				
	
			
			
			
						parent
						
							7fa7dd48c4
						
					
				
				
					commit
					78d43a5e10
				
			| @@ -47,7 +47,7 @@ class MigrationExecutor(object): | ||||
|                 # child(ren) in the same app, and no further. | ||||
|                 next_in_app = sorted( | ||||
|                     n for n in | ||||
|                     self.loader.graph.dependents.get(target, set()) | ||||
|                     self.loader.graph.node_map[target].children | ||||
|                     if n[0] == target[0] | ||||
|                 ) | ||||
|                 for node in next_in_app: | ||||
|   | ||||
| @@ -5,6 +5,68 @@ from collections import deque | ||||
| from django.db.migrations.state import ProjectState | ||||
| from django.utils.datastructures import OrderedSet | ||||
| from django.utils.encoding import python_2_unicode_compatible | ||||
| from django.utils.functional import total_ordering | ||||
|  | ||||
|  | ||||
| @python_2_unicode_compatible | ||||
| @total_ordering | ||||
| class Node(object): | ||||
|     """ | ||||
|     A single node in the migration graph. Contains direct links to adjacent | ||||
|     nodes in either direction. | ||||
|     """ | ||||
|     def __init__(self, key): | ||||
|         self.key = key | ||||
|         self.children = set() | ||||
|         self.parents = set() | ||||
|  | ||||
|     def __eq__(self, other): | ||||
|         return self.key == other | ||||
|  | ||||
|     def __lt__(self, other): | ||||
|         return self.key < other | ||||
|  | ||||
|     def __hash__(self): | ||||
|         return hash(self.key) | ||||
|  | ||||
|     def __getitem__(self, item): | ||||
|         return self.key[item] | ||||
|  | ||||
|     def __str__(self): | ||||
|         return str(self.key) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return '<Node: (%r, %r)>' % self.key | ||||
|  | ||||
|     def add_child(self, child): | ||||
|         self.children.add(child) | ||||
|  | ||||
|     def add_parent(self, parent): | ||||
|         self.parents.add(parent) | ||||
|  | ||||
|     # Use manual caching, @cached_property effectively doubles the | ||||
|     # recursion depth for each recursion. | ||||
|     def ancestors(self): | ||||
|         # Use self.key instead of self to speed up the frequent hashing | ||||
|         # when constructing an OrderedSet. | ||||
|         if '_ancestors' not in self.__dict__: | ||||
|             ancestors = deque([self.key]) | ||||
|             for parent in sorted(self.parents): | ||||
|                 ancestors.extendleft(reversed(parent.ancestors())) | ||||
|             self.__dict__['_ancestors'] = list(OrderedSet(ancestors)) | ||||
|         return self.__dict__['_ancestors'] | ||||
|  | ||||
|     # Use manual caching, @cached_property effectively doubles the | ||||
|     # recursion depth for each recursion. | ||||
|     def descendants(self): | ||||
|         # Use self.key instead of self to speed up the frequent hashing | ||||
|         # when constructing an OrderedSet. | ||||
|         if '_descendants' not in self.__dict__: | ||||
|             descendants = deque([self.key]) | ||||
|             for child in sorted(self.children): | ||||
|                 descendants.extendleft(reversed(child.descendants())) | ||||
|             self.__dict__['_descendants'] = list(OrderedSet(descendants)) | ||||
|         return self.__dict__['_descendants'] | ||||
|  | ||||
|  | ||||
| @python_2_unicode_compatible | ||||
| @@ -32,12 +94,15 @@ class MigrationGraph(object): | ||||
|     """ | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.node_map = {} | ||||
|         self.nodes = {} | ||||
|         self.dependencies = {} | ||||
|         self.dependents = {} | ||||
|         self.cached = False | ||||
|  | ||||
|     def add_node(self, node, implementation): | ||||
|         self.nodes[node] = implementation | ||||
|     def add_node(self, key, implementation): | ||||
|         node = Node(key) | ||||
|         self.node_map[key] = node | ||||
|         self.nodes[key] = implementation | ||||
|         self.clear_cache() | ||||
|  | ||||
|     def add_dependency(self, migration, child, parent): | ||||
|         if child not in self.nodes: | ||||
| @@ -50,8 +115,16 @@ class MigrationGraph(object): | ||||
|                 "Migration %s dependencies reference nonexistent parent node %r" % (migration, parent), | ||||
|                 parent | ||||
|             ) | ||||
|         self.dependencies.setdefault(child, set()).add(parent) | ||||
|         self.dependents.setdefault(parent, set()).add(child) | ||||
|         self.node_map[child].add_parent(self.node_map[parent]) | ||||
|         self.node_map[parent].add_child(self.node_map[child]) | ||||
|         self.clear_cache() | ||||
|  | ||||
|     def clear_cache(self): | ||||
|         if self.cached: | ||||
|             for node in self.nodes: | ||||
|                 self.node_map[node].__dict__.pop('_ancestors', None) | ||||
|                 self.node_map[node].__dict__.pop('_descendants', None) | ||||
|             self.cached = False | ||||
|  | ||||
|     def forwards_plan(self, node): | ||||
|         """ | ||||
| @@ -62,7 +135,10 @@ class MigrationGraph(object): | ||||
|         """ | ||||
|         if node not in self.nodes: | ||||
|             raise NodeNotFoundError("Node %r not a valid node" % (node, ), node) | ||||
|         return self.dfs(node, lambda x: self.dependencies.get(x, set())) | ||||
|         # Use parent.key instead of parent to speed up the frequent hashing in ensure_not_cyclic | ||||
|         self.ensure_not_cyclic(node, lambda x: (parent.key for parent in self.node_map[x].parents)) | ||||
|         self.cached = True | ||||
|         return self.node_map[node].ancestors() | ||||
|  | ||||
|     def backwards_plan(self, node): | ||||
|         """ | ||||
| @@ -73,7 +149,10 @@ class MigrationGraph(object): | ||||
|         """ | ||||
|         if node not in self.nodes: | ||||
|             raise NodeNotFoundError("Node %r not a valid node" % (node, ), node) | ||||
|         return self.dfs(node, lambda x: self.dependents.get(x, set())) | ||||
|         # Use child.key instead of child to speed up the frequent hashing in ensure_not_cyclic | ||||
|         self.ensure_not_cyclic(node, lambda x: (child.key for child in self.node_map[x].children)) | ||||
|         self.cached = True | ||||
|         return self.node_map[node].descendants() | ||||
|  | ||||
|     def root_nodes(self, app=None): | ||||
|         """ | ||||
| @@ -82,7 +161,7 @@ class MigrationGraph(object): | ||||
|         """ | ||||
|         roots = set() | ||||
|         for node in self.nodes: | ||||
|             if (not any(key[0] == node[0] for key in self.dependencies.get(node, set())) | ||||
|             if (not any(key[0] == node[0] for key in self.node_map[node].parents) | ||||
|                     and (not app or app == node[0])): | ||||
|                 roots.add(node) | ||||
|         return sorted(roots) | ||||
| @@ -97,7 +176,7 @@ class MigrationGraph(object): | ||||
|         """ | ||||
|         leaves = set() | ||||
|         for node in self.nodes: | ||||
|             if (not any(key[0] == node[0] for key in self.dependents.get(node, set())) | ||||
|             if (not any(key[0] == node[0] for key in self.node_map[node].children) | ||||
|                     and (not app or app == node[0])): | ||||
|                 leaves.add(node) | ||||
|         return sorted(leaves) | ||||
| @@ -105,7 +184,7 @@ class MigrationGraph(object): | ||||
|     def ensure_not_cyclic(self, start, get_children): | ||||
|         # Algo from GvR: | ||||
|         # http://neopythonic.blogspot.co.uk/2009/01/detecting-cycles-in-directed-graph.html | ||||
|         todo = set(self.nodes.keys()) | ||||
|         todo = set(self.nodes) | ||||
|         while todo: | ||||
|             node = todo.pop() | ||||
|             stack = [node] | ||||
| @@ -122,28 +201,10 @@ class MigrationGraph(object): | ||||
|                 else: | ||||
|                     node = stack.pop() | ||||
|  | ||||
|     def dfs(self, start, get_children): | ||||
|         """ | ||||
|         Iterative depth first search, for finding dependencies. | ||||
|         """ | ||||
|         self.ensure_not_cyclic(start, get_children) | ||||
|         visited = deque() | ||||
|         visited.append(start) | ||||
|         stack = deque(sorted(get_children(start))) | ||||
|         while stack: | ||||
|             node = stack.popleft() | ||||
|             visited.appendleft(node) | ||||
|             children = sorted(get_children(node), reverse=True) | ||||
|             # reverse sorting is needed because prepending using deque.extendleft | ||||
|             # also effectively reverses values | ||||
|             stack.extendleft(children) | ||||
|  | ||||
|         return list(OrderedSet(visited)) | ||||
|  | ||||
|     def __str__(self): | ||||
|         return "Graph: %s nodes, %s edges" % ( | ||||
|             len(self.nodes), | ||||
|             sum(len(x) for x in self.dependencies.values()), | ||||
|             sum(len(node.parents) for node in self.node_map.values()), | ||||
|         ) | ||||
|  | ||||
|     def make_state(self, nodes=None, at_end=True, real_apps=None): | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from unittest import expectedFailure | ||||
|  | ||||
| from django.db.migrations.graph import ( | ||||
|     CircularDependencyError, MigrationGraph, NodeNotFoundError, | ||||
| ) | ||||
| @@ -151,7 +153,23 @@ class GraphTests(TestCase): | ||||
|             graph.forwards_plan, ('C', '0001') | ||||
|         ) | ||||
|  | ||||
|     def test_dfs(self): | ||||
|     def test_deep_graph(self): | ||||
|         graph = MigrationGraph() | ||||
|         root = ("app_a", "1") | ||||
|         graph.add_node(root, None) | ||||
|         expected = [root] | ||||
|         for i in range(2, 750): | ||||
|             parent = ("app_a", str(i - 1)) | ||||
|             child = ("app_a", str(i)) | ||||
|             graph.add_node(child, None) | ||||
|             graph.add_dependency(str(i), child, parent) | ||||
|             expected.append(child) | ||||
|  | ||||
|         actual = graph.node_map[root].descendants() | ||||
|         self.assertEqual(expected[::-1], actual) | ||||
|  | ||||
|     @expectedFailure | ||||
|     def test_recursion_depth(self): | ||||
|         graph = MigrationGraph() | ||||
|         root = ("app_a", "1") | ||||
|         graph.add_node(root, None) | ||||
| @@ -163,7 +181,7 @@ class GraphTests(TestCase): | ||||
|             graph.add_dependency(str(i), child, parent) | ||||
|             expected.append(child) | ||||
|  | ||||
|         actual = graph.dfs(root, lambda x: graph.dependents.get(x, set())) | ||||
|         actual = graph.node_map[root].descendants() | ||||
|         self.assertEqual(expected[::-1], actual) | ||||
|  | ||||
|     def test_plan_invalid_node(self): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user