Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 18b0cf78a64b
Choose a base ref
...
head repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 5d3fd3674a66
Choose a head ref
  • 3 commits
  • 1 file changed
  • 2 contributors

Commits on Mar 5, 2019

  1. Copy the full SHA
    54826e7 View commit details
  2. references-by-popularity: cache computation to avoid memory bloat

    On very large graphs (14k+ paths), we'd end up with a massive in
    memory tree of mostly duplication.
    
    We can safely cache trees and point back to them later, saving
    memory.
    grahamc committed Mar 5, 2019
    Copy the full SHA
    09362bc View commit details

Commits on Mar 6, 2019

  1. Merge pull request #56918 from grahamc/closure-graph-memory

    references-by-popularity: get a handle on memory usage
    shlevy authored Mar 6, 2019

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    5d3fd36 View commit details
Showing with 50 additions and 3 deletions.
  1. +50 −3 pkgs/build-support/references-by-popularity/closure-graph.py
53 changes: 50 additions & 3 deletions pkgs/build-support/references-by-popularity/closure-graph.py
Original file line number Diff line number Diff line change
@@ -117,6 +117,17 @@
from pprint import pprint
from collections import defaultdict


def debug(msg, *args, **kwargs):
if False:
print(
"DEBUG: {}".format(
msg.format(*args, **kwargs)
),
file=sys.stderr
)


# Find paths in the original dataset which are never referenced by
# any other paths
def find_roots(closures):
@@ -327,10 +338,23 @@ def test_returns_lookp(self):
# /nix/store/tux: {}
# }
# }
subgraphs_cache = {}
def make_graph_segment_from_root(root, lookup):
global subgraphs_cache
children = {}
for ref in lookup[root]:
children[ref] = make_graph_segment_from_root(ref, lookup)
# make_graph_segment_from_root is a pure function, and will
# always return the same result based on a given input. Thus,
# cache computation.
#
# Python's assignment will use a pointer, preventing memory
# bloat for large graphs.
if ref not in subgraphs_cache:
debug("Subgraph Cache miss on {}".format(ref))
subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup)
else:
debug("Subgraph Cache hit on {}".format(ref))
children[ref] = subgraphs_cache[ref]
return children

class TestMakeGraphSegmentFromRoot(unittest.TestCase):
@@ -381,12 +405,27 @@ def test_returns_graph_tiny(self):
# /nix/store/baz: 4
# /nix/store/tux: 6
# ]
popularity_cache = {}
def graph_popularity_contest(full_graph):
global popularity_cache
popularity = defaultdict(int)
for path, subgraph in full_graph.items():
popularity[path] += 1
subcontest = graph_popularity_contest(subgraph)
# graph_popularity_contest is a pure function, and will
# always return the same result based on a given input. Thus,
# cache computation.
#
# Python's assignment will use a pointer, preventing memory
# bloat for large graphs.
if path not in popularity_cache:
debug("Popularity Cache miss on {}", path)
popularity_cache[path] = graph_popularity_contest(subgraph)
else:
debug("Popularity Cache hit on {}", path)

subcontest = popularity_cache[path]
for subpath, subpopularity in subcontest.items():
debug("Calculating popularity for {}", subpath)
popularity[subpath] += subpopularity + 1

return popularity
@@ -474,6 +513,7 @@ def main():
filename = sys.argv[1]
key = sys.argv[2]

debug("Loading from {}", filename)
with open(filename) as f:
data = json.load(f)

@@ -497,14 +537,21 @@ def main():
# ]
graph = data[key]

debug("Finding roots from {}", key)
roots = find_roots(graph);
debug("Making lookup for {}", key)
lookup = make_lookup(graph)

full_graph = {}
for root in roots:
debug("Making full graph for {}", root)
full_graph[root] = make_graph_segment_from_root(root, lookup)

ordered = order_by_popularity(graph_popularity_contest(full_graph))
debug("Running contest")
contest = graph_popularity_contest(full_graph)
debug("Ordering by popularity")
ordered = order_by_popularity(contest)
debug("Checking for missing paths")
missing = []
for path in all_paths(graph):
if path not in ordered: