Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 343abf7730fb
Choose a base ref
...
head repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 796b760e2e7c
Choose a head ref
  • 3 commits
  • 1 file changed
  • 2 contributors

Commits on Mar 5, 2019

  1. references-by-popularity: create debug output

    (cherry picked from commit 54826e7)
    grahamc committed Mar 5, 2019
    Copy the full SHA
    86fafb0 View commit details
  2. references-by-popularity: cache computation to avoid memory bloat

    On very large graphs (14k+ paths), we'd end up with a massive in
    memory tree of mostly duplication.
    
    We can safely cache trees and point back to them later, saving
    memory.
    
    (cherry picked from commit 09362bc)
    grahamc committed Mar 5, 2019
    Copy the full SHA
    e32d952 View commit details

Commits on Mar 6, 2019

  1. Merge pull request #56919 from grahamc/closure-graph-memory-18.09

    Closure graph memory (18.09)
    shlevy authored Mar 6, 2019
    Copy the full SHA
    796b760 View commit details
Showing with 50 additions and 3 deletions.
  1. +50 −3 pkgs/build-support/references-by-popularity/closure-graph.py
53 changes: 50 additions & 3 deletions pkgs/build-support/references-by-popularity/closure-graph.py
Original file line number Diff line number Diff line change
@@ -117,6 +117,17 @@
from pprint import pprint
from collections import defaultdict


def debug(msg, *args, **kwargs):
if False:
print(
"DEBUG: {}".format(
msg.format(*args, **kwargs)
),
file=sys.stderr
)


# Find paths in the original dataset which are never referenced by
# any other paths
def find_roots(closures):
@@ -327,10 +338,23 @@ def test_returns_lookp(self):
# /nix/store/tux: {}
# }
# }
subgraphs_cache = {}
def make_graph_segment_from_root(root, lookup):
global subgraphs_cache
children = {}
for ref in lookup[root]:
children[ref] = make_graph_segment_from_root(ref, lookup)
# make_graph_segment_from_root is a pure function, and will
# always return the same result based on a given input. Thus,
# cache computation.
#
# Python's assignment will use a pointer, preventing memory
# bloat for large graphs.
if ref not in subgraphs_cache:
debug("Subgraph Cache miss on {}".format(ref))
subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup)
else:
debug("Subgraph Cache hit on {}".format(ref))
children[ref] = subgraphs_cache[ref]
return children

class TestMakeGraphSegmentFromRoot(unittest.TestCase):
@@ -381,12 +405,27 @@ def test_returns_graph_tiny(self):
# /nix/store/baz: 4
# /nix/store/tux: 6
# ]
popularity_cache = {}
def graph_popularity_contest(full_graph):
global popularity_cache
popularity = defaultdict(int)
for path, subgraph in full_graph.items():
popularity[path] += 1
subcontest = graph_popularity_contest(subgraph)
# graph_popularity_contest is a pure function, and will
# always return the same result based on a given input. Thus,
# cache computation.
#
# Python's assignment will use a pointer, preventing memory
# bloat for large graphs.
if path not in popularity_cache:
debug("Popularity Cache miss on {}", path)
popularity_cache[path] = graph_popularity_contest(subgraph)
else:
debug("Popularity Cache hit on {}", path)

subcontest = popularity_cache[path]
for subpath, subpopularity in subcontest.items():
debug("Calculating popularity for {}", subpath)
popularity[subpath] += subpopularity + 1

return popularity
@@ -474,6 +513,7 @@ def main():
filename = sys.argv[1]
key = sys.argv[2]

debug("Loading from {}", filename)
with open(filename) as f:
data = json.load(f)

@@ -497,14 +537,21 @@ def main():
# ]
graph = data[key]

debug("Finding roots from {}", key)
roots = find_roots(graph);
debug("Making lookup for {}", key)
lookup = make_lookup(graph)

full_graph = {}
for root in roots:
debug("Making full graph for {}", root)
full_graph[root] = make_graph_segment_from_root(root, lookup)

ordered = order_by_popularity(graph_popularity_contest(full_graph))
debug("Running contest")
contest = graph_popularity_contest(full_graph)
debug("Ordering by popularity")
ordered = order_by_popularity(contest)
debug("Checking for missing paths")
missing = []
for path in all_paths(graph):
if path not in ordered: