diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -1266,6 +1266,11 @@ ) coreconfigitem( b'experimental', + b'web.full-garbage-collection-rate', + default=1, # still forcing a full collection on each request +) +coreconfigitem( + b'experimental', b'worker.wdir-get-thread-safe', default=False, ) diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py --- a/mercurial/hgweb/hgwebdir_mod.py +++ b/mercurial/hgweb/hgwebdir_mod.py @@ -285,6 +285,7 @@ self.lastrefresh = 0 self.motd = None self.refresh() + self.requests_count = 0 if not baseui: # set up environment for new ui extensions.loadall(self.ui) @@ -341,6 +342,10 @@ self.repos = repos self.ui = u + self.gc_full_collect_rate = self.ui.configint( + b'experimental', b'web.full-garbage-collection-rate' + ) + self.gc_full_collections_done = 0 encoding.encoding = self.ui.config(b'web', b'encoding') self.style = self.ui.config(b'web', b'style') self.templatepath = self.ui.config( @@ -383,12 +388,27 @@ finally: # There are known cycles in localrepository that prevent # those objects (and tons of held references) from being - # collected through normal refcounting. We mitigate those - # leaks by performing an explicit GC on every request. - # TODO remove this once leaks are fixed. - # TODO only run this on requests that create localrepository - # instances instead of every request. - gc.collect() + # collected through normal refcounting. + # In some cases, the resulting memory consumption can + # be tamed by performing explicit garbage collections. + # In presence of actual leaks or big long-lived caches, the + # impact on performance of such collections can become a + # problem, hence the rate shouldn't be set too low. + # See "Collecting the oldest generation" in + # https://devguide.python.org/garbage_collector + # for more about such trade-offs. + rate = self.gc_full_collect_rate + + # this is not thread safe, but the consequence (skipping + # a garbage collection) is arguably better than risking + # to have several threads perform a collection in parallel + # (long useless wait on all threads). + self.requests_count += 1 + if rate > 0 and self.requests_count % rate == 0: + gc.collect() + self.gc_full_collections_done += 1 + else: + gc.collect(generation=1) def _runwsgi(self, req, res): try: diff --git a/tests/test-hgwebdir-gc.py b/tests/test-hgwebdir-gc.py new file mode 100644 --- /dev/null +++ b/tests/test-hgwebdir-gc.py @@ -0,0 +1,49 @@ +from __future__ import absolute_import + +import os +from mercurial.hgweb import hgwebdir_mod + +hgwebdir = hgwebdir_mod.hgwebdir + +os.mkdir(b'webdir') +os.chdir(b'webdir') + +webdir = os.path.realpath(b'.') + + +def trivial_response(req, res): + return [] + + +def make_hgwebdir(gc_rate=None): + config = os.path.join(webdir, b'hgwebdir.conf') + with open(config, 'wb') as configfile: + configfile.write(b'[experimental]\n') + if gc_rate is not None: + configfile.write(b'web.full-garbage-collection-rate=%d\n' % gc_rate) + hg_wd = hgwebdir(config) + hg_wd._runwsgi = trivial_response + return hg_wd + + +def process_requests(webdir_instance, number): + # we don't care for now about passing realistic arguments + for _ in range(number): + for chunk in webdir_instance.run_wsgi(None, None): + pass + + +without_gc = make_hgwebdir(gc_rate=0) +process_requests(without_gc, 5) +assert without_gc.requests_count == 5 +assert without_gc.gc_full_collections_done == 0 + +with_gc = make_hgwebdir(gc_rate=2) +process_requests(with_gc, 5) +assert with_gc.requests_count == 5 +assert with_gc.gc_full_collections_done == 2 + +with_systematic_gc = make_hgwebdir() # default value of the setting +process_requests(with_systematic_gc, 3) +assert with_systematic_gc.requests_count == 3 +assert with_systematic_gc.gc_full_collections_done == 3