diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -2486,6 +2486,11 @@ ) coreconfigitem( b'web', + b'full-garbage-collection-rate', + default=0, # means not to force full collections +) +coreconfigitem( + b'web', b'hidden', default=False, ) diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py --- a/mercurial/hgweb/hgwebdir_mod.py +++ b/mercurial/hgweb/hgwebdir_mod.py @@ -285,6 +285,7 @@ self.lastrefresh = 0 self.motd = None self.refresh() + self.requests_count = 0 if not baseui: # set up environment for new ui extensions.loadall(self.ui) @@ -341,6 +342,10 @@ self.repos = repos self.ui = u + self.gc_full_collect_rate = self.ui.configint( + b'web', b'full-garbage-collection-rate' + ) + self.gc_full_collections_done = 0 encoding.encoding = self.ui.config(b'web', b'encoding') self.style = self.ui.config(b'web', b'style') self.templatepath = self.ui.config( @@ -383,12 +388,25 @@ finally: # There are known cycles in localrepository that prevent # those objects (and tons of held references) from being - # collected through normal refcounting. We mitigate those - # leaks by performing an explicit GC on every request. - # TODO remove this once leaks are fixed. - # TODO only run this on requests that create localrepository - # instances instead of every request. - gc.collect() + # collected through normal refcounting. + # In some cases, the resulting memory consumption can + # be tamed by performing explicit garbage collections. + # In presence of actual leaks or big long-lived caches, the + # impact on performance of such collections can become a + # problem, hence the rate shouldn't be set too low. + # See "Collecting the oldest generation" in + # https://devguide.python.org/garbage_collector + # for more about such trade-offs. + rate = self.gc_full_collect_rate + + # this is not thread safe, but the consequence (skipping + # a garbage collection) is arguably better than risking + # to have several threads perform a collection in parallel + # (long useless wait on all threads). + self.requests_count += 1 + if rate > 0 and self.requests_count % rate == 0: + gc.collect() + self.gc_full_collections_done += 1 def _runwsgi(self, req, res): try: diff --git a/tests/test-hgwebdir-gc.py b/tests/test-hgwebdir-gc.py new file mode 100644 --- /dev/null +++ b/tests/test-hgwebdir-gc.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import + +import os +from mercurial.hgweb import hgwebdir_mod + +hgwebdir = hgwebdir_mod.hgwebdir + +os.mkdir(b'webdir') +os.chdir(b'webdir') + +webdir = os.path.realpath(b'.') + + +def trivial_response(req, res): + return [] + + +def make_hgwebdir(gc_rate): + config = os.path.join(webdir, b'hgwebdir.conf') + with open(config, 'wb') as configfile: + configfile.write(b'[web]\n') + configfile.write(b'full-garbage-collection-rate=%d\n' % gc_rate) + + hg_wd = hgwebdir(config) + hg_wd._runwsgi = trivial_response + return hg_wd + + +def process_requests(webdir_instance, number): + # we don't care for now about passing realistic arguments + for _ in range(number): + for chunk in webdir_instance.run_wsgi(None, None): + pass + + +without_gc = make_hgwebdir(gc_rate=0) +process_requests(without_gc, 5) +assert without_gc.requests_count == 5 +assert without_gc.gc_full_collections_done == 0 + +with_gc = make_hgwebdir(gc_rate=2) +process_requests(with_gc, 5) +assert with_gc.requests_count == 5 +assert with_gc.gc_full_collections_done == 2