diff --git a/rust/treedirstate/src/dirstate.rs b/rust/treedirstate/src/dirstate.rs --- a/rust/treedirstate/src/dirstate.rs +++ b/rust/treedirstate/src/dirstate.rs @@ -43,6 +43,13 @@ Backend::File(ref mut file) => file.cache(), } } + + pub fn offset(&self) -> Option { + match *self { + Backend::Empty(ref _null) => None, + Backend::File(ref file) => Some(file.position()), + } + } } /// A dirstate object. This contains the state of all files in the dirstate, stored in tree @@ -149,6 +156,11 @@ self.root_id } + /// Returns the current append offset for the file store. + pub fn store_offset(&self) -> Option { + self.store.offset() + } + /// Add or update a file entry in the dirstate. pub fn add_file(&mut self, name: KeyRef, state: &T) -> Result<()> { let store = self.store.store_view(); diff --git a/rust/treedirstate/src/filestore.rs b/rust/treedirstate/src/filestore.rs --- a/rust/treedirstate/src/filestore.rs +++ b/rust/treedirstate/src/filestore.rs @@ -122,6 +122,10 @@ } Ok(()) } + + pub fn position(&self) -> u64 { + self.position + } } impl Store for FileStore { diff --git a/rust/treedirstate/src/python.rs b/rust/treedirstate/src/python.rs --- a/rust/treedirstate/src/python.rs +++ b/rust/treedirstate/src/python.rs @@ -130,6 +130,12 @@ Ok(None) } + def storeoffset(&self) -> PyResult { + let dirstate = self.dirstate(py).borrow(); + let offset = dirstate.store_offset(); + Ok(offset.unwrap_or(0)) + } + def filecount(&self) -> PyResult { let dirstate = self.dirstate(py).borrow(); Ok((dirstate.tracked_count() + dirstate.removed_count()) as usize) diff --git a/treedirstate/__init__.py b/treedirstate/__init__.py --- a/treedirstate/__init__.py +++ b/treedirstate/__init__.py @@ -3,6 +3,7 @@ from mercurial import ( dirstate, + dispatch, encoding, error, extensions, @@ -18,14 +19,25 @@ import errno import heapq import itertools +import os +import random import struct +import string from .rusttreedirstate import RustDirstateMap dirstateheader = b'########################treedirstate####' treedirstateversion = 1 +treefileprefix = 'dirstate.tree.' useinnewrepos = True +# Minimum size the treedirstate file can be before auto-repacking. +minrepackthreshold = 1024 * 1024 + +# Number of times the treedirstate file can grow by, compared to its initial +# size, before auto-repacking. +repackfactor = 3 + class _reader(object): def __init__(self, data, offset): self.data = data @@ -86,6 +98,7 @@ self._dirtyparents = False self._nonnormalset = set() self._otherparentset = set() + self._packedsize = 0 if importmap is not None: self._rmap.importmap(importmap) @@ -322,7 +335,8 @@ self._treeid = r.readstr() rootid = r.readuint() - self._rmap.read('dirstate.tree.000', rootid) + self._packedsize = r.readuint() + self._rmap.read(treefileprefix + self._treeid, rootid) clen = r.readuint() copymap = {} for _i in range(clen): @@ -357,9 +371,19 @@ else: def nonnormadd(f): pass + repackthreshold = max(self._packedsize * repackfactor, + minrepackthreshold) + if self._rmap.storeoffset() > repackthreshold: + self._ui.note(_("auto-repacking treedirstate\n")) + self._ui.setconfig('treedirstate', 'repacking', True) + self._repacked = True + self._treeid = None + else: + self._extended = True if self._treeid is None: - self._treeid = '000' - self._rmap.write('dirstate.tree.000', now, nonnormadd) + self._treeid = newtree(self._opener) + self._rmap.write(treefileprefix + self._treeid, now, nonnormadd) + self._packedsize = self._rmap.storeoffset() else: self._rmap.writedelta(now, nonnormadd) st.write(self._genrootdata()) @@ -389,6 +413,7 @@ w.writeuint(treedirstateversion) w.writestr(self._treeid) w.writeuint(self._rmap.rootid()) + w.writeuint(self._packedsize) w.writeuint(len(self.copymap)) for k, v in self.copymap.iteritems(): w.writestr(k) @@ -414,6 +439,23 @@ return (util.safehasattr(repo, 'requirements') and 'treedirstate' in repo.requirements) +def newtree(opener): + while True: + treeid = ''.join([random.choice(string.digits) for _c in range(8)]) + if not opener.exists(treefileprefix + treeid): + return treeid + +def gettreeid(opener, dirstatefile): + # The treeid is located within the first 128 bytes. + data = opener(dirstatefile).read(128) + if data[40:80] != dirstateheader: + return None + r = _reader(data, 80) + version = r.readuint() + if version != treedirstateversion: + return None + return r.readstr() + def upgrade(ui, repo): if istreedirstate(repo): raise error.Abort('repo already has treedirstate') @@ -441,6 +483,41 @@ finally: wlock.release() +def repack(ui, repo): + if not istreedirstate(repo): + ui.note(_("not repacking because repo does not have treedirstate")) + return + with repo.wlock(): + repo.dirstate._map._treeid = None + repo.dirstate._dirty = True + +dirstatefiles = [ + 'dirstate', + 'dirstate.pending', + 'undo.dirstate', + 'undo.backup.dirstate', +] + +def cleanup(ui, repo): + with repo.wlock(): + treesinuse = {} + for f in dirstatefiles: + try: + treeid = gettreeid(repo.vfs, f) + if treeid is not None: + treesinuse.setdefault(treeid, set()).add(f) + except Exception: + pass + for f in repo.vfs.listdir(): + if f.startswith(treefileprefix): + treeid = f[len(treefileprefix):] + if treeid not in treesinuse: + ui.debug("dirstate tree %s unused, deleting\n" % treeid) + repo.vfs.unlink(f) + else: + ui.debug("dirstate tree %s in use by %s\n" + % (treeid, ', '.join(treesinuse[treeid]))) + def wrapdirstate(orig, self): ds = orig(self) if istreedirstate(self): @@ -478,6 +555,27 @@ else: return orig(ui, abort, dirstate) +def wrapruncommand(orig, lui, repo, cmd, fullargs, *args): + """ + Wraps runcommand to perform cleanup of old dirstate tree files. This + happens whenever the treefile is repacked, and also on 1% of other + invocations that involve treedirstate. + """ + # For chg, do not wrap the "serve" runcommand call + if 'CHGINTERNALMARK' in os.environ: + return orig(lui, repo, cmd, fullargs, *args) + + try: + return orig(lui, repo, cmd, fullargs, *args) + finally: + if (repo is not None and + "_map" in repo.dirstate.__dict__ and + isinstance(repo.dirstate._map, treedirstatemap) and + (getattr(repo.dirstate._map, "_repacked", False) or + (getattr(repo.dirstate._map, "_extended", False) and + random.random() < 0.01))): + cleanup(lui, repo) + def wrapnewreporequirements(orig, repo): reqs = orig(repo) reqs.add('treedirstate') @@ -500,6 +598,7 @@ extensions.wrapfunction(localrepo.localrepository.dirstate, 'func', wrapdirstate) extensions.wrapfunction(scmutil, 'casecollisionauditor', wrapcca) + extensions.wrapfunction(dispatch, 'runcommand', wrapruncommand) def reposetup(ui, repo): ui.setconfig('treedirstate', 'enabled', istreedirstate(repo)) @@ -515,5 +614,11 @@ upgrade(ui, repo) elif cmd == "off": downgrade(ui, repo) + cleanup(ui, repo) + elif cmd == "repack": + repack(ui, repo) + cleanup(ui, repo) + elif cmd == "cleanup": + cleanup(ui, repo) else: raise error.Abort("unrecognised command: %s" % cmd)