Diff	ID	Description	Created	Lint	Unit
Base		Base
Diff 1	19307		Jan 15 2020, 9:56 AM	★	★
Diff 2	19434		Jan 17 2020, 1:07 PM	★	★
Diff 3	19770		Jan 31 2020, 10:29 AM	★	★
Diff 4	19797		Jan 31 2020, 6:00 PM	★	★
Diff 5	19841		Feb 2 2020, 3:19 AM	★	★
Diff 6	19901		Feb 4 2020, 7:25 PM	★	★
Diff 7	20122	rHG8374b69aef7527c9e70d31e59a9b9f2be3f04c79	Jan 15 2020, 9:50 AM	★	★

Status	Author	Revision
Closed	marmoute	D7894 nodemap: introduce an option to use mmap to read the nodemap mapping
Closed	marmoute	D7893 nodemap: update the index with the newly written data (when appropriate)
Closed	marmoute	D7892 nodemap: never read more than the expected data amount
Closed	marmoute	D7891 nodemap: write new data from the expected current data length
Closed	marmoute	D7890 nodemap: double check the source docket when doing incremental update
Closed	marmoute	D7889 nodemap: track the total and unused amount of data in the rawdata file
Closed	marmoute	D7888 nodemap: track the maximum revision tracked in the nodemap
Closed	marmoute	D7887 nodemap: add a flag to dump the details of the docket
Closed	marmoute	D7886 nodemap: introduce append-only incremental update of the persisten data
Closed	marmoute	D7885 nodemap: keep track of the docket for loaded data
Closed	marmoute	D7884 nodemap: introduce an explicit class/object for the docket
Closed	marmoute	D7883 nodemap: keep track of the ondisk id of nodemap blocks
Closed	marmoute	D7847 nodemap: provide the on disk data to indexes who support it
Closed	marmoute	D7846 nodemap: all check that revision and nodes match in the nodemap
Closed	marmoute	D7845 nodemap: add basic checking of the on disk nodemap content
Closed	marmoute	D7844 nodemap: code to parse the persistent binary nodemap data
Closed	marmoute	D7843 nodemap: move the iteratio inside the Block object
Closed	marmoute	D7842 nodemap: use an explicit "Block" object in the reference implementation
Closed	marmoute	D7841 nodemap: add a optional `nodemap_add_full` method on indexes
Closed	marmoute	D7840 nodemap: add a (python) index class for persistent nodemap testing
Closed	marmoute	D7839 nodemap: delete older raw data file when creating a new ones
Closed	marmoute	D7838 nodemap: use an intermediate "docket" file to carry small metadata
Closed	marmoute	D7837 nodemap: only use persistent nodemap for non-inlined revlog
Closed	marmoute	D7836 nodemap: add a function to read the data from disk
Closed	marmoute	D7835 nodemap: write nodemap data on disk
Closed	marmoute	D7834 nodemap: have some python code writing a nodemap in persistent binary form

Diff 20122

mercurial/debugcommands.py

	elif opts['metadata']:			elif opts['metadata']:
	unfi = repo.unfiltered()			unfi = repo.unfiltered()
	cl = unfi.changelog			cl = unfi.changelog
	nm_data = nodemap.persisted_data(cl)			nm_data = nodemap.persisted_data(cl)
	if nm_data is not None:			if nm_data is not None:
	docket, data = nm_data			docket, data = nm_data
	ui.write((b"uid: %s\n") % docket.uid)			ui.write((b"uid: %s\n") % docket.uid)
	ui.write((b"tip-rev: %d\n") % docket.tip_rev)			ui.write((b"tip-rev: %d\n") % docket.tip_rev)
				ui.write((b"data-length: %d\n") % docket.data_length)
				ui.write((b"data-unused: %d\n") % docket.data_unused)


	@command(			@command(
	b'debugobsolete',			b'debugobsolete',
	[			[
	(b'', b'flags', 0, _(b'markers flag')),			(b'', b'flags', 0, _(b'markers flag')),
	(			(
	b'',			b'',

mercurial/pure/parsers.py

	def nodemap_data_incremental(self):			def nodemap_data_incremental(self):
	"""Return bytes containing a incremental update to persistent nodemap			"""Return bytes containing a incremental update to persistent nodemap

	This containst the data for an append-only update of the data provided			This containst the data for an append-only update of the data provided
	in the last call to `update_nodemap_data`.			in the last call to `update_nodemap_data`.
	"""			"""
	if self._nm_root is None:			if self._nm_root is None:
	return None			return None
	data = nodemaputil.update_persistent_data(			changed, data = nodemaputil.update_persistent_data(
	self, self._nm_root, self._nm_max_idx, self._nm_rev			self, self._nm_root, self._nm_max_idx, self._nm_rev
	)			)
	self._nm_root = self._nm_max_idx = self._nm_rev = None			self._nm_root = self._nm_max_idx = self._nm_rev = None
	return data			return changed, data

	def update_nodemap_data(self, docket, nm_data):			def update_nodemap_data(self, docket, nm_data):
	"""provide full block of persisted binary data for a nodemap			"""provide full block of persisted binary data for a nodemap

	The data are expected to come from disk. See `nodemap_data_all` for a			The data are expected to come from disk. See `nodemap_data_all` for a
	produceur of such data."""			produceur of such data."""
	if nm_data is not None:			if nm_data is not None:
	self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)			self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)

mercurial/revlogutils/nodemap.py

	if not pdata:			if not pdata:
	return None			return None
	offset = 0			offset = 0
	(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])			(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
	if version != ONDISK_VERSION:			if version != ONDISK_VERSION:
	return None			return None
	offset += S_VERSION.size			offset += S_VERSION.size
	headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])			headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
	uid_size, tip_rev = headers			uid_size, tip_rev, data_length, data_unused = headers
	offset += S_HEADER.size			offset += S_HEADER.size
	docket = NodeMapDocket(pdata[offset : offset + uid_size])			docket = NodeMapDocket(pdata[offset : offset + uid_size])
	docket.tip_rev = tip_rev			docket.tip_rev = tip_rev
				docket.data_length = data_length
				docket.data_unused = data_unused

	filename = _rawdata_filepath(revlog, docket)			filename = _rawdata_filepath(revlog, docket)
	return docket, revlog.opener.tryread(filename)			return docket, revlog.opener.tryread(filename)


	def setup_persistent_nodemap(tr, revlog):			def setup_persistent_nodemap(tr, revlog):
	"""Install whatever is needed transaction side to persist a nodemap on disk			"""Install whatever is needed transaction side to persist a nodemap on disk

	raise error.ProgrammingError(msg)			raise error.ProgrammingError(msg)

	can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")			can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
	ondisk_docket = revlog._nodemap_docket			ondisk_docket = revlog._nodemap_docket

	# first attemp an incremental update of the data			# first attemp an incremental update of the data
	if can_incremental and ondisk_docket is not None:			if can_incremental and ondisk_docket is not None:
	target_docket = revlog._nodemap_docket.copy()			target_docket = revlog._nodemap_docket.copy()
	data = revlog.index.nodemap_data_incremental()			data_changed_count, data = revlog.index.nodemap_data_incremental()
	datafile = _rawdata_filepath(revlog, target_docket)			datafile = _rawdata_filepath(revlog, target_docket)
	# EXP-TODO: if this is a cache, this should use a cache vfs, not a			# EXP-TODO: if this is a cache, this should use a cache vfs, not a
	# store vfs			# store vfs
	with revlog.opener(datafile, b'a') as fd:			with revlog.opener(datafile, b'a') as fd:
	fd.write(data)			fd.write(data)
				target_docket.data_length += len(data)
				target_docket.data_unused += data_changed_count
	else:			else:
	# otherwise fallback to a full new export			# otherwise fallback to a full new export
	target_docket = NodeMapDocket()			target_docket = NodeMapDocket()
	datafile = _rawdata_filepath(revlog, target_docket)			datafile = _rawdata_filepath(revlog, target_docket)
	if util.safehasattr(revlog.index, "nodemap_data_all"):			if util.safehasattr(revlog.index, "nodemap_data_all"):
	data = revlog.index.nodemap_data_all()			data = revlog.index.nodemap_data_all()
	else:			else:
	data = persistent_data(revlog.index)			data = persistent_data(revlog.index)
	# EXP-TODO: if this is a cache, this should use a cache vfs, not a			# EXP-TODO: if this is a cache, this should use a cache vfs, not a
	# store vfs			# store vfs
	with revlog.opener(datafile, b'w') as fd:			with revlog.opener(datafile, b'w') as fd:
	fd.write(data)			fd.write(data)
				target_docket.data_length = len(data)
	target_docket.tip_rev = revlog.tiprev()			target_docket.tip_rev = revlog.tiprev()
	# EXP-TODO: if this is a cache, this should use a cache vfs, not a			# EXP-TODO: if this is a cache, this should use a cache vfs, not a
	# store vfs			# store vfs
	with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:			with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
	fp.write(target_docket.serialize())			fp.write(target_docket.serialize())
	revlog._nodemap_docket = target_docket			revlog._nodemap_docket = target_docket
	# EXP-TODO: if the transaction abort, we should remove the new data and			# EXP-TODO: if the transaction abort, we should remove the new data and
	# reinstall the old one.			# reinstall the old one.
	# specified inside the "docket" file.			# specified inside the "docket" file.
	#			#
	# The docket file contains information to find, qualify and validate the raw			# The docket file contains information to find, qualify and validate the raw
	# data. Its content is currently very light, but it will expand as the on disk			# data. Its content is currently very light, but it will expand as the on disk
	# nodemap gains the necessary features to be used in production.			# nodemap gains the necessary features to be used in production.

	# version 0 is experimental, no BC garantee, do no use outside of tests.			# version 0 is experimental, no BC garantee, do no use outside of tests.
	ONDISK_VERSION = 0			ONDISK_VERSION = 0

	S_VERSION = struct.Struct(">B")			S_VERSION = struct.Struct(">B")
	S_HEADER = struct.Struct(">BQ")			S_HEADER = struct.Struct(">BQQQ")

	ID_SIZE = 8			ID_SIZE = 8


	def _make_uid():			def _make_uid():
	"""return a new unique identifier.			"""return a new unique identifier.

	The identifier is random and composed of ascii characters."""			The identifier is random and composed of ascii characters."""
	return nodemod.hex(os.urandom(ID_SIZE))			return nodemod.hex(os.urandom(ID_SIZE))


	class NodeMapDocket(object):			class NodeMapDocket(object):
	"""metadata associated with persistent nodemap data			"""metadata associated with persistent nodemap data

	The persistent data may come from disk or be on their way to disk.			The persistent data may come from disk or be on their way to disk.
	"""			"""

	def __init__(self, uid=None):			def __init__(self, uid=None):
	if uid is None:			if uid is None:
	uid = _make_uid()			uid = _make_uid()
	self.uid = uid			self.uid = uid
	self.tip_rev = None			self.tip_rev = None
				self.data_length = None
				self.data_unused = 0

	def copy(self):			def copy(self):
	new = NodeMapDocket(uid=self.uid)			new = NodeMapDocket(uid=self.uid)
	new.tip_rev = self.tip_rev			new.tip_rev = self.tip_rev
				new.data_length = self.data_length
				new.data_unused = self.data_unused
	return new			return new

	def serialize(self):			def serialize(self):
	"""return serialized bytes for a docket using the passed uid"""			"""return serialized bytes for a docket using the passed uid"""
	data = []			data = []
	data.append(S_VERSION.pack(ONDISK_VERSION))			data.append(S_VERSION.pack(ONDISK_VERSION))
	headers = (len(self.uid), self.tip_rev)			headers = (
				len(self.uid),
				self.tip_rev,
				self.data_length,
				self.data_unused,
				)
	data.append(S_HEADER.pack(*headers))			data.append(S_HEADER.pack(*headers))
	data.append(self.uid)			data.append(self.uid)
	return b''.join(data)			return b''.join(data)


	def _rawdata_filepath(revlog, docket):			def _rawdata_filepath(revlog, docket):
	"""The (vfs relative) nodemap's rawdata file for a given uid"""			"""The (vfs relative) nodemap's rawdata file for a given uid"""
	prefix = revlog.nodemap_file[:-2]			prefix = revlog.nodemap_file[:-2]
	"""			"""
	trie = _build_trie(index)			trie = _build_trie(index)
	return _persist_trie(trie)			return _persist_trie(trie)


	def update_persistent_data(index, root, max_idx, last_rev):			def update_persistent_data(index, root, max_idx, last_rev):
	"""return the incremental update for persistent nodemap from a given index			"""return the incremental update for persistent nodemap from a given index
	"""			"""
	trie = _update_trie(index, root, last_rev)			changed_block, trie = _update_trie(index, root, last_rev)
	return _persist_trie(trie, existing_idx=max_idx)			return (
				changed_block * S_BLOCK.size,
				_persist_trie(trie, existing_idx=max_idx),
				)


	S_BLOCK = struct.Struct(">" + ("l" * 16))			S_BLOCK = struct.Struct(">" + ("l" * 16))

	NO_ENTRY = -1			NO_ENTRY = -1
	# rev 0 need to be -2 because 0 is used by block, -1 is a special value.			# rev 0 need to be -2 because 0 is used by block, -1 is a special value.
	REV_OFFSET = 2			REV_OFFSET = 2

	for rev in range(len(index)):			for rev in range(len(index)):
	hex = nodemod.hex(index[rev][7])			hex = nodemod.hex(index[rev][7])
	_insert_into_block(index, 0, root, rev, hex)			_insert_into_block(index, 0, root, rev, hex)
	return root			return root


	def _update_trie(index, root, last_rev):			def _update_trie(index, root, last_rev):
	"""consume"""			"""consume"""
				changed = 0
	for rev in range(last_rev + 1, len(index)):			for rev in range(last_rev + 1, len(index)):
	hex = nodemod.hex(index[rev][7])			hex = nodemod.hex(index[rev][7])
	_insert_into_block(index, 0, root, rev, hex)			changed += _insert_into_block(index, 0, root, rev, hex)
	return root			return changed, root


	def _insert_into_block(index, level, block, current_rev, current_hex):			def _insert_into_block(index, level, block, current_rev, current_hex):
	"""insert a new revision in a block			"""insert a new revision in a block

	index: the index we are adding revision for			index: the index we are adding revision for
	level: the depth of the current block in the trie			level: the depth of the current block in the trie
	block: the block currently being considered			block: the block currently being considered
	current_rev: the revision number we are adding			current_rev: the revision number we are adding
	current_hex: the hexadecimal representation of the of that revision			current_hex: the hexadecimal representation of the of that revision
	"""			"""
				changed = 1
	if block.ondisk_id is not None:			if block.ondisk_id is not None:
	block.ondisk_id = None			block.ondisk_id = None
	hex_digit = _to_int(current_hex[level : level + 1])			hex_digit = _to_int(current_hex[level : level + 1])
	entry = block.get(hex_digit)			entry = block.get(hex_digit)
	if entry is None:			if entry is None:
	# no entry, simply store the revision number			# no entry, simply store the revision number
	block[hex_digit] = current_rev			block[hex_digit] = current_rev
	elif isinstance(entry, dict):			elif isinstance(entry, dict):
	# need to recurse to an underlying block			# need to recurse to an underlying block
	_insert_into_block(index, level + 1, entry, current_rev, current_hex)			changed += _insert_into_block(
				index, level + 1, entry, current_rev, current_hex
				)
	else:			else:
	# collision with a previously unique prefix, inserting new			# collision with a previously unique prefix, inserting new
	# vertices to fit both entry.			# vertices to fit both entry.
	other_hex = nodemod.hex(index[entry][7])			other_hex = nodemod.hex(index[entry][7])
	other_rev = entry			other_rev = entry
	new = Block()			new = Block()
	block[hex_digit] = new			block[hex_digit] = new
	_insert_into_block(index, level + 1, new, other_rev, other_hex)			_insert_into_block(index, level + 1, new, other_rev, other_hex)
	_insert_into_block(index, level + 1, new, current_rev, current_hex)			_insert_into_block(index, level + 1, new, current_rev, current_hex)
				return changed


	def _persist_trie(root, existing_idx=None):			def _persist_trie(root, existing_idx=None):
	"""turn a nodemap trie into persistent binary data			"""turn a nodemap trie into persistent binary data

	See `_build_trie` for nodemap trie structure"""			See `_build_trie` for nodemap trie structure"""
	block_map = {}			block_map = {}
	if existing_idx is not None:			if existing_idx is not None:

tests/test-persistent-nodemap.t

	> exp-persistent-nodemap=yes			> exp-persistent-nodemap=yes
	> [devel]			> [devel]
	> persistent-nodemap=yes			> persistent-nodemap=yes
	> EOF			> EOF
	$ hg debugbuilddag .+5000			$ hg debugbuilddag .+5000
	$ hg debugnodemap --metadata			$ hg debugnodemap --metadata
	uid: ???????????????? (glob)			uid: ???????????????? (glob)
	tip-rev: 5000			tip-rev: 5000
				data-length: 122880
				data-unused: 0
	$ f --size .hg/store/00changelog.n			$ f --size .hg/store/00changelog.n
	.hg/store/00changelog.n: size=26			.hg/store/00changelog.n: size=42
	$ f --sha256 .hg/store/00changelog-*.nd			$ f --sha256 .hg/store/00changelog-*.nd
	.hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)			.hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
	$ hg debugnodemap --dump-new \| f --sha256 --size			$ hg debugnodemap --dump-new \| f --sha256 --size
	size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7			size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
	$ hg debugnodemap --dump-disk \| f --sha256 --bytes=256 --hexdump --size			$ hg debugnodemap --dump-disk \| f --sha256 --bytes=256 --hexdump --size
	size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7			size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
	0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff \|................\|			0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff \|................\|
	0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff \|................\|			0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff \|................\|

	add a new commit			add a new commit

	$ hg up			$ hg up
	0 files updated, 0 files merged, 0 files removed, 0 files unresolved			0 files updated, 0 files merged, 0 files removed, 0 files unresolved
	$ echo foo > foo			$ echo foo > foo
	$ hg add foo			$ hg add foo
	$ hg ci -m 'foo'			$ hg ci -m 'foo'

				#if pure
	$ hg debugnodemap --metadata			$ hg debugnodemap --metadata
	uid: ???????????????? (glob)			uid: ???????????????? (glob)
	tip-rev: 5001			tip-rev: 5001
				data-length: 123072
				data-unused: 192
				#else
				$ hg debugnodemap --metadata
				uid: ???????????????? (glob)
				tip-rev: 5001
				data-length: 122880
				data-unused: 0
				#endif
	$ f --size .hg/store/00changelog.n			$ f --size .hg/store/00changelog.n
	.hg/store/00changelog.n: size=26			.hg/store/00changelog.n: size=42

	(The pure code use the debug code that perform incremental update, the C code reencode from scratch)			(The pure code use the debug code that perform incremental update, the C code reencode from scratch)

	#if pure			#if pure
	$ f --sha256 .hg/store/00changelog-*.nd --size			$ f --sha256 .hg/store/00changelog-*.nd --size
	.hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)			.hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)

	#else			#else
	$ f --sha256 .hg/store/00changelog-*.nd --size			$ f --sha256 .hg/store/00changelog-*.nd --size
	.hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)			.hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)

	#endif			#endif

	$ hg debugnodemap --check			$ hg debugnodemap --check
	revision in index: 5002			revision in index: 5002
	revision in nodemap: 5002			revision in nodemap: 5002

			Path	Packages
M			mercurial/debugcommands.py (2 lines)
M			mercurial/pure/parsers.py (4 lines)
M			mercurial/revlogutils/nodemap.py (41 lines)
M			tests/test-persistent-nodemap.t (17 lines)

This is an archive of the discontinued Mercurial Phabricator instance.

nodemap: track the total and unused amount of data in the rawdata file
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents
Changeset List

Diff 20122

mercurial/debugcommands.py

mercurial/pure/parsers.py

mercurial/revlogutils/nodemap.py

tests/test-persistent-nodemap.t

This is an archive of the discontinued Mercurial Phabricator instance.

nodemap: track the total and unused amount of data in the rawdata fileClosedPublic

Details

Diff Detail

Event Timeline

Revision ContentsChangeset List

Diff 20122

mercurial/debugcommands.py

mercurial/pure/parsers.py

mercurial/revlogutils/nodemap.py

tests/test-persistent-nodemap.t

nodemap: track the total and unused amount of data in the rawdata file
ClosedPublic

Revision Contents
Changeset List