diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -657,8 +657,31 @@
             return self._rustmap
 
         def write(self, tr, st, now):
-            if self._use_dirstate_v2:
-                packed = self._rustmap.write_v2(now)
+            if not self._use_dirstate_v2:
+                p1, p2 = self.parents()
+                packed = self._rustmap.write_v1(p1, p2, now)
+                st.write(packed)
+                st.close()
+                return
+
+            # We can only append to an existing data file if there is one
+            can_append = self.docket.uuid is not None
+            packed, append = self._rustmap.write_v2(now, can_append)
+            if append:
+                docket = self.docket
+                with self._opener(docket.data_filename(), b'ab') as fp:
+                    assert fp.tell() == docket.data_size, (
+                        fp.tell(),
+                        docket.data_size,
+                    )
+                    written = fp.write(packed)
+                    if written is not None:  # py2 return None
+                        assert written == len(packed), (written, len(packed))
+                docket.data_size += len(packed)
+                docket.parents = self.parents()
+                st.write(docket.serialize())
+                st.close()
+            else:
                 old_docket = self.docket
                 new_docket = docketmod.DirstateDocket.with_new_uuid(
                     self.parents(), len(packed)
@@ -674,11 +697,8 @@
                 if old_docket.uuid:
                     self._opener.unlink(old_docket.data_filename())
                 self._docket = new_docket
-            else:
-                p1, p2 = self.parents()
-                packed = self._rustmap.write_v1(p1, p2, now)
-                st.write(packed)
-                st.close()
+            # Reload from the newly-written file
+            util.clearcachedproperty(self, b"_rustmap")
             self._dirtyparents = False
 
         @propertycache
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -468,6 +468,24 @@
         Ok((map, parents))
     }
 
+    /// Assuming dirstate-v2 format, returns whether the next write should
+    /// append to the existing data file that contains `self.on_disk` (true),
+    /// or create a new data file from scratch (false).
+    pub(super) fn write_should_append(&self) -> bool {
+        // Soon this will be a heuristic based on the amount of unreachable
+        // data. For now it’s pseudo-random in order to make tests exercise
+        // both code paths.
+
+        fn bad_rng() -> u32 {
+            std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap()
+                .subsec_millis()
+        }
+
+        bad_rng() % 2 == 0
+    }
+
     fn get_node<'tree>(
         &'tree self,
         path: &HgPath,
@@ -1043,8 +1061,15 @@
         Ok(packed)
     }
 
+    /// Returns new data together with whether that data should be appended to
+    /// the existing data file whose content is at `self.on_disk` (true),
+    /// instead of written to a new data file (false).
     #[timed]
-    fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
+    fn pack_v2(
+        &mut self,
+        now: Timestamp,
+        can_append: bool,
+    ) -> Result<(Vec<u8>, bool), DirstateError> {
         // TODO: how do we want to handle this in 2038?
         let now: i32 = now.0.try_into().expect("time overflow");
         let mut paths = Vec::new();
@@ -1063,7 +1088,7 @@
 
         self.clear_known_ambiguous_mtimes(&paths)?;
 
-        on_disk::write(self)
+        on_disk::write(self, can_append)
     }
 
     fn status<'a>(
diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs
@@ -179,11 +179,19 @@
 
     /// Clear mtimes that are ambigous with `now` (similar to
     /// `clear_ambiguous_times` but for all files in the dirstate map), and
-    /// serialize bytes to write the `.hg/dirstate` file to disk in dirstate-v2
+    /// serialize bytes to write a dirstate data file to disk in dirstate-v2
     /// format.
     ///
+    /// Returns new data together with whether that data should be appended to
+    /// the existing data file whose content is at `self.on_disk` (true),
+    /// instead of written to a new data file (false).
+    ///
     /// Note: this is only supported by the tree dirstate map.
-    fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>;
+    fn pack_v2(
+        &mut self,
+        now: Timestamp,
+        can_append: bool,
+    ) -> Result<(Vec<u8>, bool), DirstateError>;
 
     /// Run the status algorithm.
     ///
@@ -383,7 +391,11 @@
         self.pack(parents, now)
     }
 
-    fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> {
+    fn pack_v2(
+        &mut self,
+        _now: Timestamp,
+        _can_append: bool,
+    ) -> Result<(Vec<u8>, bool), DirstateError> {
         panic!(
             "should have used dirstate_tree::DirstateMap to use the v2 format"
         )
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -544,20 +544,28 @@
     recur(on_disk, root.root_nodes, &mut f)
 }
 
+/// Returns new data together with whether that data should be appended to the
+/// existing data file whose content is at `dirstate_map.on_disk` (true),
+/// instead of written to a new data file (false).
 pub(super) fn write(
     dirstate_map: &mut DirstateMap,
-) -> Result<Vec<u8>, DirstateError> {
-    let root_len = std::mem::size_of::<Root>();
+    can_append: bool,
+) -> Result<(Vec<u8>, bool), DirstateError> {
+    let append = can_append && dirstate_map.write_should_append();
 
     // This ignores the space for paths, and for nodes without an entry.
     // TODO: better estimate? Skip the `Vec` and write to a file directly?
-    let size_guess = root_len
+    let size_guess = std::mem::size_of::<Root>()
         + std::mem::size_of::<Node>()
             * dirstate_map.nodes_with_entry_count as usize;
-    let mut out = Vec::with_capacity(size_guess);
 
-    let root_nodes =
-        write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
+    let mut writer = Writer {
+        dirstate_map,
+        append,
+        out: Vec::with_capacity(size_guess),
+    };
+
+    let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
 
     let root = Root {
         root_nodes,
@@ -567,112 +575,121 @@
             .into(),
         ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
     };
-    out.extend(root.as_bytes());
-    Ok(out)
+    writer.out.extend(root.as_bytes());
+    Ok((writer.out, append))
+}
+
+struct Writer<'dmap, 'on_disk> {
+    dirstate_map: &'dmap DirstateMap<'on_disk>,
+    append: bool,
+    out: Vec<u8>,
 }
 
-fn write_nodes(
-    dirstate_map: &DirstateMap,
-    nodes: dirstate_map::ChildNodesRef,
-    out: &mut Vec<u8>,
-) -> Result<ChildNodes, DirstateError> {
-    // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
-    // order. Sort to enable binary search in the written file.
-    let nodes = nodes.sorted();
-    let nodes_len = nodes.len();
+impl Writer<'_, '_> {
+    fn write_nodes(
+        &mut self,
+        nodes: dirstate_map::ChildNodesRef,
+    ) -> Result<ChildNodes, DirstateError> {
+        // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
+        // order. Sort to enable binary search in the written file.
+        let nodes = nodes.sorted();
+        let nodes_len = nodes.len();
 
-    // First accumulate serialized nodes in a `Vec`
-    let mut on_disk_nodes = Vec::with_capacity(nodes_len);
-    for node in nodes {
-        let children = write_nodes(
-            dirstate_map,
-            node.children(dirstate_map.on_disk)?,
-            out,
-        )?;
-        let full_path = node.full_path(dirstate_map.on_disk)?;
-        let full_path = write_path(full_path.as_bytes(), out);
-        let copy_source =
-            if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
-                write_path(source.as_bytes(), out)
+        // First accumulate serialized nodes in a `Vec`
+        let mut on_disk_nodes = Vec::with_capacity(nodes_len);
+        for node in nodes {
+            let children =
+                self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
+            let full_path = node.full_path(self.dirstate_map.on_disk)?;
+            let full_path = self.write_path(full_path.as_bytes());
+            let copy_source = if let Some(source) =
+                node.copy_source(self.dirstate_map.on_disk)?
+            {
+                self.write_path(source.as_bytes())
             } else {
                 PathSlice {
                     start: 0.into(),
                     len: 0.into(),
                 }
             };
-        on_disk_nodes.push(match node {
-            NodeRef::InMemory(path, node) => {
-                let (state, data) = match &node.data {
-                    dirstate_map::NodeData::Entry(entry) => (
-                        entry.state.into(),
-                        Entry {
-                            mode: entry.mode.into(),
-                            mtime: entry.mtime.into(),
-                            size: entry.size.into(),
-                        },
-                    ),
-                    dirstate_map::NodeData::CachedDirectory { mtime } => {
-                        (b'd', Entry::from_timestamp(*mtime))
+            on_disk_nodes.push(match node {
+                NodeRef::InMemory(path, node) => {
+                    let (state, data) = match &node.data {
+                        dirstate_map::NodeData::Entry(entry) => (
+                            entry.state.into(),
+                            Entry {
+                                mode: entry.mode.into(),
+                                mtime: entry.mtime.into(),
+                                size: entry.size.into(),
+                            },
+                        ),
+                        dirstate_map::NodeData::CachedDirectory { mtime } => {
+                            (b'd', Entry::from_timestamp(*mtime))
+                        }
+                        dirstate_map::NodeData::None => (
+                            b'\0',
+                            Entry {
+                                mode: 0.into(),
+                                mtime: 0.into(),
+                                size: 0.into(),
+                            },
+                        ),
+                    };
+                    Node {
+                        children,
+                        copy_source,
+                        full_path,
+                        base_name_start: u16::try_from(path.base_name_start())
+                            // Could only panic for paths over 64 KiB
+                            .expect("dirstate-v2 path length overflow")
+                            .into(),
+                        descendants_with_entry_count: node
+                            .descendants_with_entry_count
+                            .into(),
+                        tracked_descendants_count: node
+                            .tracked_descendants_count
+                            .into(),
+                        state,
+                        data,
                     }
-                    dirstate_map::NodeData::None => (
-                        b'\0',
-                        Entry {
-                            mode: 0.into(),
-                            mtime: 0.into(),
-                            size: 0.into(),
-                        },
-                    ),
-                };
-                Node {
+                }
+                NodeRef::OnDisk(node) => Node {
                     children,
                     copy_source,
                     full_path,
-                    base_name_start: u16::try_from(path.base_name_start())
-                        // Could only panic for paths over 64 KiB
-                        .expect("dirstate-v2 path length overflow")
-                        .into(),
-                    descendants_with_entry_count: node
-                        .descendants_with_entry_count
-                        .into(),
-                    tracked_descendants_count: node
-                        .tracked_descendants_count
-                        .into(),
-                    state,
-                    data,
-                }
-            }
-            NodeRef::OnDisk(node) => Node {
-                children,
-                copy_source,
-                full_path,
-                ..*node
-            },
-        })
+                    ..*node
+                },
+            })
+        }
+        // … so we can write them contiguously, after writing everything else
+        // they refer to.
+        let start = self.curent_offset();
+        let len = u32::try_from(nodes_len)
+            // Could only panic with over 4 billion nodes
+            .expect("dirstate-v2 path length overflow")
+            .into();
+        self.out.extend(on_disk_nodes.as_bytes());
+        Ok(ChildNodes { start, len })
     }
-    // … so we can write them contiguously, after writing everything else they
-    // refer to.
-    let start = curent_offset(out);
-    let len = u32::try_from(nodes_len)
-        // Could only panic with over 4 billion nodes
-        .expect("dirstate-v2 path length overflow")
-        .into();
-    out.extend(on_disk_nodes.as_bytes());
-    Ok(ChildNodes { start, len })
-}
 
-fn curent_offset(out: &Vec<u8>) -> Offset {
-    u32::try_from(out.len())
-        // Could only panic for a dirstate file larger than 4 GiB
-        .expect("dirstate-v2 offset overflow")
-        .into()
-}
+    fn curent_offset(&mut self) -> Offset {
+        let mut offset = self.out.len();
+        if self.append {
+            offset += self.dirstate_map.on_disk.len()
+        }
+        u32::try_from(offset)
+            // Could only panic for a dirstate file larger than 4 GiB
+            .expect("dirstate-v2 offset overflow")
+            .into()
+    }
 
-fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
-    let start = curent_offset(out);
-    let len = u16::try_from(slice.len())
-        // Could only panic for paths over 64 KiB
-        .expect("dirstate-v2 path length overflow")
-        .into();
-    out.extend(slice.as_bytes());
-    PathSlice { start, len }
+    fn write_path(&mut self, slice: &[u8]) -> PathSlice {
+        let start = self.curent_offset();
+        let len = u16::try_from(slice.len())
+            // Could only panic for paths over 64 KiB
+            .expect("dirstate-v2 path length overflow")
+            .into();
+        self.out.extend(slice.as_bytes());
+        PathSlice { start, len }
+    }
 }
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -340,16 +340,23 @@
         }
     }
 
+    /// Returns new data together with whether that data should be appended to
+    /// the existing data file whose content is at `self.on_disk` (True),
+    /// instead of written to a new data file (False).
     def write_v2(
         &self,
-        now: PyObject
-    ) -> PyResult<PyBytes> {
+        now: PyObject,
+        can_append: bool,
+    ) -> PyResult<PyObject> {
         let now = Timestamp(now.extract(py)?);
 
         let mut inner = self.inner(py).borrow_mut();
-        let result = inner.pack_v2(now);
+        let result = inner.pack_v2(now, can_append);
         match result {
-            Ok(packed) => Ok(PyBytes::new(py, &packed)),
+            Ok((packed, append)) => {
+                let packed = PyBytes::new(py, &packed);
+                Ok((packed, append).to_py_object(py).into_object())
+            },
             Err(_) => Err(PyErr::new::<exc::OSError, _>(
                 py,
                 "Dirstate error".to_string(),
diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs
--- a/rust/hg-cpython/src/dirstate/dispatch.rs
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs
@@ -124,8 +124,12 @@
         self.get_mut().pack_v1(parents, now)
     }
 
-    fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
-        self.get_mut().pack_v2(now)
+    fn pack_v2(
+        &mut self,
+        now: Timestamp,
+        can_append: bool,
+    ) -> Result<(Vec<u8>, bool), DirstateError> {
+        self.get_mut().pack_v2(now, can_append)
     }
 
     fn status<'a>(