diff --git a/mercurial/chgserver.py b/mercurial/chgserver.py --- a/mercurial/chgserver.py +++ b/mercurial/chgserver.py @@ -129,12 +129,37 @@ ignored = {b'HG'} else: ignored = set() - envitems = [ - (k, v) + envitems = { + k: v for k, v in pycompat.iteritems(encoding.environ) if _envre.match(k) and k not in ignored - ] - envhash = _hashlist(sorted(envitems)) + } + # Py3 may modify environment variables before `hg serve` starts. When chg + # connects to us, it passes in its environment (without the modifications), + # and we'll generate a different config hash. + # + # To handle this situation, chg will pass the variables through using + # CHGORIG_=<0 if not in env, 1 if in env>, and we use + # *that* value only when calculating the hash. Examples: + # CHGORIG_LC_CTYPE=0 <-- not in chg's env, delete for hash + # CHGORIG_LC_CTYPE=1 <-- in chg's env, but empty. Set to empty for hash. + # CHGORIG_LC_CTYPE=1UTF-8 <-- set to UTF-8 for hash. + # + # This does NOT cause us to lose Py3's modifications to these variables (we + # aren't copying this into our environment), so chg and hg should continue + # to behave the same - both will operate with the modifications Py3 made. + for k, v in pycompat.iteritems(encoding.environ): + if k.startswith(b'CHGORIG_'): + origname = k[len(b'CHGORIG_') :] + if not _envre.match(origname) or origname in ignored: + continue + + if v.startswith(b'0') and origname in envitems: + del envitems[origname] + else: + envitems[origname] = v[1:] + + envhash = _hashlist(sorted(pycompat.iteritems(envitems))) return sectionhash[:6] + envhash[:6] @@ -550,39 +575,18 @@ raise ValueError(b'unexpected value in setenv request') self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys())) - # Python3 has some logic to "coerce" the C locale to a UTF-8 capable - # one, and it sets LC_CTYPE in the environment to C.UTF-8 if none of - # 'LC_CTYPE', 'LC_ALL' or 'LANG' are set (to any value). This can be - # disabled with PYTHONCOERCECLOCALE=0 in the environment. - # - # When fromui is called via _inithashstate, python has already set - # this, so that's in the environment right when we start up the hg - # process. Then chg will call us and tell us to set the environment to - # the one it has; this might NOT have LC_CTYPE, so we'll need to - # carry-forward the LC_CTYPE that was coerced in these situations. - # - # If this is not handled, we will fail config+env validation and fail - # to start chg. If this is just ignored instead of carried forward, we - # may have different behavior between chg and non-chg. - if pycompat.ispy3: - # Rename for wordwrapping purposes - oldenv = encoding.environ - if not any( - e.get(b'PYTHONCOERCECLOCALE') == b'0' for e in [oldenv, newenv] - ): - keys = [b'LC_CTYPE', b'LC_ALL', b'LANG'] - old_keys = [k for k, v in oldenv.items() if k in keys and v] - new_keys = [k for k, v in newenv.items() if k in keys and v] - # If the user's environment (from chg) doesn't have ANY of the - # keys that python looks for, and the environment (from - # initialization) has ONLY LC_CTYPE and it's set to C.UTF-8, - # carry it forward. - if ( - not new_keys - and old_keys == [b'LC_CTYPE'] - and oldenv[b'LC_CTYPE'] == b'C.UTF-8' - ): - newenv[b'LC_CTYPE'] = oldenv[b'LC_CTYPE'] + # We want to *preserve* these variables in the environment across the + # clear, since Python may have modified them during startup, and we want + # to preserve that modification. + preserve = [k for k in encoding.environ if k.startswith(b'CHGORIG_')] + for chgorig in preserve: + actual = chgorig[8:] + if actual in newenv: + newenv[chgorig] = b'1' + newenv[actual] + else: + newenv[chgorig] = b'0' + # Keep the value as it was coerced by python + newenv[actual] = encoding.environ[actual] encoding.environ.clear() encoding.environ.update(newenv) diff --git a/tests/test-chg.t b/tests/test-chg.t --- a/tests/test-chg.t +++ b/tests/test-chg.t @@ -332,8 +332,7 @@ YYYY/MM/DD HH:MM:SS (PID)> log -R cached YYYY/MM/DD HH:MM:SS (PID)> loaded repo into cache: $TESTTMP/cached (in ...s) -Test that chg works even when python "coerces" the locale (py3.7+, which is done -by default if none of LC_ALL, LC_CTYPE, or LANG are set in the environment) +Test that chg works even when python "coerces" the locale (py3.7+) $ cat > $TESTTMP/debugenv.py < from mercurial import encoding @@ -345,11 +344,31 @@ > for k in [b'LC_ALL', b'LC_CTYPE', b'LANG']: > v = encoding.environ.get(k) > if v is not None: - > ui.write(b'%s=%s\n' % (k, encoding.environ[k])) + > ui.write(b'debugenv: %s=%s\n' % (k, encoding.environ[k])) > EOF - $ LANG= LC_ALL= LC_CTYPE= chg \ - > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv - LC_ALL= - LC_CTYPE=C.UTF-8 (py37 !) - LC_CTYPE= (no-py37 !) - LANG= + $ CHGDEBUG= LANG= LC_ALL= LC_CTYPE= chg \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv 2>&1 \ + > | egrep 'debugenv|start' + chg: debug: * start cmdserver at * (glob) + debugenv: LC_ALL= + debugenv: LC_CTYPE=C.UTF-8 (py37 !) + debugenv: LC_CTYPE= (no-py37 !) + debugenv: LANG= +(Should not trigger a command server restart) + $ CHGDEBUG= LANG= LC_ALL= LC_CTYPE= chg \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv 2>&1 \ + > | egrep 'debugenv|start' + debugenv: LC_ALL= + debugenv: LC_CTYPE=C.UTF-8 (py37 !) + debugenv: LC_CTYPE= (no-py37 !) + debugenv: LANG= +(Should trigger a command server restart since the LC_CTYPE=C value should be +used, not the coerced one) + $ CHGDEBUG= LANG= LC_ALL= LC_CTYPE=C chg \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv 2>&1 \ + > | egrep 'debugenv|start' + chg: debug: * start cmdserver at * (glob) + debugenv: LC_ALL= + debugenv: LC_CTYPE=C.UTF-8 (py37 !) + debugenv: LC_CTYPE=C (no-py37 !) + debugenv: LANG=