My Stuff: 2025

Sunday, October 26, 2025

Python script to access a machine behind SSH

"""Utility to reach a TCP server behind an SSH jump host.

This script establishes an SSH tunnel to a remote host and forwards a

local port to a TCP service that is only reachable from that host. Once

the tunnel is up, you can interact with the remote service using the

local forwarding port.

Example

-------

+python ssh_tunnel_client.py \

+ --ssh-host jump.example.com --ssh-user alice --ssh-key ~/.ssh/id_rsa \

+ --remote-host 10.0.0.5 --remote-port 5012

+The script will open localhost:5012 by default; any TCP client pointed to

+that address/port pair will effectively communicate with 10.0.0.5:5012

+through the SSH tunnel.

+"""

+from __future__ import annotations

+import argparse

+import getpass

+import logging

+import socketserver

+import sys

+from typing import Optional, Tuple

+import paramiko

+LOGGER = logging.getLogger(__name__)

+class ForwardServer(socketserver.ThreadingTCPServer):

+ daemon_threads = True

+ allow_reuse_address = True

+class TunnelHandler(socketserver.BaseRequestHandler):

+ """Forwards a local TCP connection through the SSH transport."""

+ def handle(self) -> None: # noqa: D401 - see class docstring.

+ transport: paramiko.Transport = self.server.transport # type: ignore[attr-defined]

+ remote_host: str = self.server.remote_host # type: ignore[attr-defined]

+ remote_port: int = self.server.remote_port # type: ignore[attr-defined]

+ try:

+ chan = transport.open_channel(

+ "direct-tcpip",

+ (remote_host, remote_port),

+ self.request.getsockname(),

+ )

+ except Exception as exc: # pragma: no cover - network specific

+ LOGGER.error("Failed to open SSH channel: %s", exc)

+ return

+ if chan is None:

+ LOGGER.error("SSH channel creation returned None")

+ return

+ LOGGER.info(

+ "Forwarding connection from %s to %s:%s",

+ self.client_address,

+ remote_host,

+ remote_port,

+ )

+ try:

+ while True:

+ rdata = self.request.recv(1024)

+ if len(rdata) == 0:

+ break

+ chan.sendall(rdata)

+ response = chan.recv(1024)

+ if len(response) == 0:

+ break

+ self.request.sendall(response)

+ finally:

+ chan.close()

+ self.request.close()

+def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace:

+ parser = argparse.ArgumentParser(description=__doc__)

+ parser.add_argument("--ssh-host", required=True, help="SSH jump host")

+ parser.add_argument("--ssh-port", type=int, default=22, help="SSH port")

+ parser.add_argument("--ssh-user", required=True, help="SSH username")

+ parser.add_argument(

+ "--ssh-key",

+ help="Path to private key for authentication (optional if password is provided)",

+ )

+ parser.add_argument(

+ "--ssh-password",

+ help="Password for SSH authentication (optional if key is provided)",

+ )

+ parser.add_argument(

+ "--remote-host",

+ required=True,

+ help="Destination host reachable from the SSH server",

+ )

+ parser.add_argument(

+ "--remote-port",

+ type=int,

+ default=5012,

+ help="Destination TCP port on the remote host",

+ )

+ parser.add_argument(

+ "--local-port",

+ type=int,

+ default=5012,

+ help="Local port for the forwarded connection",

+ )

+ parser.add_argument(

+ "--verbose",

+ action="store_true",

+ help="Enable debug logging",

+ )

+ return parser.parse_args(argv)

+def create_ssh_client(args: argparse.Namespace) -> paramiko.SSHClient:

+ client = paramiko.SSHClient()

+ client.load_system_host_keys()

+ client.set_missing_host_key_policy(paramiko.WarningPolicy())

+ password = args.ssh_password

+ if password is None and args.ssh_key is None:

+ password = getpass.getpass("SSH password: ")

+ client.connect(

+ args.ssh_host,

+ port=args.ssh_port,

+ username=args.ssh_user,

+ key_filename=args.ssh_key,

+ password=password,

+ look_for_keys=args.ssh_key is None and password is None,

+ )

+ return client

+def start_forwarding(

+ client: paramiko.SSHClient,

+ remote_host: str,

+ remote_port: int,

+ local_port: int,

+) -> Tuple[ForwardServer, Tuple[str, int]]:

+ transport = client.get_transport()

+ if transport is None or not transport.is_active():

+ raise RuntimeError("SSH transport is not available")

+ server = ForwardServer(("127.0.0.1", local_port), TunnelHandler)

+ server.transport = transport # type: ignore[attr-defined]

+ server.remote_host = remote_host # type: ignore[attr-defined]

+ server.remote_port = remote_port # type: ignore[attr-defined]

+ return server, server.server_address

+def main(argv: Optional[list[str]] = None) -> int:

+ args = parse_args(argv)

+ logging.basicConfig(

+ level=logging.DEBUG if args.verbose else logging.INFO,

+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",

+ )

+ try:

+ client = create_ssh_client(args)

+ except paramiko.AuthenticationException as exc:

+ LOGGER.error("Authentication failed: %s", exc)

+ return 1

+ except paramiko.SSHException as exc:

+ LOGGER.error("Unable to establish SSH connection: %s", exc)

+ return 1

+ LOGGER.info(

+ "Connected to %s. Forwarding localhost:%d to %s:%d",

+ args.ssh_host,

+ args.local_port,

+ args.remote_host,

+ args.remote_port,

+ )

+ try:

+ server, local_address = start_forwarding(

+ client,

+ args.remote_host,

+ args.remote_port,

+ args.local_port,

+ )

+ except Exception as exc:

+ LOGGER.error("Failed to start port forwarding: %s", exc)

+ client.close()

+ return 1

+ LOGGER.info("Tunnel established on %s:%d", *local_address)

+ try:

+ server.serve_forever()

+ except KeyboardInterrupt:

+ LOGGER.info("Interrupted by user, shutting down")

+ finally:

+ server.server_close()

+ client.close()

+ return 0

+if __name__ == "__main__":

+ sys.exit(main())

Sunday, June 8, 2025

DISTCC: client did not provide distcc magic fairy dust

I get these errors from distccd (in detach mode) everytime distcc client wants to distribute some jobs to do:

istccd[14791] (dcc_create_kids) up to 1 children
distccd[14791] (dcc_create_kids) up to 2 children
distccd[14791] (dcc_create_kids) up to 3 children
distccd[14791] (dcc_create_kids) up to 4 children
distccd[14791] (dcc_create_kids) up to 5 children
distccd[14791] (dcc_create_kids) up to 6 children
distccd[14791] (dcc_create_kids) up to 7 children
distccd[14791] (dcc_create_kids) up to 8 children
distccd[14791] (dcc_create_kids) up to 9 children
distccd[14791] (dcc_create_kids) up to 10 children
distccd[14791] (dcc_create_kids) up to 11 children
distccd[14791] (dcc_create_kids) up to 12 children
distccd[14791] (dcc_create_kids) up to 13 children
distccd[14791] (dcc_create_kids) up to 14 children
distccd[14792] (dcc_check_client) connection from 192.168.100.218:36956
distccd[14792] (check_address_inet) match client 0xda64a8c0, value 0x64a8c0, mask 0xffffff
distccd[14792] (dcc_readx) ERROR: unexpected eof on fd4
distccd[14792] (dcc_r_token_int) ERROR: read failed while waiting for token "DIST"
distccd[14792] (dcc_r_request_header) ERROR: client did not provide distcc magic fairy dust
distccd[14792] (dcc_cleanup_tempfiles_inner) deleted 3 temporary files
distccd[14792] (dcc_job_summary) client: 192.168.100.218:36956 OTHER exit:0 sig:0 core:0 ret:108 time:0ms
distccd[14793] (dcc_check_client) connection from 192.168.100.218:40390
distccd[14793] (check_address_inet) match client 0xda64a8c0, value 0x64a8c0, mask 0xffffff
distccd[14793] (dcc_readx) ERROR: unexpected eof on fd4
distccd[14793] (dcc_r_token_int) ERROR: read failed while waiting for token "DIST"
distccd[14793] (dcc_r_request_header) ERROR: client did not provide distcc magic fairy dust
distccd[14793] (dcc_cleanup_tempfiles_inner) deleted 3 temporary files
distccd[14793] (dcc_job_summary) client: 192.168.100.218:40390 OTHER exit:0 sig:0 core:0 ret:108 time:0ms

From distcc source code:

/**
* Read a token and value. The receiver always knows what token name
* is expected next -- indeed the names are really only there as a
* sanity check and to aid debugging.
*
* @param ifd fd to read from
* @param expected 4-char token that is expected to come in next
* @param val receives the parameter value
**/
int dcc_r_token_int(int ifd, const char *expected, unsigned *val)
{
char buf[13], *bum;
int ret;
if (strlen(expected) != 4) {
rs_log_error("expected token \"%s\" seems wrong", expected);
return EXIT_PROTOCOL_ERROR;
}
if ((ret = dcc_readx(ifd, buf, 12))) {
rs_log_error("read failed while waiting for token \"%s\"",
expected);
return ret;
}

Monday, February 3, 2025

Python script to find duplicate files

Here is a Python script that scans directories for duplicate files by comparing file sizes and MD5 hashes:

import os
import hashlib
from collections import defaultdict
import argparse
def get_file_hash(filepath):
"""Calculate the MD5 hash of a file's content."""
hasher = hashlib.md5()
try:
with open(filepath, 'rb') as f:
while True:
chunk = f.read(8192) # Read in 8KB chunks to handle large files
if not chunk:
break
hasher.update(chunk)
except IOError:
return None # Skip files that can't be read
return hasher.hexdigest()
def find_duplicates(start_directory):
"""Find duplicate files in the specified directory and its subdirectories."""
file_sizes = defaultdict(list)

# First pass: Group files by size
for root, dirs, files in os.walk(start_directory):
for filename in files:
filepath = os.path.join(root, filename)
try:
file_size = os.path.getsize(filepath)
except OSError:
continue # Skip inaccessible files
file_sizes[file_size].append(filepath)

# Second pass: Compare hashes of files with the same size
duplicates = []
for size, paths in file_sizes.items():
if len(paths) < 2:
continue # Skip unique file sizes

hashes = defaultdict(list)
for path in paths:
file_hash = get_file_hash(path)
if file_hash is not None:
hashes[file_hash].append(path)

# Collect all groups of identical files
for hash_group in hashes.values():
if len(hash_group) > 1:
duplicates.append(hash_group)

return duplicates
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find duplicate files in a directory tree.')
parser.add_argument('directory', help='Directory to scan for duplicates')
args = parser.parse_args()
if not os.path.isdir(args.directory):
print(f"Error: '{args.directory}' is not a valid directory")
exit(1)
duplicate_groups = find_duplicates(args.directory)
if duplicate_groups:
print("\nDuplicate files found:")
for i, group in enumerate(duplicate_groups, 1):
print(f"\nGroup {i}:")
for path in group:
print(f" {path}")
print(f"\nFound {len(duplicate_groups)} groups of duplicates")
else:
print("\nNo duplicate files found")

Key features:

1. Uses two-pass comparison for efficiency:

- First groups files by size

- Then compares MD5 hashes of same-size files

2. Handles large files using chunked reading

3. Skips inaccessible files gracefully

4. Provides clear command-line output

5. Uses MD5 hashing for content comparison

To use:

1. Save as `find_duplicates.py`

2. Run with: `python find_duplicates.py /path/to/directory`

The script will:

1. Scan all subdirectories recursively

2. Identify files with identical content

3. Group duplicates together in the output

4. Show full paths of duplicate files

Note: MD5 is used for speed, but you could modify the script to use SHA-256 for cryptographic-strength hashing by replacing `hashlib.md5()` with `hashlib.sha256()`.