blob: 5db8965a327737c1695d009cb4a315b938337203 [file] [log] [blame]
Aaron leventhal6719bb732025-08-06 23:36:531# Copyright 2025 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#!/usr/bin/env python3
5
6import argparse
7import json
8import os
9import shutil
10import subprocess
11import tempfile
12from pathlib import Path
13import sys
14from urllib.parse import urlparse
15
16# Data source embedded in the script
17# Only includes documents EXTERNAL to the chromium/src repository.
18DOCUMENT_MANIFEST = {
19 "description": "Manifest of externally sourced documents cached for AI assistant context.",
20 "remote_documents": [
21 {
22 "source_url": "https://gn.googlesource.com/gn/+/main/docs/style_guide.md",
23 "cached_path": "style_guide.md",
24 "subdir": "gn",
25 "description": "Style guide for writing clean and maintainable BUILD.gn files."
26 },
Aaron leventhal6719bb732025-08-06 23:36:5327 ]
28}
29
30def parse_gob_url(url):
31 """Parses a Git-on-Borg URL into repo URL, branch, and file path."""
32 try:
33 parts = url.split('/+/')
34 repo_url = parts[0]
35 if len(parts) < 2:
36 raise ValueError(f"Invalid GoB URL format: {url} - missing '/+/'")
37 path_parts = parts[1].split('/', 1)
38 branch = path_parts[0]
39 file_path = path_parts[1]
40 return repo_url, branch, file_path
41 except Exception as e:
42 raise ValueError(f"Error parsing URL {url}: {e}")
43
44def replace_non_inclusive_language(file_path):
45 """Replaces non-inclusive terms to align with Chromium's guidelines.
46
47 This function is called after fetching external documentation to ensure that
48 the cached files pass the Chromium presubmit check for inclusive language.
49 """
50 try:
51 with open(file_path, 'r', encoding='utf-8') as f:
52 content = f.read()
53
54 # Perform case-insensitive replacements.
55 content = content.replace("whitelist", "allowlist") # nocheck
56 content = content.replace("blacklist", "denylist") # nocheck
57 content = content.replace("Whitelist", "Allowlist") # nocheck
58 content = content.replace("Blacklist", "Denylist") # nocheck
59 content = content.replace("master", "main") # nocheck
60 content = content.replace("Master", "Main") # nocheck
61
62 with open(file_path, 'w', encoding='utf-8') as f:
63 f.write(content)
64 print(f" Applied non-inclusive language replacements to {file_path.name}")
65 except Exception as e:
66 print(f" Could not process file {file_path}: {e}")
67
68def fetch_doc_with_git(repo_url, branch, file_path, output_file):
69 """Fetches a single file from a git repo using a shallow clone into a temporary directory."""
70 with tempfile.TemporaryDirectory() as tmpdir:
71 tmp_path = Path(tmpdir)
72 print(f" Cloning {repo_url} (branch: {branch}) into temporary directory {tmp_path}")
73 try:
74 subprocess.run(
75 ['git', 'clone', '--depth', '1', '--branch', branch, '--no-checkout', '--filter=blob:none', repo_url, "."],
76 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=300
77 )
78 subprocess.run(
79 ['git', 'sparse-checkout', 'init', '--cone'],
80 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=60
81 )
82 subprocess.run(
83 ['git', 'sparse-checkout', 'set', file_path],
84 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=60
85 )
86 print(f" Checking out {file_path}...")
87 subprocess.run(
88 ['git', 'checkout'],
89 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=120
90 )
91 except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError) as e:
92 print(f" Git operation failed for {repo_url}: {e}")
93 return False
94
95 source_file = tmp_path / file_path
96 if source_file.exists():
97 print(f" Copying {file_path} to {output_file}")
98 try:
99 shutil.copyfile(source_file, output_file)
100 replace_non_inclusive_language(output_file)
101 return True
102 except Exception as e:
103 print(f" Error copying file: {e}")
104 return False
105 else:
106 print(f" Error: File {file_path} not found in repository after sparse checkout.")
107 return False
108 return False
109
110def fetch_and_cache_docs(manifest, base_output_dir, force=False):
111 """Fetches documents from URLs specified in the manifest."""
112 print(f"Starting doc refresh. Base output dir: {base_output_dir}")
113 base_output_dir.mkdir(parents=True, exist_ok=True)
114
115 successes = []
116 failures = []
117
118 remote_docs = manifest.get("remote_documents", [])
119 if not remote_docs:
120 print("No remote documents to fetch.")
121 return
122
123 for doc in remote_docs:
124 source_url = doc.get("source_url")
125 cached_path = doc.get("cached_path")
126 subdir = doc.get("subdir")
127 description = doc.get("description", "No description")
128
129 if not source_url or not cached_path or not subdir:
130 print(f" Skipping invalid entry (missing source_url, cached_path, or subdir): {doc}")
131 failures.append(f"{description} (Invalid Manifest Entry)")
132 continue
133
134 output_dir = base_output_dir / subdir
135 output_dir.mkdir(parents=True, exist_ok=True)
136 output_file = output_dir / cached_path
137
138 print(f" Processing: {description} -> {subdir}/{cached_path}")
139 print(f" Source URL: {source_url}")
140
141 try:
142 repo_url, branch, file_path = parse_gob_url(source_url)
143 if fetch_doc_with_git(repo_url, branch, file_path, output_file):
144 successes.append(f"{subdir}/{cached_path}")
145 else:
146 failures.append(f"{subdir}/{cached_path} (Fetch Failed)")
147 except ValueError as e:
148 print(f" Skipping {source_url}: {e}")
149 failures.append(f"{subdir}/{cached_path} (URL Parse Error)")
150 except Exception as e:
151 print(f" An unexpected error occurred for {source_url}: {e}")
152 failures.append(f"{subdir}/{cached_path} (Unexpected Error)")
153 print("")
154
155 print("--- Refresh Summary ---")
156 print(f"Successfully updated: {len(successes)}")
157 print(f"Failed: {len(failures)}")
158 if failures:
159 print("\nFailed documents:")
160 for f in failures:
161 print(f" - {f}")
162 if not force:
163 sys.exit(1)
164
165def main():
166 parser = argparse.ArgumentParser(
167 description="Refresh the cached external documentation.")
168 parser.add_argument(
169 '--force',
170 action='store_true',
171 help='Continue and exit successfully even if some documents fail to update.'
172 )
173 args = parser.parse_args()
174
175 script_dir = Path(__file__).resolve().parent
176 base_output_dir = script_dir
177 git_root = script_dir.parent.parent
178
179 print(f"Base output directory: {base_output_dir}")
180 print(f"Assumed git root: {git_root}")
181
182 if not (git_root / ".git").exists():
183 print(f"Error: Git root not found at {git_root}. Please run this script from within the Chromium source tree.")
184 sys.exit(1)
185
186 fetch_and_cache_docs(DOCUMENT_MANIFEST, base_output_dir, args.force)
187
188 print("Document refresh complete.")
189 try:
190 print(f"Adding changes in {script_dir.relative_to(git_root)} to git index...")
191 subprocess.run(['git', 'add', str(script_dir.relative_to(git_root))], check=True, cwd=git_root)
192 print("Changes added to git index.")
193 print("Please review and commit the changes.")
194 except Exception as e:
195 print(f"An error occurred while running git add: {e}")
196
197if __name__ == "__main__":
198 main()