blob: f891757ef4272dd1f3856b0d2ee60f0bcf6261d3 [file] [log] [blame]
Aaron leventhal6719bb732025-08-06 23:36:531# Copyright 2025 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#!/usr/bin/env python3
5
6import argparse
7import json
8import os
9import shutil
10import subprocess
11import tempfile
12from pathlib import Path
13import sys
14from urllib.parse import urlparse
15
16# Data source embedded in the script
17# Only includes documents EXTERNAL to the chromium/src repository.
18DOCUMENT_MANIFEST = {
19 "description": "Manifest of externally sourced documents cached for AI assistant context.",
20 "remote_documents": [
21 {
22 "source_url": "https://gn.googlesource.com/gn/+/main/docs/style_guide.md",
23 "cached_path": "style_guide.md",
24 "subdir": "gn",
25 "description": "Style guide for writing clean and maintainable BUILD.gn files."
26 },
27 # Likely not needed:
28 # {
29 # "source_url": "https://gn.googlesource.com/gn/+/main/README.md",
30 # "cached_path": "README.md",
31 # "subdir": "gn",
32 # "description": "Introduction to GN, the meta-build system."
33 # },
34 # {
35 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/quick_start.md",
36 # "cached_path": "quick_start.md",
37 # "subdir": "gn",
38 # "description": "Quick start guide for using GN."
39 # },
40 # {
41 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/reference.md",
42 # "cached_path": "reference.md",
43 # "subdir": "gn",
44 # "description": "Comprehensive reference for GN language, functions, and variables."
45 # },
46 # {
47 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/language.md",
48 # "cached_path": "language.md",
49 # "subdir": "gn",
50 # "description": "Formal definition of the GN language syntax."
51 # },
52 # {
53 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/faq.md",
54 # "cached_path": "faq.md",
55 # "subdir": "gn",
56 # "description": "Frequently asked questions about GN."
57 # },
58 # {
59 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/standalone.md",
60 # "cached_path": "standalone.md",
61 # "subdir": "gn",
62 # "description": "Guide on using GN in standalone projects outside of Chromium."
63 # },
64 # {
65 # "source_url": "https://chromium.googlesource.com/website/+/main/site/developers/gn-build-configuration/index.md",
66 # "cached_path": "chromium_gn_build_config.md",
67 # "subdir": "gn",
68 # "description": "Chromium-specific guide on GN build configurations and flags."
69 # },
70 # {
71 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/cross_compiles.md",
72 # "cached_path": "cross_compiles.md",
73 # "subdir": "gn",
74 # "description": "Guide for cross-compiling with GN."
75 # },
76 # {
77 # "source_url": "https://gn.googlesource.com/gn/+/main/docs/mingw.md",
78 # "cached_path": "mingw.md",
79 # "subdir": "gn",
80 # "description": "Notes on using GN with MinGW."
81 # }
82 ]
83}
84
85def parse_gob_url(url):
86 """Parses a Git-on-Borg URL into repo URL, branch, and file path."""
87 try:
88 parts = url.split('/+/')
89 repo_url = parts[0]
90 if len(parts) < 2:
91 raise ValueError(f"Invalid GoB URL format: {url} - missing '/+/'")
92 path_parts = parts[1].split('/', 1)
93 branch = path_parts[0]
94 file_path = path_parts[1]
95 return repo_url, branch, file_path
96 except Exception as e:
97 raise ValueError(f"Error parsing URL {url}: {e}")
98
99def replace_non_inclusive_language(file_path):
100 """Replaces non-inclusive terms to align with Chromium's guidelines.
101
102 This function is called after fetching external documentation to ensure that
103 the cached files pass the Chromium presubmit check for inclusive language.
104 """
105 try:
106 with open(file_path, 'r', encoding='utf-8') as f:
107 content = f.read()
108
109 # Perform case-insensitive replacements.
110 content = content.replace("whitelist", "allowlist") # nocheck
111 content = content.replace("blacklist", "denylist") # nocheck
112 content = content.replace("Whitelist", "Allowlist") # nocheck
113 content = content.replace("Blacklist", "Denylist") # nocheck
114 content = content.replace("master", "main") # nocheck
115 content = content.replace("Master", "Main") # nocheck
116
117 with open(file_path, 'w', encoding='utf-8') as f:
118 f.write(content)
119 print(f" Applied non-inclusive language replacements to {file_path.name}")
120 except Exception as e:
121 print(f" Could not process file {file_path}: {e}")
122
123def fetch_doc_with_git(repo_url, branch, file_path, output_file):
124 """Fetches a single file from a git repo using a shallow clone into a temporary directory."""
125 with tempfile.TemporaryDirectory() as tmpdir:
126 tmp_path = Path(tmpdir)
127 print(f" Cloning {repo_url} (branch: {branch}) into temporary directory {tmp_path}")
128 try:
129 subprocess.run(
130 ['git', 'clone', '--depth', '1', '--branch', branch, '--no-checkout', '--filter=blob:none', repo_url, "."],
131 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=300
132 )
133 subprocess.run(
134 ['git', 'sparse-checkout', 'init', '--cone'],
135 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=60
136 )
137 subprocess.run(
138 ['git', 'sparse-checkout', 'set', file_path],
139 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=60
140 )
141 print(f" Checking out {file_path}...")
142 subprocess.run(
143 ['git', 'checkout'],
144 check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=120
145 )
146 except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError) as e:
147 print(f" Git operation failed for {repo_url}: {e}")
148 return False
149
150 source_file = tmp_path / file_path
151 if source_file.exists():
152 print(f" Copying {file_path} to {output_file}")
153 try:
154 shutil.copyfile(source_file, output_file)
155 replace_non_inclusive_language(output_file)
156 return True
157 except Exception as e:
158 print(f" Error copying file: {e}")
159 return False
160 else:
161 print(f" Error: File {file_path} not found in repository after sparse checkout.")
162 return False
163 return False
164
165def fetch_and_cache_docs(manifest, base_output_dir, force=False):
166 """Fetches documents from URLs specified in the manifest."""
167 print(f"Starting doc refresh. Base output dir: {base_output_dir}")
168 base_output_dir.mkdir(parents=True, exist_ok=True)
169
170 successes = []
171 failures = []
172
173 remote_docs = manifest.get("remote_documents", [])
174 if not remote_docs:
175 print("No remote documents to fetch.")
176 return
177
178 for doc in remote_docs:
179 source_url = doc.get("source_url")
180 cached_path = doc.get("cached_path")
181 subdir = doc.get("subdir")
182 description = doc.get("description", "No description")
183
184 if not source_url or not cached_path or not subdir:
185 print(f" Skipping invalid entry (missing source_url, cached_path, or subdir): {doc}")
186 failures.append(f"{description} (Invalid Manifest Entry)")
187 continue
188
189 output_dir = base_output_dir / subdir
190 output_dir.mkdir(parents=True, exist_ok=True)
191 output_file = output_dir / cached_path
192
193 print(f" Processing: {description} -> {subdir}/{cached_path}")
194 print(f" Source URL: {source_url}")
195
196 try:
197 repo_url, branch, file_path = parse_gob_url(source_url)
198 if fetch_doc_with_git(repo_url, branch, file_path, output_file):
199 successes.append(f"{subdir}/{cached_path}")
200 else:
201 failures.append(f"{subdir}/{cached_path} (Fetch Failed)")
202 except ValueError as e:
203 print(f" Skipping {source_url}: {e}")
204 failures.append(f"{subdir}/{cached_path} (URL Parse Error)")
205 except Exception as e:
206 print(f" An unexpected error occurred for {source_url}: {e}")
207 failures.append(f"{subdir}/{cached_path} (Unexpected Error)")
208 print("")
209
210 print("--- Refresh Summary ---")
211 print(f"Successfully updated: {len(successes)}")
212 print(f"Failed: {len(failures)}")
213 if failures:
214 print("\nFailed documents:")
215 for f in failures:
216 print(f" - {f}")
217 if not force:
218 sys.exit(1)
219
220def main():
221 parser = argparse.ArgumentParser(
222 description="Refresh the cached external documentation.")
223 parser.add_argument(
224 '--force',
225 action='store_true',
226 help='Continue and exit successfully even if some documents fail to update.'
227 )
228 args = parser.parse_args()
229
230 script_dir = Path(__file__).resolve().parent
231 base_output_dir = script_dir
232 git_root = script_dir.parent.parent
233
234 print(f"Base output directory: {base_output_dir}")
235 print(f"Assumed git root: {git_root}")
236
237 if not (git_root / ".git").exists():
238 print(f"Error: Git root not found at {git_root}. Please run this script from within the Chromium source tree.")
239 sys.exit(1)
240
241 fetch_and_cache_docs(DOCUMENT_MANIFEST, base_output_dir, args.force)
242
243 print("Document refresh complete.")
244 try:
245 print(f"Adding changes in {script_dir.relative_to(git_root)} to git index...")
246 subprocess.run(['git', 'add', str(script_dir.relative_to(git_root))], check=True, cwd=git_root)
247 print("Changes added to git index.")
248 print("Please review and commit the changes.")
249 except Exception as e:
250 print(f"An error occurred while running git add: {e}")
251
252if __name__ == "__main__":
253 main()