Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

build_reference.py 8.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. """
  3. Helper file to build Ultralytics Docs reference section.
  4. This script recursively walks through the ultralytics directory and builds an MkDocs reference section of *.md files
  5. composed of classes and functions, and also creates a navigation menu for use in mkdocs.yaml.
  6. Note: Must be run from repository root directory. Do not run from docs directory.
  7. """
  8. import re
  9. import subprocess
  10. from collections import defaultdict
  11. from pathlib import Path
  12. # Constants
  13. hub_sdk = False
  14. if hub_sdk:
  15. PACKAGE_DIR = Path("/Users/glennjocher/PycharmProjects/hub-sdk/hub_sdk")
  16. REFERENCE_DIR = PACKAGE_DIR.parent / "docs/reference"
  17. GITHUB_REPO = "ultralytics/hub-sdk"
  18. else:
  19. FILE = Path(__file__).resolve()
  20. PACKAGE_DIR = FILE.parents[1] / "ultralytics"
  21. REFERENCE_DIR = PACKAGE_DIR.parent / "docs/en/reference"
  22. GITHUB_REPO = "ultralytics/ultralytics"
  23. MKDOCS_YAML = PACKAGE_DIR.parent / "mkdocs.yml"
  24. def extract_classes_and_functions(filepath: Path) -> tuple[list[str], list[str]]:
  25. """Extract class and function names from a given Python file."""
  26. content = filepath.read_text()
  27. return (re.findall(r"(?:^|\n)class\s(\w+)(?:\(|:)", content), re.findall(r"(?:^|\n)def\s(\w+)\(", content))
  28. def create_markdown(py_filepath: Path, module_path: str, classes: list[str], functions: list[str]) -> Path:
  29. """Create a Markdown file containing the API reference for the given Python module."""
  30. md_filepath = py_filepath.with_suffix(".md")
  31. exists = md_filepath.exists()
  32. # Read existing content and retain header metadata if available
  33. header_content = ""
  34. if exists:
  35. existing_content = md_filepath.read_text()
  36. header_parts = existing_content.split("---")
  37. for part in header_parts:
  38. if "description:" in part or "comments:" in part:
  39. header_content += f"---{part}---\n\n"
  40. if not any(header_content):
  41. header_content = "---\ndescription: TODO ADD DESCRIPTION\nkeywords: TODO ADD KEYWORDS\n---\n\n"
  42. module_name = module_path.replace(".__init__", "")
  43. module_path = module_path.replace(".", "/")
  44. url = f"https://github.com/{GITHUB_REPO}/blob/main/{module_path}.py"
  45. edit = f"https://github.com/{GITHUB_REPO}/edit/main/{module_path}.py"
  46. pretty = url.replace("__init__.py", "\\_\\_init\\_\\_.py") # Properly display __init__.py filenames
  47. # Build markdown content
  48. title_content = (
  49. f"# Reference for `{module_path}.py`\n\n"
  50. f"!!! note\n\n"
  51. f" This file is available at [{pretty}]({url}). If you spot a problem please help fix it by [contributing]"
  52. f"(https://docs.ultralytics.com/help/contributing/) a [Pull Request]({edit}) 🛠️. Thank you 🙏!\n\n"
  53. )
  54. md_content = ["<br>\n\n"]
  55. md_content.extend(f"## ::: {module_name}.{cls}\n\n<br><br><hr><br>\n\n" for cls in classes)
  56. md_content.extend(f"## ::: {module_name}.{func}\n\n<br><br><hr><br>\n\n" for func in functions)
  57. if md_content[-1:]: # Remove last horizontal rule if content exists
  58. md_content[-1] = md_content[-1].replace("<hr><br>\n\n", "")
  59. # Write to file
  60. md_filepath.parent.mkdir(parents=True, exist_ok=True)
  61. md_filepath.write_text(header_content + title_content + "".join(md_content) + "\n")
  62. if not exists:
  63. print(f"Created new file '{md_filepath}'")
  64. subprocess.run(["git", "add", "-f", str(md_filepath)], check=True, cwd=PACKAGE_DIR)
  65. return md_filepath.relative_to(PACKAGE_DIR.parent)
  66. def nested_dict():
  67. """Create and return a nested defaultdict."""
  68. return defaultdict(nested_dict)
  69. def sort_nested_dict(d: dict) -> dict:
  70. """Sort a nested dictionary recursively."""
  71. return {k: sort_nested_dict(v) if isinstance(v, dict) else v for k, v in sorted(d.items())}
  72. def create_nav_menu_yaml(nav_items: list[str]) -> str:
  73. """Create and return a YAML string for the navigation menu."""
  74. nav_tree = nested_dict()
  75. for item_str in nav_items:
  76. item = Path(item_str)
  77. parts = item.parts
  78. current_level = nav_tree["reference"]
  79. for part in parts[2:-1]: # Skip docs/reference and filename
  80. current_level = current_level[part]
  81. current_level[parts[-1].replace(".md", "")] = item
  82. def _dict_to_yaml(d, level=0):
  83. """Convert a nested dictionary to a YAML-formatted string with indentation."""
  84. yaml_str = ""
  85. indent = " " * level
  86. for k, v in sorted(d.items()):
  87. if isinstance(v, dict):
  88. yaml_str += f"{indent}- {k}:\n{_dict_to_yaml(v, level + 1)}"
  89. else:
  90. yaml_str += f"{indent}- {k}: {str(v).replace('docs/en/', '')}\n"
  91. return yaml_str
  92. reference_yaml = _dict_to_yaml(sort_nested_dict(nav_tree))
  93. print(f"Scan complete, generated reference section with {len(reference_yaml.splitlines())} lines")
  94. return reference_yaml
  95. def extract_document_paths(yaml_section: str) -> list[str]:
  96. """Extract document paths from a YAML section, ignoring formatting and structure."""
  97. paths = []
  98. # Match all paths that appear after a colon in the YAML
  99. path_matches = re.findall(r":\s*([^\s][^:\n]*?)(?:\n|$)", yaml_section)
  100. for path in path_matches:
  101. # Clean up the path
  102. path = path.strip()
  103. if path and not path.startswith("-") and not path.endswith(":"):
  104. paths.append(path)
  105. return sorted(paths)
  106. def update_mkdocs_file(reference_yaml: str) -> None:
  107. """Update the mkdocs.yaml file with the new reference section only if changes in document paths are detected."""
  108. mkdocs_content = MKDOCS_YAML.read_text()
  109. # Find the top-level Reference section
  110. ref_pattern = r"(\n - Reference:[\s\S]*?)(?=\n - \w|$)"
  111. ref_match = re.search(ref_pattern, mkdocs_content)
  112. # Build new section with proper indentation
  113. new_section_lines = ["\n - Reference:"]
  114. for line in reference_yaml.splitlines():
  115. if line.strip() == "- reference:": # Skip redundant header
  116. continue
  117. new_section_lines.append(f" {line}")
  118. new_ref_section = "\n".join(new_section_lines) + "\n"
  119. if ref_match:
  120. # We found an existing Reference section
  121. ref_section = ref_match.group(1)
  122. print(f"Found existing top-level Reference section ({len(ref_section)} chars)")
  123. # Compare only document paths
  124. existing_paths = extract_document_paths(ref_section)
  125. new_paths = extract_document_paths(new_ref_section)
  126. # Check if the document paths are the same (ignoring structure or formatting differences)
  127. if len(existing_paths) == len(new_paths) and set(existing_paths) == set(new_paths):
  128. print(f"No changes detected in document paths ({len(existing_paths)} items). Skipping update.")
  129. return
  130. print(f"Changes detected: {len(new_paths)} document paths vs {len(existing_paths)} existing")
  131. # Update content
  132. new_content = mkdocs_content.replace(ref_section, new_ref_section)
  133. MKDOCS_YAML.write_text(new_content)
  134. subprocess.run(["npx", "prettier", "--write", str(MKDOCS_YAML)], check=False, cwd=PACKAGE_DIR.parent)
  135. print(f"Updated Reference section in {MKDOCS_YAML}")
  136. else:
  137. # No existing Reference section, we need to add it
  138. help_match = re.search(r"(\n - Help:)", mkdocs_content)
  139. if help_match:
  140. help_section = help_match.group(1)
  141. # Insert before Help section
  142. new_content = mkdocs_content.replace(help_section, f"{new_ref_section}{help_section}")
  143. MKDOCS_YAML.write_text(new_content)
  144. print(f"Added new Reference section before Help in {MKDOCS_YAML}")
  145. else:
  146. print("Could not find a suitable location to add Reference section")
  147. def main():
  148. """Extract class/function names, create Markdown files, and update mkdocs.yaml."""
  149. nav_items = []
  150. for py_filepath in PACKAGE_DIR.rglob("*.py"):
  151. classes, functions = extract_classes_and_functions(py_filepath)
  152. if classes or functions:
  153. py_filepath_rel = py_filepath.relative_to(PACKAGE_DIR)
  154. md_filepath = REFERENCE_DIR / py_filepath_rel
  155. module_path = f"{PACKAGE_DIR.name}.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}"
  156. md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions)
  157. nav_items.append(str(md_rel_filepath))
  158. # Update mkdocs.yaml with generated YAML
  159. update_mkdocs_file(create_nav_menu_yaml(nav_items))
  160. if __name__ == "__main__":
  161. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...