Ultralytics
/
ultralytics
connected to https://github.com/ultralytics/ultralytics.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
110

	
111

	
112

	
113

	
114

	
115

	
116

	
117

	
118

	
119

	
120

	
121

	
122

	
123

	
124

	
125

	
126

	
127

	
128

	
129

	
130

	
131

	
132

	
133

	
134

	
135

	
136

	
137

	
138

	
139

	
140

	
141

	
142

	
143

	
144

	
145

	
146

	
147

	
148

	
149

	
150

	
151

	
152

	
153

	
154

	
155

	
156

	
157

	
158

	
159

	
160

	
161

	
162

	
163

	
164

	
165

	
166

	
167

	
168

	
169

	
170

	
171

	
172

	
173

	
174

	
175

	
176

	
177

	
178

	
179

	
180

	
            # Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Script to fix broken Markdown links and front matter in language-specific directories zh, ko, ja, ru, de, fr, es, pt.

This script processes markdown files in language-specific directories (like /zh/). It finds Markdown links and checks
their existence. If a link is broken and does not exist in the language-specific directory but exists in the /en/
directory, the script updates the link to point to the corresponding file in the /en/ directory.

It also ensures that front matter keywords like 'comments:', 'description:', and 'keywords:' are not translated and
remain in English.
"""

import re
from pathlib import Path


class MarkdownLinkFixer:
    """Class to fix Markdown links and front matter in language-specific directories."""

    def __init__(self, base_dir, update_links=True, update_text=True):
        """Initialize the MarkdownLinkFixer with the base directory."""
        self.base_dir = Path(base_dir)
        self.update_links = update_links
        self.update_text = update_text
        self.md_link_regex = re.compile(r'\[([^]]+)]\(([^:)]+)\.md\)')

    @staticmethod
    def replace_front_matter(content, lang_dir):
        """Ensure front matter keywords remain in English."""
        english = ['comments', 'description', 'keywords']
        translations = {
            'zh': ['评论', '描述', '关键词'],  # Mandarin Chinese (Simplified) warning, sometimes translates as 关键字
            'es': ['comentarios', 'descripción', 'palabras clave'],  # Spanish
            'ru': ['комментарии', 'описание', 'ключевые слова'],  # Russian
            'pt': ['comentários', 'descrição', 'palavras-chave'],  # Portuguese
            'fr': ['commentaires', 'description', 'mots-clés'],  # French
            'de': ['kommentare', 'beschreibung', 'schlüsselwörter'],  # German
            'ja': ['コメント', '説明', 'キーワード'],  # Japanese
            'ko': ['댓글', '설명', '키워드'],  # Korean
            'hi': ['टिप्पणियाँ', 'विवरण', 'कीवर्ड'],  # Hindi
            'ar': ['التعليقات', 'الوصف', 'الكلمات الرئيسية']  # Arabic
        }  # front matter translations for comments, description, keyword

        for term, eng_key in zip(translations.get(lang_dir.stem, []), english):
            content = re.sub(rf'{term} *[：:].*', f'{eng_key}: true', content, flags=re.IGNORECASE) if \
                eng_key == 'comments' else re.sub(rf'{term} *[：:] *', f'{eng_key}: ', content, flags=re.IGNORECASE)
        return content

    @staticmethod
    def replace_admonitions(content, lang_dir):
        """Ensure front matter keywords remain in English."""
        english = [
            'Note', 'Summary', 'Tip', 'Info', 'Success', 'Question', 'Warning', 'Failure', 'Danger', 'Bug', 'Example',
            'Quote', 'Abstract', 'Seealso', 'Admonition']
        translations = {
            'en':
            english,
            'zh': ['笔记', '摘要', '提示', '信息', '成功', '问题', '警告', '失败', '危险', '故障', '示例', '引用', '摘要', '另见', '警告'],
            'es': [
                'Nota', 'Resumen', 'Consejo', 'Información', 'Éxito', 'Pregunta', 'Advertencia', 'Fracaso', 'Peligro',
                'Error', 'Ejemplo', 'Cita', 'Abstracto', 'Véase También', 'Amonestación'],
            'ru': [
                'Заметка', 'Сводка', 'Совет', 'Информация', 'Успех', 'Вопрос', 'Предупреждение', 'Неудача', 'Опасность',
                'Ошибка', 'Пример', 'Цитата', 'Абстракт', 'См. Также', 'Предостережение'],
            'pt': [
                'Nota', 'Resumo', 'Dica', 'Informação', 'Sucesso', 'Questão', 'Aviso', 'Falha', 'Perigo', 'Bug',
                'Exemplo', 'Citação', 'Abstrato', 'Veja Também', 'Advertência'],
            'fr': [
                'Note', 'Résumé', 'Conseil', 'Info', 'Succès', 'Question', 'Avertissement', 'Échec', 'Danger', 'Bug',
                'Exemple', 'Citation', 'Abstrait', 'Voir Aussi', 'Admonestation'],
            'de': [
                'Hinweis', 'Zusammenfassung', 'Tipp', 'Info', 'Erfolg', 'Frage', 'Warnung', 'Ausfall', 'Gefahr',
                'Fehler', 'Beispiel', 'Zitat', 'Abstrakt', 'Siehe Auch', 'Ermahnung'],
            'ja': ['ノート', '要約', 'ヒント', '情報', '成功', '質問', '警告', '失敗', '危険', 'バグ', '例', '引用', '抄録', '参照', '訓告'],
            'ko': ['노트', '요약', '팁', '정보', '성공', '질문', '경고', '실패', '위험', '버그', '예제', '인용', '추상', '참조', '경고'],
            'hi': [
                'नोट', 'सारांश', 'सुझाव', 'जानकारी', 'सफलता', 'प्रश्न', 'चेतावनी', 'विफलता', 'खतरा', 'बग', 'उदाहरण',
                'उद्धरण', 'सार', 'देखें भी', 'आगाही'],
            'ar': [
                'ملاحظة', 'ملخص', 'نصيحة', 'معلومات', 'نجاح', 'سؤال', 'تحذير', 'فشل', 'خطر', 'عطل', 'مثال', 'اقتباس',
                'ملخص', 'انظر أيضاً', 'تحذير']}

        for term, eng_key in zip(translations.get(lang_dir.stem, []), english):
            if lang_dir.stem != 'en':
                content = re.sub(rf'!!! *{eng_key} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE)
                content = re.sub(rf'!!! *{term} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE)
            content = re.sub(rf'!!! *{term}', f'!!! {eng_key}', content, flags=re.IGNORECASE)
            content = re.sub(r'!!! *"', '!!! Example "', content, flags=re.IGNORECASE)

        return content

    @staticmethod
    def update_iframe(content):
        """Update the 'allow' attribute of iframe if it does not contain the specific English permissions."""
        english = 'accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share'
        pattern = re.compile(f'allow="(?!{re.escape(english)}).+?"')
        return pattern.sub(f'allow="{english}"', content)

    def link_replacer(self, match, parent_dir, lang_dir, use_abs_link=False):
        """Replace broken links with corresponding links in the /en/ directory."""
        text, path = match.groups()
        linked_path = (parent_dir / path).resolve().with_suffix('.md')

        if not linked_path.exists():
            en_linked_path = Path(str(linked_path).replace(str(lang_dir), str(lang_dir.parent / 'en')))
            if en_linked_path.exists():
                if use_abs_link:
                    # Use absolute links WARNING: BUGS, DO NOT USE
                    docs_root_relative_path = en_linked_path.relative_to(lang_dir.parent)
                    updated_path = str(docs_root_relative_path).replace('en/', '/../')
                else:
                    # Use relative links
                    steps_up = len(parent_dir.relative_to(self.base_dir).parts)
                    updated_path = Path('../' * steps_up) / en_linked_path.relative_to(self.base_dir)
                    updated_path = str(updated_path).replace('/en/', '/')

                print(f"Redirecting link '[{text}]({path})' from {parent_dir} to {updated_path}")
                return f'[{text}]({updated_path})'
            else:
                print(f"Warning: Broken link '[{text}]({path})' found in {parent_dir} does not exist in /docs/en/.")

        return match.group(0)

    @staticmethod
    def update_html_tags(content):
        """Updates HTML tags in docs."""
        alt_tag = 'MISSING'

        # Remove closing slashes from self-closing HTML tags
        pattern = re.compile(r'<([^>]+?)\s*/>')
        content = re.sub(pattern, r'<\1>', content)

        # Find all images without alt tags and add placeholder alt text
        pattern = re.compile(r'!\[(.*?)\]\((.*?)\)')
        content, num_replacements = re.subn(pattern, lambda match: f'![{match.group(1) or alt_tag}]({match.group(2)})',
                                            content)

        # Add missing alt tags to HTML images
        pattern = re.compile(r'<img\s+(?!.*?\balt\b)[^>]*src=["\'](.*?)["\'][^>]*>')
        content, num_replacements = re.subn(pattern, lambda match: match.group(0).replace('>', f' alt="{alt_tag}">', 1),
                                            content)

        return content

    def process_markdown_file(self, md_file_path, lang_dir):
        """Process each markdown file in the language directory."""
        print(f'Processing file: {md_file_path}')
        with open(md_file_path, encoding='utf-8') as file:
            content = file.read()

        if self.update_links:
            content = self.md_link_regex.sub(lambda m: self.link_replacer(m, md_file_path.parent, lang_dir), content)

        if self.update_text:
            content = self.replace_front_matter(content, lang_dir)
            content = self.replace_admonitions(content, lang_dir)
            content = self.update_iframe(content)
            content = self.update_html_tags(content)

        with open(md_file_path, 'w', encoding='utf-8') as file:
            file.write(content)

    def process_language_directory(self, lang_dir):
        """Process each language-specific directory."""
        print(f'Processing language directory: {lang_dir}')
        for md_file in lang_dir.rglob('*.md'):
            self.process_markdown_file(md_file, lang_dir)

    def run(self):
        """Run the link fixing and front matter updating process for each language-specific directory."""
        for subdir in self.base_dir.iterdir():
            if subdir.is_dir() and re.match(r'^\w\w$', subdir.name):
                self.process_language_directory(subdir)


if __name__ == '__main__':
    # Set the path to your MkDocs 'docs' directory here
    docs_dir = str(Path(__file__).parent.resolve())
    fixer = MarkdownLinkFixer(docs_dir, update_links=True, update_text=True)
    fixer.run()