Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

create_single_doc.py 2.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  1. import os
  2. import re
  3. import sys
  4. def adjust_image_paths(markdown_content, original_md_file_path, root_folder):
  5. """
  6. Adjusts image paths in the markdown content to be relative to the root folder.
  7. This includes markdown image syntax and HTML <img> tags.
  8. :param markdown_content: The content of the markdown file.
  9. :param original_md_file_path: The file path of the original markdown file.
  10. :param root_folder: The root folder that contains all the markdown files.
  11. :return: Updated markdown content with adjusted image paths.
  12. """
  13. def adjust_path(match):
  14. original_path = match.group(1)
  15. # Construct the absolute path to the image
  16. absolute_image_path = os.path.abspath(os.path.join(os.path.dirname(original_md_file_path), original_path))
  17. # Make the path relative to the root folder
  18. relative_image_path = os.path.relpath(absolute_image_path, root_folder)
  19. # Ensure the path starts with './'
  20. if not relative_image_path.startswith('.'):
  21. relative_image_path = './' + relative_image_path
  22. return match.group(0).replace(original_path, relative_image_path)
  23. # Adjust paths in markdown image syntax
  24. adjusted_content = re.sub(r'!\[.*?\]\((.*?)\)', adjust_path, markdown_content)
  25. # Adjust paths in HTML <img> tags
  26. adjusted_content = re.sub(r'<img\s+[^>]*?src=["\'](.*?)["\'][^>]*>', adjust_path, adjusted_content)
  27. return adjusted_content
  28. def concatenate_markdown_files(root_folder, output_file):
  29. """
  30. Searches for markdown (.md) files starting from the root folder, adjusts their image paths,
  31. concatenates their contents, and writes them to a specified output markdown file.
  32. :param root_folder: The root directory to search for markdown files.
  33. :param output_file: The file path for the output markdown file.
  34. """
  35. concatenated_content = ""
  36. for subdir, dirs, files in os.walk(root_folder):
  37. for file in files:
  38. if file.endswith('.md'):
  39. file_path = os.path.join(subdir, file)
  40. with open(file_path, 'r') as md_file:
  41. file_content = md_file.read()
  42. # Adjust image paths
  43. file_content = adjust_image_paths(file_content, file_path, root_folder)
  44. concatenated_content += file_content + '\n\n'
  45. with open(output_file, 'w') as output_md_file:
  46. output_md_file.write(concatenated_content)
  47. if __name__ == "__main__":
  48. if len(sys.argv) != 3:
  49. print("Usage: python script.py <root_folder> <output_file>")
  50. sys.exit(1)
  51. root_folder = sys.argv[1]
  52. output_file = sys.argv[2]
  53. concatenate_markdown_files(root_folder, output_file)
  54. print(f"All markdown files from {root_folder} have been concatenated into {output_file}.")
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...