Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

rationalize_fastq 2.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
  1. #!/bin/bash
  2. usage="
  3. $(basename "$0") [-h] [-i -o]
  4. Consolidate fastq(.gz) files.
  5. -h show this help text.
  6. -i input file or directory.
  7. -o output file. If input file is a single file a symlink is made
  8. otherwise the output is the concatenation of all files found.
  9. If the output file has the extension '.gz' the final output
  10. will be compressed, else it will be plain text."
  11. iflag=false
  12. oflag=false
  13. while getopts ':hi:o:' option; do
  14. case "$option" in
  15. h) echo "$usage"; exit;;
  16. i) INPUT=$OPTARG; iflag=true;;
  17. o) OUTPUT=$OPTARG oflag=true;;
  18. :) printf "missing argument for -%s\n" "$OPTARG" >&2
  19. echo "$usage" >&2
  20. exit 1;;
  21. \?) printf "illegal option: -%s\n" "$OPTARG" >&2
  22. echo "$usage" >&2
  23. exit 1;;
  24. esac
  25. done
  26. shift $((OPTIND - 1))
  27. if ! $iflag || ! $oflag ; then
  28. echo "$usage" >&2
  29. echo "" >&2
  30. echo "Both -i and -o must be specified."
  31. exit 1
  32. fi
  33. function abspath {
  34. echo "$(cd "$(dirname "$1")"; pwd -P)/$(basename "$1")"
  35. }
  36. function extension {
  37. filename=$(basename -- "$1")
  38. echo "${filename##*.}"
  39. }
  40. export -f extension
  41. INPUT=$(abspath $INPUT)
  42. OUTPUT=$(abspath $OUTPUT)
  43. OUTEXT=$(extension $OUTPUT)
  44. echo "Input: ${INPUT}"
  45. echo "Output: ${OUTPUT}"
  46. if [[ -e "${OUTPUT}" ]]; then
  47. echo "Output exists, exiting"
  48. exit 1
  49. fi
  50. if [[ ! -e "${INPUT}" ]]; then
  51. echo "Error: ${INPUT} does not exist."
  52. exit 1
  53. fi
  54. if [[ -f "${INPUT}" ]]; then
  55. echo "Single file detected."
  56. INEXT=$(extension $INPUT)
  57. if [[ "$OUTEXT" == "$INEXT" ]]; then
  58. echo "- Creating symlink."
  59. ln -s ${INPUT} ${OUTPUT} \
  60. && echo "Created symlink ${OUTPUT}" \
  61. || (cecho "Failed to create symlink ${OUTPUT}" && exit 1)
  62. elif [[ "$OUTEXT" == "gz" ]]; then
  63. echo -n " - Compressing input..."
  64. gzip ${INPUT} -c > ${OUTPUT}
  65. echo "done."
  66. else
  67. echo -n " - Decompressing input..."
  68. gzip -d -c ${INPUT} > ${OUTPUT}
  69. echo "done."
  70. fi
  71. elif [[ -d "${INPUT}" ]]; then
  72. echo "Directory detected, concatenating found files."
  73. flist=$(mktemp)
  74. nfiles=$(find ${INPUT} -type f \( \
  75. -iname '*.fastq' -o -iname '*.fastq.gz' -o -iname '*.fq' -o -iname '*.fq.gz' \
  76. ! -wholename ${OUTPUT} \) | tee ${flist} | wc -l)
  77. echo " - Found ${nfiles} files."
  78. ext=""
  79. for fname in $(cat ${flist}); do
  80. newext=$(extension ${fname})
  81. if [[ -z "${ext}" ]]; then
  82. ext=${newext}
  83. elif [[ ${ext} != ${newext} ]]; then
  84. echo " - Multiple file extensions found, exiting"
  85. exit 1
  86. fi
  87. done
  88. echo -n " - Concatenating files..."
  89. if [[ "$ext" == "gz" ]] && [[ "$OUTEXT" != "gz" ]]; then
  90. cat ${flist} | xargs cat | gzip -d > ${OUTPUT}
  91. elif [[ "$ext" != "gz" ]] && [[ "$OUTEXT" == "gz" ]]; then
  92. cat ${flist} | xargs cat | gzip > ${OUTPUT}
  93. else
  94. cat ${flist} | xargs cat > ${OUTPUT}
  95. fi
  96. echo "done."
  97. rm ${flist}
  98. fi
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...