Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

init.bash 2.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  1. #!/usr/bin/bash
  2. # configuration
  3. DVCSTORE_DIR='/mnt/data/dvcstore'
  4. DEVGPT_DIR='/mnt/data/MSR_Challenge_2024/DevGPT-data.v9'
  5. # initialize virtualenv, if needed
  6. echo "Checking for virtualenv (venv)"
  7. if [ ! -e venv/bin/activate ]; then
  8. python3 -m venv venv
  9. fi
  10. if [ -z "$VIRTUAL_ENV" ]; then
  11. echo "Run 'source venv/bin/activate'"
  12. echo "then re-run this script '$0'"
  13. exit
  14. fi
  15. # virtualenv is initialized
  16. echo "Installing Python packages"
  17. pip install -q -r requirements.txt
  18. #pip install -q --upgrade --editable . # Not needed because of first line in requirements.txt
  19. # configuring DVC remote
  20. DVC_REMOTES="$(dvc remote list)"
  21. if grep -q -F -e "$DVCSTORE_DIR" <<<"$DVC_REMOTES"; then
  22. echo "DVC storage looks to be configured correctly:"
  23. echo " $DVC_REMOTES"
  24. else
  25. echo "Adding local storage to .dvc/config.local"
  26. cat <<-EOF >>.dvc/config.local
  27. [core]
  28. remote = local
  29. ['remote "local"']
  30. url = $DVCSTORE_DIR
  31. EOF
  32. fi
  33. # try to create symlink to externally downloaded DevGPT dataset,
  34. # if possible, instead of having each user to re-download it
  35. if [ ! -e 'data/external/DevGPT' ]; then
  36. if [ -d "$DEVGPT_DIR" ]; then
  37. echo "Linking '$DEVGPT_DIR'"
  38. ln -s "$DEVGPT_DIR" data/external/DevGPT
  39. else
  40. echo "Could not find '$DEVGPT_DIR' directory with DevGPT dataset"
  41. fi
  42. else
  43. echo "'data/external/DevGPT' already exists"
  44. fi
  45. # check if we are inside Git repository, and if it is not the case,
  46. # configure DVC to not require to be run from inside git repo
  47. INIT_SCRIPT_DIR="$(realpath "${0%/*}")" # "$(realpath $(dirname "$0")"
  48. if [ "$(git rev-parse --is-inside-work-tree)" = "true" ]; then
  49. GIT_REPO_TOPDIR="$(realpath "$(git rev-parse --show-toplevel)")"
  50. if [ "$INIT_SCRIPT_DIR" = "$GIT_REPO_TOPDIR" ]; then
  51. echo "WARNING: possibly incorrect git repository found:"
  52. echo "- top directory of git repo: $GIT_REPO_TOPDIR"
  53. echo "- directory with this script: $INIT_SCRIPT_DIR"
  54. fi
  55. else
  56. echo "Not inside Git repository; configuring DVC to handle this case"
  57. dvc config --local core.no_scm true
  58. fi
  59. # getting data from DVC
  60. echo "Retrieving data from DVC"
  61. dvc pull
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...