Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

generate_raw_dictionary_file.R 46 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
  1. library(readr)
  2. library(stringr)
  3. library(purrr)
  4. library(dplyr)
  5. library(tidyr)
  6. library(WriteXLS)
  7. # Read all raw datasets from UN into one dataframe ---------
  8. df <- fs::dir_ls('data/raw/UN Data/', glob='*.csv') %>%
  9. map_dfr(read_delim, delim = ',', skip = 1, .id = "filepath")
  10. # Keep the variables required to create the data dictionary
  11. df %>%
  12. select(Series, Year, Source, filepath) -> df
  13. # Keep only latest data for each variable
  14. df %>%
  15. group_by(Series) %>%
  16. filter(Year == max(Year)) %>%
  17. ungroup() -> df
  18. # Variables are repeated for they occur for every country. Let's remove the
  19. # duplicates.
  20. df %>%
  21. distinct(Series, .keep_all = TRUE) -> df
  22. # Fix names according to preprocessed dataset
  23. df %>%
  24. # Make them all lowercase
  25. mutate(Series = tolower(Series)) %>%
  26. # Replace special chars
  27. mutate(Series = gsub(' ', '_', Series)) %>%
  28. mutate(Series = gsub('-', '_', Series)) %>%
  29. mutate(Series = gsub('_+', '_', Series)) %>%
  30. mutate(Series = gsub(',', '', Series)) -> df
  31. # Add _year to the end of variable name, just like in the preprocessed file
  32. df %>%
  33. unite("Series", Series:Year, remove=FALSE) -> df
  34. # Add engineering UN variables
  35. df <- rbind(df, c('whos_major_trade_partner_exp_1', 2018,
  36. paste0('United Nations Statistics Division, New York, ',
  37. 'Commodity Trade Statistics Database (UN COMTRADE), ',
  38. 'last accessed May 2019.')))
  39. # Add COVID-19 and engineering columns
  40. df <- rbind(df,
  41. c('locality_code', '2020', paste0('European Centre for Disease ',
  42. 'Prevention and Control. Last ',
  43. 'accessed 28 April, 2020.')),
  44. c('locality_name', '2020', paste0('European Centre for Disease ',
  45. 'Prevention and Control. Last ',
  46. 'accessed 28 April, 2020.')),
  47. c('date', '2020', paste0('European Centre for Disease ',
  48. 'Prevention and Control. Last ',
  49. 'accessed 28 April, 2020.')),
  50. c('new_cases', '2020', paste0('European Centre for Disease ',
  51. 'Prevention and Control. Last ',
  52. 'accessed 28 April, 2020.')),
  53. c('new_deaths', '2020', paste0('European Centre for Disease ',
  54. 'Prevention and Control. Last ',
  55. 'accessed 28 April, 2020.')),
  56. c('acc_cases', '2020', paste0('Engineered based on data from ',
  57. 'European Centre for Disease ',
  58. 'Prevention and Control. Last ',
  59. 'accessed 28 April, 2020.')),
  60. c('acc_deaths', '2020', paste0('Engineered based on data from ',
  61. 'European Centre for Disease ',
  62. 'Prevention and Control. Last ',
  63. 'accessed 28 April, 2020.')),
  64. c('lethality_rate_percent', '2020', paste0('Engineered based on data from ',
  65. 'European Centre for Disease ',
  66. 'Prevention and Control. Last ',
  67. 'accessed April, 2020.')),
  68. c('retail_recreation', '2020', paste0('Google Community Mobility ',
  69. 'Report. Last accessed 28 ',
  70. 'April, 2020')),
  71. c('grocery_pharmacy', '2020', paste0('Google Community Mobility ',
  72. 'Report. Last accessed 28 ',
  73. 'April, 2020')),
  74. c('parks', '2020', paste0('Google Community Mobility ',
  75. 'Report. Last accessed 28 ',
  76. 'April, 2020')),
  77. c('transit_stations', '2020', paste0('Google Community Mobility ',
  78. 'Report. Last accessed 28 ',
  79. 'April, 2020')),
  80. c('workplaces', '2020', paste0('Google Community Mobility ',
  81. 'Report. Last accessed 28 ',
  82. 'April, 2020')),
  83. c('residential', '2020', paste0('Google Community Mobility ',
  84. 'Report. Last accessed 28 ',
  85. 'April, 2020')),
  86. c('first_case_date', '2020',
  87. paste0('https://en.wikipedia.org/w/index.php?title=2019%E2%80%93',
  88. '20_coronavirus_pandemic_by_country_and_territory&oldid=9',
  89. '53662872 Last accessed April 28, 2020')),
  90. c('n_days_since_1st_case', '2020',
  91. paste0('Engineering from ECDC and https://en.wikipedia.org/w/index.php?title=2019%E2%80%93',
  92. '20_coronavirus_pandemic_by_country_and_territory&oldid=9',
  93. '53662872 Last accessed April 28, 2020')),
  94. c('first_death_date', '2020', paste0('Engineering based on ',
  95. 'data from ECDC ',
  96. 'counting the first death ',
  97. 'after February, 15th.')),
  98. c('n_days_since_1st_death', '2020', paste0('Engineering based on ',
  99. 'data from ECDC ',
  100. 'counting the first death ',
  101. 'after February, 15th.'))
  102. )
  103. colnames(df) <- c('Variable name', 'Year', 'Source', 'Filepath')
  104. # Add description for some variables
  105. df$Description <- NA
  106. df %>%
  107. mutate(Description = case_when(
  108. `Variable name` == 'retail_recreation' ~
  109. paste0('Mobility trends for places like restaurants, cafes, shopping ',
  110. 'centers theme parks, museums, libraries, andmovie theaters.',
  111. 'This variable indicates how visits and length of stay to this ',
  112. 'category of location has varied (in percent, positively or ',
  113. 'negatively) compared to the baseline. The baseline is the ',
  114. 'median value, for the corresponding day of the week, during the ',
  115. '5-week period Jan 3–Feb 6, 2020'),
  116. `Variable name` == 'grocery_pharmacy' ~
  117. paste0('Mobility trends for places like grocery markets, ',
  118. 'food warehouses, farmers markets, specialty food shops, drug ',
  119. 'stores, and pharmacies.',
  120. 'This variable indicates how visits and length of stay to this ',
  121. 'category of location has varied (in percent, positively or ',
  122. 'negatively) compared to the baseline. The baseline is the ',
  123. 'median value, for the corresponding day of the week, during the ',
  124. '5-week period Jan 3–Feb 6, 2020'),
  125. `Variable name` == 'parks' ~
  126. paste0('Mobility trends for places like national parks, public beaches, ',
  127. 'marinas, dog parks, plazas, and public gardens.',
  128. 'This variable indicates how visits and length of stay to this ',
  129. 'category of location has varied (in percent, positively or ',
  130. 'negatively) compared to the baseline. The baseline is the ',
  131. 'median value, for the corresponding day of the week, during the ',
  132. '5-week period Jan 3–Feb 6, 2020'),
  133. `Variable name` == 'transit_stations' ~
  134. paste0('Mobility trends for places like public transport hubs such as ',
  135. 'subway, bus, and train stations.',
  136. 'This variable indicates how visits and length of stay to this ',
  137. 'category of location has varied (in percent, positively or ',
  138. 'negatively) compared to the baseline. The baseline is the ',
  139. 'median value, for the corresponding day of the week, during the ',
  140. '5-week period Jan 3–Feb 6, 2020'),
  141. `Variable name` == 'workplaces' ~
  142. paste0('Mobility trends for places of work.',
  143. 'This variable indicates how visits and length of stay to this ',
  144. 'category of location has varied (in percent, positively or ',
  145. 'negatively) compared to the baseline. The baseline is the ',
  146. 'median value, for the corresponding day of the week, during the ',
  147. '5-week period Jan 3–Feb 6, 2020'),
  148. `Variable name` == 'residential' ~
  149. paste0('Mobility trends for places of residence.',
  150. 'This variable indicates how visits and length of stay to this ',
  151. 'category of location has varied (in percent, positively or ',
  152. 'negatively) compared to the baseline. The baseline is the ',
  153. 'median value, for the corresponding day of the week, during the ',
  154. '5-week period Jan 3–Feb 6, 2020'),
  155. `Variable name` == 'date' ~
  156. paste0('Date for epidemiological variables. Format: YY-MM-DD'),
  157. `Variable name` == 'new_cases' ~
  158. paste0('Number of new cases for a specific date for a given country.'),
  159. `Variable name` == 'new_deaths' ~
  160. paste0('Number of new deaths for a specific date for a given country.'),
  161. `Variable name` == 'acc_cases' ~
  162. paste0('Accumulated number of cases up to the date for a given country.'),
  163. `Variable name` == 'acc_deaths' ~
  164. paste0('Accumulated number of deaths up to the date for a given ',
  165. 'country.'),
  166. `Variable name` == 'lethality_rate_percent' ~
  167. paste0('Lethality rate in percent up to the last date in the dataset for',
  168. 'a given country'),
  169. `Variable name` == 'first_case_date' ~
  170. paste0('The date of the first confirmed case of COVID-19 for a given ',
  171. 'country. If it is NA it means that this country does not have ',
  172. 'any death confirmed.'),
  173. `Variable name` == 'first_death_date' ~
  174. paste0('The date of the first confirmed death due to COVID-19 for a ',
  175. 'given country, starting from February 15th, 2020. If it is NA',
  176. 'it means that this country does not have any death confirmed.'),
  177. TRUE ~ '')) -> df
  178. df$Filepath[c(174, 180:182, 189:192)] <- 'Does not apply. Engineered variable'
  179. df$Filepath[c(175:179)] <- 'data/raw/COVID19_worldwide_raw.csv'
  180. df$Filepath[c(183:188)] <- 'data/raw/Global_Mobility_Report.csv'
  181. # Add also docpath (PDF) to dictionary
  182. df %>%
  183. mutate(Docpath = str_replace(Filepath, '.csv', '.pdf')) %>%
  184. mutate(Docpath = str_replace(Docpath, 'data/raw', 'documentation')) -> df
  185. df$Docpath[175:192] <- 'There is no extra documentation.'
  186. # Check documentation and see if variable names are correct ###
  187. # According to the file below, maternal mortality rate is per 100k livebirths,
  188. # not 100k people.
  189. # SYB62_246_201907_Population growth and indicators of fertility and mortality.pdf
  190. df %>%
  191. mutate(`Variable name` = ifelse(`Variable name` == paste0('maternal_mortal',
  192. 'ity_ratio_(deaths',
  193. '_per_100000_popul',
  194. 'ation)_2015'),
  195. paste0('maternal_mortality_ratio_(deaths',
  196. '_per_100000_livebirths)_2015'),
  197. `Variable name`)) -> df
  198. # According to the file below, it's participation rate.
  199. # SYB62_329_201904_Labour Force and Unemployment.pdf
  200. df %>%
  201. mutate(`Variable name` = ifelse(`Variable name` == paste0('labour_force_part',
  202. 'icipation_total_',
  203. '2019'),
  204. paste0('labour_force_participation_rate_tot',
  205. 'al_2019'),
  206. `Variable name`)) %>%
  207. mutate(`Variable name` = ifelse(`Variable name` == paste0('labour_force_part',
  208. 'icipation_male_',
  209. '2019'),
  210. paste0('labour_force_participation_rate_mal',
  211. 'e_2019'),
  212. `Variable name`)) %>%
  213. mutate(`Variable name` = ifelse(`Variable name` == paste0('labour_force_part',
  214. 'icipation_female',
  215. '_2019'),
  216. paste0('labour_force_participation_rate_fema',
  217. 'le_2019'),
  218. `Variable name`)) -> df
  219. # Description
  220. df %>%
  221. mutate(Description = case_when(
  222. `Variable name` == 'index_of_industrial_production:_total_industry_mining;_manufacturing;_electricity_gas_and_water_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  223. `Variable name` == 'index_of_industrial_production:_mining_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  224. `Variable name` == 'index_of_industrial_production:_manufacturing_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  225. `Variable name` == 'index_of_industrial_production:_food_beverages_and_tobacco_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  226. `Variable name` == 'index_of_industrial_production:_textiles_wearing_apparel_leather_footwear_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  227. `Variable name` == 'index_of_industrial_production:_chemicals_petroleum_rubber_and_plastic_products_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  228. `Variable name` == 'index_of_industrial_production:_metal_products_and_machinery_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  229. `Variable name` == 'index_of_industrial_production:_electricity_gas_steam_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  230. `Variable name` == 'index_of_industrial_production:_water_and_waste_management_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  231. `Variable name` == 'index_of_industrial_production:_basic_metals_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  232. `Variable name` == 'index_of_industrial_production:_electricity_gas_and_water_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  233. `Variable name` == 'index_of_industrial_production:_metal_products_(index_base:_2005=100)_2013' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  234. `Variable name` == 'index_of_industrial_production:_machinery_(index_base:_2005=100)_2013' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  235. `Variable name` == 'index_of_industrial_production:_miscellaneous_manufacturing_industries_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  236. `Variable name` == 'index_of_industrial_production:_mining_and_manufacturing_(index_base:_2005=100)_2014' ~ 'The Index of Industrial Production measures volume changes in the production of an economy by International Standard Industrial Classification of All Economic Activities (ISIC)',
  237. `Variable name` == 'emissions_(thousand_metric_tons_of_carbon_dioxide)_2014' ~ 'Estimates are expressed in million metric tons of carbon. Per capita emission estimates are expressed in metric tons of carbon. Source: <https://cdiac.ess-dive.lbl.gov/ftp/ndp030/global.1751_2014.ems>.',
  238. `Variable name` == 'emissions_per_capita_(metric_tons_of_carbon_dioxide)_2014' ~ 'Per capita emission estimates are expressed in metric tons of carbon. Source: <https://cdiac.ess-dive.lbl.gov/ftp/ndp030/global.1751_2014.ems>.',
  239. `Variable name` == 'agricultural_production_(index_base:_2004_2006_=_100)_2016' ~ 'Relative level of the aggregate volume of agricultural production for each year in comparison with the base period 2004-2006',
  240. `Variable name` == 'food_production_(index_base:_2004_2006_=_100)_2016' ~ 'Food production index covers food crops that are considered edible and that contain nutrients. Coffee and tea are excluded because, although edible, they have no nutritive value. Source <https://data.worldbank.org/indicator/AG.PRD.FOOD.XD>.',
  241. `Variable name` == 'urban_population_(percent_growth_rate_per_annum)_2015' ~ 'Rate at which the percentage urban population grows or declines. Source: <https://population.un.org/wup/Publications/Files/WUP2018-Report.pdf>.',
  242. `Variable name` == 'rural_population_(percent_growth_rate_per_annum)_2015' ~ 'Rate at which the percentage rural population grows or declines. Source: <https://population.un.org/wup/Publications/Files/WUP2018-Report.pdf>.',
  243. `Variable name` == 'urban_population_(percent)_2018' ~ 'Urban Population percent at Mid-Year. Source: <https://population.un.org/wup/Publications/Files/WUP2018-Report.pdf>.',
  244. `Variable name` == 'capital_city_population_(as_a_percentage_of_total_population)_2015' ~ 'Capital city as a percentage of total population.',
  245. `Variable name` == 'capital_city_population_(as_a_percentage_of_total_urban_population)_2015' ~ 'Capital city as a percentage of total urban population.',
  246. `Variable name` == 'capital_city_population_(thousands)_2018' ~ 'Population living in capital city (thousand).',
  247. `Variable name` == 'surface_area_(thousand_km2)_2017' ~ 'Total surface area (square kilometres). Source: <https://unstats.un.org/unsd/demographic-social/products/dyb/dybsets/2015.pdf>',
  248. `Variable name` == 'population_mid_year_estimates_(millions)_2019' ~ 'De facto population in a country, area or region as of 1 July. Source: <https://population.un.org/wpp/Download/Standard/Population/>',
  249. `Variable name` == 'population_mid_year_estimates_for_males_(millions)_2019' ~ 'Male De facto population in a country, area or region as of 1 July. Source: <https://population.un.org/wpp/Download/Standard/Population/>',
  250. `Variable name` == 'population_mid_year_estimates_for_females_(millions)_2019' ~ 'Female De facto population in a country, area or region as of 1 July. Source: <https://population.un.org/wpp/Download/Standard/Population/>',
  251. `Variable name` == 'sex_ratio_(males_per_100_females)_2019' ~ 'Number of males per 100 females in the population. Source: <https://population.un.org/wpp/Download/Standard/Population/>',
  252. `Variable name` == 'population_aged_0_to_14_years_old_(percentage)_2019' ~ 'Percentage of population 0-14 aged group.',
  253. `Variable name` == 'population_aged_60+_years_old_(percentage)_2019' ~ 'Percentage of population 60+ aged group.',
  254. `Variable name` == 'population_density_2019' ~ 'Population per square Kilometre',
  255. `Variable name` == 'imports_cif_(millions_of_us_dollars)_2018' ~ 'The total value of imports (CIF) in millions of US dollars',
  256. `Variable name` == 'exports_fob_(millions_of_us_dollars)_2018' ~ 'The total value of exports (FOB) in millions of US dollars',
  257. `Variable name` == 'balance_imports/exports_(millions_of_us_dollars)_2018' ~ 'Balance of trade in millions of US dollars',
  258. `Variable name` == 'balance_of_payments:_current_account_(millions_of_us_dollars)_2018' ~ 'Current account of the balance of payments in millions of US dollars (Description on: https://data.imf.org/api/document/download?key=60567161)',
  259. `Variable name` == 'balance_of_payments:_capital_account_(millions_of_us_dollars)_2018' ~ 'Capital account of the balance of payments in millions of US dollars (Description on: https://data.imf.org/api/document/download?key=60567161)',
  260. `Variable name` == 'balance_of_payments:_financial_account_(millions_of_us_dollars)_2018' ~ 'Financial account of the balance of payments in millions of US dollars (Description on: https://data.imf.org/api/document/download?key=60567161)',
  261. `Variable name` == 'consumer_price_index:_general_2018' ~ 'General consumer price index',
  262. `Variable name` == 'consumer_price_index:_food_2018' ~ 'Consumer price index for food',
  263. `Variable name` == 'exchange_rates:_end_of_period_(national_currency_per_us_dollar)_2018' ~ 'National currency per US dollar, end of period',
  264. `Variable name` == 'exchange_rates:_period_average_(national_currency_per_us_dollar)_2018' ~ 'National currency per US dollar, period average ',
  265. `Variable name` == 'land_area_(thousand_hectares)_2016' ~ 'Country area (in thousand of hectares) excluding area under inland waters and coastal waters (as defined by FAO: http://www.fao.org/faostat/)',
  266. `Variable name` == 'arable_land_(thousand_hectares)_2016' ~ 'The total of areas (in thousand of hectares) under temporary crops, temporary meadows and pastures, and land with temporary fallow. Arable land does not include land that is potentially cultivable but is not normally cultivated.',
  267. `Variable name` == 'permanent_crops_(thousand_hectares)_2016' ~ 'Land cultivated (in thousand of hectares) with long-term crops which do not have to be replanted for several years, land under trees and shrubs producing flowers, and nurseries (except those for forest trees, which should be classified under ""Forestry""). Permanent meadows and pastures are excluded from land under permanent crops."',
  268. `Variable name` == 'forest_cover_(thousand_hectares)_2016' ~ 'Forest cover (in thousand of hectares) as defined by FAO (http://www.fao.org/faostat/)',
  269. `Variable name` == 'arable_land_(%_of_total_land_area)_2016' ~ 'The total of areas (in percentage) under temporary crops, temporary meadows and pastures, and land with temporary fallow. Arable land does not include land that is potentially cultivable but is not normally cultivated.',
  270. `Variable name` == 'permanent_crops_(%_of_total_land_area)_2016' ~ 'Land cultivated (in percentage) with long-term crops which do not have to be replanted for several years, land under trees and shrubs producing flowers, and nurseries (except those for forest trees, which should be classified under ""Forestry""). Permanent meadows and pastures are excluded from land under permanent crops."',
  271. `Variable name` == 'forest_cover_(%_of_total_land_area)_2016' ~ 'Forest cover (in percentage) as defined by FAO (http://www.fao.org/faostat/)',
  272. `Variable name` == 'important_sites_for_terrestrial_biodiversity_protected_(%_of_total_sites_protected)_2018' ~ 'Proportion of important sites for terrestrial biodiversity that are covered by protected areas (https://unstats.un.org/sdgs/metadata/files/Metadata-15-01-02.pdf)',
  273. `Variable name` == 'agriculture_hunting_forestry_and_fishing_(%_of_gross_value_added)_2016' ~ 'Gross value added of Agricultura, Hunting, Forestry and Fishing (in percentage)',
  274. `Variable name` == 'industry_(%_of_gross_value_added)_2016' ~ 'Gross value added of Industry (in percentage).',
  275. `Variable name` == 'services_(%_of_gross_value_added)_2016' ~ 'Gross value added of Services (in percentage).',
  276. `Variable name` == 'health_personnel:_physicians_(number)_2018' ~ 'Absolute number of physicians in a location.',
  277. `Variable name` == 'health_personnel:_physicians_(per_1000_population)_2018' ~ 'Rate of physicians for 1000 inhabitants (workforce density) in a location.',
  278. `Variable name` == 'health_personnel:_nurses_and_midwives_(number)_2018' ~ 'Absolute number of nurses and midwives in a location.',
  279. `Variable name` == 'health_personnel:_nurses_and_midwives_personnel_(per_1000_population)_2018' ~ 'Rate of nurses and midwives for 1000 inhabitants (workforce density) in a location.',
  280. `Variable name` == 'health_personnel:_dentists_(number)_2018' ~ 'Absolute number of dentists in a location.',
  281. `Variable name` == 'health_personnel:_dentists_(per_1000_population)_2018' ~ 'Rate of dentists for 1000 inhabitants (workforce density) in a location.',
  282. `Variable name` == 'health_personnel:_pharmacists_(number)_2018' ~ 'Absolute number of pharmacists in a location.',
  283. `Variable name` == 'health_personnel:_pharmacists_(per_1000_population)_2018' ~ 'Rate of pharmacists for 1000 inhabitants (workforce density) in a location.',
  284. `Variable name` == 'employment_by_industry:_agriculture_(%)_male_and_female_2019' ~ 'Percentage of total employment in agriculture, both sexes.',
  285. `Variable name` == 'employment_by_industry:_industry_(%)_male_and_female_2019' ~ 'Percentage of total employment in industry, both sexes.',
  286. `Variable name` == 'employment_by_industry:_services_(%)_male_and_female_2019' ~ 'Percentage of total employment in services, both sexes.',
  287. `Variable name` == 'employment_by_industry:_agriculture_(%)_male_2019' ~ 'Percentage of total employment in agriculture, males.',
  288. `Variable name` == 'employment_by_industry:_industry_(%)_male_2019' ~ 'Percentage of total employment in industry, males.',
  289. `Variable name` == 'employment_by_industry:_services_(%)_male_2019' ~ 'Percentage of total employment in service, males.',
  290. `Variable name` == 'employment_by_industry:_agriculture_(%)_female_2019' ~ 'Percentage of total employment in agriculture, females.',
  291. `Variable name` == 'employment_by_industry:_industry_(%)_female_2019' ~ 'Percentage of total employment in industry, females.',
  292. `Variable name` == 'employment_by_industry:_services_(%)_female_2019' ~ 'Percentage of total employment in service, females.',
  293. `Variable name` == 'net_official_development_assistance_received:_bilateral_(millions_of_us_dollars)_2017' ~ 'Bilateral aid (millions of US dollars).',
  294. `Variable name` == 'net_official_development_assistance_received:_multilateral_(millions_of_us_dollars)_2017' ~ 'Multilateral aid (millions of US dollars).',
  295. `Variable name` == 'net_official_development_assistance_received:_total_(millions_of_us_dollars)_2017' ~ 'Total aid (millions of US dollars).',
  296. `Variable name` == 'net_official_development_assistance_received:_total_(as_%_gni)_2017' ~ 'Total aid as a percentage of Gross National Income (GNI).',
  297. `Variable name` == 'gdp_in_current_prices_(millions_of_us_dollars)_2017' ~ 'In millions of US dollars at current prices.',
  298. `Variable name` == 'gdp_per_capita_(us_dollars)_2017' ~ 'per capita US dollars',
  299. `Variable name` == 'gdp_in_constant_2010_prices_(millions_of_us_dollars)_2017' ~ 'In millions of US dollars at constant 2010 prices.',
  300. `Variable name` == 'gdp_real_rates_of_growth_(percent)_2017' ~ 'Percentage of real rates of growth.',
  301. `Variable name` == 'current_expenditure_other_than_staff_compensation_as_%_of_total_expenditure_in_public_institutions_(%)_2018' ~ 'Current expenditure other than staff compensation as percentage of total expenditure in public education institutions,',
  302. `Variable name` == 'all_staff_compensation_as_%_of_total_expenditure_in_public_institutions_(%)_2018' ~ 'All staff compensation as percentage of total expenditure in public education institutions.',
  303. `Variable name` == 'capital_expenditure_as_%_of_total_expenditure_in_public_institutions_(%)_2018' ~ 'Capital expenditure as percentage of total expenditure in public education institutions.',
  304. `Variable name` == 'expenditure_by_level_of_education:_pre_primary_(as_%_of_government_expenditure)_2018' ~ 'Percentage of government expenditure on education for pre-primary education.',
  305. `Variable name` == 'expenditure_by_level_of_education:_primary_(as_%_of_government_expenditure)_2018' ~ 'Percentage of government expenditure on education for primary education.',
  306. `Variable name` == 'expenditure_by_level_of_education:_secondary_(as_%_of_government_expenditure)_2018' ~ 'Percentage of government expenditure on education for secondary education.',
  307. `Variable name` == 'expenditure_by_level_of_education:_tertiary_(as_%_of_government_expenditure)_2018' ~ 'Percentage of government expenditure on education for tertiary education.',
  308. `Variable name` == 'public_expenditure_on_education_(%_of_government_expenditure)_2018' ~ 'Government expenditure on education as percentage of government total expenditure.',
  309. `Variable name` == 'public_expenditure_on_education_(%_of_gdp)_2018' ~ 'Government expenditure on education as percentage of GDP. ',
  310. `Variable name` == 'population_annual_rate_of_increase_(percent)_2015' ~ 'Annual rate of growth of the population.',
  311. `Variable name` == 'infant_mortality_for_both_sexes_(per_1000_live_births)_2015' ~ 'Number of infant deaths (box sexes) per 1,000 births.',
  312. `Variable name` == 'maternal_mortality_ratio_(deaths_per_100000_livebirths)_2015' ~ 'Maternal deaths per 100000 livebirths',
  313. `Variable name` == 'life_expectancy_at_birth_for_both_sexes_(years)_2015' ~ 'Average number of years of life expected at age 0 for both sexes.',
  314. `Variable name` == 'total_fertility_rate_(children_per_women)_2016' ~ 'Average number of live births per woman.',
  315. `Variable name` == 'life_expectancy_at_birth_for_males_(years)_2018' ~ 'Average number of years of life expected at age 0, males.',
  316. `Variable name` == 'life_expectancy_at_birth_for_females_(years)_2018' ~ 'Average number of years of life expected at age 0, females.',
  317. `Variable name` == 'primary_energy_production_(petajoules)_2016' ~ 'Primary energy production in petajoules per capita.',
  318. `Variable name` == 'net_imports_[imports_exports_bunkers]_(petajoules)_2016' ~ 'Net imports [Imports - Exports - Bunkers] in petajoules (Production, trade and supply of energy).',
  319. `Variable name` == 'changes_in_stocks_(petajoules)_2016' ~ 'Changes in stocks in petajoules (Production, trade and supply of energy).',
  320. `Variable name` == 'total_supply_(petajoules)_2016' ~ 'Total supply of energy in petajoules per capita',
  321. `Variable name` == 'supply_per_capita_(gigajoules)_2016' ~ 'Supply of energy in gigajoules per capita.',
  322. `Variable name` == 'grants_of_patents_(number)_2017' ~ 'Number of grants of patents.',
  323. `Variable name` == 'patents_in_force_(number)_2017' ~ 'Number of patents in force.',
  324. `Variable name` == 'resident_patent_filings_(per_million_population)_2017' ~ 'Residents patent fillings per million people.',
  325. `Variable name` == 'r_&_d_personnel:_total_(number_in_full_time_equivalent)_2016' ~ 'Total number of people employed in R&D (Full-time equivalent - FTE).',
  326. `Variable name` == 'r_&_d_personnel:_researchers_total_(number_in_full_time_equivalent)_2016' ~ 'Total number of researchers (Full-time equivalent - FTE)',
  327. `Variable name` == 'r_&_d_personnel:_researchers_women_(number_in_full_time_equivalent)_2016' ~ 'Total number of female researchers (Full-time equivalent - FTE)',
  328. `Variable name` == 'r_&_d_personnel:_technicians_total_(number_in_full_time_equivalent)_2016' ~ 'Total number of technicians and equivalent staff (Full-time equivalent - FTE)',
  329. `Variable name` == 'r_&_d_personnel:_technicians_women_(number_in_full_time_equivalent)_2016' ~ 'Total number of female technicians and equivalent staff (Full-time equivalent - FTE)',
  330. `Variable name` == 'r_&_d_personnel:_other_supporting_staff_total_(number_in_full_time_equivalent)_2016' ~ 'Total number of other supporting staff in R&D (Full-time equivalent - FTE)',
  331. `Variable name` == 'r_&_d_personnel:_other_supporting_staff_women_(number_in_full_time_equivalent)_2016' ~ 'Total number of other female supporting staff in R&D (Full-time equivalent - FTE)',
  332. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_as_a_percentage_of_gdp_(%)_2016' ~ 'Gross domestic expenditure on R&D as percentage of GDP.',
  333. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_government_(%)_2016' ~ 'Percentage of expenditure on R&D coming from the government.',
  334. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_funds_from_abroad_(%)_2016' ~ 'Percentage of expenditure on R&D coming from funds from abroad.',
  335. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_not_distributed_(%)_2016' ~ 'Percentage of expenditure on R&D coming from non-specified source.',
  336. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_business_enterprises_(%)_2016' ~ 'Percentage of expenditure on R&D coming from business enterprises.',
  337. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_private_non_profit_(%)_2016' ~ 'Percentage of expenditure on R&D coming from private or non-profit institutiohns.',
  338. `Variable name` == 'gross_domestic_expenditure_on_r_&_d:_higher_education_(%)_2016' ~ 'Percentage of expenditure on R&D coming from Higher Education.',
  339. `Variable name` == 'gross_enrollment_ratio_tertiary_(female)_2017' ~ 'Gross enrollment ratio of female students at the tertiary level.',
  340. `Variable name` == 'students_enrolled_in_tertiary_education_(thousands)_2018' ~ 'Number of students enrolled (thousands) at the tertiary level.',
  341. `Variable name` == 'gross_enrollment_ratio_tertiary_(male)_2018' ~ 'Gross enrollment ratio of male students at the tertiary level.',
  342. `Variable name` == 'students_enrolled_in_primary_education_(thousands)_2018' ~ 'Number of students enrolled (thousands) at the primary level.',
  343. `Variable name` == 'students_enrolled_in_secondary_education_(thousands)_2018' ~ 'Number of students enrolled (thousands) at the secondary level.',
  344. `Variable name` == 'gross_enrollement_ratio_primary_(male)_2018' ~ 'Gross enrollment ratio of male students at the primary level.',
  345. `Variable name` == 'gross_enrollment_ratio_primary_(female)_2018' ~ 'Gross enrollment ratio of female students at the primary level.',
  346. `Variable name` == 'gross_enrollment_ratio_secondary_(male)_2018' ~ 'Gross enrollment ratio of male students at the secnodary level.',
  347. `Variable name` == 'gross_enrollment_ratio_secondary_(female)_2018' ~ 'Gross enrollment ratio of female students at the secondary level.',
  348. `Variable name` == 'threatened_species:_vertebrates_(number)_2019' ~ 'Number of threatened vertebrate species.',
  349. `Variable name` == 'threatened_species:_invertebrates_(number)_2019' ~ 'Number of threatened invertebrate species.',
  350. `Variable name` == 'threatened_species:_plants_(number)_2019' ~ 'Number of threatened plant species.',
  351. `Variable name` == 'threatened_species:_total_(number)_2019' ~ 'Number of total threatened species.',
  352. `Variable name` == 'percentage_of_individuals_using_the_internet_2017' ~ 'Percentage of the population using the internet.',
  353. `Variable name` == 'seats_held_by_women_in_national_parliament_as_of_february_(%)_2019' ~ 'Proportion (percentage) of seats held by women in national parliament as of February of 2019.',
  354. `Variable name` == 'ratio_of_girls_to_boys_in_primary_education_2017' ~ 'Ratio of girls to boys in primary education.',
  355. `Variable name` == 'ratio_of_girls_to_boys_in_secondary_education_2017' ~ 'Ratio of girls to boys in secondary education.',
  356. `Variable name` == 'ratio_of_girls_to_boys_in_tertiary_education_2017' ~ 'Ratio of girls to boys in tertiary education.',
  357. `Variable name` == 'teachers_at_tertiary_level_(thousands)_2018' ~ 'Number of teachers (thousands) at the tertiary level.',
  358. `Variable name` == 'pupil_teacher_ratio_in_tertiary_education_2018' ~ 'Pupil teacher ratio at the tertiary level.',
  359. `Variable name` == 'teachers_at_primary_level_(thousands)_2018' ~ 'Number of teachers (thousands) at the primary level.',
  360. `Variable name` == 'pupil_teacher_ratio_in_primary_education_2018' ~ 'Pupil teacher ratio at the primary level.',
  361. `Variable name` == 'teachers_at_secondary_level_(thousands)_2018' ~ 'Number of teachers (thousands) at the secondary level.',
  362. `Variable name` == 'pupil_teacher_ratio_in_secondary_education_2018' ~ 'Pupil teacher ratio at the secondary level.',
  363. `Variable name` == 'international_migrant_stock:_both_sexes_(number)_2017' ~ 'Estimates of international migrant (absolute number) based on official statistics on the foreign-born or the foreign population, for both sexes',
  364. `Variable name` == 'international_migrant_stock:_both_sexes_(%_total_population)_2017' ~ 'Estimates of international migrant (percent) based on official statistics on the foreign-born or the foreign population, for both sexes',
  365. `Variable name` == 'international_migrant_stock:_male_(%_total_population)_2017' ~ 'Estimates of international migrant (percent) based on official statistics on the foreign-born or the foreign population, males',
  366. `Variable name` == 'international_migrant_stock:_female_(%_total_population)_2017' ~ 'Estimates of international migrant (percent) based on official statistics on the foreign-born or the foreign population, females',
  367. `Variable name` == 'total_refugees_and_people_in_refugee_like_situations_(number)_2018' ~ 'Total number of refugees and people in refugee like situations (mid-year).',
  368. `Variable name` == 'asylum_seekers_including_pending_cases_(number)_2018' ~ 'Total number of asylum seekers including pending cases (mid-year).',
  369. `Variable name` == 'other_of_concern_to_unhcr_(number)_2018' ~ 'Total number of individuals in a refugee-like situation other than refugees/asylum seekers (mid-year).',
  370. `Variable name` == 'total_population_of_concern_to_unhcr_(number)_2018' ~ 'Total population of concern to UNHCR (mid-year).',
  371. `Variable name` == 'total_sexual_violence_at_the_national_level_rate_per_100000_2015' ~ 'Rate of sexual violence by 100000 inhabitants.',
  372. `Variable name` == 'intentional_homicide_rates_per_100000_2016' ~ 'Rate of intentional homicides by 100000 inhabitants.',
  373. `Variable name` == 'percentage_of_male_and_female_intentional_homicide_victims_male_2016' ~ 'Percentage of male victims to intentional homicides.',
  374. `Variable name` == 'percentage_of_male_and_female_intentional_homicide_victims_female_2016' ~ 'Percentage of female victims to intentional homicides.',
  375. `Variable name` == 'assault_rate_per_100000_population_2016' ~ 'Rate of assault by 100000 inhabitants.',
  376. `Variable name` == 'kidnapping_at_the_national_level_rate_per_100000_2016' ~ 'Rate of kidnappings by 100000 inhabitants.',
  377. `Variable name` == 'theft_at_the_national_level_rate_per_100000_population_2016' ~ 'Rate of theft by 100000 inhabitants.',
  378. `Variable name` == 'robbery_at_the_national_level_rate_per_100000_population_2016' ~ 'Rate of robbery by 100000 inhabitants.',
  379. `Variable name` == 'labour_force_participation_rate_total_2019' ~ 'Proportion of a country’s working-age population that engages actively in the labour market.',
  380. `Variable name` == 'unemployment_rate_total_2019' ~ 'Proportion of the labour force that does not have a job.',
  381. `Variable name` == 'labour_force_participation_rate_male_2019' ~ 'Proportion of a country’s working-age population (males) that engages actively in the labour market.',
  382. `Variable name` == 'unemployment_rate_male_2019' ~ 'Proportion of the labour force that does not have a job (males).',
  383. `Variable name` == 'labour_force_participation_rate_female_2019' ~ 'Proportion of a country’s working-age population (females) that engages actively in the labour market.',
  384. `Variable name` == 'unemployment_rate_female_2019' ~ 'Proportion of the labour force that does not have a job (females).',
  385. `Variable name` == 'major_trading_partner_1_(%_of_exports)_2018' ~ 'Percentage of total exports trade in US dollars with the major trading partner.',
  386. `Variable name` == 'major_trading_partner_1_(%_of_imports)_2018' ~ 'Percentage of total imports trade in US dollars with the major trading partner.',
  387. `Variable name` == 'major_trading_partner_2_(%_of_exports)_2018' ~ 'Percentage of total exports trade in US dollars with the second major trading partner.',
  388. `Variable name` == 'major_trading_partner_2_(%_of_imports)_2018' ~ 'Percentage of total imports trade in US dollars with the second major trading partner.',
  389. `Variable name` == 'major_trading_partner_3_(%_of_exports)_2018' ~ 'Percentage of total exports trade in US dollars with the third major trading partner.',
  390. `Variable name` == 'major_trading_partner_3_(%_of_imports)_2018' ~ 'Percentage of total imports trade in US dollars with the third major trading partner.',
  391. `Variable name` == 'tourism_expenditure_(millions_of_us_dollars)_2018' ~ 'Tourist/visitor expenditure (millions of US dollars).',
  392. `Variable name` == 'tourist/visitor_arrivals_(thousands)_2018' ~ 'Tourist/visitor arrivals (thousands).',
  393. `Variable name` == 'current_health_expenditure_(%_of_gdp)_2017' ~ 'Expenditure in health as percentage of GDP.',
  394. `Variable name` == 'domestic_general_government_health_expenditure_(%_of_total_government_expenditure)_2017' ~ 'Expenditure in health as percentage of government spending.',
  395. `Variable name` == 'whos_major_trade_partner_exp_1' ~ 'Engineering from variable Major trading partner 1 (% of exports), in the file SYB62_330_201907_Major Trading Partners.csv. Who is the country that is the major trading partner for exportation considering the percentage of total exports trade in US dollars. ',
  396. `Variable name` == 'locality_code' ~ 'ISO 3166-1 Alpha-2 is used for a 2 digit code for countries and regions. Hong Kong and Réunion are examples of regions.',
  397. `Variable name` == 'locality_name' ~ 'The name of the country or region. Hong Kong and Réunion are examples of regions.',
  398. `Variable name` == 'date' ~ 'Date for epidemiological variables. Format: YY-MM-DD',
  399. `Variable name` == 'new_cases' ~ 'Number of new cases for a specific date for a given country.',
  400. `Variable name` == 'new_deaths' ~ 'Number of new deaths for a specific date for a given country.',
  401. `Variable name` == 'acc_cases' ~ 'Accumulated number of cases up to the date for a given country.',
  402. `Variable name` == 'acc_deaths' ~ 'Accumulated number of deaths up to the date for a given country.',
  403. `Variable name` == 'lethality_rate_percent' ~ 'Lethality rate in percent up to the last date in the dataset fora given country',
  404. `Variable name` == 'retail_recreation' ~ 'Mobility trends for places like restaurants, cafes, shopping centers theme parks, museums, libraries, andmovie theaters.This variable indicates how visits and length of stay to this category of location has varied (in percent, positively or negatively) compared to the baseline. The baseline is the median value, for the corresponding day of the week, during the 5-week period Jan 3–Feb 6, 2020',
  405. `Variable name` == 'grocery_pharmacy' ~ 'Mobility trends for places like grocery markets, food warehouses, farmers markets, specialty food shops, drug stores, and pharmacies.This variable indicates how visits and length of stay to this category of location has varied (in percent, positively or negatively) compared to the baseline. The baseline is the median value, for the corresponding day of the week, during the 5-week period Jan 3–Feb 6, 2020',
  406. `Variable name` == 'parks' ~ 'Mobility trends for places like national parks, public beaches, marinas, dog parks, plazas, and public gardens.This variable indicates how visits and length of stay to this category of location has varied (in percent, positively or negatively) compared to the baseline. The baseline is the median value, for the corresponding day of the week, during the 5-week period Jan 3–Feb 6, 2020',
  407. `Variable name` == 'transit_stations' ~ 'Mobility trends for places like public transport hubs such as subway, bus, and train stations.This variable indicates how visits and length of stay to this category of location has varied (in percent, positively or negatively) compared to the baseline. The baseline is the median value, for the corresponding day of the week, during the 5-week period Jan 3–Feb 6, 2020',
  408. `Variable name` == 'workplaces' ~ 'Mobility trends for places of work.This variable indicates how visits and length of stay to this category of location has varied (in percent, positively or negatively) compared to the baseline. The baseline is the median value, for the corresponding day of the week, during the 5-week period Jan 3–Feb 6, 2020',
  409. `Variable name` == 'residential' ~ 'Mobility trends for places of residence.This variable indicates how visits and length of stay to this category of location has varied (in percent, positively or negatively) compared to the baseline. The baseline is the median value, for the corresponding day of the week, during the 5-week period Jan 3–Feb 6, 2020',
  410. `Variable name` == 'first_case_date' ~ 'The date of the first confirmed case of COVID-19 for a given country.',
  411. `Variable name` == 'n_days_since_1st_case' ~ 'Number of days since the first case',
  412. `Variable name` == 'first_death_date' ~ 'The date of the first confirmed death due to COVID-19 for a given country, starting from February 15th, 2020.',
  413. `Variable name` == 'n_days_since_1st_death' ~ 'Number of days since the first death (counting from February 15th)',
  414. )
  415. ) -> df
  416. # Change column ordering
  417. df %>%
  418. select(`Variable name`, Year, Description, Source, Docpath, Filepath) -> df
  419. WriteXLS(x = df, ExcelFileName = 'data_dictionary.xls',
  420. SheetNames = 'Data Dictionary', BoldHeaderRow=TRUE)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...