Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

TaskADataParser.java 4.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
  1. /*
  2. * Copyright 2013,2014 BioASQ project: FP7/2007-2013, ICT-2011.4.4(d),
  3. * Intelligent Information Management,
  4. * Targeted Competition Framework grant agreement n° 318652.
  5. * www: http://www.bioasq.org
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. */
  19. /**
  20. *
  21. * @author Ioannis Partalas
  22. */
  23. package data;
  24. import com.google.gson.stream.JsonReader;
  25. import java.io.FileInputStream;
  26. import java.io.IOException;
  27. import java.io.InputStreamReader;
  28. import java.util.ArrayList;
  29. import java.util.HashSet;
  30. import java.util.logging.Level;
  31. import java.util.logging.Logger;
  32. public class TaskADataParser {
  33. HashSet journalList;
  34. int numeOfArticles=0;
  35. double labelsPerArticle=0.0;
  36. HashSet labelsList;
  37. double labelDensity=0;
  38. HashSet pmids;
  39. /**
  40. *
  41. * Return a json reader and opens the array
  42. *
  43. */
  44. public static JsonReader streamParser(String jsonFile) throws IOException {
  45. int count = 0;
  46. int abstract_count=0;
  47. int duplicates = 0;
  48. JsonReader reader =null;
  49. try {
  50. reader = new JsonReader(new InputStreamReader(new FileInputStream(jsonFile)));
  51. reader.setLenient(true);
  52. reader.beginObject();
  53. String nam = reader.nextName();
  54. System.out.println(nam);
  55. reader.beginArray();
  56. } catch (Exception ex) {
  57. System.out.println("File not found");
  58. System.out.println(ex.toString());
  59. }
  60. return reader;
  61. }
  62. public static void closeReader(JsonReader reader)
  63. {
  64. try {
  65. reader.endArray();
  66. reader.endObject();
  67. } catch (IOException ex) {
  68. Logger.getLogger(TaskADataParser.class.getName()).log(Level.SEVERE, null, ex);
  69. }
  70. }
  71. public static PubMedDocument getNextDocument(JsonReader reader)
  72. {
  73. String text=null;
  74. String title=null;
  75. String pmid=null;
  76. String journal=null;
  77. String[] meshMajor=null;
  78. try {
  79. if (reader.hasNext()) {
  80. reader.beginObject();
  81. while (reader.hasNext()) {
  82. String name = reader.nextName();
  83. if (name.equals("abstractText")) {
  84. text = reader.nextString();
  85. } else if (name.equals("journal")) {
  86. journal = reader.nextString();
  87. } else if (name.equals("meshMajor")) {
  88. meshMajor = readLabelsArray(reader);
  89. } else if (name.equals("pmid")) {
  90. pmid = reader.nextString();
  91. } else if (name.equals("title")){
  92. title = reader.nextString();
  93. }
  94. else if (name.equals("year")){
  95. reader.skipValue();
  96. }
  97. else{
  98. System.out.println(name);
  99. reader.skipValue();
  100. }
  101. }
  102. reader.endObject();
  103. }
  104. } catch (Exception ex) { }
  105. return new PubMedDocument(text, title, pmid, journal, meshMajor);
  106. }
  107. public static String[] readLabelsArray(JsonReader reader){
  108. String labels[];
  109. ArrayList<String> lab = new ArrayList<String>();
  110. try{
  111. reader.beginArray();
  112. while (reader.hasNext()) {
  113. String nextString = reader.nextString();
  114. lab.add(nextString);
  115. }
  116. reader.endArray();
  117. }catch(IOException ex){}
  118. labels = new String[lab.size()];
  119. labels = lab.toArray(labels);
  120. return labels;
  121. }
  122. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...