Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

TrainViveknSentiment.scala 2.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  1. import com.johnsnowlabs.nlp.annotator._
  2. import com.johnsnowlabs.nlp.base._
  3. import com.johnsnowlabs.util.Benchmark
  4. import org.apache.spark.ml.Pipeline
  5. import org.apache.spark.sql.SparkSession
  6. object TrainViveknSentiment extends App {
  7. val spark: SparkSession = SparkSession
  8. .builder()
  9. .appName("test")
  10. .master("local[*]")
  11. .config("spark.driver.memory", "4G")
  12. .config("spark.kryoserializer.buffer.max","200M")
  13. .config("spark.serializer","org.apache.spark.serializer.KryoSerializer")
  14. .getOrCreate()
  15. spark.sparkContext.setLogLevel("WARN")
  16. import spark.implicits._
  17. val training = Seq(
  18. ("I really liked this movie!", "positive"),
  19. ("The cast was horrible", "negative"),
  20. ("Never going to watch this again or recommend it to anyone", "negative"),
  21. ("It's a waste of time", "negative"),
  22. ("I loved the protagonist", "positive"),
  23. ("The music was really really good", "positive")
  24. ).toDS.toDF("train_text", "train_sentiment")
  25. val testing = Array(
  26. "I don't recommend this movie, it's horrible",
  27. "Dont waste your time!!!"
  28. )
  29. val document = new DocumentAssembler()
  30. .setInputCol("train_text")
  31. .setOutputCol("document")
  32. val token = new Tokenizer()
  33. .setInputCols("document")
  34. .setOutputCol("token")
  35. val normalizer = new Normalizer()
  36. .setInputCols("token")
  37. .setOutputCol("normal")
  38. val vivekn = new ViveknSentimentApproach()
  39. .setInputCols("document", "normal")
  40. .setOutputCol("result_sentiment")
  41. .setSentimentCol("train_sentiment")
  42. val finisher = new Finisher()
  43. .setInputCols("result_sentiment")
  44. .setOutputCols("final_sentiment")
  45. val pipeline = new Pipeline().setStages(Array(document, token, normalizer, vivekn, finisher))
  46. val sparkPipeline = pipeline.fit(training)
  47. val lightPipeline = new LightPipeline(sparkPipeline)
  48. Benchmark.time("Light pipeline quick annotation") { lightPipeline.annotate(testing) }
  49. Benchmark.time("Spark pipeline, this may be too much for just two rows!") {
  50. val testingDS = testing.toSeq.toDS.toDF("testing_text")
  51. println("Updating DocumentAssembler input column")
  52. document.setInputCol("testing_text")
  53. sparkPipeline.transform(testingDS).show()
  54. }
  55. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...