Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dataviz.py 1.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  1. from wordcloud import WordCloud
  2. from collections import Counter
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. import seaborn as sns
  6. from IPython.display import Markdown, display
  7. def md(input):
  8. return display(Markdown(input))
  9. def get_wc(data):
  10. # mask style: https://github.com/amueller/word_cloud/blob/master/examples/masked.py
  11. wordcloud = WordCloud(width = 400, height = 400,
  12. background_color ='black',
  13. min_font_size = 10
  14. ).generate(" ".join(data))
  15. return wordcloud.to_image()
  16. def plot_keywords(df,title='Title'):
  17. fig = plt.figure(figsize=(20,10))
  18. plt.title(title)
  19. sns.set(font_scale=1.1)
  20. sns.barplot(y='token',x='count',data=df,orient="h")
  21. # plt.xticks(rotation=45)
  22. return fig
  23. def extract_keywords(text,num=50, begin=None, exclude = []):
  24. if begin:
  25. tokens = [token for token in text.split() if token.startswith(begin)]
  26. else:
  27. tokens = [token for token in text.split() if token not in exclude]
  28. tokens = [token for token in text.split()]
  29. most_common_tokens = Counter(tokens).most_common(num)
  30. return pd.DataFrame(most_common_tokens, columns=('token','count'))
  31. def get_target(group):
  32. '''
  33. Takes the political group as an argument
  34. Returns 'droite' or 'gauche'
  35. Else returns 'centre' e.g. if it is a group of the center
  36. '''
  37. target_dict = {
  38. "droite":["AGIR-E", "DLF", "LDS", "LR", "RN", "UDI_I"],
  39. "gauche":["EDS", "FI", "GDR", "GE", "LND", "SOC"],
  40. }
  41. if group in target_dict["droite"]:
  42. return "droite"
  43. elif group in target_dict["gauche"]:
  44. return "gauche"
  45. else:
  46. return "centre"
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...