使用以下公式繪制stm模型預期主題概率ggplot 2

我正在研究一個在線評論數據集,并使用R的stm包來分析數據。

我讀過一篇文章,其中說明了如何使用stm包來研究text-based數據,我發現它非常有用。特別是,我認為下面從帖子中截取的圖形可能對我的研究有用。

這篇文章只展示了這張圖的部分代碼。

topic_labels <- tribble(
  ~topic, ~category, ~color, 
  list(4, 8, 24, 50), "Anthropology/History", "#d4ae0b", 
  list(44, 48), "Journalism/Satire", "#3E7D49", 
  list(9, 17, 27, 30, 32), "Philosophy", "#c73200", 
  list(2, 3, 5, 11, 13, 15, 18, 20, 26, 28, 33, 34, 35, 39, 43), "Politics", "#de860b", 
  list(14, 23, 29, 31, 36, 37, 38, 42, 45, 47), "Political Economy", "#6F8FCF",
  list(21, 25, 41), "Military", "#b7a4d6", 
  list(1, 6, 7, 10, 12, 16, 19, 22, 40), "Sociology", "#8f1f3f", 
  list(46, 49), "Science/Math", "#767676") %>% 
  unnest(topic) %>% 
  unnest(topic) %>% 
  mutate(topic = factor(topic))

我嘗試使用自己的數據集生成代碼,但失敗了。這是我的代碼:

library(stm)
library(tidyverse)
library(ggplot2)

# Step 1: Create a mapping of topics to categories
topic_categories <- c(
  "Price" = "11,21,25,7",
  "Services" = "1,9,5",
  "Environment" = "18,13,24,3",
  "Hygiene" = "26,19,4,23",
  "Personnel" = "20,6,15",
  "Values" = "14,17,22",
  "Perception" = "2,16,8,12",
  "Others" = "10"
)

# Step 2: Create a data frame with topic probabilities
topic_probabilities <- colMeans(stm26$theta)
topic_data <- data.frame(
  topic = 1:length(topic_probabilities),
  probability = topic_probabilities
)

# Step 3: Assign categories to topics
topic_data$category <- NA
for (cat in names(topic_categories)) {
  topics <- as.numeric(strsplit(topic_categories[cat], ",")[[1]])
  topic_data$category[topic_data$topic %in% topics] <- cat
}

# Step 4: Get top words for each topic
top_words <- labelTopics(stm26, n = 2)
topic_data$top_words <- apply(top_words$prob, 1, function(x) paste(x, collapse = ", "))

# Step 5: Assign colors to categories
category_colors <- c(
  "Price" = "#8f1f3f",
  "Services" = "#d4ae0b",
  "Environment" = "#de860b",
  "Hygiene" = "#6F8FCF",
  "Personnel" = "#c73200",
  "Values" = "#b7a4d6",
  "Perception" = "#3E7D49",
  "Others" = "#767676"
)

# Step 6: Create the plot
ggplot(topic_data, aes(y = reorder(topic, probability), x = probability, color = category)) +
  geom_segment(aes(x = 0, xend = probability, yend = reorder(topic, probability)), size = 0.5) +
  geom_point(size = 1) +
  geom_text(aes(label = top_words), hjust = 0, nudge_x = 0.002, size = 3) +
  scale_color_manual(values = category_colors) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1), 
                     limits = c(0, 0.18)) +
  geom_vline(xintercept = seq(0.05, 0.15, by = 0.05), color = "lightgrey") +  
  facet_grid(category ~ ., scales = "free_y", space = "free_y", switch = "y") +
  theme_minimal() +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_text(face = "bold", size = 8, color = "black", margin = margin(r = -25)), 
    axis.ticks.y = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_blank(),  
    panel.grid.minor.x = element_blank(),  
    legend.position = "none",
    strip.placement = "outside",
    strip.text.y.left = element_text(angle = 0, hjust = 1, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, face = "italic"),
    plot.margin = margin(5.5, 40, 5.5, 5.5)  # Adjusted right margin to prevent text cutoff
  ) +
  labs(
    title = "Title",
    subtitle = "Subtitle",
    x = "Expected topic probability"
  )

結果與帖子的圖形有點不同。

首先,我無法將類別名稱移動到主題行的頂部。其次,網格線(灰色)是間歇性的,不連續的。

有人知道如何修改代碼以生成類似于帖子中顯示的圖形嗎?

? 最佳回答:

這里有一個選項,通過切換到ggforce::facet_col將條帶文本移動到頂部來實現所需的結果,使用ggtext為條帶文本著色,最后使用annotation_customclip="off"獲得連續的網格線,沒有中斷。

使用一些虛假的隨機示例數據:

library(tidyverse)
library(ggtext)
library(ggforce)

set.seed(123)

topic_data <- data.frame(
  topic = sample(LETTERS, 26),
  probability = runif(26, 0, .18),
  category = sample(names(category_colors), 26, replace = TRUE),
  top_words = sample(letters, 26)
) |>
  left_join(
    enframe(category_colors, name = "category", value = "color")
  ) |>
  mutate(
    category = factor(category, names(category_colors)),
    category_facet = glue::glue(
      "<span style='color: {color}'>{category}</span>"
    ),
    category_facet = reorder(category_facet, as.numeric(category))
  )

ggplot(topic_data, aes(
  y = reorder(topic, probability),
  x = probability, color = category
)) +
  lapply(
    seq(0.05, 0.15, 0.05),
    \(x) {
      annotation_custom(
        grid::segmentsGrob(
          y0 = unit(0, "npc"), y1 = unit(1, "npc") + unit(16, "pt"),
          gp = grid::gpar(
            col = "lightgrey"
          )
        ),
        xmin = x,
        xmax = x
      )
    }
  ) +
  geom_segment(aes(
    x = 0, xend = probability,
    yend = reorder(topic, probability)
  ), size = 0.5) +
  geom_point(size = 1) +
  geom_text(aes(label = top_words), hjust = 0, nudge_x = 0.002, size = 3) +
  scale_color_manual(values = category_colors) +
  scale_x_continuous(
    labels = scales::percent_format(accuracy = 1),
    limits = c(0, 0.18),
    expand = c(0, 0, .05, 0)
  ) +
  ggforce::facet_col(~category_facet,
    scales = "free_y",
    space = "free"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(
      face = "bold", size = 8
    ),
    axis.ticks.y = element_blank(),
    panel.grid = element_blank(),
    legend.position = "none",
    strip.text.x = ggtext::element_markdown(
      hjust = 0, face = "bold", 
      size = 12,
      margin = margin(l = -13)
    ),
    strip.background = element_blank(),
    strip.clip = "off",
    plot.title = element_text(hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, face = "italic"),
    plot.margin = margin(5.5, 40, 5.5, 5.5)
  ) +
  labs(
    title = "Title",
    subtitle = "Subtitle",
    x = "Expected topic probability",
    y = NULL
  ) +
  coord_cartesian(clip = "off")

主站蜘蛛池模板: 奇米精品一区二区三区在| 免费无码毛片一区二区APP| 国精产品999一区二区三区有限| 日韩一区二区久久久久久| 少妇无码AV无码一区| 亚洲一区精品伊人久久伊人| 丰满人妻一区二区三区免费视频| 乱精品一区字幕二区| 一区二区手机视频| 亚洲色大成网站www永久一区| 乱子伦一区二区三区| 无码丰满熟妇一区二区| 搜日本一区二区三区免费高清视频 | 精品人妻一区二区三区四区| 国产午夜三级一区二区三| 日韩精品人妻一区二区中文八零| 人妻体体内射精一区二区| 精品日本一区二区三区在线观看| 国偷自产一区二区免费视频| 久久无码人妻一区二区三区| 在线观看一区二区三区视频| 亚洲一区中文字幕久久| 日本精品高清一区二区2021| 狠狠色婷婷久久一区二区 | 久久久久一区二区三区| 精品一区二区三区在线视频观看| 国产韩国精品一区二区三区久久| 国产精品高清一区二区三区| 国产精品一区二区三区99 | 久久一区二区免费播放| 日韩一区在线视频| 无码一区二区三区在线| 蜜桃AV抽搐高潮一区二区| 无码国产精品一区二区免费虚拟VR| 国产精品特级毛片一区二区三区| 一本AV高清一区二区三区| 中文字幕亚洲一区| 日本一区二区免费看| 久久一区二区三区精品| 日韩在线不卡免费视频一区| 久久精品国产一区|