[Kaggle] Coffee Sales Visuzlization

Author

SEOYEON CHOI

Published

September 4, 2025

Reference

import rpy2
%load_ext rpy2.ipython
%%R
options(repr.plot.width = 8, repr.plot.height = 8)
%%R
library(ggplot2)
library(dplyr)

Data

%%R
df <- read.csv("../../../delete/Coffe_sales.csv") 
%%R
head(df)
  hour_of_day cash_type money         coffee_name Time_of_Day Weekday
1          10      card  38.7               Latte     Morning     Fri
2          12      card  38.7       Hot Chocolate   Afternoon     Fri
3          12      card  38.7       Hot Chocolate   Afternoon     Fri
4          13      card  28.9           Americano   Afternoon     Fri
5          13      card  38.7               Latte   Afternoon     Fri
6          15      card  33.8 Americano with Milk   Afternoon     Fri
  Month_name Weekdaysort Monthsort       Date            Time
1        Mar           5         3 2024-03-01 10:15:50.520000
2        Mar           5         3 2024-03-01 12:19:22.539000
3        Mar           5         3 2024-03-01 12:20:18.089000
4        Mar           5         3 2024-03-01 13:46:33.006000
5        Mar           5         3 2024-03-01 13:48:14.626000
6        Mar           5         3 2024-03-01 15:39:47.726000
  • 비교 의미 없음
%%R
table(df['cash_type'])
cash_type
card 
3547 
  • 가격 순위 확인
%%R
df %>%
  group_by(coffee_name) %>%
  summarise(avg_price = mean(money, na.rm = TRUE)) %>%
  arrange(desc(avg_price))
# A tibble: 8 × 2
  coffee_name         avg_price
  <chr>                   <dbl>
1 Hot Chocolate            36.0
2 Cappuccino               35.9
3 Cocoa                    35.7
4 Latte                    35.5
5 Americano with Milk      30.6
6 Americano                26.0
7 Cortado                  25.7
8 Espresso                 20.9

Bar chart(money)

  • 월 순서 정렬
%%R
df$Month_name <- factor(df$Month_name,
                   levels = c("Jan","Feb","Mar","Apr","May","Jun",
                              "Jul","Aug","Sep","Oct","Nov","Dec"))
%%R
table(df['Month_name'])
Month_name
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 
201 423 494 168 241 223 237 272 344 426 259 259 
%%R
ggplot(df, aes(x = Month_name, y = money, fill = Month_name)) +
  geom_col() +
  labs(title = "Money by Month_name", x = "Month_name", y = "Money") +
  theme_minimal()

  • 요일별 구분해보기
%%R
ggplot(df, aes(x = Month_name, y = money, fill = Weekday)) +
  geom_col() +
  labs(title = "Money by Month_name", x = "Month_name", y = "Money") +
  theme_minimal()

  • 오전오후밤 구분해보기
%%R
ggplot(df, aes(x = Month_name, y = money, fill = Time_of_Day)) +
  geom_col() +
  labs(title = "Money by Month_name", x = "Month_name", y = "Money") +
  theme_minimal()

  • 요일 순 정렬 factor
%%R
df$Weekday <- factor(df$Weekday, 
                     levels = c("Mon","Tue","Wed","Thu","Fri","Sat","Sun"))
%%R
table(df['Weekday'])
Weekday
Mon Tue Wed Thu Fri Sat Sun 
544 572 500 510 532 470 419 
%%R
ggplot(df, aes(x = Weekday, y = money, fill = Weekday)) +
  geom_col() +
  labs(title = "Money by Weekday", x = "Weekday", y = "Money") +
  theme_minimal()

  • 오전오후밤 구분해보기
%%R
ggplot(df, aes(x = Weekday, y = money, fill = Time_of_Day)) +
  geom_col() +
  labs(title = "Money by Weekday", x = "Weekday", y = "Money") +
  theme_minimal()

  • 메뉴별 판매액
%%R
table(df['coffee_name'])
coffee_name
          Americano Americano with Milk          Cappuccino               Cocoa 
                564                 809                 486                 239 
            Cortado            Espresso       Hot Chocolate               Latte 
                287                 129                 276                 757 
%%R
ggplot(df, aes(x = coffee_name, y = money, fill = coffee_name)) +
  geom_col() +
  labs(title = "Money by coffee_name", x = "coffee_name", y = "Money") +
  theme_minimal()

  • 메뉴별 음료 정보 선호도 확인
%%R
df$Time_of_Day <- factor(df$Time_of_Day, 
                     levels = c("Morning","Afternoon","Night"))
%%R
table(df['Time_of_Day'])
Time_of_Day
  Morning Afternoon     Night 
     1181      1205      1161 
  • 팔리는 수도 비슷
%%R
df %>%
  group_by(Time_of_Day, coffee_name) %>%
  summarise(count = n()) %>%
  ggplot(aes(x = Time_of_Day, y = count, fill = coffee_name)) +
  geom_col(position = "dodge") +
  labs(title = "Coffee Choice by Time of Day", x = "Time of Day", y = "Count") +
  theme_minimal()
`summarise()` has grouped output by 'Time_of_Day'. You can override using the
`.groups` argument.

  • 아침엔 americano with milk 압도적

    • 라떼랑 아메리카노는 비슷
  • 오후엔 라떼

    • 아메리카노랑 아메리카노윗밀크는 비슷
  • 밤에도 라떼

  • 중앙값 기준으로 low, high 나눠서 값에 따라 판매액 차이가 있는지

%%R
median(df$money)
[1] 32.82
%%R
df %>%
  mutate(price_group = ifelse(money >= median(money), "High", "Low")) %>%
  group_by(Time_of_Day, price_group) %>%
  summarise(n = n()) %>%
  ggplot(aes(x = Time_of_Day, y = n, fill = price_group)) +
  geom_col(position = "dodge")
`summarise()` has grouped output by 'Time_of_Day'. You can override using the
`.groups` argument.

  • 아침에는 저렴
  • 저녁에는 비싼 음료 선호

Line chart

  • 시간대별 판매액
%%R
table(df['hour_of_day'])
hour_of_day
  6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22 
  5  88 235 242 328 283 241 225 225 236 278 237 218 229 169 195 113 
%%R
df %>%
  group_by(hour_of_day) %>%
  summarise(total_money = sum(money)) %>%
  ggplot(aes(x = hour_of_day, y = total_money)) +
  geom_line(color = "steelblue") +
  geom_point(color = "darkred") +
  labs(title = "Total Sales by Hour of Day", x = "Hour", y = "Sales") +
  theme_minimal()

  • 오전대가 가장 많이 팔림, 그리고 오후 15시넘어서
%%R
df %>%
  filter(coffee_name == "Americano") %>% 
  group_by(hour_of_day) %>%
  summarise(total_money = sum(money)) %>%
  ggplot(aes(x = hour_of_day, y = total_money)) +
  geom_line(color = "steelblue") +
  geom_point(color = "darkred") +
  labs(title = "Total Sales by Hour of Day", x = "Hour", y = "Sales") +
  theme_minimal()

  • 11시에 아메리카노가 가장 많이 팔렸다!
%%R
df %>%
  filter(coffee_name == "Latte") %>% 
  group_by(hour_of_day) %>%
  summarise(total_money = sum(money)) %>%
  ggplot(aes(x = hour_of_day, y = total_money)) +
  geom_line(color = "steelblue") +
  geom_point(color = "darkred") +
  labs(title = "Total Sales by Hour of Day", x = "Hour", y = "Sales") +
  theme_minimal()

  • 10,16시에 라때가 많이 팔리는 경향

  • 주중 주말 판매액 비교

%%R
df %>%
  mutate(Weekend = ifelse(Weekday %in% c("Sat","Sun"), "Weekend", "Weekday")) %>%
  group_by(Weekend, hour_of_day) %>%
  summarise(total = sum(money)) %>%
  ggplot(aes(x = hour_of_day, y = total, color = Weekend)) +
  geom_line(size=1)
`summarise()` has grouped output by 'Weekend'. You can override using the
`.groups` argument.

Heat map

  • 시간대 및 요일별로 판매액 큰 구간 확인 가능
%%R
df %>%
  group_by(Weekday, hour_of_day) %>%
  summarise(total = sum(money)) %>%
  ggplot(aes(x = hour_of_day, y = Weekday, fill = total)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "red") +
  labs(title = "Sales Heatmap by Hour & Weekday")
`summarise()` has grouped output by 'Weekday'. You can override using the
`.groups` argument.