Reference

Import

import pandas as pd
import plotly.express as px
from scipy.stats import spearmanr, pearsonr

Data

df = pd.read_csv('../../../delete/ScreenTime vs MentalWellness.csv').iloc[:, :-1]

df

	user_id	age	gender	occupation	work_mode	screen_time_hours	work_screen_hours	leisure_screen_hours	sleep_hours	sleep_quality_1_5	stress_level_0_10	productivity_0_100	exercise_minutes_per_week	social_hours_per_week	mental_wellness_index_0_100
0	U0001	33	Female	Employed	Remote	10.79	5.44	5.35	6.63	1	9.3	44.7	127	0.7	9.3
1	U0002	28	Female	Employed	In-person	7.40	0.37	7.03	8.05	3	5.7	78.0	74	2.1	56.2
2	U0003	35	Female	Employed	Hybrid	9.78	1.09	8.69	6.48	1	9.1	51.8	67	8.0	3.6
3	U0004	42	Male	Employed	Hybrid	11.13	0.56	10.57	6.89	1	10.0	37.0	0	5.7	0.0
4	U0005	28	Male	Student	Remote	13.22	4.09	9.13	5.79	1	10.0	38.5	143	10.1	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
395	U0396	26	Female	Student	Remote	6.43	2.99	3.44	7.75	1	5.9	64.6	252	7.6	39.3
396	U0397	16	Male	Self-employed	Remote	9.59	5.44	4.15	5.57	1	10.0	47.4	99	7.0	3.5
397	U0398	40	Male	Student	Remote	8.72	2.36	6.36	7.56	1	9.4	57.3	193	10.1	6.6
398	U0399	29	Female	Retired	Hybrid	5.04	0.94	4.10	7.32	1	7.1	63.6	97	12.1	21.0
399	U0400	39	Female	Employed	Remote	8.86	3.82	5.04	6.55	1	10.0	41.4	88	9.8	8.4

400 rows × 15 columns

screen_time_hours은 work_screen_hours와 leisure_screen_hours의 합임을 확인

(df["screen_time_hours"] == df["work_screen_hours"] + df["leisure_screen_hours"]).all()

False

diff[diff>0]

Series([], dtype: float64)

diff = round(df["screen_time_hours"] - (df["work_screen_hours"] + df["leisure_screen_hours"]),2)
df_diff = df[diff != 0]
df_diff

	user_id	age	gender	occupation	work_mode	screen_time_hours	work_screen_hours	leisure_screen_hours	sleep_hours	sleep_quality_1_5	stress_level_0_10	productivity_0_100	exercise_minutes_per_week	social_hours_per_week	mental_wellness_index_0_100

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   user_id                      400 non-null    object 
 1   age                          400 non-null    int64  
 2   gender                       400 non-null    object 
 3   occupation                   400 non-null    object 
 4   work_mode                    400 non-null    object 
 5   screen_time_hours            400 non-null    float64
 6   work_screen_hours            400 non-null    float64
 7   leisure_screen_hours         400 non-null    float64
 8   sleep_hours                  400 non-null    float64
 9   sleep_quality_1_5            400 non-null    int64  
 10  stress_level_0_10            400 non-null    float64
 11  productivity_0_100           400 non-null    float64
 12  exercise_minutes_per_week    400 non-null    int64  
 13  social_hours_per_week        400 non-null    float64
 14  mental_wellness_index_0_100  400 non-null    float64
dtypes: float64(8), int64(3), object(4)
memory usage: 47.0+ KB

문자형 컬럼

char_cols = df.select_dtypes(include=['object', 'string']).columns
for col in char_cols:
    print(f"\n--- {col} (categorical) ---")
    print(df[col].value_counts())


--- user_id (categorical) ---
U0001    1
U0264    1
U0274    1
U0273    1
U0272    1
        ..
U0131    1
U0130    1
U0129    1
U0128    1
U0400    1
Name: user_id, Length: 400, dtype: int64

--- gender (categorical) ---
Female              222
Male                170
Non-binary/Other      8
Name: gender, dtype: int64

--- occupation (categorical) ---
Employed         207
Student          107
Self-employed     45
Unemployed        27
Retired           14
Name: occupation, dtype: int64

--- work_mode (categorical) ---
Remote       150
Hybrid       146
In-person    104
Name: work_mode, dtype: int64

숫자형 컬럼

num_cols = df.select_dtypes(include=['number']).columns
print("\n--- Numeric summary ---")
print(df[num_cols].describe().T)


--- Numeric summary ---
                             count        mean        std    min      25%  \
age                          400.0   29.777500   7.466080  16.00  24.0000   
screen_time_hours            400.0    9.024900   2.491058   1.00   7.3725   
work_screen_hours            400.0    2.183075   1.931321   0.11   0.6950   
leisure_screen_hours         400.0    6.841825   2.220896   0.89   5.4600   
sleep_hours                  400.0    7.013175   0.852421   4.64   6.3975   
sleep_quality_1_5            400.0    1.397500   0.652348   1.00   1.0000   
stress_level_0_10            400.0    8.150500   2.094844   0.00   6.9000   
productivity_0_100           400.0   54.306500  15.020054  20.60  43.6000   
exercise_minutes_per_week    400.0  109.810000  70.007045   0.00  58.0000   
social_hours_per_week        400.0    7.905000   4.909632   0.00   4.5750   
mental_wellness_index_0_100  400.0   20.326750  20.376793   0.00   3.6750   

                                 50%       75%     max  
age                           30.000   35.0000   60.00  
screen_time_hours              9.090   10.4950   19.17  
work_screen_hours              1.455    3.0125   12.04  
leisure_screen_hours           6.700    8.4175   13.35  
sleep_hours                    7.030    7.6400    9.74  
sleep_quality_1_5              1.000    2.0000    4.00  
stress_level_0_10              8.800   10.0000   10.00  
productivity_0_100            51.750   63.0000  100.00  
exercise_minutes_per_week    103.000  157.0000  372.00  
social_hours_per_week          7.750   11.0250   23.90  
mental_wellness_index_0_100   14.800   30.6500   97.00

Visualization

Character col check

fig = px.bar(
    df['gender'].value_counts().reset_index(),
    x='index', y='gender',
    labels={'index':'Gender', 'gender':'Count'},
    title='Gender Distribution',
    height=500, width= 700)
fig.show()

fig = px.bar(
    df['occupation'].value_counts().reset_index(),
    x='index', y='occupation',
    labels={'index':'occupation', 'occupation':'Count'},
    title='Occupation Distribution',
    height=500, width= 700)
fig.show()

fig = px.bar(
    df['work_mode'].value_counts().reset_index(),
    x='index', y='work_mode',
    labels={'index':'work_mode', 'work_mode':'Count'},
    title='work_mode Distribution',
    height=500, width= 700)
fig.show()

Gender and Work mode

Average Screen Time, work screen hours and leisure screen hours by Gender

성별 차이는 없어 보임

df_grouped = df.groupby("gender")[["screen_time_hours", "work_screen_hours","leisure_screen_hours"]].mean().reset_index()

fig = px.bar(
    df_grouped,
    x="gender", 
    y=["screen_time_hours", "work_screen_hours","leisure_screen_hours"],
    barmode="group",
    title="Average Screen Time by Gender",
    labels={"value": "Hours", "gender": "Gender", "variable": "Type"},
    width=700, height=500
)
fig.show()

Average Screen Time, work screen hours and leisure screen hours by work mode

remote로 하는 사람들이 당연히 work_xcreen_hours 가장 높았고 이에따라 screen time hours도 높았음

df_grouped = df.groupby("work_mode")[["screen_time_hours", "work_screen_hours","leisure_screen_hours"]].mean().reset_index()

fig = px.bar(
    df_grouped,
    x="work_mode", 
    y=["screen_time_hours", "work_screen_hours","leisure_screen_hours"],
    barmode="group",
    title="Average Screen Time by work_mode",
    labels={"value": "Hours", "work_mode": "work_mode", "variable": "Type"},
    width=700, height=500
)
fig.show()

sleep quality

다 왜 이렇게 낮아..ㅎㅎㅎ

fig = px.box(
    df,
    x="work_mode",
    y="sleep_quality_1_5",
    color="gender",  
    title="Stress Level by Work Mode and Gender",
    height=500,
    width=700
)
fig.show()

fig = px.histogram(
    df,
    x="sleep_quality_1_5",
    color="gender",  
    barmode="group",
    facet_col="work_mode", 
    title="Distribution of Sleep Quality by Gender and Work Mode",
    height=600,
    width=900
)
fig.show()

stress level

remote에 비해 in-person,hybrid 의 stress level이 낮네?

fig = px.box(
    df,
    x="work_mode",
    y="stress_level_0_10",
    color="gender",  
    title="Stress Level by Work Mode and Gender",
    height=500,
    width=700
)
fig.show()

productivity

in-person, hybrid가 remote에 비해 높은 경향

fig = px.box(
    df,
    x="work_mode",
    y="productivity_0_100",
    color="gender",   
    title="Productivity by Work Mode and Gender",
    height=500,
    width=700
)
fig.show()

excercise minutes per week

remote라고 많거나 하지 않네

fig = px.box(
    df,
    x="work_mode",
    y="exercise_minutes_per_week",
    color="gender",   
    title="Excercise minutes per week by Work Mode and Gender",
    height=500,
    width=700
)
fig.show()

Social hours per week

remote가 오히려 낮은 경향, hybrid가 높은 경향

fig = px.box(
    df,
    x="work_mode",
    y="social_hours_per_week",
    color="gender",   
    title="Social hours per week by Work Mode and Gender",
    height=500,
    width=700
)
fig.show()

Mental Wellness index

remote가 낮은경향, 그렇다고 나머지 높진 않음.

fig = px.box(
    df,
    x="work_mode",
    y="mental_wellness_index_0_100",
    color="gender",   
    title="Mental wellness index by Work Mode and Gender",
    height=500,
    width=700
)
fig.show()

Occupation

student들이 screen 시간이 길지만 leisure hours는 직장인들과 비슷하다

df_long = df.melt(
    id_vars="occupation", 
    value_vars=["screen_time_hours", "work_screen_hours", "leisure_screen_hours"],
    var_name="screen_type",
    value_name="hours"
)

fig = px.box(
    df_long,
    x="occupation",
    y="hours",
    color="screen_type",  # work, leisure, total 구분
    title="Screen Hours by Occupation",
    height=500,
    width=1000
)

fig.show()

Sleep Hours

sleep hours는 비슷

fig = px.box(
    df,
    x="occupation",
    y="sleep_hours",
    title="Sleep Hours by occupation",
    height=500, width= 700)
fig.show()

sleep quality

다 낮다..

fig = px.histogram(
    df,
    x="sleep_quality_1_5",
    color="occupation",  
    barmode="group",
    title="Distribution of Sleep Quality",
    height=500,
    width=800
)
fig.show()

Stress Level

Stress Level은 student가 평균적으로 높다

fig = px.box(
    df,
    x="occupation",
    y="stress_level_0_10",
    title="Stress Level by occupation",
    height=500, width= 700)
fig.show()

Productivity

Productivity는 학생이 제일 낮고 retired가 가장 높다

fig = px.box(
    df,
    x="occupation",
    y="productivity_0_100",
    title="Productivity by occupation",
    height=500, width= 700)
fig.show()

Excercise Minutes

Excercise Minutes는 unemployed가 평균적으로 가장 높고 그다음 retired, 다음은 비슷

fig = px.box(
    df,
    x="occupation",
    y="exercise_minutes_per_week",
    title="Excercise Minutes by occupation",
    height=500, width= 700)
fig.show()

Social Hours per week

다 비슷하고 student가 낮은 경향

fig = px.box(
    df,
    x="occupation",
    y="social_hours_per_week",
    title="Social Hours per week by occupation",
    height=500, width= 700)
fig.show()

Mental Wellness

Mental Wellness는 student가 평균적으로 가장 낮고, employed, self-employed가 그다음 낮고 retired, unemployed가 높은 그나저나 student 분산이 꽤나 높을듯

fig = px.box(
    df,
    x="occupation",
    y="mental_wellness_index_0_100",
    title="Mental Wellness by occupation",
    height=500, width= 700)
fig.show()

Trend Check

전반적 경향: screen time이 늘어날수록 sleep 이 줄어든다.

fig = px.scatter(
    df, 
    x="screen_time_hours", 
    y="sleep_hours", 
    trendline="ols", 
    # color='gender',
    title="Screen Time vs Sleep Hours",
    height=500, width= 700)
fig.show()

stress level 이 높을수록 screen time시간이 길고, sleep hours는 줄어드는 trend

fig = px.scatter(
    df, 
    x="screen_time_hours", 
    y="sleep_hours", 
    trendline="ols", 
    color='stress_level_0_10',
    title="Screen Time vs Sleep Hours",
    height=500, width=700,
    labels={"stress_level_0_10": "Stress Level (0-10)"}
)
fig.show()

productivity가 낮을수록 screen time hour도 갈고 sleep hour는 낮아지는 경향

fig = px.scatter(
    df, 
    x="screen_time_hours", 
    y="sleep_hours", 
    trendline="ols", 
    color='productivity_0_100',
    title="Screen Time vs Sleep Hours",
    height=500, width= 700,
    labels={"productivity_0_100": "Productivity (0-100)"}
)
fig.show()

Mental Wellness

Mental Wellness가 낮을수록 screen time도 길고 sleep hour도 낮아지는 경향

fig = px.scatter(
    df, 
    x="screen_time_hours", 
    y="sleep_hours", 
    trendline="ols", 
    color='mental_wellness_index_0_100',
    title="Screen Time vs Sleep Hours",
    height=500, width= 900,
    labels={"mental_wellness_index_0_100": "Mental Wellness (0-100)"}
)
fig.show()

screen_time_hours, sleep_hours에 따라 graph 확인해본 결과 stress_level_0_10, productivity_0_100, mental_wellness_index_0_100가 영향이 있어보였다.

Correlation

corr = df[['screen_time_hours','work_screen_hours','leisure_screen_hours','sleep_hours',
           'sleep_quality_1_5','stress_level_0_10','productivity_0_100',
           'exercise_minutes_per_week','social_hours_per_week','mental_wellness_index_0_100']].corr()
px.imshow(corr, text_auto=True, title="Correlation Heatmap",
    height=1000, width= 1000)

1부터 5점 척도로 이루어진 sleep_quality_1_5 때문에 spearman correlation으로

spearman_corr, spearman_p = spearmanr(df['sleep_hours'], df['sleep_quality_1_5'])
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3f}")

Spearman correlation: 0.626, p-value: 0.000

잠자는 시간과 수면의 질의 상관계수는 0.626, p-value<0.000으로 연관 있다는 결과,
아래 그림으로 봐도 양의 상관관계가 있는 것으로 보인다.

fig = px.box(
    df, 
    x='sleep_quality_1_5', 
    y='sleep_hours',
    points="all",  
    title="Sleep Hours by Sleep Quality (1-5)",
    height=500, width= 900
)
fig.show()

stress level vs mental wellness

spearman_corr, spearman_p = spearmanr(df['mental_wellness_index_0_100'], df['stress_level_0_10'])
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3f}")

Spearman correlation: -0.871, p-value: 0.000

강한 음의 상관관계, 유의미함

px.scatter(df, x="mental_wellness_index_0_100", y="stress_level_0_10", trendline="ols", height=500, width= 900)

mental wellness vs productivity

spearman_corr, spearman_p = spearmanr(df['mental_wellness_index_0_100'], df['productivity_0_100'])
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3f}")

Spearman correlation: 0.869, p-value: 0.000

강한 양의 상관, 유의미함

px.scatter(df, x="mental_wellness_index_0_100", y="productivity_0_100", trendline="ols", height=500, width= 900)

productivity vs stress level

spearman_corr, spearman_p = spearmanr(df['productivity_0_100'], df['stress_level_0_10'])
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3f}")

Spearman correlation: -0.856, p-value: 0.000

강한 음의 상관, 생산성이 커질수록 스트레스 수준이 감소

px.scatter(df, x="productivity_0_100", y="stress_level_0_10", height=500, width= 900)

screen time vs stress level

spearman_corr, spearman_p = spearmanr(df['screen_time_hours'], df['stress_level_0_10'])
print(f"Spearman correlation: {spearman_corr:.3f}, p-value: {spearman_p:.3f}")

Spearman correlation: 0.719, p-value: 0.000

강한 양의 상관, 유의미함, 스크린 타임이 길수로 스트레스가 많아짐

px.scatter(df, x="screen_time_hours", y="stress_level_0_10", height=500, width= 900)