// NOTE: This is a stata "do" file. To run the analysis, open Stata, replace the file paths, and run script.
version 17
cls
clear all
import delimited "[insert path to the file]\autodeath_minimized_dataset.csv", case(preserve)


drop if total_response_time_min < 1 // Too fast to realistically read anything. Responded "Completely agree" to everything.
count if CC_R_1 == CC_R_2 & CC_R_2 == CC_R_3 & CC_R_3 == CC_R_4 & CC_R_4 == CC_R_5 // Count the number of all identical responses
count if CC_R_1 == CC_R_2 & CC_R_2 == CC_R_3 & CC_R_3 == CC_R_4 & CC_R_4 == CC_R_5 & CC_R_5 == 1 // Count the number of only most negative responses
count if CC_R_1 == CC_R_2 & CC_R_2 == CC_R_3 & CC_R_3 == CC_R_4 & CC_R_4 == CC_R_5 & CC_R_5 == 5 // Count the number of only most positive responses
drop if CC_R_1 == CC_R_2 & CC_R_2 == CC_R_3 & CC_R_3 == CC_R_4 & CC_R_4 == CC_R_5 // Drop if there are no variation in the CC responses. Likely means the respondent was ignoring the actual task or did not understand it. Will probably underestimate the within-person variance in the mixed model fitting as they have no variation.

// Recode strings in choice cards to numbers.
recode CC_C* (1 = -100) (2 = -95) (3 = -90) (4 = -85) (5 = -80) (6 = -75) (7 = -70) (8 = -65) (9 = -60) (10 = -55) (11 = -50) (12 = -45) (13 = -40) (14 = -35) (15 = -30) (16 = -25) (17 = -20) (18 = -15) (19 = -10) (20 = -5) (21 = 0) (22 = 5) (23 = 10) (24 = 15) (25 = 20) (26 = 25) (27 = 30) (28 = 35) (29 = 40) (30 = 45) (31 = 50) (32 = 55) (33 = 60) (34 = 65) (35 = 70) (36 = 75) (37 = 80) (38 = 85) (39 = 90) (40 = 95) (41 = 100)
label values CC_C* . // Remove labels because they are just confusing now

// Descriptive stats
tab male
sum age
tab education

// Pivot long so that each CC is a seperate line pr. ID
reshape long CC_R_, i(RespondentID) j(CC)

// Fit all CC info into 3 values
foreach v in 1 2 3 {
	generate CC_CX_F`v' = .
	replace CC_CX_F`v' = CC_C1_F`v' if CC == 1
	replace CC_CX_F`v' = CC_C2_F`v' if CC == 2
	replace CC_CX_F`v' = CC_C3_F`v' if CC == 3
	replace CC_CX_F`v' = CC_C4_F`v' if CC == 4
	replace CC_CX_F`v' = CC_C5_F`v' if CC == 5
}

drop CC_C1_F1 CC_C2_F1 CC_C3_F1 CC_C4_F1 CC_C5_F1 CC_C1_F2 CC_C2_F2 CC_C3_F2 CC_C4_F2 CC_C5_F2 CC_C1_F3 CC_C2_F3 CC_C3_F3 CC_C4_F3 CC_C5_F3 // Clean som variables no longer in use

label variable CC_CX_F1 "Traffic jam"
label variable CC_CX_F2 "Minor personal injuries"
label variable CC_CX_F3 "Fatalities and serious injuries"
label variable CC_R_ "Acceptance"

// Additional response cleaning. Delete responses that make little sense and likely a result of wanting to state that "I am for/aginst automated trucks".
drop if CC_R < 4 & CC_CX_F1 < 0 & CC_CX_F2 < 0 & CC_CX_F3 < 0 // Delete response if all factors are improving and response is negative.
drop if CC_R > 4 & CC_CX_F1 > 0 & CC_CX_F2 > 0 & CC_CX_F3 > 0 // Delete response if all factors are worse off but response is positive.


// Prepare data for prospect theory
// Generate loss aversion. Multiply traffic jam and death numbers so they line up with the theoretically existing loss aversion data.
foreach v of varlist CC_CX_F* { 
	generate `v'_loss_aver = `v'
	replace `v'_loss_aver = `v' * 1.31 if `v' > 0
}

// Generate power measures of the same values. Although the diagram clearly indicates a LOG "S" curve, its nice to have in the dataset to respect the original theory.
foreach v of varlist CC_CX_F1 CC_CX_F2 CC_CX_F3 { 
	generate `v'_pwr88 = `v'
	replace `v'_pwr88 = abs(`v')^0.88
	replace `v'_pwr88 = (0 - `v'_pwr88) if `v' > 0
}
// Also create for a more aggresive pwr approach
foreach v of varlist CC_CX_F1 CC_CX_F2 CC_CX_F3 { 
	generate `v'_pwr50 = `v'
	replace `v'_pwr50 = abs(`v')^0.30
	replace `v'_pwr50 = (0 - `v'_pwr50) if `v' > 0
}

// Log measure that actually seems to fit the plotted values.
foreach v of varlist CC_CX_F1 CC_CX_F2 CC_CX_F3 { 
	generate `v'_log = `v'
	replace `v'_log = log(abs(`v') + 1)
	replace `v'_log = 0 if `v' == 0
	replace `v'_log = (0 - `v'_log) if `v' > 0
}

// Generate prospect theory value, which is just loss aversion and log combined.
foreach v of varlist CC_CX_F1_log CC_CX_F2_log CC_CX_F3_log { 
	generate `v'_la = `v'
	replace `v'_la = `v' * 1.31 if `v' > 0
}

// Model mean accept over all the factors. Useful as a decriptive figure. Make the actual figure in exel.
generate CC_F1R1000 = CC_CX_F1 + 1000 // Add 1000 because easier to manipulate positive numbers.
generate CC_F2R1000 = CC_CX_F2 + 1000
generate CC_F3R1000 = CC_CX_F3 + 1000
mean CC_R_, over(CC_F1R1000)
mean CC_R_, over(CC_F2R1000)
mean CC_R_, over(CC_F3R1000)

// Finally, adjust 1-7 scale to a -3 to +3 scale to be consistent with figure.
replace CC_R_ = CC_R_ - 4

// Prospect models
mixed CC_R_ || RespondentID: // Empty model for R2 calculations
mixed CC_R_ CC_CX_F1 CC_CX_F2 CC_CX_F3 || RespondentID:
mixed CC_R_ CC_CX_F1_loss_aver CC_CX_F2_loss_aver CC_CX_F3_loss_aver || RespondentID:
mixed CC_R_ CC_CX_F1_log CC_CX_F2_log CC_CX_F3_log || RespondentID:
mixed CC_R_ CC_CX_F1_log_la CC_CX_F2_log_la CC_CX_F3_log_la || RespondentID:

// Manual calculation of level-1 R2 because Stata does not have a funtion for this.
local empty_model_lvl1_res = 3.845098
di (`empty_model_lvl1_res' - 2.369636) / `empty_model_lvl1_res'
di (`empty_model_lvl1_res' - 2.37722) / `empty_model_lvl1_res'
di (`empty_model_lvl1_res' - 2.088877) / `empty_model_lvl1_res'
di (`empty_model_lvl1_res' - 2.092655) / `empty_model_lvl1_res'

// Manual calculation of level-1 R2 IF we do not leave out the non-logical responses indicating that making all things worse is acceptable and improving all things is unacceptable. Generally this just removed 16% explained variance from all models. The conclusions are the same.
local empty_model_lvl1_res = 2.803403
di (`empty_model_lvl1_res' - 2.160072) / `empty_model_lvl1_res'
di (`empty_model_lvl1_res' - 2.162814) / `empty_model_lvl1_res'
di (`empty_model_lvl1_res' - 2.034946) / `empty_model_lvl1_res'
di (`empty_model_lvl1_res' - 2.036818) / `empty_model_lvl1_res'





******* This part is for the calulation of the average acceptance rates *******
version 17
cls
clear all
import delimited "[insert path to the file]\autodeath_minimized_dataset.csv", case(preserve)

drop if total_response_time_min < 1 // Too fast to realistically read anything. Responded "Completely agree" to everything.

// Recode strings in choice cards to numbers.
recode CC_C* (1 = -100) (2 = -95) (3 = -90) (4 = -85) (5 = -80) (6 = -75) (7 = -70) (8 = -65) (9 = -60) (10 = -55) (11 = -50) (12 = -45) (13 = -40) (14 = -35) (15 = -30) (16 = -25) (17 = -20) (18 = -15) (19 = -10) (20 = -5) (21 = 0) (22 = 5) (23 = 10) (24 = 15) (25 = 20) (26 = 25) (27 = 30) (28 = 35) (29 = 40) (30 = 45) (31 = 50) (32 = 55) (33 = 60) (34 = 65) (35 = 70) (36 = 75) (37 = 80) (38 = 85) (39 = 90) (40 = 95) (41 = 100)
label values CC_C* . // Remove labels because they are just confusing now

// Pivot long so that each CC is a seperate line pr. ID
reshape long CC_R_, i(RespondentID) j(CC)

// Fit all CC info into 3 values
foreach v in 1 2 3 {
	generate CC_CX_F`v' = .
	replace CC_CX_F`v' = CC_C1_F`v' if CC == 1
	replace CC_CX_F`v' = CC_C2_F`v' if CC == 2
	replace CC_CX_F`v' = CC_C3_F`v' if CC == 3
	replace CC_CX_F`v' = CC_C4_F`v' if CC == 4
	replace CC_CX_F`v' = CC_C5_F`v' if CC == 5
}

drop CC_C1_F1 CC_C2_F1 CC_C3_F1 CC_C4_F1 CC_C5_F1 CC_C1_F2 CC_C2_F2 CC_C3_F2 CC_C4_F2 CC_C5_F2 CC_C1_F3 CC_C2_F3 CC_C3_F3 CC_C4_F3 CC_C5_F3 // Clean som variables no longer in use

label variable CC_CX_F1 "Traffic jam"
label variable CC_CX_F2 "Minor personal injuries"
label variable CC_CX_F3 "Fatalities and serious injuries"
label variable CC_R_ "Acceptance"

recode male (3 = .)
replace CC_R_ = CC_R_ - 4

sum CC_R_

// Regression model.
mixed CC_R_ || RespondentID: // Empty model for R2 calculations
mixed CC_R_ male age education CC_CX_F1 CC_CX_F2 CC_CX_F3 || RespondentID:

// Manual calculation of level-1 R2 because Stata does not have a funtion for this.
local empty_model_lvl1_res = 2.810213
local empty_model_lvl2_res = 1.06233
di (`empty_model_lvl1_res' - 2.152775) / `empty_model_lvl1_res'
di (`empty_model_lvl2_res' - 1.039529) / `empty_model_lvl2_res'