官术网_书友最值得收藏!

Insurance, R - non-life insurance pricing

We have an example of using R to come up with the pricing for non-life products, specifically mopeds, at http://www.cybaea.net/journal/2012/03/13/R-code-for-Chapter-2-of-Non_Life-Insurance-Pricing-with-GLM/. The code first creates a table of the statistics available for the product line, then compares the pricing to actual statistics in use.

The first part of the code that accumulates the data is as follows:

con <- url("http://www2.math.su.se/~esbj/GLMbook/moppe.sas")
data <- readLines(con, n = 200L, warn = FALSE, encoding = "unknown")
close(con)
## Find the data range
data.start <- grep("^cards;", data) + 1L
data.end <- grep("^;", data[data.start:999L]) + data.start - 2L
table.1.2 <- read.table(text = data[data.start:data.end],
header = FALSE,
sep = "",
quote = "",
col.names = c("premiekl", "moptva", "zon", "dur",
"medskad", "antskad", "riskpre", "helpre", "cell"),
na.strings = NULL,
colClasses = c(rep("factor", 3), "numeric",
rep("integer", 4), "NULL"),
comment.char = "")
rm(con, data, data.start, data.end)
# Remainder of Script adds comments/descriptions
comment(table.1.2) <-
c("Title: Partial casco moped insurance from Wasa insurance, 1994--1999",
"Source: http://www2.math.su.se/~esbj/GLMbook/moppe.sas",
"Copyright: http://www2.math.su.se/~esbj/GLMbook/")
## See the SAS code for this derived field
table.1.2$skadfre = with(table.1.2, antskad / dur)
## English language column names as comments:
comment(table.1.2$premiekl) <-
c("Name: Class",
"Code: 1=Weight over 60kg and more than 2 gears",
"Code: 2=Other")
comment(table.1.2$moptva) <-
c("Name: Age",
"Code: 1=At most 1 year",
"Code: 2=2 years or more")
comment(table.1.2$zon) <-
c("Name: Zone",
"Code: 1=Central and semi-central parts of Sweden's three largest cities",
"Code: 2=suburbs and middle-sized towns",
"Code: 3=Lesser towns, except those in 5 or 7",
"Code: 4=Small towns and countryside, except 5--7",
"Code: 5=Northern towns",
"Code: 6=Northern countryside",
"Code: 7=Gotland (Sweden's largest island)")
comment(table.1.2$dur) <-
c("Name: Duration",
"Unit: year")
comment(table.1.2$medskad) <-
c("Name: Claim severity",
"Unit: SEK")
comment(table.1.2$antskad) <- "Name: No. claims"
comment(table.1.2$riskpre) <-
c("Name: Pure premium",
"Unit: SEK")
comment(table.1.2$helpre) <-
c("Name: Actual premium",
"Note: The premium for one year according to the tariff in force 1999",
"Unit: SEK")
comment(table.1.2$skadfre) <-
c("Name: Claim frequency",
"Unit: /year")
## Save results for later
save(table.1.2, file = "table.1.2.RData")
## Print the table (not as pretty as the book)
print(table.1.2)

The resultant first 10 rows of the table are as follows:

       premiekl moptva zon    dur medskad antskad riskpre helpre    skadfre
    1         1      1   1   62.9   18256      17    4936   2049 0.27027027
    2         1      1   2  112.9   13632       7     845   1230 0.06200177
    3         1      1   3  133.1   20877       9    1411    762 0.06761833
    4         1      1   4  376.6   13045       7     242    396 0.01858736
    5         1      1   5    9.4       0       0       0    990 0.00000000
    6         1      1   6   70.8   15000       1     212    594 0.01412429
    7         1      1   7    4.4    8018       1    1829    396 0.22727273
    8         1      2   1  352.1    8232      52    1216   1229 0.14768532
    9         1      2   2  840.1    7418      69     609    738 0.08213308
    10        1      2   3 1378.3    7318      75     398    457 0.05441486

Then, we go through each product/statistics to determine whether the pricing for a product is in line with others. Note, the repos = clause on the install.packages statement is a fairly new addition to R:

# make sure the packages we want to use are installed
install.packages(c("data.table", "foreach", "ggplot2"), dependencies = TRUE, repos = "http://cran.us.r-project.org")
# load the data table we need
if (!exists("table.1.2"))
load("table.1.2.RData")
library("foreach")
## We are looking to reproduce table 2.7 which we start building here,
## add columns for our results.
table27 <-
data.frame(rating.factor =
c(rep("Vehicle class", nlevels(table.1.2$premiekl)),
rep("Vehicle age", nlevels(table.1.2$moptva)),
rep("Zone", nlevels(table.1.2$zon))),
class =
c(levels(table.1.2$premiekl),
levels(table.1.2$moptva),
levels(table.1.2$zon)),
stringsAsFactors = FALSE)
## Calculate duration per rating factor level and also set the
## contrasts (using the same idiom as in the code for the previous
## chapter). We use foreach here to execute the loop both for its
## side-effect (setting the contrasts) and to accumulate the sums.
# new.cols are set to claims, sums, levels
new.cols <-
foreach (rating.factor = c("premiekl", "moptva", "zon"),
.combine = rbind) %do%
{
nclaims <- tapply(table.1.2$antskad, table.1.2[[rating.factor]], sum)
sums <- tapply(table.1.2$dur, table.1.2[[rating.factor]], sum)
n.levels <- nlevels(table.1.2[[rating.factor]])
contrasts(table.1.2[[rating.factor]]) <-
contr.treatment(n.levels)[rank(-sums, ties.method = "first"), ]
data.frame(duration = sums, n.claims = nclaims)
}
table27 <- cbind(table27, new.cols)
rm(new.cols)
#build frequency distribution
model.frequency <-
glm(antskad ~ premiekl + moptva + zon + offset(log(dur)),
data = table.1.2, family = poisson)
rels <- coef( model.frequency )
rels <- exp( rels[1] + rels[-1] ) / exp( rels[1] )
table27$rels.frequency <-
c(c(1, rels[1])[rank(-table27$duration[1:2], ties.method = "first")],
c(1, rels[2])[rank(-table27$duration[3:4], ties.method = "first")],
c(1, rels[3:8])[rank(-table27$duration[5:11], ties.method = "first")])
# note the severities involved
model.severity <-
glm(medskad ~ premiekl + moptva + zon,
data = table.1.2[table.1.2$medskad > 0, ],
family = Gamma("log"), weights = antskad)
rels <- coef( model.severity )
rels <- exp( rels[1] + rels[-1] ) / exp( rels[1] )
## Aside: For the canonical link function use
## rels <- rels[1] / (rels[1] + rels[-1])
table27$rels.severity <-
c(c(1, rels[1])[rank(-table27$duration[1:2], ties.method = "first")],
c(1, rels[2])[rank(-table27$duration[3:4], ties.method = "first")],
c(1, rels[3:8])[rank(-table27$duration[5:11], ties.method = "first")])
table27$rels.pure.premium <- with(table27, rels.frequency * rels.severity)
print(table27, digits = 2)

The resultant display is as follows:

       rating.factor class duration n.claims rels.frequency rels.severity
    1  Vehicle class     1     9833      391           1.00          1.00
    2  Vehicle class     2     8825      395           0.78          0.55
    11   Vehicle age     1     1918      141           1.55          1.79
    21   Vehicle age     2    16740      645           1.00          1.00
    12          Zone     1     1451      206           7.10          1.21
    22          Zone     2     2486      209           4.17          1.07
    3           Zone     3     2889      132           2.23          1.07
    4           Zone     4    10069      207           1.00          1.00
    5           Zone     5      246        6           1.20          1.21
    6           Zone     6     1369       23           0.79          0.98
    7           Zone     7      148        3           1.00          1.20
       rels.pure.premium
    1               1.00
    2               0.42
    11              2.78
    21              1.00
    12              8.62
    22              4.48
    3               2.38
    4               1.00
    5               1.46
    6               0.78
    7               1.20

Here, we can see that some vehicle classes (2,6) are priced very low in comparison to statistics for that vehicle where as other are overpriced (1222).

主站蜘蛛池模板: 晋江市| 定边县| 乌拉特后旗| 渑池县| 潞城市| 荃湾区| 汝城县| 扬中市| 河津市| 文昌市| 青州市| 牟定县| 永定县| 朝阳县| 侯马市| 油尖旺区| 茂名市| 广宗县| 台江县| 临沂市| 凤庆县| 定安县| 南涧| 芷江| 扶沟县| 安泽县| 天等县| 苍溪县| 久治县| 衡水市| 田东县| 余江县| 新津县| 甘德县| 双峰县| 株洲市| 韩城市| 龙门县| 漳平市| 东港市| 玉屏|