-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_total_OPTICS.R
52 lines (30 loc) · 1.1 KB
/
cluster_total_OPTICS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#Load File, Load Packages
bank<-read.csv("bank-additional-full.csv",header=TRUE,sep=";")
library(rminer)
library(CrossClustering)
library(dplyr)
library(cluster)
library(dbscan)
library(diffusionMap)
set.seed(1)
#Create artificial time-axis beforehand though. The website noted that the values were chronologi-
#cally sorted. Therefore a simple itemnumber identifies a chronology.
time_axis <- as.numeric(rownames(bank))
bank_time <- cbind(bank, time_axis)
bank_time_without_y <- subset(bank_time[,-21])
#----------------------Clustering----------------------------#
# Setting up clustering training set
d <- daisy(bank_time_without_y, metric = "gower")
epsilon <- epsilonCompute(d, p = 0.01)
preclusters <- optics(d, epsilon)
clusters <- extractDBSCAN(preclusters, epsilon)
data <- bank_time
data$cluster <- unlist(clusters$cluster)
nrclust <- length(unique(clusters$cluster))
# printing clustering information training set
cat("amount of clusters training set:", nrclust, "\n")
# memory clean
gc()
# Write file
write.table(data, "~/thesis/data/total_with_clust_OPTICS.txt", sep=";")
gc()