|
之前Java写的一个Keams算法,想通过写这个例子试试手,总结下来就是对Scala 还是不是很熟悉,还需要慢慢加强。对于Scala中List ,数组,Map等集合还需深入了解
Scala中foreach 用起来还是比较方便的,对于定义数据类型也比较方法相比java简单
for 循环中遇到一个开闭区间的问题,下面代码中 until 是不包括值为D_LEN的,如果是换成 to 是包含 D_LEN值的 ,这点是需要注意的地方
- for (i <- 0 until D_LEN) {
- t(i) = add(t(i), list.point(i))
- }
复制代码 下面是Scala Keams程序
清单1.
- package com.test.zhuoer
- /**
- * *
- * Keams聚类用到的实体类
- *
- * ?这个类的构造函数,如果参数多了怎么办?
- *
- */
- class KEntity(pointArg: Array[Double], attrStrArg: String, idArg: Int) {
- var point = pointArg;
- var attrStr = attrStrArg // 属性字符串
- var id = idArg //ID
- }
复制代码 清单2.- package com.test.zhuoer
- import scala.util.control.Breaks._
- object Keams {
- var K = 3 //簇大小
- var initCluster = Map[Int, Array[Double]]() // 聚类中心
- var datasource = List[KEntity]() //数据源
- var D_LEN = 4; //数据维度
- var k_Cluster = Map[Int, List[KEntity]]() //用来分类的数据Map
- var ctDistance = new Array[Double](K) //每次迭代聚类中心点
- var DISTANCE = 0d //精度控制
- /**
- * *
- * 初始化方法
- */
- def init = {
- var set = Set[Int]() //去重聚类点
- var index_k = 0
- while (initCluster.size != K) {
- var t = scala.util.Random.nextInt(datasource.size)
- if (!set.contains(t)) {
- set += t;
- initCluster += (index_k -> datasource(index_k).point) //初始聚类点
- k_Cluster += (index_k -> List()) //初始聚类数据
- ctDistance(index_k) = -1 //设置聚类点距离
- index_k = index_k + 1
- }
- }
- }
- /**
- * *
- * 初始聚类Map数据
- */
- def initKCluster {
- k_Cluster = Map[Int, List[KEntity]]()
- for (i <- 0 until K) {
- k_Cluster += (i -> List()) //初始聚类数据
- }
- }
- /**
- * *
- * 加法
- */
- def add(d1: Double, d2: Double): Double =
- d1 + d2
- /**
- * *
- * 减法
- */
- def sub(d1: Double, d2: Double): Double =
- d1 - d2
- /**
- * *
- * 乘法
- */
- def mul(d1: Double, d2: Double): Double =
- d1 * d2
- /**
- * *
- * 除法
- */
- def div(d1: Double, d2: Double): Double =
- d1 / d2
- /**
- * *
- * 两点之间欧氏距离
- */
- def distance(d1: Array[Double], d2: Array[Double]): Double = {
- var sum = 0d
- if (d1.length == d2.length && d1.length == D_LEN) {
- for (i <- 0 until D_LEN) {
- sum += Math.pow(sub(d1(i), d2(i)), 2)
- }
- }
- Math.sqrt(sum);
- }
- /**
- * *
- * 重新计算每个类别下面的点与对应聚类中心的距离
- */
- def newCenter() = {
- k_Cluster.foreach(cluster => {
- var t = new Array[Double](D_LEN)
- var list = cluster._2
- list.foreach(list => {
- for (i <- 0 until D_LEN) {
- t(i) = add(t(i), list.point(i))
- }
- })
- var ncc = new Array[Double](D_LEN)
- for (i <- 0 until ncc.length) {
- ncc(i) = div(t(i), cluster._2.size)
- }
- ctDistance(cluster._1) = distance(initCluster(cluster._1), ncc)
- initCluster += (cluster._1 -> ncc)
- })
- }
- /**
- * *
- * 迭代方法重新将数据分类
- */
- def order() {
- initKCluster
- for (i <- 0 until datasource.size) {
- var tempDistance = 99999999999d
- var cluster = 0
- for (j <- 0 until K) {
- var min_cluster = distance(datasource(i).point, initCluster(j));
- if (min_cluster < tempDistance) {
- tempDistance = min_cluster;
- cluster = j;
- }
- }
- k_Cluster += (cluster -> (k_Cluster(cluster) ::: List(datasource(i))))
- }
- }
- /**
- * *
- * 执行聚类
- */
- def exec = {
- breakable {
- var c = 0
- do {
- order // 迭代方法重新将数据分类
- // 重新计算的聚类中心相互之间距离小于精度值,停止迭代
- var t = 0;
- ctDistance.foreach(d => {
- if (DISTANCE == d)
- t = t + 1
- })
- if (t != K)
- newCenter // 重新计算每个类别下面的点与对应聚类中心的距离
- else
- break
- println("--------------迭代次数:" + c)
- c = c + 1
- } while (true)
- }
- }
- def main(args: Array[String]): Unit = {
- println("-------------Start")
- //准备数据
- var d: Array[Array[Double]] = Array(
- Array(6, 12, 8929, 1474),
- Array(7, 12, 9149, 9952),
- Array(4, 12, 3992, 5822),
- Array(3, 12, 1626, 360),
- Array(32, 12, 3563, 39630),
- Array(38, 12, 303451, 34083239),
- Array(66, 12, 133102, 6468),
- Array(14, 12, 38860, 15140),
- Array(128, 72, 271390, 39019349),
- Array(111, 12, 0, 0),
- Array(61, 12, 18626, 664),
- Array(40, 12, 3626, 660),
- Array(63, 42, 2290136, 3419991),
- Array(1, 12, 0, 14000),
- Array(5, 12, 5723, 998),
- Array(6, 12, 9032, 1512),
- Array(102, 72, 20134467, 25894663),
- Array(5, 12, 5723, 5998),
- Array(101, 72, 621319, 15322448),
- Array(6, 12, 9095, 1542),
- Array(6, 12, 9095, 1542),
- Array(6, 12, 9095, 1542),
- Array(6, 12, 9095, 1542),
- Array(6, 12, 9095, 1542),
- Array(4, 12, 3626, 660),
- Array(6, 12, 9095, 1542),
- Array(3, 12, 1626, 360),
- Array(2, 12, 500, 19100),
- Array(100, 12, 15420, 8208707),
- Array(100, 12, 8927659, 38163823),
- Array(11, 12, 7708, 5546),
- Array(6, 12, 8849, 33459849),
- Array(4, 12, 3626, 89160),
- Array(14, 12, 38860, 14140),
- Array(1, 12, 0, 79000),
- Array(30, 12, 225482, 90391),
- Array(31, 12, 230754, 119948),
- Array(3, 12, 1626, 360),
- Array(27, 12, 287635, 148541),
- Array(11, 12, 7890, 13594),
- Array(7, 12, 9095, 26942),
- Array(30, 36, 311375, 73711),
- Array(2, 12, 500, 84100),
- Array(1, 12, 0, 228000),
- Array(6, 12, 8992, 1504),
- Array(3, 12, 1626, 360),
- Array(4, 12, 3626, 8660),
- Array(18, 12, 69041, 17594),
- Array(18, 12, 69358, 16593),
- Array(4, 12, 3706, 694),
- Array(2, 12, 500, 3100),
- Array(35, 12, 2907, 206039),
- Array(8, 12, 7446, 10562),
- Array(2, 12, 500, 3100),
- Array(21, 12, 189051, 20076),
- Array(21, 12, 188065, 19604),
- Array(100, 12, 805762, 19934040),
- Array(15, 12, 40589, 13905),
- Array(8, 12, 7549, 2600),
- Array(6, 12, 9095, 1542),
- Array(15, 12, 39589, 7305),
- Array(18, 12, 66326, 10959),
- Array(7, 12, 9095, 9942),
- Array(22, 12, 184273, 27756),
- Array(8, 12, 7708, 8646),
- Array(23, 12, 223512, 33280),
- Array(4, 12, 3626, 660),
- Array(4, 12, 3626, 660),
- Array(6, 12, 8786, 80428),
- Array(3, 12, 1626, 360),
- Array(21, 12, 5515, 21260),
- Array(1, 12, 0, 3000),
- Array(1, 12, 0, 41000),
- Array(30, 12, 330716, 91039),
- Array(4, 12, 3626, 660),
- Array(7, 12, 9329, 2022),
- Array(1, 12, 0, 3000),
- Array(2, 12, 500, 178700),
- Array(2, 12, 500, 132900),
- Array(2, 12, 500, 18500),
- Array(1, 12, 0, 8200),
- Array(2, 12, 500, 8300),
- Array(2, 12, 500, 45900),
- Array(2, 12, 500, 18900),
- Array(2, 12, 500, 9500),
- Array(2, 12, 500, 96500),
- Array(2, 12, 500, 30700),
- Array(2, 12, 500, 20100),
- Array(8, 12, 7577, 67104),
- Array(58, 36, 1397618, 17879602),
- Array(11, 12, 8909, 11400),
- Array(6, 12, 9181, 1594),
- Array(2, 12, 563, 130),
- Array(32, 12, 19756, 186422),
- Array(6, 12, 8786, 6428),
- Array(5, 12, 5786, 1028),
- Array(6, 12, 8786, 1428),
- Array(6, 12, 8786, 40006428),
- Array(12, 12, 68456, 95240),
- Array(2, 12, 500, 20100),
- Array(11, 12, 7708, 7546),
- Array(4, 12, 3786, 728),
- Array(8, 12, 7577, 2604),
- Array(5, 12, 5849, 1058),
- Array(5, 12, 5786, 1028),
- Array(6, 12, 8889, 1466),
- Array(9, 12, 7708, 4246),
- Array(9, 12, 7708, 4246),
- Array(7, 12, 9095, 1942),
- Array(4, 12, 3626, 660),
- Array(7, 12, 8500, 1700),
- Array(4, 12, 3500, 600),
- Array(2, 12, 563, 130),
- Array(9, 12, 7708, 4246),
- Array(4, 12, 13000, 400),
- Array(1, 12, 0, 3000),
- Array(3, 12, 1500, 300),
- Array(7, 12, 9095, 1942),
- Array(10, 12, 6879, 4774),
- Array(4, 12, 3626, 27660),
- Array(11, 12, 7141, 7308),
- Array(9, 12, 7403, 5092),
- Array(19, 12, 107219, 22580),
- Array(15, 12, 39589, 14305),
- Array(32, 12, 8945, 229632),
- Array(7, 12, 9212, 1982),
- Array(1, 12, 0, 2000),
- Array(2, 12, 500, 5100),
- Array(1, 12, 0, 5000),
- Array(2, 12, 500, 5100),
- Array(23, 12, 134301, 37817),
- Array(3, 12, 1626, 360),
- Array(2, 12, 626, 160),
- Array(32, 12, 174216, 160294),
- Array(1, 12, 0, 342000),
- Array(2, 12, 500, 120100),
- Array(21, 12, 8515, 19560),
- Array(15, 12, 40854, 12462),
- Array(4, 12, 3706, 694),
- Array(60, 12, 0, 0),
- Array(60, 12, 0, 0),
- Array(14, 12, 38860, 9140),
- Array(21, 12, 7689, 32237),
- Array(4, 12, 3626, 660),
- Array(4, 12, 3626, 660),
- Array(6, 12, 8889, 362466),
- Array(3, 12, 1626, 360),
- Array(1, 12, 0, 3000),
- Array(2, 12, 563, 5130),
- Array(2, 12, 500, 62100),
- Array(4, 12, 3626, 660),
- Array(45, 12, 1508096, 8341432),
- Array(1, 12, 0, 114000),
- Array(4, 12, 3626, 12660),
- Array(1, 12, 0, 9000),
- Array(4, 12, 3626, 234660),
- Array(1, 12, 0, 132000),
- Array(60, 12, 3293822, 40645),
- Array(12, 12, 68658, 7290),
- Array(6, 12, 8786, 8428),
- Array(4, 12, 3786, 2728),
- Array(1, 12, 0, 5000),
- Array(1, 12, 0, 400000000),
- Array(1, 12, 0, 2000),
- Array(2, 12, 626, 5160),
- Array(32, 12, 273220, 152515),
- Array(2, 12, 500, 5100),
- Array(1, 12, 0, 8000),
- Array(1, 12, 0, 5000))
- d.foreach { x =>
- {
- var ke = new KEntity(x, x(0) + " " + x(1) + " " + x(2) + " " + x(3), 0)
- datasource = ke :: datasource
- }
- }
- init //初始化数据
- exec //执行聚类 就这么一句是蛮简洁的差点忽略掉了--~
- k_Cluster.foreach(e => {
- println("---------------K" + e._1)
- e._2.foreach { x =>
- {
- print("[")
- print(x.attrStr)
- println("]")
- }
- }
- })
- }
- }
复制代码 清单3.
执行结果 |
|