1.项目背景
在做交通路线分析的时候,客户需要找出车辆的行车规律,我们将车辆每天的行车路线当做一个数据样本,总共有365天或是更多,从这些数据中通过聚类来获得行车路线规律统计分析。
我首先想到是K-means算法,不过它的算法思想是任选K个中心点,然后不停的迭代,在迭代的过程中需要不停的更新中心点。在我们着这个项目中,此方案不能解决,因为我们是通过编辑距离来计算两条路线的相似度。可以参考(1.交通聚类:编辑距离 (Levenshtein距离)Java实现) 这篇文章了解一下编辑距离。当我们第一步选出k个中心点后,并且两两计算编辑距离,然后再重新选择中心点,这时问题出来了,我们得到了编辑距离的均值,是数字化的。如果在根据这个均值两两比较编辑距离就无法实现了。故此方案废弃。
2.层次聚类概述(Hierarchical Clustering)
基于层次的聚类方法(系统聚类方法):对给定的数据集进行层次分解,直到某种条件满足为止。
1.凝聚的层次聚类:自底向上,首先将每个对象作为一个族开始,每一步合并两个最近的簇,直到满足簇的数目。如AGNES算法。每一项自成一类;迭代,将最近的两类合并为一类。
2.分裂的层次聚类: 自顶向下,从包含所有对象的一个簇开始,每一步分裂一个簇,直到簇的数目。如DLANA算法。将所有项看做一类;找出最不相似的项分裂出去成为两类。
相信大家读完上边的陈述已经明白是怎么回事了。下边是代码,供以后用到的朋友学习研究。
3.代码:
package agenes; /** * Created by zzy on 15/11/15. */import java.util.ArrayList;import java.util.List;public class Cluster { private List<DataPoint> dataPoints = new ArrayList<DataPoint>(); // 类簇中的样本点 private String clusterName; public List<DataPoint> getDataPoints() { return dataPoints; } public void setDataPoints(List<DataPoint> dataPoints) { this.dataPoints = dataPoints; } public String getClusterName() { return clusterName; } public void setClusterName(String clusterName) { this.clusterName = clusterName; }}
package agenes; /** * Created by zzy on 15/11/15. */import java.util.ArrayList;import java.util.List;public class ClusterAnalysis { public List<Cluster> startAnalysis(List<DataPoint> dataPoints,int ClusterNum){ List<Cluster> finalClusters=new ArrayList<Cluster>(); List<Cluster> originalClusters=initialCluster(dataPoints); finalClusters=originalClusters; while(finalClusters.size()>ClusterNum){ double min=Double.MAX_VALUE; int mergeIndexA=0; int mergeIndexB=0; for(int i=0;i<finalClusters.size();i++){ for(int j=0;j<finalClusters.size();j++){ if(i!=j){ Cluster clusterA=finalClusters.get(i); Cluster clusterB=finalClusters.get(j); List<DataPoint> dataPointsA=clusterA.getDataPoints(); List<DataPoint> dataPointsB=clusterB.getDataPoints(); for(int m=0;m<dataPointsA.size();m++){ for(int n=0;n<dataPointsB.size();n++){ double tempDis=getDistance(dataPointsA.get(m),dataPointsB.get(n)); if(tempDis<min){ min=tempDis; mergeIndexA=i; mergeIndexB=j; } } } } } //end for j }// end for i //合并cluster[mergeIndexA]和cluster[mergeIndexB] finalClusters=mergeCluster(finalClusters,mergeIndexA,mergeIndexB); }//end while return finalClusters; } private List<Cluster> mergeCluster(List<Cluster> clusters,int mergeIndexA,int mergeIndexB){ if (mergeIndexA != mergeIndexB) { // 将cluster[mergeIndexB]中的DataPoint加入到 cluster[mergeIndexA] Cluster clusterA = clusters.get(mergeIndexA); Cluster clusterB = clusters.get(mergeIndexB); List<DataPoint> dpA = clusterA.getDataPoints(); List<DataPoint> dpB = clusterB.getDataPoints(); for (DataPoint dp : dpB) { DataPoint tempDp = new DataPoint();// tempDp.setDataPointName(dp.getDataPointName());// tempDp.setDimensioin(dp.getDimensioin());// tempDp.setCluster(clusterA); tempDp = dp; tempDp.setCluster(clusterA); dpA.add(tempDp); } clusterA.setDataPoints(dpA); // List<Cluster> clusters中移除cluster[mergeIndexB] clusters.remove(mergeIndexB); } return clusters; } // 初始化类簇 private List<Cluster> initialCluster(List<DataPoint> dataPoints){ List<Cluster> originalClusters=new ArrayList<Cluster>(); for(int i=0;i<dataPoints.size();i++){ DataPoint tempDataPoint=dataPoints.get(i); List<DataPoint> tempDataPoints=new ArrayList<DataPoint>(); tempDataPoints.add(tempDataPoint); Cluster tempCluster=new Cluster(); tempCluster.setClusterName("Cluster "+String.valueOf(i)); tempCluster.setDataPoints(tempDataPoints); tempDataPoint.setCluster(tempCluster); originalClusters.add(tempCluster); } return originalClusters; } //计算两个样本点之间的欧几里得距离 private double getDistance(DataPoint dpA, DataPoint dpB){ double distance=0; double[] dimA = dpA.getDimensioin(); double[] dimB = dpB.getDimensioin(); if (dimA.length == dimB.length) { for (int i = 0; i < dimA.length; i++) { double temp=Math.pow((dimA[i]-dimB[i]),2); distance=distance+temp; } distance=Math.pow(distance, 0.5); } return distance; } public static void main(String[] args){ ArrayList<DataPoint> dpoints = new ArrayList<DataPoint>(); double[] a={2,3}; double[] b={2,4}; double[] c={1,4}; double[] d={1,3}; double[] e={2,2}; double[] f={3,2}; double[] g={8,7}; double[] h={8,6}; double[] i={7,7}; double[] j={7,6}; double[] k={8,5};// double[] l={100,2};//孤立点 double[] m={8,20}; double[] n={8,19}; double[] o={7,18}; double[] p={7,17}; double[] q={8,20}; dpoints.add(new DataPoint(a,"a")); dpoints.add(new DataPoint(b,"b")); dpoints.add(new DataPoint(c,"c")); dpoints.add(new DataPoint(d,"d")); dpoints.add(new DataPoint(e,"e")); dpoints.add(new DataPoint(f,"f")); dpoints.add(new DataPoint(g,"g")); dpoints.add(new DataPoint(h,"h")); dpoints.add(new DataPoint(i,"i")); dpoints.add(new DataPoint(j,"j")); dpoints.add(new DataPoint(k,"k"));// dataPoints.add(new DataPoint(l,"l"));// dpoints.add(new DataPoint(l,"l")); dpoints.add(new DataPoint(m,"m")); dpoints.add(new DataPoint(n,"n")); dpoints.add(new DataPoint(o,"o")); dpoints.add(new DataPoint(p,"p")); dpoints.add(new DataPoint(q,"q")); int clusterNum=3; //类簇数 ClusterAnalysis ca=new ClusterAnalysis(); List<Cluster> clusters=ca.startAnalysis(dpoints, clusterNum); for(Cluster cl:clusters){ System.out.println("------"+cl.getClusterName()+"------"); List<DataPoint> tempDps=cl.getDataPoints(); for(DataPoint tempdp:tempDps){ System.out.println(tempdp.getDataPointName()); } } }}
package agenes;/** * Created by zzy on 15/11/15. */public class DataPoint { String dataPointName; // 样本点名 Cluster cluster; // 样本点所属类簇 private double dimensioin[]; // 样本点的维度 public DataPoint() { } public DataPoint(double[] dimensioin, String dataPointName) { this.dataPointName = dataPointName; this.dimensioin = dimensioin; } public double[] getDimensioin() { return dimensioin; } public void setDimensioin(double[] dimensioin) { this.dimensioin = dimensioin; } public Cluster getCluster() { return cluster; } public void setCluster(Cluster cluster) { this.cluster = cluster; } public String getDataPointName() { return dataPointName; } public void setDataPointName(String dataPointName) { this.dataPointName = dataPointName; }}
源码github:https://github.com/chaoren399/dkdemo/tree/master/agenes/src/agenes
还没有人抢沙发呢~