时间: 2020-09-3|tag: 26次围观|0 条评论

1.项目背景

在做交通路线分析的时候,客户需要找出车辆的行车规律,我们将车辆每天的行车路线当做一个数据样本,总共有365天或是更多,从这些数据中通过聚类来获得行车路线规律统计分析。

我首先想到是K-means算法,不过它的算法思想是任选K个中心点,然后不停的迭代,在迭代的过程中需要不停的更新中心点。在我们着这个项目中,此方案不能解决,因为我们是通过编辑距离来计算两条路线的相似度。可以参考(1.交通聚类:编辑距离 (Levenshtein距离)Java实现) 这篇文章了解一下编辑距离。当我们第一步选出k个中心点后,并且两两计算编辑距离,然后再重新选择中心点,这时问题出来了,我们得到了编辑距离的均值,是数字化的。如果在根据这个均值两两比较编辑距离就无法实现了。故此方案废弃。

 

2.层次聚类概述(Hierarchical Clustering)

基于层次的聚类方法(系统聚类方法):对给定的数据集进行层次分解,直到某种条件满足为止。

1.凝聚的层次聚类:自底向上,首先将每个对象作为一个族开始,每一步合并两个最近的簇,直到满足簇的数目。如AGNES算法。每一项自成一类;迭代,将最近的两类合并为一类。

2.分裂的层次聚类: 自顶向下,从包含所有对象的一个簇开始,每一步分裂一个簇,直到簇的数目。如DLANA算法。将所有项看做一类;找出最不相似的项分裂出去成为两类。

 

相信大家读完上边的陈述已经明白是怎么回事了。下边是代码,供以后用到的朋友学习研究。

3.代码:

package agenes; /** * Created by zzy on 15/11/15. */import java.util.ArrayList;import java.util.List;public class Cluster {    private List<DataPoint> dataPoints = new ArrayList<DataPoint>(); // 类簇中的样本点    private String clusterName;    public List<DataPoint> getDataPoints() {        return dataPoints;    }    public void setDataPoints(List<DataPoint> dataPoints) {        this.dataPoints = dataPoints;    }    public String getClusterName() {        return clusterName;    }    public void setClusterName(String clusterName) {        this.clusterName = clusterName;    }}
 
package agenes; /** * Created by zzy on 15/11/15. */import java.util.ArrayList;import java.util.List;public class ClusterAnalysis {    public List<Cluster> startAnalysis(List<DataPoint> dataPoints,int ClusterNum){        List<Cluster> finalClusters=new ArrayList<Cluster>();        List<Cluster> originalClusters=initialCluster(dataPoints);        finalClusters=originalClusters;        while(finalClusters.size()>ClusterNum){            double min=Double.MAX_VALUE;            int mergeIndexA=0;            int mergeIndexB=0;            for(int i=0;i<finalClusters.size();i++){                for(int j=0;j<finalClusters.size();j++){                    if(i!=j){                        Cluster clusterA=finalClusters.get(i);                        Cluster clusterB=finalClusters.get(j);                        List<DataPoint> dataPointsA=clusterA.getDataPoints();                        List<DataPoint> dataPointsB=clusterB.getDataPoints();                        for(int m=0;m<dataPointsA.size();m++){                            for(int n=0;n<dataPointsB.size();n++){                                double tempDis=getDistance(dataPointsA.get(m),dataPointsB.get(n));                                if(tempDis<min){                                    min=tempDis;                                    mergeIndexA=i;                                    mergeIndexB=j;                                }                            }                        }                    }                } //end for j            }// end for i            //合并cluster[mergeIndexA]和cluster[mergeIndexB]            finalClusters=mergeCluster(finalClusters,mergeIndexA,mergeIndexB);        }//end while        return finalClusters;    }    private List<Cluster> mergeCluster(List<Cluster> clusters,int mergeIndexA,int mergeIndexB){        if (mergeIndexA != mergeIndexB) {            // 将cluster[mergeIndexB]中的DataPoint加入到 cluster[mergeIndexA]            Cluster clusterA = clusters.get(mergeIndexA);            Cluster clusterB = clusters.get(mergeIndexB);            List<DataPoint> dpA = clusterA.getDataPoints();            List<DataPoint> dpB = clusterB.getDataPoints();            for (DataPoint dp : dpB) {                DataPoint tempDp = new DataPoint();//                tempDp.setDataPointName(dp.getDataPointName());//                tempDp.setDimensioin(dp.getDimensioin());//                tempDp.setCluster(clusterA);                tempDp = dp;                tempDp.setCluster(clusterA);                dpA.add(tempDp);            }            clusterA.setDataPoints(dpA);            // List<Cluster> clusters中移除cluster[mergeIndexB]            clusters.remove(mergeIndexB);        }        return clusters;    }    // 初始化类簇    private List<Cluster> initialCluster(List<DataPoint> dataPoints){        List<Cluster> originalClusters=new ArrayList<Cluster>();        for(int i=0;i<dataPoints.size();i++){            DataPoint tempDataPoint=dataPoints.get(i);            List<DataPoint> tempDataPoints=new ArrayList<DataPoint>();            tempDataPoints.add(tempDataPoint);            Cluster tempCluster=new Cluster();            tempCluster.setClusterName("Cluster "+String.valueOf(i));            tempCluster.setDataPoints(tempDataPoints);            tempDataPoint.setCluster(tempCluster);            originalClusters.add(tempCluster);        }        return originalClusters;    }    //计算两个样本点之间的欧几里得距离    private double getDistance(DataPoint dpA, DataPoint dpB){        double distance=0;        double[] dimA = dpA.getDimensioin();        double[] dimB = dpB.getDimensioin();        if (dimA.length == dimB.length) {            for (int i = 0; i < dimA.length; i++) {                double temp=Math.pow((dimA[i]-dimB[i]),2);                distance=distance+temp;            }            distance=Math.pow(distance, 0.5);        }        return distance;    }    public static void main(String[] args){        ArrayList<DataPoint> dpoints = new ArrayList<DataPoint>();        double[] a={2,3};        double[] b={2,4};        double[] c={1,4};        double[] d={1,3};        double[] e={2,2};        double[] f={3,2};        double[] g={8,7};        double[] h={8,6};        double[] i={7,7};        double[] j={7,6};        double[] k={8,5};//       double[] l={100,2};//孤立点        double[] m={8,20};        double[] n={8,19};        double[] o={7,18};        double[] p={7,17};        double[] q={8,20};        dpoints.add(new DataPoint(a,"a"));        dpoints.add(new DataPoint(b,"b"));        dpoints.add(new DataPoint(c,"c"));        dpoints.add(new DataPoint(d,"d"));        dpoints.add(new DataPoint(e,"e"));        dpoints.add(new DataPoint(f,"f"));        dpoints.add(new DataPoint(g,"g"));        dpoints.add(new DataPoint(h,"h"));        dpoints.add(new DataPoint(i,"i"));        dpoints.add(new DataPoint(j,"j"));        dpoints.add(new DataPoint(k,"k"));//        dataPoints.add(new DataPoint(l,"l"));//        dpoints.add(new DataPoint(l,"l"));        dpoints.add(new DataPoint(m,"m"));        dpoints.add(new DataPoint(n,"n"));        dpoints.add(new DataPoint(o,"o"));        dpoints.add(new DataPoint(p,"p"));        dpoints.add(new DataPoint(q,"q"));        int clusterNum=3; //类簇数        ClusterAnalysis ca=new ClusterAnalysis();        List<Cluster> clusters=ca.startAnalysis(dpoints, clusterNum);        for(Cluster cl:clusters){            System.out.println("------"+cl.getClusterName()+"------");            List<DataPoint> tempDps=cl.getDataPoints();            for(DataPoint tempdp:tempDps){                System.out.println(tempdp.getDataPointName());            }        }    }}

 

package agenes;/** * Created by zzy on 15/11/15. */public class DataPoint {    String dataPointName; // 样本点名    Cluster cluster; // 样本点所属类簇    private double dimensioin[]; // 样本点的维度    public DataPoint() {    }    public DataPoint(double[] dimensioin, String dataPointName) {        this.dataPointName = dataPointName;        this.dimensioin = dimensioin;    }    public double[] getDimensioin() {        return dimensioin;    }    public void setDimensioin(double[] dimensioin) {        this.dimensioin = dimensioin;    }    public Cluster getCluster() {        return cluster;    }    public void setCluster(Cluster cluster) {        this.cluster = cluster;    }    public String getDataPointName() {        return dataPointName;    }    public void setDataPointName(String dataPointName) {        this.dataPointName = dataPointName;    }}
源码github:https://github.com/chaoren399/dkdemo/tree/master/agenes/src/agenes

文章转载于:https://www.cnblogs.com/chaoren399/p/5005731.html

原著是一个有趣的人,若有侵权,请通知删除

本博客所有文章如无特别注明均为原创。
复制或转载请以超链接形式注明转自起风了,原文地址《2.交通聚类 -层次聚类(agnes)Java实现
   

还没有人抢沙发呢~