Consistent Hashing

yesjavame

浏览: 659519 次
性别:
来自: 杭州

最近访客更多访客>>

zjut_wyj

zjzouxu

jcyanfan

chenqi210

博主相关

博客

微博

相册

留言

关于我

文章分类

全部博客 (1234)

社区版块

存档分类

Security 算法 F#

Implementation

For completeness here is a simple implementation in Java. In order for consistent hashing to be effective it is important to have a hash function thatmixes well. Most implementations ofObject 'shashCode donot mix well - for example, they typically produce a restricted number of small integer values - so we have aHashFunction interface to allow a custom hash function to be used. MD5 hashes are recommended here.

import java.util.Collection;
import java.util.SortedMap;
import java.util.TreeMap;

public class ConsistentHash<T> {

 private final HashFunction hashFunction;
 private final int numberOfReplicas;
 private final SortedMap<Integer, T> circle = new TreeMap<Integer, T>();

 public ConsistentHash(HashFunction hashFunction, int numberOfReplicas,
     Collection<T> nodes) {
   this.hashFunction = hashFunction;
   this.numberOfReplicas = numberOfReplicas;

   for (T node : nodes) {
     add(node);
   }
 }

 public void add(T node) {
   for (int i = 0; i < numberOfReplicas; i++) {
     circle.put(hashFunction.hash(node.toString() + i), node);
   }
 }

 public void remove(T node) {
   for (int i = 0; i < numberOfReplicas; i++) {
     circle.remove(hashFunction.hash(node.toString() + i));
   }
 }

 public T get(Object key) {
   if (circle.isEmpty()) {
     return null;
   }
   int hash = hashFunction.hash(key);
   if (!circle.containsKey(hash)) {
     SortedMap<Integer, T> tailMap = circle.tailMap(hash);
     hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey();
   }

   return circle.get(hash);//这一行可以有很大优化，毕竟在万个以内的整数中查找一个最接近的大于等于hash的算法是非常简单的，而不必用treemap的实现。

 }
}
numberOfReplicas的经验值在100-200之间，这就是一个物理 节点对应多少个虚拟节点，如果我们把环形拉直，其实就是每个
节点在数组中的位置，物理节点很少，比如10个物理节点，如果平均分布在Integer.MIN-Integer.MAX中，那么每个节点间的区间
大约有2^29这么大，假如某一时间段的一些key的hash正好在这一范围，那么它们就被聚集到某一台物理节点上。在采用了虚拟节点
后，每个物理节点对应的虚拟节点和其它物理节点对应的虚拟节点是平均交叉分布的，极大地减少了节点区间带来的分布聚集。
以下是一个简单实现的测试：

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

/**
 * 类NodeManager.java的实现描述：
 * 
 * @author axman 2011-3-30 下午09:24:38
 */
public class NodeManager {
    private static final char[] hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
    public static class Node {
        // node's fields.
        private String ipAddr;
        private int    port;
        private String nodeName;
        public Node(String ipAddr, int port, String nodeName){
            this.ipAddr = ipAddr;
            this.port = port;
            this.nodeName = nodeName;
        }
        public String toString() {
            return nodeName + ":<" + ipAddr + ":" + port + ">";
        }
    }
    private final SortedMap<Integer, Node> nodeMap = new TreeMap<Integer, Node>();
    private int[]                          hashVal;
    private int                            virtualNums;
    public NodeManager(Node[] nodes, int virtualNums){
        this.virtualNums = virtualNums;
        for (Node node : nodes) {
            for (int i = 0; i < this.virtualNums; i++) {
                nodeMap.put(hash(node.toString() + i), node);
            }
        }
        Integer[] tmp = nodeMap.keySet().toArray(new Integer[0]);
        hashVal = copy(tmp);
        // 初始化，add,remove方法n天，n月，n年才会调用一次，所以用一点点开销转化为int[]为下面每秒
        // 千万次调用提升性能。
    }
    public void add(Node node) {
        for (int i = 0; i < this.virtualNums; i++) {
            nodeMap.put(hash(node.toString() + i), node);
        }
        Integer[] tmp = nodeMap.keySet().toArray(new Integer[0]);
        hashVal = copy(tmp);
    }
    public void remove(Node node) {
        for (int i = 0; i < this.virtualNums; i++) {
            nodeMap.remove((hash(node.toString() + i)));
        }
        Integer[] tmp = nodeMap.keySet().toArray(new Integer[0]);
        hashVal = copy(tmp);
    }
    /**
     * 环形中查找下一节点就是在有序整数数组中查找一个大于等于当前值的元素，完全的二分查找。 仅是返回值的判断条件不同。
     * 
     * @param key
     * @return
     */
    public Node getNode(String key) {
        int hash = hash(key);
        int low = 0, high = hashVal.length - 1;
        while (low <= high) {
            int mid = (low + high) / 2;
            int midVal = hashVal[mid];
            if (midVal >= hash) {
                if (mid == 0 || hashVal[mid - 1] <= hash) {
                    return nodeMap.get(midVal);
                }
                high = mid - 1;
            } else {
                low = mid + 1;
            }
        }
        return nodeMap.get(hashVal[0]);
    }
    private int[] copy(Integer[] src) {
        int[] tmp = new int[src.length];
        for (int i = 0; i < tmp.length; i++)
            tmp[i] = src[i];
        return tmp;
    }
    private int hash(String key) {
        try {
            java.security.MessageDigest md5 = java.security.MessageDigest.getInstance("MD5");
            md5.update(key.getBytes());
            byte[] data = md5.digest();
            char[] charArr = new char[32];
            for (int i = 0; i < data.length; i++) {
                charArr[i * 2] = hex[data[i] >>> 4 & 0xF];
                charArr[i * 2 + 1] = hex[data[i] & 0xF];
            }
            return new String(charArr).hashCode();
        } catch (Exception e) {
            return Integer.MIN_VALUE;
        }
    }
    public static void main(String[] args) {
        Node[] nodes = new Node[10];
        Map<Node, List<String>> map = new HashMap<Node, List<String>>();
        Map<Node, List<String>> map1 = new HashMap<Node, List<String>>();
  
        //make node
        for (int i = 0; i < nodes.length; i++) {
            nodes[i] = new Node("10.1.33.2" + i, 80 + i, "mynode" + i);
        }
        NodeManager nm = new NodeManager(nodes, 150);
        
        //make kesy
        String[] keys = new String[10000];
        for (int i = 0; i < keys.length; i++) {
            keys[i] = "key" + (i * 17) + "ss"+i*19;
        }
        
        //make result 1
        for (String key : keys) {
            Node n = nm.getNode(key);
            List<String> l = map.get(n);
            if (l == null) {
                l = new ArrayList<String>();
                map.put(n, l);
            }
            l.add(key);
        }
        
        //make result2 ,add node
   
        nm.add(new Node("10.1.3.30", 90, "mynode10"));
        
        for (String key : keys) {
            Node n = nm.getNode(key);
            List<String> l = map1.get(n);
            if (l == null) {
                l = new ArrayList<String>();
                map1.put(n, l);
            }
            l.add(key);
        }
        
        //print difference
        for (Iterator<Node> i = map.keySet().iterator(); i.hasNext();) {
            Node key = i.next();
            List<String> l = map.get(key);
            List<String> l1 = map1.get(key);
            System.out.println(key+":"+l.size()+","+l1.size()+","+((l.size()-l1.size())*1d/l.size()));
            //这个差值的平均值应该是n/(m+n)
        }
    }
}

分享到：

Ext JS 4 Beta 1发布了 | 速度真快，ExtJS第5个预览版发布

2011-03-30 10:49
浏览 407
评论(0)
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Consistent Hashing

Implementation

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Consistent Hashing

Implementation

评论

发表评论

相关推荐

最近访客更多访客>>