使用bitset实现毫秒级查询(二)

JAVA学习网 2017-10-25 08:41:02

上一篇中我们了解了bitset索引的基本用法,本篇开始学习bitset索引更新及一些复杂查询。

1.bitset索引更新

  因为我们的数据是在系统启动时全部加载进内存,所以当数据库数据发生变化时要实时通知到内存,可以使用消息队列的方式实现:将新增或者修改的数据写入kafka,然后在索引服务中从kafka中读出数据更新索引.
在UserIndexStore类中增加更新索引方法:

/**
     * 更新索引
     * @param user
     */
    public void updateIndex(User user) {
        String name = user.getName();
        Integer userIndex = this.nameIndexMap.get(name);
        if (userIndex != null) {
            clear(userIndex);//清除bitset对应位置的值
            update(user, userIndex);
            this.userMap.put(userIndex, user);
            this.nameIndexMap.put(user.getName(), userIndex);
        } else {
            //新增时会有并发问题
            synchronized (this) {
                int index = this.userMap.size() + 1;
                createIndex(user, index);
            }
        }

    }

    private void update(User user, Integer userIndex) {
        getAddress().update(user.getAddress(), userIndex);
        getAge().update(user.getAge(), userIndex);
        getGender().update(user.getGender(), userIndex);

    }

    private void clear(Integer index) {
        getAddress().clear(index);
        getAge().clear(index);
        getGender().clear(index);
    }

在BitSetIndexModel类中增加clear()方法

    /**
     * 对第i位置0
     * @param i
     */
    public void clear(Integer i) {
        for (BitSet bs : bsList) {
            if (bs != null && i < bs.length()) {
                bs.clear(i);
            }
        }
    }
2.bitset进阶查询

'>=','<=',between and
在BitSetIndexModel类中增加如下方法:

public List<String> getHigherList(String str) {
        List<String> newlist = new ArrayList<String>();
        newlist.add(str);
        newlist.addAll(list);
        // 排序
        Collections.sort(newlist);
        // 查找str在list中的位置
        int fromIndex = Collections.binarySearch(newlist, str);
        if (fromIndex >= 0) {
            // 如果map中不包含当前值则 index后移一位
            if (map.get(str) == null) {
                fromIndex++;
            }
            return newlist.subList(fromIndex, newlist.size());
        } else {
            return new ArrayList<String>();
        }
    }

    public List<String> getLowerList(String str) {
        List<String> newlist = new ArrayList<String>();
        newlist.add(str);
        newlist.addAll(list);
        // 排序
        Collections.sort(newlist);
        // 查找str在list中的位置
        int endIndex = Collections.binarySearch(newlist, str);
        if (endIndex >= 0) {
            return newlist.subList(0, endIndex + 1);
        } else {
            return new ArrayList<String>();
        }
    }

    @SuppressWarnings("unchecked")
    public <T extends Comparable<? super T>> List<T> getRange(T min, T max, Comparator<? super T> c) {
        List<T> newlist = new ArrayList<T>();
        for (String s : list) {
            newlist.add((T) (s));
        }
        Collections.sort(newlist);
        // 查找str在list中的位置
        int fromIndex = minBinarySearch(newlist, min, c);
        int endIndex = maxBinarySearch(newlist, max, c);
        if (fromIndex >= 0 && endIndex <= list.size() - 1) {
            if (fromIndex == endIndex) {
                return newlist.subList(fromIndex, endIndex + 1);
            }
            return newlist.subList(fromIndex, ++endIndex);
        } else {
            return new ArrayList<T>();
        }
    }

    /**
     *
     * @param list
     * @param key
     * @return
     */
    private static <T> int maxBinarySearch(List<T> list, T key, Comparator<? super T> c) {
        int low = 0;
        int high = list.size() - 1;
        int mid = 0;
        while (low <= high) {
            mid = (low + high) >>> 1;
            T midVal = list.get(mid);
            int cmp = c.compare(midVal, key);
            if (cmp < 0) {
                low = mid + 1;
            } else if (cmp > 0) {
                high = mid - 1;
            } else {
                return mid; // key found
            }
        }
        if (mid == low) {
            return high;
        } else {
            return mid;
        }
    }

    private static <T> int minBinarySearch(List<T> list, T key, Comparator<? super T> c) {
        int low = 0;
        int high = list.size() - 1;
        int mid = 0;
        while (low <= high) {
            mid = (low + high) >>> 1;
            T midVal = list.get(mid);
            int cmp = c.compare(midVal, key);
            if (cmp < 0) {
                low = mid + 1;
            } else if (cmp > 0) {
                high = mid - 1;
            } else {
                return mid; // key found
            }
        }
        if (high == mid) {
            return low;
        } else {
            return mid;
        }
    }

在UserIndexStore中增加以下方法

/**
     * 查询年龄大于等于指定值的user索引
     * @param age
     * @return
     */
    public BitSet findUserByAgeHigher(String age) {
        BitSetIndexModel indexModel = getAge();
        List<String> strs = indexModel.getHigherList(age);
        BitSet bitset = null;
        for (String str : strs) {
            bitset = indexModel.or(str, bitset);
        }
        return bitset;
    }

    /**
     * 查询age小于等于指定值的user索引
     * @param age
     * @return
     */
    public BitSet findUserByAgeLower(String age) {
        BitSetIndexModel indexModel = getAge();
        List<String> strs = indexModel.getLowerList(age);
        BitSet bitset = null;
        for (String str : strs) {
            bitset = indexModel.or(str, bitset);
        }
        return bitset;
    }

    /**
     * 查询age在某两个值区间内的user索引
     * @param min
     * @param max
     * @return
     */
    public BitSet findUserByAgeBetweenAnd(String min, String max) {
        BitSetIndexModel indexModel = getAge();
        List<String> strs = indexModel.getRange(min, max, new Comparator<Object>() {
            @Override
            public int compare(Object o1, Object o2) {

                return Integer.valueOf(o1 == null ? "0" : o1.toString()).compareTo(Integer.valueOf(o2 == null ? "0" : o2.toString()));
            }
        });
        BitSet bitset = null;
        for (String str : strs) {
            bitset = indexModel.or(str, bitset);
        }
        return bitset;
    }
测试,查询年龄在16-17之间的北京女孩。
public class BitSetTestRange {

    public static void main(String[] args) {
        List<User> users = buildData();
        UserIndexStore.getInstance().createIndex(users);
        ExecutorService executorService = Executors.newFixedThreadPool(50);
        int num = 2000;
        long begin1 = System.currentTimeMillis();
        for (int i = 0; i < num; i++) {
            Runnable syncRunnable = new Runnable() {
                @Override
                public void run() {
                    BitSet bs = UserIndexStore.getInstance().query("北京", "girl", null);
                    BitSet ageBs = UserIndexStore.getInstance().findUserByAgeBetweenAnd("16", "17");
                    bs.and(ageBs);
                    for (Integer index : BitSetIndexModel.getRealIndexs(bs)) {
                        UserIndexStore.getInstance().findUser(index);
                    }
                }
            };
            executorService.execute(syncRunnable);
        }
        executorService.shutdown();
        while (true) {
            try {
                if (executorService.awaitTermination(1, TimeUnit.SECONDS)) {
                    System.err.println("单次查询时间为:" + (System.currentTimeMillis() - begin1) / num + "ms");
                    break;
                }
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    private static List<User> buildData() {
        String[] addresss = { "北京", "上海", "深圳" };
        String[] ages = { "16", "17", "18" };
        List<User> users = new ArrayList<>();
        for (int i = 0; i < 1000000; i++) {
            User user = new User();
            user.setName("name" + i);
            int rand = ThreadLocalRandom.current().nextInt(3);
            user.setAddress(addresss[ThreadLocalRandom.current().nextInt(3)]);
            user.setGender((rand & 1) == 0 ? "girl" : "boy");
            user.setAge(ages[ThreadLocalRandom.current().nextInt(3)]);
            users.add(user);
        }
        return users;
    }

}
单次查询时间为:22ms

相比"="查询,区间查询速度慢了一些,但还在预期之内。
***

总结

  以上就实现了一个bitset索引,支持索引创建,更新,查询。并且因为没有传统的网络传输和磁盘io,所以速度非常快,基本上响应时间都在10ms以内。如果需要我可以在下一篇使用spring cloud搭建一个较完整的demo,供大家参考使用。

阅读(750) 评论(0)