HBase Java简单示例

添加时间:2013-7-19 点击量:

Hbase采取Java实现，原生客户端也是Java实现，其他说话须要经由过程thritf接口办事间接接见Hbase的数据。

Hbase作为大数据存储数据库，其写才能很是强，加上Hbase本身就脱胎于Hadoop故和Hadoop的兼容性极好，很是合适于存储半规矩数据（灵活、可扩大性强、大数据存储）。基于Hadoop的mapreduce + Hbase存储，很是合适处理惩罚大数据。

Hbase根蒂根基应用示例：

import java.io.IOException; 

import java.util.ArrayList; 

import java.util.List; 

 

import org.apache.hadoop.conf.Configuration; 

import org.apache.hadoop.hbase.HBaseConfiguration; 

import org.apache.hadoop.hbase.HColumnDescriptor; 

import org.apache.hadoop.hbase.HTableDescriptor; 

import org.apache.hadoop.hbase.KeyValue; 

import org.apache.hadoop.hbase.MasterNotRunningException; 

import org.apache.hadoop.hbase.ZooKeeperConnectionException; 

import org.apache.hadoop.hbase.client.Delete; 

import org.apache.hadoop.hbase.client.Get; 

import org.apache.hadoop.hbase.client.HBaseAdmin; 

import org.apache.hadoop.hbase.client.HTable; 

import org.apache.hadoop.hbase.client.HTablePool; 

import org.apache.hadoop.hbase.client.Put; 

import org.apache.hadoop.hbase.client.Result; 

import org.apache.hadoop.hbase.client.ResultScanner; 

import org.apache.hadoop.hbase.client.Scan; 

import org.apache.hadoop.hbase.filter.Filter; 

import org.apache.hadoop.hbase.filter.FilterList; 

import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; 

import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 

import org.apache.hadoop.hbase.util.Bytes; 

 

public class HbaseTest { 

 

    public static Configuration configuration; 

    static { 

        configuration = HBaseConfiguration.create（）; 

        configuration.set（hbase.zookeeper.property.clientPort， 2181）; 

        configuration.set（hbase.zookeeper.quorum， 192.168.1.100）; 

        configuration.set（hbase.master， 192.168.1.100:600000）; 

    } 

 

    public static void main（String[] args） { 

        // createTable（wujintao）; 

        // Data（wujintao）; 

        // QueryAll（wujintao）; 

        // QueryByCondition1（wujintao）; 

        // QueryByCondition2（wujintao）; 

        //QueryByCondition3（wujintao）; 

        //Row（wujintao，abcdef）; 

        ByCondition（wujintao，abcdef）; 

    } 

 

     

    public static void createTable（String tableName） { 

        System.out.println（start create table ......）; 

        try { 

            HBaseAdmin hBaseAdmin = new HBaseAdmin（configuration）; 

            if （hBaseAdmin.tableExists（tableName）） {// 若是存在要创建的表，那么先删除，再创建 

                hBaseAdmin.disableTable（tableName）; 

                hBaseAdmin.Table（tableName）; 

                System.out.println（tableName +  is exist，detele....）; 

            } 

            HTableDescriptor tableDescriptor = new HTableDescriptor（tableName）; 

            tableDescriptor.addFamily（new HColumnDescriptor（column1））; 

            tableDescriptor.addFamily（new HColumnDescriptor（column2））; 

            tableDescriptor.addFamily（new HColumnDescriptor（column3））; 

            hBaseAdmin.createTable（tableDescriptor）; 

        } catch （MasterNotRunningException e） { 

            e.printStackTrace（）; 

        } catch （ZooKeeperConnectionException e） { 

            e.printStackTrace（）; 

        } catch （IOException e） { 

            e.printStackTrace（）; 

        } 

        System.out.println（end create table ......）; 

    } 

 

     

    public static void Data（String tableName） { 

        System.out.println（start  data ......）; 

        HTablePool pool = new HTablePool（configuration， 1000）; 

        HTable table = （HTable） pool.getTable（tableName）; 

        Put put = new Put（112233bbbcccc.getBytes（））;// 一个PUT代表一行数据，再NEW一个PUT默示第二行数据，每行一个独一的ROWKEY，此处rowkey为put机关办法中传入的值 

        put.add（column1.getBytes（）， null， aaa.getBytes（））;// 本行数据的第一列 

        put.add（column2.getBytes（）， null， bbb.getBytes（））;// 本行数据的第三列 

        put.add（column3.getBytes（）， null， ccc.getBytes（））;// 本行数据的第三列 

        try { 

            table.put（put）; 

        } catch （IOException e） { 

            e.printStackTrace（）; 

        } 

        System.out.println（end  data ......）; 

    } 

 

     

    public static void dropTable（String tableName） { 

        try { 

            HBaseAdmin admin = new HBaseAdmin（configuration）; 

            admin.disableTable（tableName）; 

            admin.Table（tableName）; 

        } catch （MasterNotRunningException e） { 

            e.printStackTrace（）; 

        } catch （ZooKeeperConnectionException e） { 

            e.printStackTrace（）; 

        } catch （IOException e） { 

            e.printStackTrace（）; 

        } 

 

    } 

     

     public static void Row（String tablename， String rowkey）  { 

        try { 

            HTable table = new HTable（configuration， tablename）; 

            List list = new ArrayList（）; 

            Delete d1 = new Delete（rowkey.getBytes（））; 

            list.add（d1）; 

             

            table.（list）; 

            System.out.println（删除行成功!）; 

             

        } catch （IOException e） { 

            e.printStackTrace（）; 

        } 

         

 

    } 

 

      

     public static void ByCondition（String tablename， String rowkey）  { 

            //今朝还没有发明有效的API可以或许实现按照非rowkey的前提删除这个功能能，还有清空表全部数据的API操纵 

 

    } 

 

 

     

    public static void QueryAll（String tableName） { 

        HTablePool pool = new HTablePool（configuration， 1000）; 

        HTable table = （HTable） pool.getTable（tableName）; 

        try { 

            ResultScanner rs = table.getScanner（new Scan（））; 

            for （Result r : rs） { 

                System.out.println（获获得rowkey: + new String（r.getRow（）））; 

                for （KeyValue keyValue : r.raw（）） { 

                    System.out.println（列： + new String（keyValue.getFamily（）） 

                            + ====值: + new String（keyValue.getValue（）））; 

                } 

            } 

        } catch （IOException e） { 

            e.printStackTrace（）; 

        } 

    } 

 

     

    public static void QueryByCondition1（String tableName） { 

 

        HTablePool pool = new HTablePool（configuration， 1000）; 

        HTable table = （HTable） pool.getTable（tableName）; 

        try { 

            Get scan = new Get（abcdef.getBytes（））;// 按照rowkey查询 

            Result r = table.get（scan）; 

            System.out.println（获获得rowkey: + new String（r.getRow（）））; 

            for （KeyValue keyValue : r.raw（）） { 

                System.out.println（列： + new String（keyValue.getFamily（）） 

                        + ====值: + new String（keyValue.getValue（）））; 

            } 

        } catch （IOException e） { 

            e.printStackTrace（）; 

        } 

    } 

 

     

    public static void QueryByCondition2（String tableName） { 

 

        try { 

            HTablePool pool = new HTablePool（configuration， 1000）; 

            HTable table = （HTable） pool.getTable（tableName）; 

            Filter filter = new SingleColumnValueFilter（Bytes 

                    .toBytes（column1）， null， CompareOp.EQUAL， Bytes 

                    .toBytes（aaa））; // 当列column1的值为aaa时进行查询 

            Scan s = new Scan（）; 

            s.setFilter（filter）; 

            ResultScanner rs = table.getScanner（s）; 

            for （Result r : rs） { 

                System.out.println（获获得rowkey: + new String（r.getRow（）））; 

                for （KeyValue keyValue : r.raw（）） { 

                    System.out.println（列： + new String（keyValue.getFamily（）） 

                            + ====值: + new String（keyValue.getValue（）））; 

                } 

            } 

        } catch （Exception e） { 

            e.printStackTrace（）; 

        } 

 

    } 

 

     

    public static void QueryByCondition3（String tableName） { 

 

        try { 

            HTablePool pool = new HTablePool（configuration， 1000）; 

            HTable table = （HTable） pool.getTable（tableName）; 

 

            List<Filter> filters = new ArrayList<Filter>（）; 

 

            Filter filter1 = new SingleColumnValueFilter（Bytes 

                    .toBytes（column1）， null， CompareOp.EQUAL， Bytes 

                    .toBytes（aaa））; 

            filters.add（filter1）; 

 

            Filter filter2 = new SingleColumnValueFilter（Bytes 

                    .toBytes（column2）， null， CompareOp.EQUAL， Bytes 

                    .toBytes（bbb））; 

            filters.add（filter2）; 

 

            Filter filter3 = new SingleColumnValueFilter（Bytes 

                    .toBytes（column3）， null， CompareOp.EQUAL， Bytes 

                    .toBytes（ccc））; 

            filters.add（filter3）; 

 

            FilterList filterList1 = new FilterList（filters）; 

 

            Scan scan = new Scan（）; 

            scan.setFilter（filterList1）; 

            ResultScanner rs = table.getScanner（scan）; 

            for （Result r : rs） { 

                System.out.println（获获得rowkey: + new String（r.getRow（）））; 

                for （KeyValue keyValue : r.raw（）） { 

                    System.out.println（列： + new String（keyValue.getFamily（）） 

                            + ====值: + new String（keyValue.getValue（）））; 

                } 

            } 

            rs.close（）; 

 

        } catch （Exception e） { 

            e.printStackTrace（）; 

        } 

 

    } 

 

}

Hbase数据获取示例：

/

  Need Packages:

  commons-codec-1.4.jar

 

  commons-logging-1.1.1.jar

 

  hadoop-0.20.2-core.jar

 

  hbase-0.90.2.jar

 

  log4j-1.2.16.jar

 

  zookeeper-3.3.2.jar

 

 /



import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.KeyValue;

import org.apache.hadoop.hbase.client.Get;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.ResultScanner;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;

import org.apache.hadoop.hbase.filter.FilterList;

import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;

import org.apache.hadoop.hbase.util.Bytes;



public class HbaseSelecter

{

    public static Configuration configuration = null;

    static

    {

        configuration = HBaseConfiguration.create（）;

        //configuration.set（hbase.master， 192.168.0.201:60000）;

        configuration.set（hbase.zookeeper.quorum， idc01-hd-nd-03，idc01-hd-nd-04，idc01-hd-nd-05）;

        //configuration.set（hbase.zookeeper.property.clientPort， 2181）;

    }



    public static void RowKey（String tablename， String rowKey） throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Get g = new Get（rowKey.getBytes（））;

        Result rs = table.get（g）;



        for （KeyValue kv : rs.raw（））

        {

            System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

            System.out.println（Column Family:  + new String（kv.getFamily（）））;

            System.out.println（Column       : + new String（kv.getQualifier（）））;

            System.out.println（value        :  + new String（kv.getValue（）））;

        }

    }



    public static void RowKeyFamily（String tablename， String rowKey， String family） throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Get g = new Get（rowKey.getBytes（））;

        g.addFamily（Bytes.toBytes（family））;

        Result rs = table.get（g）;

        for （KeyValue kv : rs.raw（））

        {

            System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

            System.out.println（Column Family:  + new String（kv.getFamily（）））;

            System.out.println（Column       : + new String（kv.getQualifier（）））;

            System.out.println（value        :  + new String（kv.getValue（）））;

        }

    }



    public static void RowKeyFamilyColumn（String tablename， String rowKey， String family， String column）

            throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Get g = new Get（rowKey.getBytes（））;

        g.addColumn（family.getBytes（）， column.getBytes（））;



        Result rs = table.get（g）;



        for （KeyValue kv : rs.raw（））

        {

            System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

            System.out.println（Column Family:  + new String（kv.getFamily（）））;

            System.out.println（Column       : + new String（kv.getQualifier（）））;

            System.out.println（value        :  + new String（kv.getValue（）））;

        }

    }



    public static void Filter（String tablename， List<String> arr） throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Scan scan = new Scan（）;// 实例化一个遍历器

        FilterList filterList = new FilterList（）; // 过滤器List



        for （String v : arr）

        { // 下标0为列簇，1为列名，3为前提

            String[] wheres = v.split（，）;



            filterList.addFilter（new SingleColumnValueFilter（// 过滤器

                    wheres[0].getBytes（）， wheres[1].getBytes（），



                    CompareOp.EQUAL，// 各个前提之间是 and 的关系

                    wheres[2].getBytes（）））;

        }

        scan.setFilter（filterList）;

        ResultScanner ResultScannerFilterList = table.getScanner（scan）;

        for （Result rs = ResultScannerFilterList.next（）; rs != null; rs = ResultScannerFilterList.next（））

        {

            for （KeyValue kv : rs.list（））

            {

                System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

                System.out.println（Column Family:  + new String（kv.getFamily（）））;

                System.out.println（Column       : + new String（kv.getQualifier（）））;

                System.out.println（value        :  + new String（kv.getValue（）））;

            }

        }

    }



    public static void main（String[] args） throws Exception

    {

        if（args.length < 2）{

            System.out.println（Usage: HbaseSelecter table key）;

            System.exit（-1）;

        }



        System.out.println（Table:  + args[0] +  ， key:  + args[1]）;

        RowKey（args[0]， args[1]）;



        /

        System.out.println（------------------------行键  查询----------------------------------）;

        RowKey（b2c， yihaodian1002865）;

        RowKey（b2c， yihaodian1003396）;



        System.out.println（------------------------行键+列簇 查询----------------------------------）;

        RowKeyFamily（riapguh， 用户A， user）;

        RowKeyFamily（riapguh， 用户B， user）;



        System.out.println（------------------------行键+列簇+列名 查询----------------------------------）;

        RowKeyFamilyColumn（riapguh， 用户A， user， user_code）;

        RowKeyFamilyColumn（riapguh， 用户B， user， user_code）;



        System.out.println（------------------------前提 查询----------------------------------）;

        List<String> arr = new ArrayList<String>（）;

        arr.add（dpt，dpt_code，d_001）;

        arr.add（user，user_code，u_0001）;

        Filter（riapguh， arr）;

        /

    }

}

Hbase 导出特定列示例（小量数据）:

/

  Need Packages:

  commons-codec-1.4.jar

 

  commons-logging-1.1.1.jar

 

  hadoop-0.20.2-core.jar

 

  hbase-0.90.2.jar

 

  log4j-1.2.16.jar

 

  zookeeper-3.3.2.jar

 

  Example: javac -classpath ./:/data/chenzhenjing/code/panama/lib/hbase-0.90.2.jar:/data/chenzhenjing/code/panama/lib/hadoop-core-0.20-append-for-hbase.jar:/data/chenzhenjing/code/panama/lib/commons-logging-1.0.4.jar:/data/chenzhenjing/code/panama/lib/commons-lang-2.4.jar:/data/chenzhenjing/code/panama/lib/commons-io-1.2.jar:/data/chenzhenjing/code/panama/lib/zookeeper-3.3.2.jar:/data/chenzhenjing/code/panama/lib/log4j-1.2.15.jar:/data/chenzhenjing/code/panama/lib/commons-codec-1.3.jar   DiffHbase.java   

 /



import java.io.BufferedReader;

import java.io.File;

import java.io.IOException;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import java.io.FileOutputStream;

import java.io.OutputStreamWriter;

import java.io.StringReader;

import java.text.SimpleDateFormat;

import java.util.Date;



import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.KeyValue;

import org.apache.hadoop.hbase.client.Get;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.ResultScanner;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;

import org.apache.hadoop.hbase.filter.FilterList;

import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;

import org.apache.hadoop.hbase.util.Bytes;



class ColumnUtils {



    public static byte[] getFamily（String column）{

        return getBytes（column， 0）;

    }



    public static byte[] getQualifier（String column）{

        return getBytes（column， 1）;

    }



    private static byte[] getBytes（String column ， int offset）{

        String[] split = column.split（:）;

        return Bytes.toBytes（offset > split.length -1 ？ split[0] :split[offset]）;

    }

}



public class DiffHbase

{

    public static Configuration configuration = null;

    static

    {

        configuration = HBaseConfiguration.create（）;

        configuration.set（hbase.zookeeper.quorum， idc01-hd-ds-01，idc01-hd-ds-02，idc01-hd-ds-03）;

    }



    public static void RowKey（String tablename， String rowKey） throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Get g = new Get（rowKey.getBytes（））;

        Result rs = table.get（g）;



        for （KeyValue kv : rs.raw（））

        {

            System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

            System.out.println（Column Family:  + new String（kv.getFamily（）））;

            System.out.println（Column       : + new String（kv.getQualifier（）） + t）;

            System.out.println（value        :  + new String（kv.getValue（）））;

        }

    }



    public static void RowKeyFamily（String tablename， String rowKey， String family） throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Get g = new Get（rowKey.getBytes（））;

        g.addFamily（Bytes.toBytes（family））;

        Result rs = table.get（g）;

        for （KeyValue kv : rs.raw（））

        {

            System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

            System.out.println（Column Family:  + new String（kv.getFamily（）））;

            System.out.println（Column       : + new String（kv.getQualifier（）） + t）;

            System.out.println（value        :  + new String（kv.getValue（）））;

        }

    }



    public static void RowKeyFamilyColumn（String tablename， String rowKey， String family， String column）

        throws IOException

    {

        HTable table = new HTable（configuration， tablename）;

        Get g = new Get（rowKey.getBytes（））;

        g.addColumn（family.getBytes（）， column.getBytes（））;



        Result rs = table.get（g）;



        for （KeyValue kv : rs.raw（））

        {

            System.out.println（-------------------- + new String（kv.getRow（）） + ----------------------------）;

            System.out.println（Column Family:  + new String（kv.getFamily（）））;

            System.out.println（Column       : + new String（kv.getQualifier（）） + t）;

            System.out.println（value        :  + new String（kv.getValue（）））;

        }

    }







    private static final String USAGE = Usage: DiffHbase [-o outfile] tablename infile filterColumns...;



    /

      Prints the usage message and exists the program.

      

      ＠param message  The message to print first.

     /

    private static void printUsage（String message） {

        System.err.println（message）;

        System.err.println（USAGE）;

        throw new RuntimeException（USAGE）;

    }



    private static void PrintId（String id， Result rs）{

        String value = Bytes.toString（ rs.getValue（ColumnUtils.getFamily（info:url）， ColumnUtils.getQualifier（info:url）））;

        if（value == null）{

            System.out.println（ id + \tNULL）;

        }else{

            System.out.println（ id + \t + value）;

        }

    }



    private static void WriteId（String id， Result rs， FileOutputStream os）{

        String value = Bytes.toString（ rs.getValue（ColumnUtils.getFamily（info:url）， ColumnUtils.getQualifier（info:url）））;

        try{

            if（value == null）{

                os.write（ （id + \tNULL\n）.getBytes（））;

            }else{

                os.write（ （id + \t + value + \n）.getBytes（））;

            }

        }

        catch （IOException e） {

            e.printStackTrace（）;

        }

    }



    private static void PrintRow（String id， Result rs）{



        System.out.println（-------------------- + id + ----------------------------）;

        for （KeyValue kv : rs.raw（））

        {

            System.out.println（new String（kv.getFamily（）） + : + new String（kv.getQualifier（）） +  :  + new String（kv.getValue（）））;

        }

    }



    public static void main（String[] args） throws Exception

    { 

        if （args.length < 3） {

            printUsage（Too few arguments）;

        }



        String outfile = null;

        String tablename = args[0];

        String dictfile  = args[1];

        int skilLen = 2;



        if（ args[0].equals（-o））{

            outfile = args[1];

            tablename = args[2];

            dictfile  = args[3];

            skilLen = 4;

        }



        HTable table = new HTable（configuration， tablename）;



        String[] filterColumns = new String[args.length - skilLen];

        System.arraycopy（args， skilLen， filterColumns， 0， args.length - skilLen）;



        System.out.println（filterColumns: ）;

        for（int i=0; i<filterColumns.length; ++i）{

            System.out.println（\t + filterColumns[i]）;

        }



        FileOutputStream os = null;

        if（outfile != null）{

            os = new FileOutputStream（outfile）;

        }

        

        int count = 0;

        SimpleDateFormat df = new SimpleDateFormat（yyyy-MM-dd HH:mm:ss）;//设置日期格局



        File srcFile = new File（dictfile）;

        FileInputStream in = new FileInputStream（srcFile）;

        InputStreamReader isr = new InputStreamReader（in）;

        BufferedReader br = new BufferedReader（isr）;

        String read = null;

        while （（read = br.readLine（）） != null） {

            String[] split = read.trim（）.split（\\s）;   // space split

            if（ split.length < 1 ）{

                System.out.println（Error line:  + read）;

                continue;

            }



            if（ ++count ％ 1000 == 0）{

                System.out.println（df.format（new Date（）） +  :  + count +  rows processed. ）;  // new Date（）为获取当前体系时候

            }

            // System.out.println（ROWKEY: + split[0]）;



            Get g = new Get（split[0].getBytes（））;

            Result rs = table.get（g）;

            if（ rs == null）{

                System.out.println（No Result for  + split[0]）;

                continue;

            }



            for（int i=0; i<filterColumns.length; ++i）{

                String value = Bytes.toString（rs.getValue（ColumnUtils.getFamily（filterColumns[i]）， ColumnUtils.getQualifier（filterColumns[i]）））;

                if（value == null）{

                    if（ os == null）{

                        PrintId（split[0]， rs）;

                    }else{

                        WriteId（split[0]， rs， os）;

                    }



                    // PrintRow（split[0]， rs）;

                    break;

                }

            }

        }



        br.close（）;

        isr.close（）;

        in.close（）;



    }

}

Hbase Mapreduce示例：全库扫描（多量数据）：

package com.hbase.mapreduce;



import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;



import org.apache.hadoop.io.Text;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.HConstants;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.mapreduce.IdentityTableMapper;

import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.util.GenericOptionsParser;



import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;                                                                      

import org.apache.hadoop.hbase.filter.CompareFilter;                                                                                

import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;                                                                      

import org.apache.hadoop.hbase.filter.BinaryComparator;                                                                             

import org.apache.hadoop.hbase.util.Bytes; 



import com.goodhope.utils.ColumnUtils;



public class ExportHbase {

    private static final String INFOCATEGORY = info:storecategory;



    private static final String USAGE = Usage: ExportHbase  +

        -r <numReduceTasks> -indexConf <iconfFile>\n +

        -indexDir <indexDir> -webSite <amazon> [-need <true> -isVisible -startTime <long>] -table <tableName> -columns <columnName1>  +

        [<columnName2> ...];



    /

      Prints the usage message and exists the program.

      

      ＠param message  The message to print first.

     /

    private static void printUsage（String message） {

        System.err.println（message）;

        System.err.println（USAGE）;

        throw new RuntimeException（USAGE）;

    }



    /

      Creates a new job.

      ＠param conf 

      

      ＠param args  The command line arguments.

      ＠throws IOException When reading the configuration fails.

     /

    public static Job createSubmittableJob（Configuration conf， String[] args） 

        throws IOException {

        if （args.length < 7） {

            printUsage（Too few arguments）;

        }



        int numReduceTasks = 1;

        String iconfFile = null;

        String indexDir = null;

        String tableName = null;

        String website = null;

        String need = ;

        String expectShopGrade = ;

        String dino = 6;

        String isdebug = 0;

        long debugThreshold = 10000;

        String debugThresholdStr = Long.toString（debugThreshold）;

        String queue = offline;



        long endTime =  Long.MAX_VALUE;

        int maxversions = 1;

        long startTime = System.currentTimeMillis（） - 282460601000l;

        long distartTime = System.currentTimeMillis（） - 302460601000l;

        long diusedTime = System.currentTimeMillis（） - 302460601000l;

        String startTimeStr = Long.toString（startTime）;

        String diusedTimeStr = Long.toString（diusedTime）;

        String quorum = null;



        String isVisible = ;

        List<String> columns = new ArrayList<String>（） ;  



        boolean bFilter = false;



        // parse args

        for （int i = 0; i < args.length - 1; i++） {

            if （-r.equals（args[i]）） {

                numReduceTasks = Integer.parseInt（args[++i]）;

            } else if （-indexConf.equals（args[i]）） {

                iconfFile = args[++i];

            } else if （-indexDir.equals（args[i]）） {

                indexDir = args[++i];

            } else if （-table.equals（args[i]）） {

                tableName = args[++i];

            } else if （-webSite.equals（args[i]）） {

                website = args[++i];

            } else if （-startTime.equals（args[i]）） {

                startTimeStr = args[++i];

                startTime = Long.parseLong（startTimeStr）;

            } else if （-need.equals（args[i]）） {

                need = args[++i];

            } else if （-isVisible.equals（args[i]）） {

                isVisible = true;

            } else if （-shopgrade.equals（args[i]）） {

                expectShopGrade = args[++i]; 

            } else if （-queue.equals（args[i]）） {

                queue = args[++i];

            } else if （-dino.equals（args[i]）） {

                dino = args[++i];

            } else if （-maxversions.equals（args[i]）） {

                maxversions = Integer.parseInt（args[++i]）;

            } else if （-distartTime.equals（args[i]）） {

                distartTime = Long.parseLong（args[++i]）; 

            } else if （-diendTime.equals（args[i]）） {

                endTime = Long.parseLong（args[++i]）;

            } else if （-diusedTime.equals（args[i]）） {

                diusedTimeStr = args[++i];

                diusedTime = Long.parseLong（diusedTimeStr）;

            } else if （-quorum.equals（args[i]）） {

                quorum = args[++i];

            } else if （-filter.equals（args[i]）） {

                bFilter = true;

            } else if （-columns.equals（args[i]）） {

                columns.add（args[++i]）;

                while （i + 1 < args.length && !args[i + 1].startsWith（-）） {

                    String columnname = args[++i];

                    columns.add（columnname）;

                    System.out.println（args column----:  + columnname）;

                }

            } else if （-debugThreshold.equals（args[i]）） {

                isdebug = 1;

                debugThresholdStr = args[++i];

                debugThreshold =  Long.parseLong（ debugThresholdStr ）;

            }

            else {

                printUsage（Unsupported option  + args[i]）;

            }

        }



        if （distartTime > endTime） {

            printUsage（distartTime must <= diendTime）;  

        }



        if （indexDir == null || tableName == null || columns.isEmpty（）） {

            printUsage（Index directory， table name and at least one column must  +

                    be specified）;

        }



        if （iconfFile != null） {

            // set index configuration content  a file

            String content = readContent（iconfFile）;

            conf.set（hbase.index.conf， content）;

            conf.set（hbase.website.name， website）;

            conf.set（hbase.need.productDB， need）;

            conf.set（hbase.expect.shopgrade， expectShopGrade）;

            conf.set（hbase.di.no， dino）;

            conf.set（hbase.expect.item.visible， isVisible）;

            conf.set（hbase.index.startTime， startTimeStr）;

            conf.set（hbase.index.diusedTime， diusedTimeStr）;

            conf.set（hbase.index.debugThreshold， debugThresholdStr）;

            conf.set（hbase.index.debug， isdebug）;

            if （quorum != null） {

                conf.set（hbase.zookeeper.quorum， quorum）;

            }

            String temp = ;

            for （String column : columns） {

                temp = temp + column + |;

            }

            temp = temp.substring（0， temp.length（） - 1）;

            conf.set（hbase.index.column， temp）;

            System.out.println（hbase.index.column:  + temp）;

        }





        Job job = new Job（conf， export data  table  + tableName）;

        （（JobConf） job.getConfiguration（））.setQueueName（queue）;



        // number of indexes to partition into

        job.setNumReduceTasks（numReduceTasks）;

        Scan scan = new Scan（）;

        scan.setCacheBlocks（false）;



        // limit scan range

        scan.setTimeRange（distartTime， endTime）;

        //  scan.setMaxVersions（maxversions）;

        scan.setMaxVersions（1）;



        / limit scan columns /

        for （String column : columns） {

            scan.addColumn（ColumnUtils.getFamily（column）， ColumnUtils.getQualifier（column））;

            scan.addFamily（ColumnUtils.getFamily（column））;

        }



        // set filter

        if（ bFilter ）{

            System.out.println（only export guangtaobao data. ）;

            SingleColumnValueFilter filter = new SingleColumnValueFilter（

                    Bytes.toBytes（info），

                    Bytes.toBytes（producttype），

                    CompareFilter.CompareOp.EQUAL，

                    new BinaryComparator（Bytes.toBytes（guangtaobao）） ）;

            filter.setFilterIfMissing（true）;

            scan.setFilter（filter）;

        }



        TableMapReduceUtil.initTableMapperJob（tableName， scan， ExportHbaseMapper.class，

                Text.class， Text.class， job）;

        // job.setReducerClass（ExportHbaseReducer.class）;

        FileOutputFormat.setOutputPath（job， new Path（indexDir））;





        return job;

    }



    /

      Reads xml file of indexing configurations.  The xml format is similar to

      hbase-default.xml and hadoop-default.xml. For an example configuration，

      see the <code>createIndexConfContent</code> method in TestTableIndex.

      

      ＠param fileName  The file to read.

      ＠return XML configuration read  file.

      ＠throws IOException When the XML is broken.

     /

    private static String readContent（String fileName） throws IOException {

        File file = new File（fileName）;

        int length = （int） file.length（）;

        if （length == 0） {

            printUsage（Index configuration file  + fileName +  does not exist）;

        }



        int bytesRead = 0;

        byte[] bytes = new byte[length];

        FileInputStream fis = new FileInputStream（file）;



        try {

            // read entire file into content

            while （bytesRead < length） {

                int read = fis.read（bytes， bytesRead， length - bytesRead）;

                if （read > 0） {

                    bytesRead += read;

                } else {

                    break;

                }

            }

        } finally {

            fis.close（）;

        }



        return new String（bytes， 0， bytesRead， HConstants.UTF8_ENCODING）;

    }



    /

      The main entry point.

      

      ＠param args  The command line arguments.

      ＠throws Exception When running the job fails.

     /

    public static void main（String[] args） throws Exception {

        Configuration conf = HBaseConfiguration.create（）;

        String[] otherArgs = 

            new GenericOptionsParser（conf， args）.getRemainingArgs（）;

        Job job = createSubmittableJob（conf， otherArgs）;

        System.exit（job.waitForCompletion（true） ？ 0 : 1）;

    }



}



//////////////////////////////////////////////////////////



package com.hbase.mapreduce;



import java.io.IOException;

import java.util.List;

import java.util.ArrayList;

import java.lang.String;

import java.lang.StringBuffer;



import org.apache.hadoop.io.Text;

import org.apache.hadoop.conf.Configurable;

import org.apache.hadoop.conf.Configuration;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableMapper;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.hbase.KeyValue;



import com.goodhope.utils.ColumnUtils;





/

  Pass the given key and record as-is to the reduce phase.

 /

＠SuppressWarnings（deprecation）

public class ExportHbaseMapper extends TableMapper<Text，Text> implements Configurable {

    private static final Text keyTEXT = new Text（）;

    private static final Text SENDTEXT = new Text（）;



    private Configuration conf = null;



    private long startTime = 0;

    List<String> columnMap = null;



    private long rCount = 0;

    private long errCount = 0;

    private int  debug  = 0;

    private long thresCount  = 10000;



    public void map（ImmutableBytesWritable key， Result value， Context context） throws IOException， InterruptedException {



        rCount++;



        String itemid = Bytes.toString（key.get（））;

        if （itemid.contains（&）） {

            context.getCounter（Error， rowkey contains \&\）.increment（1）;

            return;

        }



        StringBuffer outstr = new StringBuffer（）;

        for （String col : columnMap） {



            String tmp = Bytes.toString（value.getValue（ColumnUtils.getFamily（col）， ColumnUtils.getQualifier（col）））;

            if （tmp == null）{

                context.getCounter（Error， col+ No value in hbase）.increment（1）;

                

                errCount++;

                if（ debug > 0 && （errCount ％ thresCount == 0））{

                    System.err.println（ itemid + : doesnt has  + col +  data!）;

                }



                outstr.append（NULL + \t）;

            }else{

                if（ tmp.contains（guangtaobao） ）{

                    outstr.append（1 + \t）;

                }else{

                    outstr.append（tmp.trim（） + \t）;

                }

            }

        }



        if （ ! outstr.toString（）.isEmpty（） ） {



            SENDTEXT.set（ outstr.toString（） ）;

            keyTEXT.set（itemid）;

            context.write（keyTEXT， SENDTEXT）;



            if（ debug > 0 && （rCount ％ thresCount10000 == 0））{

                System.out.println（ SENDTEXT.toString（） + keyTEXT.toString（） ）;

            }

        }

        else

        {

            context.getCounter（Error， No Colume output）.increment（1）;

            return;

        }

    }



    /

      Returns the current configuration.

      

      ＠return The current configuration.

      ＠see org.apache.hadoop.conf.Configurable＃getConf（）

     /

    ＠Override

        public Configuration getConf（） {

            return conf;

        }



    /

      Sets the configuration. This is used to set up the index configuration.

      

      ＠param configuration

                 The configuration to set.

      ＠see org.apache.hadoop.conf.Configurable＃setConf（org.apache.hadoop.conf.Configuration）

     /

    ＠Override

        public void setConf（Configuration configuration） {

            this.conf = configuration;

            

            startTime = Long.parseLong（conf.get（hbase.index.startTime））;

            thresCount = Long.parseLong（conf.get（hbase.index.debugThreshold））;

            debug = Integer.parseInt（conf.get（hbase.index.debug））;



            String[] columns = conf.get（hbase.index.column）.split（\\|）;



            columnMap = new ArrayList<String>（）;

            for （String column : columns） {

                System.out.println（Output column:  + column）;



                columnMap.add（column）;

            }



        }



}





//////////////////////////////////////////////////////////



package com.hbase.utils;



import org.apache.hadoop.hbase.util.Bytes;



public class ColumnUtils {



        public static byte[] getFamily（String column）{

                return getBytes（column， 0）;

        }



        public static byte[] getQualifier（String column）{

                return getBytes（column， 1）;

        }



        private static byte[] getBytes（String column ， int offset）{

                String[] split = column.split（:）;

                return Bytes.toBytes（offset > split.length -1 ？ split[0] :split[offset]）;

        }

}

文艺不是炫耀，不是花哨空洞的文字堆砌，不是一张又一张的逆光照片，不是将旅行的意义转化为名牌包和明信片的物质展示；很多时候它甚至完全不美——它嘶吼、扭曲，它会痛苦地抽搐，它常常无言地沉默。——艾小柯《文艺是一种信仰》

分享到：

相关文章

按版本划分

按功能划分

企业管理软件

HBase Java简单示例

Hbase根蒂根基应用示例：

Hbase数据获取示例：

Hbase 导出特定列示例（小量数据）:

Hbase Mapreduce示例：全库扫描（多量数据）：

按版本划分

按功能划分

企业管理软件

HBase Java简单示例

Hbase根蒂根基应用示例：

Hbase数据获取示例：

Hbase 导出特定列 示例（小量数据）:

Hbase Mapreduce示例：全库扫描（多量数据）：

Hbase 导出特定列示例（小量数据）: