????HBase ?????????
????HBase ?????????????????????????е?????????????????洢??????????????????·????? Google ??????????”BigTable”??
????HDFS ? HBase ?????洢????MapReduce ???????????????ZooKeeper ?????Э??????? failover???Ч?????????????????Pig ?? Hive ? HBase ?????????????????????????????????????? join ?????Sqoop ??????? RDBMS ??????????
????HBase ??????? where ??????Order by ???????????????? Rowkey ???????? range ????????????????? HBase ???? API ?????????????
????HBase ?? Rowkey ???????е???????????????????????з???????????????????м???????м???Χ?????????????????????м?????????洢???????λ?????????????????????? int ?????????1??10??100??11??12??2??20…??906??…??
????ColumnFamily ??“????”?????? schema ????????????壬???????????????壬???????????????“ColumnFamily??qualifier”????????????????????????????????????????е??
????Cell ??????к????????????洢?????????????洢??????????
????Timestamp ?????????汾 Cell ????????64 λ?????????汾????????????????????У??μ?????汾??????檔
????Hbase ???з???????????? N ?? Region????????????????? Region????????????Region ????????????????? Region ??????? Server ??????????????????? Server??
????Region ?? ColumnFamily ????? Store??Store ?С?洢???????????????????????????? Store ????????е? memstore ??????? disk ??? HFile??


????? 1 ?? HBase ??????????????????????????????檔

????HBase ???? API ???
???????????????????????? JDBC ??HBase client ???????????????????????? API???????????????? HBase ????????????紴???????????????????????Ρ???????????????????????嵥 1 ?????????????????????????????????????????????????????????????????
?????嵥 1.HBase API ?????????????
????import org.apache.hadoop.conf.Configuration;
????import org.apache.hadoop.hbase.HColumnDescriptor;
????import org.apache.hadoop.hbase.HTableDescriptor;
????import org.apache.hadoop.hbase.KeyValue;
????import org.apache.hadoop.hbase.client.Get;
????import org.apache.hadoop.hbase.client.HBaseAdmin;
????import org.apache.hadoop.hbase.client.HTable;
????import org.apache.hadoop.hbase.client.Put;
????import org.apache.hadoop.hbase.client.Result;
????import org.apache.hadoop.hbase.client.ResultScanner;
????import org.apache.hadoop.hbase.client.Scan;
????import org.apache.hadoop.hbase.util.Bytes;
????import java.io.IOException;
????import java.util.ArrayList;
????import java.util.List;
????public class HBaseUtil {
????private Configuration conf = null;
????private HBaseAdmin admin = null;
????protected HBaseUtil(Configuration conf) throws IOException {
????this.conf = conf;
????this.admin = new HBaseAdmin(conf);
????}
????public boolean existsTable(String table)
????throws IOException {
????return admin.tableExists(table);
????}
????public void createTable(String table?? byte[][] splitKeys?? String... colfams)
????throws IOException {
????HTableDescriptor desc = new HTableDescriptor(table);
????for (String cf : colfams) {
????HColumnDescriptor coldef = new HColumnDescriptor(cf);
????desc.addFamily(coldef);
????}
????if (splitKeys != null) {
????admin.createTable(desc?? splitKeys);
????} else {
????admin.createTable(desc);
????}
????}
????public void disableTable(String table) throws IOException {
????admin.disableTable(table);
????}
????public void dropTable(String table) throws IOException {
????if (existsTable(table)) {
????disableTable(table);
????admin.deleteTable(table);
????}
????}
????public void fillTable(String table?? int startRow?? int endRow?? int numCols??
????int pad?? boolean setTimestamp?? boolean random??
????String... colfams) throws IOException {
????HTable tbl = new HTable(conf?? table);
????for (int row = startRow; row <= endRow; row++) {
????for (int col = 0; col < numCols; col++) {
????Put put = new Put(Bytes.toBytes("row-"));
????for (String cf : colfams) {
????String colName = "col-";
????String val = "val-";
????if (setTimestamp) {
????put.add(Bytes.toBytes(cf)?? Bytes.toBytes(colName)??
????col?? Bytes.toBytes(val));
????} else {
????put.add(Bytes.toBytes(cf)?? Bytes.toBytes(colName)??
????Bytes.toBytes(val));
????}
????}
????tbl.put(put);
????}
????}
????tbl.close();
????}
????public void put(String table?? String row?? String fam?? String qual??
????String val) throws IOException {
????HTable tbl = new HTable(conf?? table);
????Put put = new Put(Bytes.toBytes(row));
????put.add(Bytes.toBytes(fam)?? Bytes.toBytes(qual)?? Bytes.toBytes(val));
????tbl.put(put);
????tbl.close();
????}
????public void put(String table?? String row?? String fam?? String qual?? long ts??
????String val) throws IOException {
????HTable tbl = new HTable(conf?? table);
????Put put = new Put(Bytes.toBytes(row));
????put.add(Bytes.toBytes(fam)?? Bytes.toBytes(qual)?? ts?? Bytes.toBytes(val));
????tbl.put(put);
????tbl.close();
????}
????public void put(String table?? String[] rows?? String[] fams?? String[] quals??
????long[] ts?? String[] vals) throws IOException {
????HTable tbl = new HTable(conf?? table);
????for (String row : rows) {
????Put put = new Put(Bytes.toBytes(row));
????for (String fam : fams) {
????int v = 0;
????for (String qual : quals) {
????String val = vals[v < vals.length ? v : vals.length];
????long t = ts[v < ts.length ? v : ts.length - 1];
????put.add(Bytes.toBytes(fam)?? Bytes.toBytes(qual)?? t??
????Bytes.toBytes(val));
????v++;
????}
????}
????tbl.put(put);
????}
????tbl.close();
????}
????public void dump(String table?? String[] rows?? String[] fams?? String[] quals)
????throws IOException {
????HTable tbl = new HTable(conf?? table);
????List<Get> gets = new ArrayList<Get>();
????for (String row : rows) {
????Get get = new Get(Bytes.toBytes(row));
????get.setMaxVersions();
????if (fams != null) {
????for (String fam : fams) {
????for (String qual : quals) {
????get.addColumn(Bytes.toBytes(fam)?? Bytes.toBytes(qual));
????}
????}
????}
????gets.add(get);
????}
????Result[] results = tbl.get(gets);
????for (Result result : results) {
????for (KeyValue kv : result.raw()) {
????System.out.println("KV: " + kv +
????"?? Value: " + Bytes.toString(kv.getValue()));
????}
????}
????}
????private static void scan(int caching?? int batch) throws IOException {
????HTable table = null;
????final int[] counters = {0?? 0};
????Scan scan = new Scan();
????scan.setCaching(caching); // co ScanCacheBatchExample-1-Set Set caching and batch parameters.
????scan.setBatch(batch);
????ResultScanner scanner = table.getScanner(scan);
????for (Result result : scanner) {
????counters[1]++; // co ScanCacheBatchExample-2-Count Count the number of Results available.
????}
????scanner.close();
????System.out.println("Caching: " + caching + "?? Batch: " + batch +
????"?? Results: " + counters[1] + "?? RPCs: " + counters[0]);
????}
????}
??????????? API ???? HBaseAdmin ???????????? Scan ?????????
????HBase ??????????????Σ?HRegion->HStore->[HFile??HFile??...??MemStore]??
?????? HBase ?У?????????ж?? Column Family??????? Scan ???????У???? Column Family(Store) ????????????? StoreScanner ????????? Store ???????????????е? MemStore ???????? HFile ???????????? StoreScanner ?????????? MemStoreScanner ?? N ?? StoreFileScanner ??????????????????
?????????????е???????????2??裺
?????????????????? Store
??????????? Store????? Store ????????? HFile ??????е? MemStore
???????????????????????ɡ?RegionScanner ?????????????? StoreScanner ???????????? RegionScanner ???????? KeyValueHeap storeHeap ???????? StoreScanner ?????????е????????°????? HFile ?? MemStore ????? StoreFileScanner ?? MemStoreScanner??????????????Ч??????????????????С??????????????????????
??????????? seekScanners() ????Щ StoreFileScanner ?? MemStoreScanner ?????? seek??seek ????? KeyValue ???seek ???????? seek ????? KeyValue???????? KeyValue ????????? seek ????? KeyValue ?????????
????Scan???÷????????
????scan.addFamily()/scan.addColumn()?????????? Family ?? Column???????е????κ? addFamily ?? Column?????????е? Columns??
????scan.setMaxVersions()????????汾??????????????κβ??????? setMaxVersions?????????е?汾??????????? setMaxVersions?????????μ?汾.??
????scan.setTimeRange()??????????????С???????????????Χ??? Cell ??????????
????scan.setTimeStamp()???????????
????scan.setFilter()????? Filter ??????????????????
????scan.setStartRow()???????????С?????????????????????
????scan.setStopRow()????????????У????????У???
????scan. setCaching()????δ????????????????????? RPC????
????scan.setBatch()?????????? Cell ?????????????????й????????????? OutofMemory ??????????????
????HBase ????????
????HBase ????????????????????????С?????????????????????????????????????????????????????д???????????????????????·???????? HBase ?????????
??????????
???????????£?????? HBase ????????????????? Region ???????????????????????е? HBase ????????????? Region д??????????? Region ???????????з??????????????д?????????????????????Щ??? Regions????????????д?? HBase ??????? Region ????????????????????????????
????Rowkey ???
????HBase ?? Rowkey ??????????洢????????? Rowkey ???????????????????????????????????洢????飬???????????????????????顣
????????Rowkey ?????????????????鯔???????????д?? Rowkey????????? reverse ??????? Rowkey????? Rowkey ???????????????????и????????? RegionServer ??????????????????????????????????? RegionServer ????????????????????? table ????з????????
????????ColumnFamily ????
????????????????????? ColumnFamily???? Hbase ??????????????? 2~3 ?? ColumnFamily ?????????? ColumnFamily ?? flush ???????????? ColumnFamily ????????Ч??????? flush?????????????????? I/O??
??????????? (setCaching)
????????????????????? HColumnDescriptor.setInMemory(true) ?????? RegionServer ??????У????????????? cache ???С?
????????洢??????
????????????????????? HColumnDescriptor.setTimeToLive(int timeToLive) ????????????洢??????????????????????????
???????????
?????? RegionServer ???? 10~1000 ?? Regions????? Region ?? 1~2G?????? Server ??? 10G????? 1000*2G=2TB?????? 3 ???????? 6TB??????????? 3 ?? 2TB ?????????? 12 ?? 500G ?????????????????????????????????????????????????????????????????
???????????????? RegionServer ????
???????????????????????£??????á??????? HBase ?? conf ???μ? hbase-env.sh ?????? export HBASE_REGIONSERVER_OPTS=”-Xmx16000m $HBASE_REGIONSERVER_OPTS”
???????? 16000m ?????? RegionServer ??????С??
????д??????????
?????????????????????????д???????????????????????????????????÷?????????? hdfs-site.xml ?????? hbase ?? conf ???£??????????????????????? dfs.replication ????????????????????????????е? HBase ????????Ч????????????????д HBase ?????? HBase ??????????????????????????????????????屸?????????? 3?????????????????????????Ч??
????WAL???д?????
????????????????? HBase ??д??????ò?????д????????????????????????????????????????? (???????? RegionServer ???)????????????????? WAL ????? Java API д????????? Put ????? WAL?????? Put.setWriteToWAL(boolean)??
????????д
????HBase ?? Put ????????????????????????????????д???????????????翪????????????? Java API ???????????? Put ??????? Put ?б???????? HTable ?? Put(Put ?б?) ??????????д??
???????????δ???????????????
??????????????????????????????????????????????????????????????????檔???????????????????
?????? HBase ?? conf ????????н??????? hbase.client.scanner.caching??
??????????? HTable.setScannerCaching(int scannerCaching) ?????????
??????????? Scan.setCaching(int caching) ???????á??????????????????
????RegionServer ???????? IO ?????
????????? IO ????????????????????????????? Big Put ???? (?????????? Put ?????????? cache ?? Scan???????? Big Put) ?? ReigonServer ????????????????
???????? IO ??????????????????????????TPS ??? (??????????? (TransactionPerSecond)) ??????????????????????????????????ο???
?????? hbase-site.xml ???????????????? hbase.regionserver.handler.count??
????Region ??С????
??????????? hbase.hregion.max.filesize?????????????? hbase-site.xml.??????С 256M??
???????? ReigonServer ????? Reigon ???洢??????? Region ????????????? Region ????? split ???С?? Region??С Region ?? split ?? compaction ?????????? Region ?? compact С Region ??? StoreFile ?????????????????? split ?? compaction ?????????????????????С Region ????? split?? compaction???????????????????Region ?????????????????????鷳?????????????Щ Hbase ?? bug????? 512M ???μ????С Region???? Region ????????? split ?? compaction?????????? compact ?? split ???????????????????????д???????????
?????????? Region ??ζ????? StoreFile??compaction ?????????????????????????ó????У??????????????????????????? compact ?? split???????????? split ?? compaction???????????????????????д?????compaction ???????????split ?????????????????????????????????????????????????????? 100G?????????????? split(RegionServer ?????δ???? 100G ?? Region ?? split)??????? RegionSplitter ????????????? split ?????? split????? split ????????????????????? split ??????????????????????????? online ??????á???淽?棬С Region ?????? memstore ???С?????????? Region ??????С?????У???????? flush ? app ?? IO wait ???????С???? StoreFile ?????????????