代码语言
.
CSharp
.
JS
Java
Asp.Net
C
MSSQL
PHP
Css
PLSQL
Python
Shell
EBS
ASP
Perl
ObjC
VB.Net
VBS
MYSQL
GO
Delphi
AS
DB2
Domino
Rails
ActionScript
Scala
代码分类
文件
系统
字符串
数据库
网络相关
图形/GUI
多媒体
算法
游戏
Jquery
Extjs
Android
HTML5
菜单
网页交互
WinForm
控件
企业应用
安全与加密
脚本/批处理
开放平台
其它
【
Java
】
java读取hfile
作者:
/ 发布于
2017/4/25
/
714
java读取hfile
import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; public class HFileReaderTest { private List<Path> files = new ArrayList<Path>(); private static Configuration conf; private int count =0; private boolean verbose; private boolean printValue = true; private boolean printKey = true; private boolean shouldPrintMeta; private boolean printBlocks; private boolean printStats = true; private boolean checkRow = true; private boolean checkFamily = true; private boolean isSeekToRow = false; private static byte[] row = new byte[1024]; public void readHfile(Path fileName) throws IOException{ conf = HBaseConfiguration.create(); // conf.set("fs.default.name", "hdfs://192.168.0.30:8020"); // // String regionName = cmd.getOptionValue("r"); // byte[] rn = Bytes.toBytes(regionName); // byte[][] hri = HRegionInfo.parseRegionName(rn); // Path rootDir = FSUtils.getRootDir(conf); // Path tableDir = new Path(rootDir, Bytes.toString(hri[0])); // String enc = HRegionInfo.encodeRegionName(rn); // Path regionDir = new Path(tableDir, enc); // // List<Path> regionFiles = HFileReader.getStoreFiles(FileSystem.get(conf), regionDir); // files.addAll(regionFiles); // iterate over all files found // for (Path fileName : files) { // try { // processFile(fileName); // } catch (IOException ex) { // // } // } processFile(fileName); } private void processFile(Path file) throws IOException { FileSystem fs = file.getFileSystem(conf); HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf)); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); HFileScanner scanner = reader.getScanner(false, false, false); System.out.println("================"+scanner); // System.out.println("=========="+scanner.getReader()); // System.out.println("=========="+scanner.seekTo()); System.out.println("=========="+scanner.getKeyValue()); KeyValueStatsCollector fileStats = null; fileStats = new KeyValueStatsCollector(); // scanKeysValues(file, fileStats, scanner, row); // for (Result r : scanner) { // // } // } static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException { List<Path> res = new ArrayList<Path>(); PathFilter dirFilter = new FSUtils.DirFilter(fs); FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter); for (FileStatus dir : familyDirs) { FileStatus[] files = fs.listStatus(dir.getPath()); for (FileStatus file : files) { if (!file.isDir()) { res.add(file.getPath()); } } } return res; } /// private static class LongStats { private long min = Long.MAX_VALUE; private long max = Long.MIN_VALUE; private long sum = 0; private long count = 0; void collect(long d) { if (d < min) min = d; if (d > max) max = d; sum += d; count++; } public String toString() { return "count: " + count + "\tmin: " + min + "\tmax: " + max + "\tmean: " + ((double)sum/count); } } private static class KeyValueStatsCollector { LongStats keyLen = new LongStats(); LongStats valLen = new LongStats(); LongStats rowSizeBytes = new LongStats(); LongStats rowSizeCols = new LongStats(); long curRowBytes = 0; long curRowCols = 0; byte[] biggestRow = null; private KeyValue prevKV = null; private long maxRowBytes = 0; public void collect(KeyValue kv) { keyLen.collect(kv.getKeyLength()); valLen.collect(kv.getValueLength()); if (prevKV != null && KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) { // new row collectRow(); } curRowBytes += kv.getLength(); curRowCols++; prevKV = kv; } private void collectRow() { rowSizeBytes.collect(curRowBytes); rowSizeCols.collect(curRowCols); if (curRowBytes > maxRowBytes && prevKV != null) { biggestRow = prevKV.getRow(); } curRowBytes = 0; curRowCols = 0; } public void finish() { if (curRowCols > 0) { collectRow(); } } @Override public String toString() { if (prevKV == null) return "no data available for statistics"; return "Key length: " + keyLen + "\n" + "Val length: " + valLen + "\n" + "Row size (bytes): " + rowSizeBytes + "\n" + "Row size (columns): " + rowSizeCols + "\n" + "Key of biggest row: " + Bytes.toStringBinary(biggestRow); } } private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner, byte[] row) throws IOException { KeyValue pkv = null; do { KeyValue kv = scanner.getKeyValue(); System.out.println("kv========="+kv); if (row != null && row.length != 0) { int result = Bytes.compareTo(kv.getRow(), row); if (result > 0) { break; } else if (result < 0) { continue; } } // collect stats if (printStats) { fileStats.collect(kv); } // dump key value if (printKey) { System.out.print("K: " + kv); if (printValue) { System.out.print(" V: " + Bytes.toStringBinary(kv.getValue())); } System.out.println(); } // check if rows are in order if (checkRow && pkv != null) { if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) { System.err.println("WARNING, previous row is greater then" + " current row\n\tfilename -> " + file + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey())); } } // check if families are consistent if (checkFamily) { String fam = Bytes.toString(kv.getFamily()); if (!file.toString().contains(fam)) { System.err.println("WARNING, filename does not match kv family," + "\n\tfilename -> " + file + "\n\tkeyvalue -> " + Bytes.toStringBinary(kv.getKey())); } if (pkv != null && !Bytes.equals(pkv.getFamily(), kv.getFamily())) { System.err.println("WARNING, previous kv has different family" + " compared to current key\n\tfilename -> " + file + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey())); } } pkv = kv; ++count; } while (scanner.next()); } public static void main(String[] args) throws IOException { // org.apache.hadoop.hbase.io.hfile.HFile -f 文件名 -p Path paht = new Path("hdfs://namenode1:8020/test/hfile/family/a40c34f130ff473999237870c8b6c2a4"); // Path paht = new Path("/lixun/hfile/family/046ad43575fa4d06a21b414cf51305f3"); new HFileReader().readHfile(paht); args = new String[]{"-f","hdfs://namenode1:8020/test/hfile/family/a40c34f130ff473999237870c8b6c2a4","-p"}; HFilePrettyPrinter prettyPrinter = new HFilePrettyPrinter(); TestR r = new TestR(); System.exit(r.run(args)); // System.exit(prettyPrinter.run(args)); } }
试试其它关键字
同语言下
.
List 切割成几份 工具类
.
一行一行读取txt的内容
.
Java PDF转换成图片并输出给前台展示
.
java 多线程框架
.
double类型如果小数点后为零则显示整数否则保留两位小
.
将图片转换为Base64字符串公共类抽取
.
sqlParser 处理SQL(增删改查) 替换schema 用于多租户
.
JAVA 月份中的第几周处理 1-7属于第一周 依次类推 29-
.
java计算两个经纬度之间的距离
.
输入时间参数计算年龄
可能有用的
.
List 切割成几份 工具类
.
一行一行读取txt的内容
.
Java PDF转换成图片并输出给前台展示
.
java 多线程框架
.
double类型如果小数点后为零则显示整数否则保留两位小
.
将图片转换为Base64字符串公共类抽取
.
sqlParser 处理SQL(增删改查) 替换schema 用于多租户
.
JAVA 月份中的第几周处理 1-7属于第一周 依次类推 29-
.
java计算两个经纬度之间的距离
.
输入时间参数计算年龄
贡献的其它代码
Label
Copyright © 2004 - 2024 dezai.cn. All Rights Reserved
站长博客
粤ICP备13059550号-3