Hadoop HDFS文件操作的Java代码
1、创建目录
1
2
3
4
5
6
7
8
9
10
11
12
13
14 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class MakeDir { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop/hdfs/xxxx" ); fs.create(path); fs.close(); } } |
2、删除目录
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class DeleteDir { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop/hdfs/xxxx" ); fs.delete(path); fs.close(); } } |
3、写文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class WriteFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" ); FSDataOutputStream out = fs.create(path); out.writeUTF( "da jia hao,cai shi zhen de hao!" ); fs.close(); } } |
4、读文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class ReadFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" ); if (fs.exists(path)){ FSDataInputStream is = fs.open(path); FileStatus status = fs.getFileStatus(path); byte [] buffer = new
byte [Integer.parseInt(String.valueOf(status.getLen()))]; is.readFully( 0 , buffer); is.close(); fs.close(); System.out.println(buffer.toString()); } } } |
5、上传本地文件到HDFS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class CopyFromLocalFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path src = new
Path( "/home/hadoop/xxxx.txt" ); Path dst = new
Path( "/user/hadoop/hdfs/" ); fs.copyFromLocalFile(src, dst); fs.close(); } } |
6、删除文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class DeleteFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" ); fs.delete(path); fs.close(); } } |
7、获取给定目录下的所有子目录以及子文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class GetAllChildFile { static
Configuration conf = new
Configuration(); public
static void main(String[] args) throws
IOException { FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop" ); getFile(path,fs); //fs.close(); } public
static void getFile(Path path,FileSystem fs) throws
IOException { FileStatus[] fileStatus = fs.listStatus(path); for ( int
i= 0 ;i<fileStatus.length;i++){ if (fileStatus[i].isDir()){ Path p = new
Path(fileStatus[i].getPath().toString()); getFile(p,fs); } else { System.out.println(fileStatus[i].getPath().toString()); } } } } |
8、查找某个文件在HDFS集群的位置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; public
class FindFile { public
static void main(String[] args) throws
IOException { getFileLocal(); } /** * 查找某个文件在HDFS集群的位置 * @Title: * @Description: * @param * @return * @throws */ public
static void getFileLocal() throws
IOException{ Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" ); FileStatus status = fs.getFileStatus(path); BlockLocation[] locations = fs.getFileBlockLocations(status, 0 , status.getLen()); int
length = locations.length; for ( int
i= 0 ;i<length;i++){ String[] hosts = locations[i].getHosts(); System.out.println( "block_"
+ i + "_location:"
+ hosts[i]); } } } |
9、HDFS集群上所有节点名称信息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 |
package
com.hadoop.file; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import
org.apache.hadoop.hdfs.protocol.DatanodeInfo; public
class FindFile { public
static void main(String[] args) throws
IOException { getHDFSNode(); } /** * HDFS集群上所有节点名称信息 * @Title: * @Description: * @param * @return * @throws */ public
static void getHDFSNode() throws
IOException{ Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); DistributedFileSystem dfs = (DistributedFileSystem)fs; DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats(); for ( int
i= 0 ;i<dataNodeStats.length;i++){ System.out.println( "DataNode_"
+ i + "_Node:"
+ dataNodeStats[i].getHostName()); } } } |
伪分布环境下操作FileSystem时候会出现异常:
Java代码如下:
1
2 |
FileSystem fs = FileSystem.get(conf); |
抛出异常如下:
Exception in thread
"main" java.lang.IllegalArgumentException: Wrong FS:
hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected:
file:///
at
org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at
org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245)
at
org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125)
at
org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283)
at
org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356)
at
com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23)
解决方案:
将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。
总结:
因为是访问远程的HDFS 需要通过URI来获得FileSystem。
Hadoop HDFS文件操作的Java代码,布布扣,bubuko.com
原文:http://www.cnblogs.com/wuzhenquan/p/3617751.html