【大数据开发】HDFS——客户端API文件操作

    科技2024-08-12  31

    文章目录

    准备工作一、API之文件系统对象二、API之文件上传三、 API之文件下载四、API之创建目录五、API之删除目录六、API之重命名七、IOUtil上传文件八、IOUtil下载文件九、API之文件状态

    准备工作

    依赖

    <?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.qfedu.bigdata</groupId> <artifactId>NZ2002Demo</artifactId> <version>1.0</version> <dependencies> <!-- jdk依赖 --> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>1.8.0</version> <scope>system</scope> <systemPath>${env.JAVA_HOME}/lib/tools.jar</systemPath> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.6</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.6</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.6</version> </dependency> <!-- https://mvnrepository.com/artifact/junit/junit --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.8</source> <!-- 默认是1.5,不能写成1.8.0 --> <target>1.8</target> </configuration> </plugin> </plugins> </build> </project>

    一、API之文件系统对象

    @Test public void testGetFileSystem() throws IOException { //创建配置对象,用于加载配置信息(四个默认的配置文件:core-default.xml,hdfs-default.xml,mapred-default.xml,yarn-default.xml) Configuration conf = new Configuration(); //修改fs.defaultFS属性的值 conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); //使用FileSystem类的静态方法get(Configuration conf);返回fs.defaultFS定义的文件系统 FileSystem fs = FileSystem.get(conf); System.out.println("文件系统对象的类型名:"+fs.getClass().getName()); }

    二、API之文件上传

    @Test public void testFileUpload() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.get(conf); //将本地的一个文件D:/file1,上传到HDFS上 /file1 //1. 使用Path描述两个文件 Path localPath = new Path("D:/file1"); Path hdfsPath = new Path("/file1"); //2.调用上传方法 fs.copyFromLocalFile(localPath,hdfsPath); //3.关闭 fs.close(); System.out.println("上传成功"); }

    三、 API之文件下载

    @Test public void testFileDownload() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.get(conf); //从HDFS上下载一个文件/file1,下载到本地 D:/file2 //1. 使用Path描述两个文件 Path hdfsfile = new Path("/file1"); Path local = new Path("D:/file2"); //2. 调用下载方法进行下载 fs.copyToLocalFile(hdfsfile,local); fs.close(); System.out.println("下载成功"); }

    四、API之创建目录

    @Test public void testMkdir() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.get(conf); //1. 测试创建目录,描述一个目录 Path hdfsfile = new Path("/dir1"); //2. 调用创建目录的方法 fs.mkdirs(hdfsfile); fs.close(); System.out.println("创建成功"); }

    五、API之删除目录

    @Test public void testDelete() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.get(conf); //1. 测试删除目录,描述一个目录 Path hdfsfile = new Path("/dir1"); //2. 调用创建目录的方法 fs.delete(hdfsfile,true); fs.close(); System.out.println("删除成功"); }

    六、API之重命名

    @Test public void testRename() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.get(conf); //1. 测试重命名,将file1改为file01 Path oldName = new Path("/file1"); Path newName = new Path("/file01"); //2.调用重命名方法 fs.rename(oldName,newName); fs.close(); System.out.println("命名成功"); }

    七、IOUtil上传文件

    @Test public void putFile() throws IOException, URISyntaxException { //1 连接HDFS 文件系统 Configuration conf=new Configuration(); //获得文件系统 FileSystem fs=FileSystem.get(new URI("hdfs://182.168.10.101:8020"),conf); // 创建输入流,读取输入文件 FileInputStream input=new FileInputStream(new File("c://a.txt")); // 创建输出流 FSDataOutputStream out=fs.create(new Path("/gg.txt")); //IO的流拷贝 IOUtils.copyBytes(input, out, conf); //关闭资源 IOUtils.closeStream(input); IOUtils.closeStream(out); System.out.println("上传完毕"); }

    八、IOUtil下载文件

    @Test public void getFile() throws IOException, URISyntaxException { // 1 连接HDFS 文件系统 Configuration conf = new Configuration(); // 获得文件系统 FileSystem fs = FileSystem.get(new URI("hdfs://192.168.10.101:8020"), conf); // 获取输入流 从HDFS上读取 FSDataInputStream input=fs.open(new Path("/gg.txt")); // 获取输出流 FileOutputStream out=new FileOutputStream(new File("c://gg.txt")); //流拷贝 IOUtils.copyBytes(input, out, conf); //关闭流 IOUtils.closeStream(input); IOUtils.closeStream(out); System.out.println("下载完成"); }

    九、API之文件状态

    @Test public void testFileStatus() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.get(conf); //1. 描述你要读取的文件 /file02 Path path = new Path("/file02"); //获取文件的状态信息 RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(path); while(it.hasNext()){ // 取出对象 LocatedFileStatus status = it.next(); System.out.println("name:"+status.getPath()); //获取位置 BlockLocation[] locate = status.getBlockLocations(); for(BlockLocation bl:locate){ System.out.println("当前块的所有副本位置:"+Arrays.toString(bl.getHosts())); System.out.println("当前块大小:"+bl.getLength()); System.out.println("当前块的副本的ip地址信息:"+Arrays.toString(bl.getNames())); } System.out.println("系统的块大小:"+status.getBlockSize()); System.out.println("文件总长度:"+status.getLen()); } }

    Processed: 0.010, SQL: 8