免费视频教程 https://www.51doit.com/ 或者联系博主微信 17710299606

使用idea创建java项目

添加hdp需要的jar包

或者创建maven项目添加依赖

<dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.8.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.8.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-core</artifactId>
        <version>2.8.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
        <version>2.8.5</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-common</artifactId>
        <version>2.8.5</version>
    </dependency>
</dependencies>

0 在windows中配置HDP环境

注意: 从HDFS中下载内容到windows系统 , 跨系统操作,需要在本地安装hdp的环境

在windows系统环境变量中配置HADOOP_HOME

0.1 解压到指定的目录中

0.2 配置系统环境变量

1 入门程序 (创建文件夹) mkdirs

package com._51doit.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.net.URI;

public class ClientDemo1 {
    public static void main(String[] args) throws Exception {
        // 指定hdfs的位置   alt+enter 抛出异常
        URI uri = new URI("hdfs://linux01:9000");
        // 用户的配置设置对象
        Configuration conf = new Configuration();
        // 当前操作客户端的用户名
        String username = "root" ;
        
        FileSystem fs = FileSystem.newInstance(uri, conf, username);
        // 创建一个文件夹  new File("") ; new Path("") ;
        // Path对象是hdfs中对路径的抽象
        Path path = new Path("/doit16/hdp");

        // 创建层级文件夹
        boolean b = fs.mkdirs(path);
        if(b){
            System.out.println("创建成功");
        }else{
            System.out.println("创建失败");
        }
        // 释放资源
        fs.close();
    }
}

2 上传文件 copyFromLocalFile

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.net.URI;

public class Upload2Hdfs {
    public static void main(String[] args) throws Exception {
        // 1 创建文件系统的对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.newInstance(new URI("hdfs://linux01:9000"), conf, "root");
        Path src = new Path("d://hadoop-2.8.1.zip"); // 本地路径  待上传的内容
        Path dis = new Path("/doit16/hdp/");// HDFS系统的路径
        
      //  fs.copyFromLocalFile(src, dis); // 没有返回值
        
        fs.copyFromLocalFile(true , true ,src, dis);
        fs.close();
    }
}

3 下载

import com._51doit.utils.DoitUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class DownLoad {

    public static void main(String[] args) throws Exception {
        // 获取客户端对象
        FileSystem fs = DoitUtils.getFs();
        Path p1 = new Path("/doit16/hdp/hadoop-2.8.1.zip");
        Path p2 = new Path("d://");
        
       // fs.copyToLocalFile(p1, p2);
        
        fs.copyToLocalFile(false, p1 , p2 , true);
        fs.close();
    }
}

4 删除

import com._51doit.utils.DoitUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class DeleteDemo {
    public static void main(String[] args) throws Exception {
        FileSystem fs = DoitUtils.getFs();
        
        Path path = new Path("/doit16");
        if(fs.exists(path)){ // 判断内容是否存在
            boolean b = fs.delete(path, true);
            if(b){
                System.out.println("删除内容成功");
            }else{
                System.out.println("删除内容失败");
            }
        }else{
            System.out.println("删除的内容不存在");
        }
        fs.close();
    }
}

5 移动+重命名

5.1 重命名

        Path path1 = new Path("/a.txt");
        Path path2 = new Path("/aa.txt");
        */
        fs.rename(path1 , path2);

6 读取数据

import com._51doit.utils.DoitUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.InputStreamReader;


public class ReadData {
    public static void main(String[] args) throws Exception {
        FileSystem fs = DoitUtils.getFs();
        // 开启文件的输入流
        FSDataInputStream fin = fs.open(new Path("/app.txt"));
      //  fin.seek(1); 随机读取
        // 将输入流包装成缓冲字符流
        BufferedReader br = new BufferedReader(new InputStreamReader(fin));
        String line = null;
        long len = 0l ;
        // 读取所有的数据
        while ((line = br.readLine()) != null) {
            len += line.length()+1 ; //  记录读取数据的长度
            System.out.println(line);
        }
        // 释放资源
        fin.close();
        br.close();
        fs.close();
    }
}

7 写数据

HDFS分布式文件系统,存储的一般都是一些静态文件数据, 不太建议向文件中写数据, 不支持随机写数据 , 支持追加内容

一次存储多次读取

import com._51doit.utils.DoitUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;


public class WriteData {
    public static void main(String[] args) throws Exception {
        FileSystem fs = DoitUtils.getFs();
        
        FSDataOutputStream fout = fs.create(new Path("/app.txt"), true);
        fout.write("hello boys \n".getBytes());
        fout.write("hello girl \n".getBytes());
        fout.close();
        fs.close();
    }

    
    private static void appendData(FileSystem fs) throws IOException {
        // 追加内容   路径中的文件 一定是存在的
        FSDataOutputStream fout = fs.append(new Path("/app.txt"));
        // 追加写
        fout.write("\nhello tom jim cat \n".getBytes());
        fout.close();
    }
}

8 查看指定目录下的内容查看文件信息

import com._51doit.utils.DoitUtils;
import org.apache.hadoop.fs.*;
import java.io.IOException;


public class ListContents {
    public static void main(String[] args) throws Exception {
        FileSystem fs = DoitUtils.getFs();
        // 遍历文件夹下的内容 包括文件和文件夹
        FileStatus[] status = fs.listStatus(new Path("/"));
        for (FileStatus fileStatus : status) {
            Path path = fileStatus.getPath();
           //if(fs.isDirectory())  // 是否是文件夹
               if(fs.isFile(path)){// 是否是文件
                   // 获取元信息
               }else{
                   // 遍历一下
               }
        }
        fs.close();

    }

    private static void myListFile(FileSystem fs) throws IOException {
        // 遍历文件  递归遍历/ 路径下所有的文件
        RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/"), true);
        while(files.hasNext()){
            LocatedFileStatus file = files.next();
            Path path = file.getPath();  // 路径
            String name = path.getName(); // 文件名
            long len = file.getLen(); // 大小
            long size = file.getBlockSize(); //  物理切块大小
            short number = file.getReplication();// 副本的个数 默认3
            
            BlockLocation[] blockLocations = file.getBlockLocations();
            // 遍历每个文件的物理块
            for (BlockLocation blockLocation : blockLocations) {
                long length = blockLocation.getLength(); // 数据物理切块的实际大小
                String[] hosts = blockLocation.getHosts();// 每个物理切块的数据三个副本
                long offset = blockLocation.getOffset(); // 数据块的起始偏移量
                for (String host : hosts) {
                    System.out.println(name+"--"+length+":"+offset+":"+host);
                }
            }

           // System.out.println(path+":"+name+":"+len+":"+size+":"+number);
        }
    }
}

9 配置对象详解

9.1 所有的操作有默认参数的

副本3 物理切块128M

9.2 可以在代码中设置

Configuration conf = new Configuration();
conf.set("dfs.replication","5");
conf.set("dfs.blocksize","32M");

9.3 默认会读取本项目中的配置文件

默认会读取classpath下的core-site.xml / core-default.xml hdfs-site.xml hdfs-default.xml xxx-site.xml

9.4 配置参数生效的优先级

代码中 > 项目的配置文件中 > 默认配置参数

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.net.URI;
import java.net.URISyntaxException;


public class ConfDetail {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication","5");
        conf.set("dfs.blocksize","32M");
        URI uri = new URI("hdfs://linux01:9000");
        FileSystem fs = FileSystem.newInstance(uri, conf, "root");
        
       fs.copyFromLocalFile(new Path("d://a.json") , new Path("/z.json"));

        fs.close();


    }
}

• Docker安装与应用	• 打印乘法口诀就这?不会就你不会吧!
• VMware的网络模式	• 新手入门linux常用命令（二）
• 史上最完整的大数据学习（四） Zookeeper 完结	• 记录hive无法创建表的问题

• Esp8266天猫精灵_RGB灯_非点灯平台	• STM32F103 串口1和串口3对发数据配合蓝牙模块
• TMS570学习【1】了解什么是TMS570	• 新闻稿 \| Qt公司收购froglogic公司以巩固市场领
• [Java]SpringBoot2整合mqtt服务器EMQ实现消息订	• 苹果群控投屏同步操作原理及运用的平台APP分享

• Esp8266天猫精灵_RGB灯_非点灯平台	• STM32F103 串口1和串口3对发数据配合蓝牙模块
• TMS570学习【1】了解什么是TMS570	• 新闻稿 \| Qt公司收购froglogic公司以巩固市场领
• [Java]SpringBoot2整合mqtt服务器EMQ实现消息订	• 苹果群控投屏同步操作原理及运用的平台APP分享
• STM32查询式按键输入[直接用寄存器]	• Ubuntu系统 USB设备端口绑定
• 2021-04-14 第四次按键输入实验	• Flutter扫码功能完美实现

hadoop详细文档(六) hadoop之HDFS的java客户端(附带详细讲解视频)