wget https://apache.claz.org/druid/0.20.2/apache-druid-0.20.2-bin.tar.gz
默认的8081端口可能被spark占用了,需要kill掉
bin/start-nano-quickstart
即可启动。然后打开
http://10.227.26.77:8888/
默认的元数据库也是derby
要停止集群可以执行bin/service --down

在load data中选择example一路next得到下面的
{
"spec": {
"type": "index_parallel",
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "http",
"uris": [
"https://druid.apache.org/data/wikipedia.json.gz"
]
},
"inputFormat": {
"type": "json"
}
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
}
},
"dataSchema": {
"dataSource": "wikipedia",
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": "HOUR",
"rollup": true
},
"timestampSpec": {
"column": "timestamp",
"format": "iso"
},
"dimensionsSpec": {
"dimensions": [
"channel",
"cityName",
"comment",
"countryIsoCode",
"countryName",
"diffUrl",
"flags",
"isAnonymous",
"isMinor",
"isNew",
"isRobot",
"isUnpatrolled",
"metroCode",
"namespace",
"page",
"regionIsoCode",
"regionName",
"user"
]
},
"metricsSpec": [
{
"name": "count",
"type": "count"
},
{
"name": "sum_added",
"type": "longSum",
"fieldName": "added"
},
{
"name": "sum_commentLength",
"type": "longSum",
"fieldName": "commentLength"
},
{
"name": "sum_deleted",
"type": "longSum",
"fieldName": "deleted"
},
{
"name": "sum_delta",
"type": "longSum",
"fieldName": "delta"
},
{
"name": "sum_deltaBucket",
"type": "longSum",
"fieldName": "deltaBucket"
}
]
}
},
"type": "index_parallel"
}

准备一些ingest的数据
{"user":"user1","dim1":"3","dim2":"1","dim3":"2","cost":59}
{"user":"user2","dim1":"3","dim2":"1","dim3":"2","cost":159}
{"user":"user5","dim1":"2","dim2":"1","dim3":"2","cost":59}

druid的日志在var/sv目录下按照coordinator,broker等分开存放
ingest失败发现router.log 中有下列异常
Exception while polling for rules
org.apache.druid.java.util.common.IOE: No known server
at org.apache.druid.discovery.DruidLeaderClient.getCurrentKnownLeader(DruidLeaderClient.java:274) ~[druid-server-0.20.2.jar:0.20.2]
at org.apache.druid.discovery.DruidLeaderClient.makeRequest(DruidLeaderClient.java:129) ~[druid-server-0.20.2.jar:0.20.2]

根据网上搜到的解决方案删除var目录再重启尝试ingest数据router再次报了一个不同错误
org.apache.druid.java.util.common.ISE: No default server found
再次清除var目录
其实Load data的时候选new spec即可
如果我们的数据如下
{"timestamp":"2021-01-01","user":"user1","dim1":"3","dim2":"1","dim3":"2","cost":59}
{"timestamp":"2021-01-01","user":"user2","dim1":"3","dim2":"2","dim3":"1","cost":159}
{"timestamp":"2021-01-02","user":"user5","dim1":"2","dim2":"3","dim3":"3","cost":59}
{"timestamp":"2021-01-02","user":"user8","dim1":"3","dim2":"4","dim3":"2","cost":359}
{"timestamp":"2021-01-03","user":"user2","dim1":"3","dim2":"1","dim3":"1","cost":459}
{"timestamp":"2021-01-03","user":"user5","dim1":"2","dim2":"2","dim3":"3","cost":659}
在configure schema的时候我们可以指定哪些字段是指标哪些是维度。指标字段会进行聚合。在上面case中自动识别为user是维度其它都是指标。
手动将dim123改成维度。维度类型选择string
正确来说我们的数据应该带有一条时间戳字段timestamp(字段叫什么都可以,druid是所谓时序数据库)如果没有会生成1个默认时间2010-01-01(相当于所有数据都是同一个时间戳)

使用druidry访问druid

<dependency>
<groupid>in.zapr.druid</groupid>
<artifactid>druidry</artifactid>
<version>3.1</version>
</dependency>
<dependency>
<groupid>org.glassfish.jersey.containers</groupid>
<artifactid>jersey-container-servlet</artifactid>
<version>2.26</version>
</dependency>

<dependency>
<groupid>org.glassfish.jersey.ext</groupid>
<artifactid>jersey-bean-validation</artifactid>
<version>2.26</version>
</dependency>

    <!-- https://mvnrepository.com/artifact/org.glassfish.jersey.inject/project -->
    <dependency>
        <groupId>org.glassfish.jersey.inject</groupId>
        <artifactId>project</artifactId>
        <version>2.26</version>
        <type>pom</type>
    </dependency>

查询应该发往broker
我们看到broker的port是8082
可以直接向10.227.26.77:8082/druid/v2/?pretty发送POST

{
"queryType": "groupBy",
"dataSource": "inline_data",
"granularity": "day",
"dimensions": ["dim1"],

"aggregations": [{
    "type": "longSum",
    "name": "sum_cost",
    "fieldName": "sum_cost"
}],

"intervals": ["2010-01-01T00:00:00.000/2022-01-03T00:00:00.000"]

}
经测试interval也可以不带毫秒
"intervals": [ "2010-01-01T00:00:00/2022-01-03T00:00:00"]
响应为
[
{
"version": "v1",
"timestamp": "2010-01-01T00:00:00.000Z",
"event": {
"dim1": null,
"sum_cost": 1077
}
},
{
"version": "v1",
"timestamp": "2021-01-01T00:00:00.000Z",
"event": {
"dim1": "3",
"sum_cost": 218
}
},
{
"version": "v1",
"timestamp": "2021-01-02T00:00:00.000Z",
"event": {
"dim1": "2",
"sum_cost": 59
}
},
{
"version": "v1",
"timestamp": "2021-01-02T00:00:00.000Z",
"event": {
"dim1": "3",
"sum_cost": 359
}
},
{
"version": "v1",
"timestamp": "2021-01-03T00:00:00.000Z",
"event": {
"dim1": "2",
"sum_cost": 659
}
},
{
"version": "v1",
"timestamp": "2021-01-03T00:00:00.000Z",
"event": {
"dim1": "3",
"sum_cost": 459
}
}
]

import in.zapr.druid.druidry.aggregator.LongSumAggregator;
import in.zapr.druid.druidry.client.DruidClient;
import in.zapr.druid.druidry.client.DruidConfiguration;
import in.zapr.druid.druidry.client.DruidJerseyClient;
import in.zapr.druid.druidry.dataSource.TableDataSource;
import in.zapr.druid.druidry.dimension.SimpleDimension;
import in.zapr.druid.druidry.granularity.PredefinedGranularity;
import in.zapr.druid.druidry.granularity.SimpleGranularity;
import in.zapr.druid.druidry.query.DruidQuery;
import in.zapr.druid.druidry.query.aggregation.DruidGroupByQuery;
import in.zapr.druid.druidry.query.config.Context;
import in.zapr.druid.druidry.query.config.Interval;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.TimeZone;
import org.glassfish.jersey.client.ClientConfig;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;

public class DruidTest {

@org.junit.Test
public void test1() throws Exception {

DruidConfiguration configuration =
    DruidConfiguration.builder()
        .host("10.227.26.77")
        .port(8082)
        .endpoint("druid/v2/?pretty")
        .build();

DruidClient druidClient = new DruidJerseyClient(configuration, new ClientConfig());

DateTimeZone dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(ZoneId.of("UTC")));
DateTimeFormatter dtf =
    DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(ZoneId.of("UTC"));
DateTime dateTime1 =
    new DateTime(
        ZonedDateTime.parse("2010-01-01 00:00:00", dtf).toInstant().toEpochMilli(),
        dateTimeZone);
DateTime dateTime2 =
    new DateTime(
        ZonedDateTime.parse("2022-01-01 00:00:00", dtf).toInstant().toEpochMilli(),
        dateTimeZone);

Context context = Context.builder().timeoutInMilliSeconds(60000L).queryId("logId").build();

DruidQuery druidQuery =
    DruidGroupByQuery.builder()
        .aggregators(Collections.singletonList(new LongSumAggregator("sum_cost", "sum_cost")))
        .dimensions(Collections.singletonList(new SimpleDimension("dim1")))
        // .dimensions(Collections.emptyList())
        .intervals(Collections.singletonList(new Interval(dateTime1, dateTime2)))
        .granularity(new SimpleGranularity(PredefinedGranularity.ALL))
        .dataSource(new TableDataSource("inline_data"))
        .context(context)
        .build();
druidClient.connect();
String druidResponsesJson = druidClient.query(druidQuery);
System.out.println(druidResponsesJson);
druidClient.close();

}
}