ElasticSearch学习笔记-Python API使用

news/2024/7/7 5:56:16 标签: ElasticSearch, Python

ElasticSearchPython_API_0">ElasticSearch学习笔记-Python API使用

1. 参考资料

  1. Elasticsearch 开发指南
  2. Elasticsearch 官网
  3. CSDN文章
  4. ElasticSearch博客园
  5. Python操作ElasticSearch
  6. ElasticSearch Python API官网

2. 代码实例
通过Python简单操作ElasticSearch

# -.- coding:utf-8 -.-
import json
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk


class ElasticObj:
    def __init__(self, index_name, index_type, index_conf, ip="127.0.0.1"):
        """
        ElasticObj initialize
        :param index_name: 索引名称
        :param index_type: 索引类型
        """
        self.index_name = index_name
        self.index_type = index_type
        self.index_conf = index_conf
        # 无用户名密码状态
        self.es = Elasticsearch(hosts=[ip])
        # 用户名密码状态
        # self.es = Elasticsearch([ip], http_auth=('elastic', 'password'), port=9200)

    def create_index(self):
        """
        Create an index in ElasticSearch.
        :return:
        """
        if self.es.indices.exists(index=self.index_name) is not True:
            res = self.es.indices.create(index=self.index_name, body=self.index_conf)
            print res

    def bulk_index_data(self, in_data):
        """
        用bulk将批量数据存储到es
        :return:
        """
        actions = []
        for line in in_data:
            action = {
                "_index": self.index_name,
                "_type": self.index_type,
                "_id": line['id'],  # _id 也可以默认生成,不赋值
                "_source": line['data']
            }
            actions.append(action)
            # 批量处理
        success, _ = bulk(self.es, actions, index=self.index_name, raise_on_error=True)
        print('Performed %d actions' % success)

    def delete_index_data(self, in_id):
        """
        删除索引中的一条
        :param in_id:
        :return:
        """
        res = self.es.delete(index=self.index_name, doc_type=self.index_type, id=in_id)
        print res

    def get_data_id(self, in_id):
        """
        通过 id查询
        :param in_id:
        :return:
        """
        res = self.es.get(index=self.index_name, doc_type=self.index_type, id=in_id)
        print(res['_source'])

        print '------------------------------------------------------------------'
        #
        # # 输出查询到的结果
        for hit in res['hits']['hits']:
            # print hit['_source']
            print hit['_source']['date'], hit['_source']['source'], hit['_source']['link'], hit['_source']['keyword'], \
                hit['_source']['title']

    def get_data_by_body(self):
        """
        通过 body查询
        :return:
        """
        # doc = {'query': {'match_all': {}}}
        doc = {
            "query": {
                "match": {
                    "keyword": "电视"
                }
            }
        }
        _searched = self.es.search(index=self.index_name, doc_type=self.index_type, body=doc)

        for hit in _searched['hits']['hits']:
            # print hit['_source']
            print hit['_source']['date'], hit['_source']['source'], hit['_source']['link'], hit['_source']['keyword'], \
                hit['_source']['title']

    def get_data_by_all(self):
        """
        查询该索引中所有数据
        :return:
        """
        res = self.es.search(index=self.index_name, body={"query": {"match_all": {}}})
        log_info = json.dumps(res, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '))
        print log_info


COUNTS = "counts"
T50 = "T50ms"
T70 = "T70ms"
T99 = "T99ms"
T100 = "T100ms"

def adproxy_es_operate():
    es_host = "192.168.37.133"
    es_index = "adproxy_log"
    es_index_type = "adproxy_log_type"
    es_index_conf = {
        'settings': {
            # just one shard, no replicas for testing
            'number_of_shards': 1,
            'number_of_replicas': 0,
        },
        "mappings": {
            es_index_type: {
                "properties": {
                    "desc": {
                        "type": "text"
                    },
                    "data_time": {
                        "type": "date",
                        "format": "yyyy-MM-dd HH:mm:ss,SSS"
                    },
                    COUNTS: {
                        "type": "integer"
                    },
                    T50: {
                        "type": "integer"
                    },
                    T70: {
                        "type": "integer"
                    },
                    T99: {
                        "type": "integer"
                    },
                    T100: {
                        "type": "integer"
                    }
                }
            }

        }
    }
    es_obj = ElasticObj(es_index, es_index_type, es_index_conf, ip=es_host)
    es_obj.create_index()
    data_info = [
        {
            "id": 1234567,
            "data": {
                "desc": "TBAD5",
                "data_time": "2019-05-08 12:30:00,000",
                COUNTS: 24756,
                T50: 14175,
                T70: 5305,
                T99: 2523,
                T100: 2753
            }
        },
        {
            "id": 12345678,
            "data": {
                "desc": "TBAD5",
                "data_time": "2019-05-07 12:35:00,000",
                COUNTS: 24533,
                T50: 14339,
                T70: 5379,
                T99: 2513,
                T100: 14339
            }
        },
        {
            "id": 123456789,
            "data": {
                "desc": "TBAD5",
                "data_time": "2019-05-08 12:35:00,000",
                COUNTS: 24533,
                T50: 14339,
                T70: 5379,
                T99: 2513,
                T100: 14339
            }
        },
        {
            "id": 1234567890,
            "data": {
                "desc": "TBAD5",
                "data_time": "2019-05-09 12:35:00,000",
                COUNTS: 24533,
                T50: 14339,
                T70: 5379,
                T99: 2513,
                T100: 14339
            }
        },
    ]
    es_obj.bulk_index_data(data_info)
    es_obj.get_data_by_all()


if __name__ == '__main__':
    adproxy_es_operate()


http://www.niftyadmin.cn/n/765276.html

相关文章

Python2.7学习笔记-中文编码通用转码函数

Python2.7 中文编码通用转码函数Python2.7学习笔记-中文编码通用转码函数Python2.7学习笔记-中文编码通用转码函数 # encoding handler begin def getCoding(strInput):"""获取编码格式"""if isinstance(strInput, unicode):return "unico…

ONE STM32单片机使用寄存器点亮LED灯

ONE 使用寄存器点亮LED灯(一)寄存器1. 寄存器是什么?2. 寄存器的作用是什么?(二)利用寄存器点亮LED灯1. 利用寄存器点亮LED灯的步骤是什么?1.1 定义寄存器:1.2 代码书写:…

STM32固件库文件分析

STM32固件库文件分析(一)汇编编写的启动文件(二)时钟配置文件(三)外设相关的(四)内核相关的(五)头文件的配置文件(六) 专门存放中断服务函数的c文…

stm32进行固件库点亮led灯

固件库点亮LED灯(一)main()(二)bsp_led.c()(三)bsp_led.h32固件库编程点亮led灯:在已经建好工程的基础上,进行建立文件,包括bsp文件,是…

按键检测 LED灯

【TOC】(按键检测) one bsp_ked.h #ifndef _BSP_KEY_H #define _BSP_KEY_H#include"stm32f10x.h"//更换IO口,只需要更改宏定义即可 #define KEY1_G_GPIO_PIN GPIO_Pin_0 //PA0 #define KEY1_G_GPIO_PORT GPIOA …

PCB基础知识

PCBPCB规则信号线的粗细回路规则串扰控制禁止走线区布局立创快捷键PCB规则 信号线的粗细 一般设计为12mil(0.2mm左右),电源线最小25mil,最好30mil,因为电流越大,需要的线宽越宽,电流线比较大&…

GIT分支操作_查看分支_创建分支_切换分支_合并分支_获取远程分支更改

GIT分支操作 一、GIT分支概念二、git分支操作1、查看分支2、新建分支3、切换分支(1)查看一个分支内的提交对HEAD的影响(2)查看切换分支对HEAD的影响4、合并分支(1) 子模块(2)提交当前分支到主分支5、获取远程分支更改一、GIT分支概念 对于GIT的分支概念,我们这样参照下图来理…

GIT使用_GIT版本切换

GIT版本切换 一、版本回退的概念1、版本操作2、版本切换的实质3、版本切换后再次提交二、如何进行版本回退1、查看版本号1.1 `git log`1.2`git log --pretty=oneline`1.3 `git log --oneline`1.4 `git log -n 3`1.5 `git log --graph`1.6 `git log --oneline --graph`1.7 `git …