Dalam salah satu proyek, proses pengimporan data dari sistem pihak ketiga ke arsitektur layanan mikro menjadi perlu. Apache NiFi dipilih sebagai alat. Impor Daftar Badan Hukum Unified State of the Federal Tax Service dipilih sebagai subjek percobaan pertama.
The artikel sebelumnya dijelaskan cara untuk mengkonversi XML untuk JSON menggunakan skema AVRO.
Artikel ini menjelaskan cara mengonversi JSON menggunakan spesifikasi JOLT.
Prosesor dan pengontrol bekas
Membagi JSON menjadi Beberapa Bagian
FlowFile yang diperoleh pada tahap sebelumnya berisi JSON dengan larik pernyataan USRLE untuk organisasi yang berbeda. Pertama, mari kita pisahkan menjadi beberapa bagian sehingga setiap FlowFile berisi satu pernyataan.
Untuk ini kami menggunakan prosesor SplitJson . Dari pengaturan - Anda perlu menentukan ekspresi JsonPath untuk memisahkan json menjadi beberapa bagian. Dalam hal ini, $. *
Dokumentasi JsonPath di sini
Anda bisa berlatih di sini
Konversi JSON
JSON yang dihasilkan memiliki struktur kompleks yang tidak perlu untuk disimpan dan diproses nanti. Lebih baik menggabungkan alamat dan nama lengkap dalam satu baris, pindahkan beberapa elemen lebih tinggi dalam hierarki.
JSON sebelum transformasi
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : {
"type" : "",
"name" : ""
},
"district" : null,
"town" : {
"type" : "",
"name" : ""
},
"settlement" : null,
"street" : {
"type" : "",
"name" : ""
},
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
JSON JoltTransformJSON.
:
Jolt Transformation DSL - . Chain -
Jolt Specification - .
JOLT
- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .
JOLT
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
modify-overwrite-beta , .. .
, : - modify-overwrite-beta - shift. - operation - spec.
, .
modify-overwrite-beta
. , . , .
.
(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"
.
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
}
. , "region": "=concat(@(type), ' ', @(name))",
: region, type name. region, @(type)
.
(. modify-overwrite-beta) - value.
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
, @(1,index)
. , index . .. value addressRF, addressRF index.
, =
concat
, @(1,index)
.
fias - .
. shift .
. "*"
. , .. director , .
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
shift
shift JSON.
JSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : " ",
"district" : " ",
"town" : " ",
"settlement" : " ",
"street" : " ",
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null,
"value" : "143500, , , , , , , , ",
"fias" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
, - , , , . , modify-overwrite-beta , . , shift - , .
shift
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
shift . , , , . , . &
. , , &0
. . &1
, .. &
- , pre-&-post
. .. &
name, pre-name-post. . .
- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"
. , . |
.
fullName shortName "name": { "*": "&" }
.
"*"
, , name
.
"&"
, .
- .
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
}
. . - "&2.&"
. , , . &2
address, &
- . &1
addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ]
lastName, firstName patronymic.
inn fio .
ogrnip, typeCode typeName .
name position.
disqualification .
- , , - . , &
- [&]
.
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
}
, fio inn. &3[&2].&
. . : &3
- director, [&2]
- , &
- fio inn.
name position. &3
- director, [&2]
- , &1
- position. &
, name , position.
Sisa elemen dalam posisi hanya dibungkus satu tingkat. diskualifikasi tetap tidak berubah.
Selanjutnya, konstruksi serupa digunakan.
Contoh
Dan akhirnya, saya akan menduplikasi JSON asli, spesifikasi JOLT dan JSON yang dihasilkan
JSON mentah
{
"reportDate": "2020-05-20",
"ogrn": "1234567890123",
"ogrnDate": "2002-12-30",
"inn": "1234567890",
"kpp": "123456789",
"opfCode": "12300",
"opfName": " ",
"name": {
"fullName": " ",
"shortName": ""
},
"address": {
"addressRF": {
"region": {
"type": "",
"name": ""
},
"district": null,
"town": {
"type": "",
"name": ""
},
"settlement": null,
"street": {
"type": "",
"name": ""
},
"index": "143500",
"regionCode": "50",
"kladr": "500000570000011",
"house": null,
"building": null,
"apartment": null
}
},
"termination": null,
"capital": null,
"manageOrg": null,
"director": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"position": {
"ogrnip": null,
"typeCode": "02",
"typeName": " ",
"name": " "
},
"disqualification": null
}
],
"founders": {
"founderULRF": null,
"founderULForeign": null,
"founderFL": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
},
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789021"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
}
],
"founderGov": null,
"founderPIF": null
},
"capitalPart": null,
"holderReestrAO": null,
"okved": {
"mainOkved": {
"code": "47.11",
"name": " , , "
},
"addOkved": null
}
}
Spesifikasi JOLT
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
Menghasilkan JSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"fullName" : " ",
"shortName" : "",
"address" : {
"kladr" : "500000570000011",
"regionCode" : "50",
"value" : "143500, , , , , , , , ",
"fias" : null
},
"capital" : null,
"director" : [ {
"fio" : " ",
"inn" : "123456789012",
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"position" : " ",
"disqualification" : null
} ],
"founders" : {
"founderFL" : [ {
"fio" : " ",
"inn" : "123456789012",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fio" : " ",
"inn" : "123456789021",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ]
},
"capitalPart" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
Lebih lanjut
Selanjutnya, JSON yang dihasilkan harus ditempatkan di suatu tempat untuk penyimpanan dan penggunaan lebih lanjut. Tapi itu melampaui narasinya. Ada sesuatu yang nyaman bagi seseorang.