Impor Daftar Badan Hukum Negara Terpadu dari Layanan Pajak Federal menggunakan Apache NiFi. Langkah 3 - Mengonversi JSON menggunakan JOLT

Dalam salah satu proyek, proses pengimporan data dari sistem pihak ketiga ke arsitektur layanan mikro menjadi perlu. Apache NiFi dipilih sebagai alat. Impor Daftar Badan Hukum Unified State of the Federal Tax Service dipilih sebagai subjek percobaan pertama.





The artikel sebelumnya dijelaskan cara untuk mengkonversi XML untuk JSON menggunakan skema AVRO.





Artikel ini menjelaskan cara mengonversi JSON menggunakan spesifikasi JOLT.





Prosesor dan pengontrol bekas

Membagi JSON menjadi Beberapa Bagian

FlowFile yang diperoleh pada tahap sebelumnya berisi JSON dengan larik pernyataan USRLE untuk organisasi yang berbeda. Pertama, mari kita pisahkan menjadi beberapa bagian sehingga setiap FlowFile berisi satu pernyataan.





Untuk ini kami menggunakan prosesor SplitJson . Dari pengaturan - Anda perlu menentukan ekspresi JsonPath untuk memisahkan json menjadi beberapa bagian. Dalam hal ini, $. *





Dokumentasi JsonPath di sini





Anda bisa berlatih di sini





Konversi JSON

JSON yang dihasilkan memiliki struktur kompleks yang tidak perlu untuk disimpan dan diproses nanti. Lebih baik menggabungkan alamat dan nama lengkap dalam satu baris, pindahkan beberapa elemen lebih tinggi dalam hierarki.





JSON sebelum transformasi
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "name" : {
    "fullName" : "   ",
    "shortName" : ""
  },
  "address" : {
    "addressRF" : {
      "region" : {
        "type" : "",
        "name" : ""
      },
      "district" : null,
      "town" : {
        "type" : "",
        "name" : ""
      },
      "settlement" : null,
      "street" : {
        "type" : "",
        "name" : ""
      },
      "index" : "143500",
      "regionCode" : "50",
      "kladr" : "500000570000011",
      "house" : null,
      "building" : null,
      "apartment" : null
    }
  },
  "termination" : null,
  "capital" : null,
  "manageOrg" : null,
  "director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012"
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ],
  "founders" : {
    "founderULRF" : null,
    "founderULForeign" : null,
    "founderFL" : [ {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789012"
      },
      "capitalPart" : {
        "nominal" : 20000.0,
        "size" : {
          "percent" : 50.0,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789021"
      },
      "capitalPart" : {
        "nominal" : 20000.0,
        "size" : {
          "percent" : 50.0,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ],
    "founderGov" : null,
    "founderPIF" : null
  },
  "capitalPart" : null,
  "holderReestrAO" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



JSON JoltTransformJSON.





:





  • Jolt Transformation DSL - . Chain -





  • Jolt Specification - .





JOLT

, - .





.





- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .





JOLT
[
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			},
			"founders": {
				"founderFL": {
					"*": {
						"fl": {
							"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
						}
					}
				},
				"founderGov": {
					"*": {
						"founderImplFL": {
							"fl": {
								"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
							}
						}
					}
				}
			}
		}
	},
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
		}
	},
	{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
]
      
      



modify-overwrite-beta , .. .





, : - modify-overwrite-beta - shift. - operation - spec.





, .





modify-overwrite-beta

. , . , .





.





(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"



.





"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			}
      
      



. , "region": "=concat(@(type), ' ', @(name))",



: region, type name. region, @(type)



.





(. modify-overwrite-beta) - value.





"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
      
      



, @(1,index)



. , index . .. value addressRF, addressRF index.





, =



concat



, @(1,index)



.





fias - .





. shift .





. "*"



. , .. director , .





"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			}
      
      



shift

shift JSON.





JSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "name" : {
    "fullName" : "   ",
    "shortName" : ""
  },
  "address" : {
    "addressRF" : {
      "region" : " ",
      "district" : " ",
      "town" : " ",
      "settlement" : " ",
      "street" : " ",
      "index" : "143500",
      "regionCode" : "50",
      "kladr" : "500000570000011",
      "house" : null,
      "building" : null,
      "apartment" : null,
      "value" : "143500,  ,  ,  ,  ,  , , , ",
      "fias" : null
    }
  },
  "termination" : null,
  "capital" : null,
  "manageOrg" : null,
  "director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012",
      "fio" : "  "
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ],
  "founders" : {
    "founderULRF" : null,
    "founderULForeign" : null,
    "founderFL" : [ {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789012",
        "fio" : "  "
      },
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789021",
        "fio" : "  "
      },
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ],
    "founderGov" : null,
    "founderPIF" : null
  },
  "capitalPart" : null,
  "holderReestrAO" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



, - , , , . , modify-overwrite-beta , . , shift - , .





shift
{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
      
      



shift . , , , . , . &



. , , &0



. . &1



, .. &



- , pre-&-post



. .. &



name, pre-name-post. . .





- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"



. , . |



.





fullName shortName "name": { "*": "&" }



.

"*"



, , name



.

"&"



, .





- .





"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			}
      
      



. . - "&2.&"



. , , . &2



address, &



- . &1



addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.









"director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012",
      "fio" : "  "
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ]
      
      



lastName, firstName patronymic.

inn fio .

ogrnip, typeCode typeName .

name position.

disqualification .





- , , - . , &



- [&]



.





"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			}
      
      



, fio inn. &3[&2].&



. . : &3



- director, [&2]



- , &



- fio inn.





name position. &3



- director, [&2]



- , &1



- position. &



, name , position.





Sisa elemen dalam posisi hanya dibungkus satu tingkat. diskualifikasi tetap tidak berubah.





Selanjutnya, konstruksi serupa digunakan.





Contoh

Dan akhirnya, saya akan menduplikasi JSON asli, spesifikasi JOLT dan JSON yang dihasilkan





JSON mentah
{
  "reportDate": "2020-05-20",
  "ogrn": "1234567890123",
  "ogrnDate": "2002-12-30",
  "inn": "1234567890",
  "kpp": "123456789",
  "opfCode": "12300",
  "opfName": "   ",
  "name": {
    "fullName": "   ",
    "shortName": ""
  },
  "address": {
    "addressRF": {
      "region": {
        "type": "",
        "name": ""
      },
      "district": null,
      "town": {
        "type": "",
        "name": ""
      },
      "settlement": null,
      "street": {
        "type": "",
        "name": ""
      },
      "index": "143500",
      "regionCode": "50",
      "kladr": "500000570000011",
      "house": null,
      "building": null,
      "apartment": null
    }
  },
  "termination": null,
  "capital": null,
  "manageOrg": null,
  "director": [
    {
      "fl": {
        "lastName": "",
        "firstName": "",
        "patronymic": "",
        "inn": "123456789012"
      },
      "position": {
        "ogrnip": null,
        "typeCode": "02",
        "typeName": "  ",
        "name": " "
      },
      "disqualification": null
    }
  ],
  "founders": {
    "founderULRF": null,
    "founderULForeign": null,
    "founderFL": [
      {
        "fl": {
          "lastName": "",
          "firstName": "",
          "patronymic": "",
          "inn": "123456789012"
        },
        "capitalPart": {
          "nominal": 20000,
          "size": {
            "percent": 50,
            "decimalPart": null,
            "simplePart": null
          }
        }
      },
      {
        "fl": {
          "lastName": "",
          "firstName": "",
          "patronymic": "",
          "inn": "123456789021"
        },
        "capitalPart": {
          "nominal": 20000,
          "size": {
            "percent": 50,
            "decimalPart": null,
            "simplePart": null
          }
        }
      }
    ],
    "founderGov": null,
    "founderPIF": null
  },
  "capitalPart": null,
  "holderReestrAO": null,
  "okved": {
    "mainOkved": {
      "code": "47.11",
      "name": "    ,  ,      "
    },
    "addOkved": null
  }
}
      
      



Spesifikasi JOLT
[
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			},
			"founders": {
				"founderFL": {
					"*": {
						"fl": {
							"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
						}
					}
				},
				"founderGov": {
					"*": {
						"founderImplFL": {
							"fl": {
								"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
							}
						}
					}
				}
			}
		}
	},
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
		}
	},
	{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
]

      
      



Menghasilkan JSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "fullName" : "   ",
  "shortName" : "",
  "address" : {
    "kladr" : "500000570000011",
    "regionCode" : "50",
    "value" : "143500,  ,  ,  ,  ,  , , , ",
    "fias" : null
  },
  "capital" : null,
  "director" : [ {
    "fio" : "  ",
    "inn" : "123456789012",
    "ogrnip" : null,
    "typeCode" : "02",
    "typeName" : "  ",
    "position" : " ",
    "disqualification" : null
  } ],
  "founders" : {
    "founderFL" : [ {
      "fio" : "  ",
      "inn" : "123456789012",
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fio" : "  ",
      "inn" : "123456789021",
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ]
  },
  "capitalPart" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



Lebih lanjut

Selanjutnya, JSON yang dihasilkan harus ditempatkan di suatu tempat untuk penyimpanan dan penggunaan lebih lanjut. Tapi itu melampaui narasinya. Ada sesuatu yang nyaman bagi seseorang.








All Articles