Sample reporting queries for Watson Knowledge Catalog
Check the following use cases for reporting and the example SQL queries to create the reports.
- Number of governance artifacts grouped by the type of the artifact
- Number of catalog assets grouped by type
- Number of catalog assets that are grouped by tag and type of asset
- List of catalog assets that do not have a description
- List of governance artifacts that do not have a description
- Business term distribution across assets in catalog
- List of catalog assets with no business terms assigned
- List of catalog assets with a particular user assigned as a collaborator
- Percentage of governance artifacts with stewards assigned
- List of related terms of a particular business term
- Hierarchy for a particular top-level category
- Number of policies that are assigned to a particular governance rule
- Trend of data quality scores of a catalog asset over a period of time
Number of governance artifacts grouped by the type of the artifact
SELECT
ARTIFACT_TYPE,
COUNT(*) TOTAL
FROM
GOVERNANCE_ARTIFACTS
GROUP BY
ARTIFACT_TYPE
Number of catalog assets grouped by type
SELECT
ASSET_TYPE,
COUNT(*) AS TOTAL
FROM
CONTAINER_ASSETS
GROUP BY
ASSET_TYPE
Number of catalog assets that are grouped by tag and type of asset
SELECT
TAG_NAME,
ASSET_TYPE,
COUNT(*) TOTAL
FROM
ASSET_TAGS,
CONTAINER_ASSETS
WHERE
ASSET_TAGS.ASSET_ID = CONTAINER_ASSETS.ASSET_ID
GROUP BY
TAG_NAME,
ASSET_TYPE
List of catalog assets that do not have a description
SELECT
*
FROM
CONTAINER_ASSETS
WHERE
DESCRIPTION IS NULL
OR DESCRIPTION = ''
AND CONTAINER_TYPE = 'catalog'
List of governance artifacts that do not have a description
SELECT
*
FROM
GOVERNANCE_ARTIFACTS
WHERE
DESCRIPTION IS NULL
Business term distribution across assets in catalog
SELECT
GA.NAME,
AGGR_TABLE.TOTAL
FROM
GOVERNANCE_ARTIFACTS AS GA
LEFT OUTER JOIN (
SELECT
ASSOCIATED_ARTIFACT_ID,
COUNT(*) AS TOTAL
FROM
GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS
WHERE
ASSOCIATED_ARTIFACT_TYPE = 'glossary_term'
GROUP BY
ASSOCIATED_ARTIFACT_ID) AS AGGR_TABLE ON
GA.ARTIFACT_ID = AGGR_TABLE.ASSOCIATED_ARTIFACT_ID
WHERE
GA.ARTIFACT_TYPE = 'glossary_term'
List of catalog assets with no business terms assigned
SELECT
HEADER,
COUNT(*) AS TOTAL
FROM
(
SELECT
CASE
WHEN GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS.ASSOCIATED_ARTIFACT_ID IS NULL THEN 'NO TERMS'
ELSE 'HAS ATLEAST ONE TERM'
END AS HEADER
FROM
CONTAINERS,
CONTAINER_ASSETS
LEFT OUTER JOIN GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS ON
CONTAINER_ASSETS.ASSET_ID = GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS.ASSET_ID
AND GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS.ASSOCIATED_ARTIFACT_TYPE = 'glossary_term'
WHERE
CONTAINERS.CONTAINER_ID = CONTAINER_ASSETS.CONTAINER_ID
AND CONTAINERS.CONTAINER_TYPE = 'catalog')
GROUP BY
HEADER
List of catalog assets with a particular user assigned as a collaborator
SELECT
CONTAINER_ASSETS.CONTAINER_ID,
CONTAINER_ASSETS.ASSET_ID,
CONTAINER_ASSETS.NAME,
CONTAINER_ASSETS.ASSET_TYPE
FROM
ASSET_COLLABORATORS,
CONTAINER_ASSETS
WHERE
USER_ID = 'IBMid-50DW59BCGD'
AND CONTAINER_ASSETS.CONTAINER_TYPE = 'catalog'
AND CONTAINER_ASSETS.ASSET_ID = ASSET_COLLABORATORS.ASSET_ID
AND CONTAINER_ASSETS.CONTAINER_ID = ASSET_COLLABORATORS.CONTAINER_ID
Percentage of governance artifacts with stewards assigned
SELECT
COUNT(*) AS TOTAL_COUNT,
ARTIFACT_WITH_STEWARD_COUNT
FROM
GOVERNANCE_ARTIFACTS
LEFT OUTER JOIN (
SELECT
COUNT(*) AS ARTIFACT_WITH_STEWARD_COUNT
FROM
GOVERNANCE_ARTIFACTS
LEFT OUTER JOIN GOVERNANCE_ARTIFACT_STEWARDS ON
GOVERNANCE_ARTIFACTS.ARTIFACT_ID = GOVERNANCE_ARTIFACT_STEWARDS.ARTIFACT_ID
WHERE
GOVERNANCE_ARTIFACT_STEWARDS.USER_ID IS NOT NULL) ON
1 = 1
GROUP BY
ARTIFACT_WITH_STEWARD_COUNT
List of related terms of a particular business term
SELECT
TEMPTABLE.NAME AS END1_NAME, RELATIONSHIP_TYPE, SGA.NAME AS END2_NAME
FROM
(
SELECT
GA.NAME, BTA.END2_ARTIFACT_ID, BTA.RELATIONSHIP_TYPE
FROM
BUSINESS_TERM_ASSOCIATIONS AS BTA
LEFT OUTER JOIN GOVERNANCE_ARTIFACTS AS GA ON
GA.ARTIFACT_ID = BTA.END1_ARTIFACT_ID ORDER BY BTA.END1_ARTIFACT_ID) AS TEMPTABLE
LEFT OUTER JOIN GOVERNANCE_ARTIFACTS AS SGA ON
TEMPTABLE.END2_ARTIFACT_ID = SGA.ARTIFACT_ID
Hierarchy for a particular top-level category
WITH RPL (END1_CATEGORY_ID,
END2_CATEGORY_ID,
DEPTH) AS
(
SELECT
ROOT.END1_CATEGORY_ID,
ROOT.END2_CATEGORY_ID,
0
FROM
CATEGORY_ASSOCIATIONS ROOT
WHERE
ROOT.END1_CATEGORY_ID = 'b11e64a2-5466-4c60-bfc5-121c7a80703d'
UNION ALL
SELECT
CHILD.END1_CATEGORY_ID,
CHILD.END2_CATEGORY_ID,
PARENT.DEPTH + 1
FROM
RPL PARENT,
CATEGORY_ASSOCIATIONS CHILD
WHERE
PARENT.END2_CATEGORY_ID = CHILD.END1_CATEGORY_ID
AND DEPTH < 1000)
SELECT
TEMP.DEPTH,
TEMP.NAME AS END1_NAME,
CATEGORIES.NAME AS END2_NAME,
TEMP.END1_CATEGORY_ID AS END1,
TEMP.END2_CATEGORY_ID AS END2
FROM
(
SELECT
END1_CATEGORY_ID,
NAME,
END2_CATEGORY_ID,
DEPTH
FROM
RPL
LEFT OUTER JOIN CATEGORIES ON
RPL.END1_CATEGORY_ID = CATEGORIES.CATEGORY_ID) AS TEMP
LEFT OUTER JOIN CATEGORIES ON
CATEGORIES.CATEGORY_ID = TEMP.END2_CATEGORY_ID
ORDER BY
DEPTH;
Number of policies that are assigned to a particular governance rule
SELECT
ENFORCEMENT_RULES.NAME,
COUNT
FROM
(
SELECT
ENFORCEMENT_RULES.RULE_ID AS RULE_ID,
COUNT(ARTIFACT_ENFORCEMENT_RULE_ASSOCIATIONS.ARTIFACT_ID) AS COUNT
FROM
ENFORCEMENT_RULES
LEFT OUTER JOIN ARTIFACT_ENFORCEMENT_RULE_ASSOCIATIONS ON
ENFORCEMENT_RULES.RULE_ID = ARTIFACT_ENFORCEMENT_RULE_ASSOCIATIONS.RULE_ID
GROUP BY
ENFORCEMENT_RULES.RULE_ID) AS TEMPTABLE
LEFT OUTER JOIN ENFORCEMENT_RULES ON
ENFORCEMENT_RULES.RULE_ID = TEMPTABLE.RULE_ID
Trend of data quality scores of a catalog asset over a period of time
For Db2:
SELECT
CA.NAME AS ASSET_NAME,
TEMP.TECH_START AS UPDATE_TIME,
TEMP.TECH_START,
TEMP.ASSET_ID,
TEMP.TECH_END,
QUALITY_SCORE
FROM
CONTAINER_ASSETS AS CA,
(
SELECT
CONTAINER_DATA_ASSETS.ASSET_ID,
CONTAINER_DATA_ASSETS.QUALITY_SCORE,
CONTAINER_DATA_ASSETS.TECH_START,
CONTAINER_DATA_ASSETS.TECH_END
FROM
CONTAINER_DATA_ASSETS FOR SYSTEM_TIME
FROM
'2021-08-19-00.00.00.000000000000' TO '9999-12-30-00.00.00.000000000000'
WHERE
CONTAINER_DATA_ASSETS.ASSET_ID = '85a89a9c-8fd3-4306-8931-cd15c41ae1d4'
AND QUALITY_SCORE IS NOT NULL) AS TEMP
WHERE
TEMP.ASSET_ID = CA.ASSET_ID
-- NOT NULL check is needed to ensure intermediate state during update that contains no quality score does not show up
For PostgreSQL:
select
CA.NAME as ASSET_NAME,
TEMP.TECH_START as UPDATE_TIME,
TEMP.TECH_START,
TEMP.ASSET_ID,
TEMP.TECH_END,
TEMP.QUALITY_SCORE
from
CONTAINER_ASSETS as CA,
(
select
CONTAINER_DATA_ASSETS.ASSET_ID,
CONTAINER_DATA_ASSETS.QUALITY_SCORE,
CONTAINER_DATA_ASSETS.TECH_START,
CONTAINER_DATA_ASSETS.TECH_END
from
CONTAINER_DATA_ASSETS
where
CONTAINER_DATA_ASSETS.ASSET_ID = '130ab82a-c3a4-4fbc-880b-51bc49517ebe'
and CONTAINER_DATA_ASSETS.QUALITY_SCORE is not null
union
select
HIST_CONTAINER_DATA_ASSETS.ASSET_ID,
HIST_CONTAINER_DATA_ASSETS.QUALITY_SCORE,
HIST_CONTAINER_DATA_ASSETS.TECH_START,
HIST_CONTAINER_DATA_ASSETS.TECH_END
from
HIST_CONTAINER_DATA_ASSETS
where
HIST_CONTAINER_DATA_ASSETS.ASSET_ID = '130ab82a-c3a4-4fbc-880b-51bc49517ebe'
and HIST_CONTAINER_DATA_ASSETS.QUALITY_SCORE is not null
) as temp
where
TEMP.ASSET_ID = CA.ASSET_ID
Parent topic: Setting up reporting for Watson Knowledge Catalog