0 / 0
Sample reporting queries for Watson Knowledge Catalog
Sample reporting queries for Watson Knowledge Catalog

Sample reporting queries for Watson Knowledge Catalog

Check the following use cases for reporting and the example SQL queries to create the reports.

Number of governance artifacts grouped by the type of the artifact

SELECT
        ARTIFACT_TYPE,
        COUNT(*) TOTAL
FROM
        GOVERNANCE_ARTIFACTS
GROUP BY
        ARTIFACT_TYPE

Number of catalog assets grouped by type

SELECT
        ASSET_TYPE,
        COUNT(*) AS TOTAL
FROM
        CONTAINER_ASSETS
GROUP BY
        ASSET_TYPE

Number of catalog assets that are grouped by tag and type of asset

SELECT
        TAG_NAME,
        ASSET_TYPE,
        COUNT(*) TOTAL
FROM
        ASSET_TAGS,
        CONTAINER_ASSETS
WHERE
        ASSET_TAGS.ASSET_ID = CONTAINER_ASSETS.ASSET_ID
GROUP BY
        TAG_NAME,
        ASSET_TYPE

List of catalog assets that do not have a description

SELECT
        *
FROM
        CONTAINER_ASSETS
WHERE
        DESCRIPTION IS NULL
        OR DESCRIPTION = ''
        AND CONTAINER_TYPE = 'catalog'

List of governance artifacts that do not have a description

SELECT
        *
FROM
       GOVERNANCE_ARTIFACTS 
WHERE
       DESCRIPTION IS NULL

Business term distribution across assets in catalog

SELECT
        GA.NAME,
        AGGR_TABLE.TOTAL
FROM
        GOVERNANCE_ARTIFACTS AS GA
LEFT OUTER JOIN (
        SELECT
                ASSOCIATED_ARTIFACT_ID,
                COUNT(*) AS TOTAL
        FROM
                GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS
        WHERE
                ASSOCIATED_ARTIFACT_TYPE = 'glossary_term'
        GROUP BY
                ASSOCIATED_ARTIFACT_ID) AS AGGR_TABLE ON
        GA.ARTIFACT_ID = AGGR_TABLE.ASSOCIATED_ARTIFACT_ID
WHERE
        GA.ARTIFACT_TYPE = 'glossary_term'

List of catalog assets with no business terms assigned

SELECT
        HEADER,
        COUNT(*) AS TOTAL
FROM
        (
        SELECT
                CASE
                        WHEN GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS.ASSOCIATED_ARTIFACT_ID IS NULL THEN 'NO TERMS'
                        ELSE 'HAS ATLEAST ONE TERM'
                END AS HEADER
        FROM
                CONTAINERS,
                CONTAINER_ASSETS
        LEFT OUTER JOIN GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS ON
                CONTAINER_ASSETS.ASSET_ID = GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS.ASSET_ID
                AND GOVERNANCE_ARTIFACT_CONTAINER_ASSOCIATIONS.ASSOCIATED_ARTIFACT_TYPE = 'glossary_term'
        WHERE
                CONTAINERS.CONTAINER_ID = CONTAINER_ASSETS.CONTAINER_ID
                AND CONTAINERS.CONTAINER_TYPE = 'catalog')
GROUP BY
        HEADER

List of catalog assets with a particular user assigned as a collaborator

SELECT
        CONTAINER_ASSETS.CONTAINER_ID,
        CONTAINER_ASSETS.ASSET_ID,
        CONTAINER_ASSETS.NAME,
        CONTAINER_ASSETS.ASSET_TYPE
FROM
        ASSET_COLLABORATORS,
        CONTAINER_ASSETS
WHERE
        USER_ID = 'IBMid-50DW59BCGD'
        AND CONTAINER_ASSETS.CONTAINER_TYPE = 'catalog'
        AND CONTAINER_ASSETS.ASSET_ID = ASSET_COLLABORATORS.ASSET_ID
        AND CONTAINER_ASSETS.CONTAINER_ID = ASSET_COLLABORATORS.CONTAINER_ID

Percentage of governance artifacts with stewards assigned

SELECT
        COUNT(*) AS TOTAL_COUNT,
        ARTIFACT_WITH_STEWARD_COUNT
FROM
        GOVERNANCE_ARTIFACTS
LEFT OUTER JOIN (
        SELECT
                COUNT(*) AS ARTIFACT_WITH_STEWARD_COUNT
        FROM
                GOVERNANCE_ARTIFACTS
        LEFT OUTER JOIN GOVERNANCE_ARTIFACT_STEWARDS ON
                GOVERNANCE_ARTIFACTS.ARTIFACT_ID = GOVERNANCE_ARTIFACT_STEWARDS.ARTIFACT_ID
        WHERE
                GOVERNANCE_ARTIFACT_STEWARDS.USER_ID IS NOT NULL) ON
        1 = 1
GROUP BY
        ARTIFACT_WITH_STEWARD_COUNT

List of related terms of a particular business term

SELECT
        TEMPTABLE.NAME AS END1_NAME, RELATIONSHIP_TYPE, SGA.NAME AS END2_NAME
FROM
        (
        SELECT
                GA.NAME, BTA.END2_ARTIFACT_ID, BTA.RELATIONSHIP_TYPE
        FROM
                BUSINESS_TERM_ASSOCIATIONS AS BTA
        LEFT OUTER JOIN GOVERNANCE_ARTIFACTS AS GA ON
                GA.ARTIFACT_ID = BTA.END1_ARTIFACT_ID ORDER BY BTA.END1_ARTIFACT_ID) AS TEMPTABLE
        LEFT OUTER JOIN GOVERNANCE_ARTIFACTS AS SGA ON
                TEMPTABLE.END2_ARTIFACT_ID = SGA.ARTIFACT_ID

Hierarchy for a particular top-level category

WITH RPL (END1_CATEGORY_ID,
END2_CATEGORY_ID,
DEPTH) AS
     (
SELECT
        ROOT.END1_CATEGORY_ID,
        ROOT.END2_CATEGORY_ID,
        0

FROM
        CATEGORY_ASSOCIATIONS ROOT

WHERE
        ROOT.END1_CATEGORY_ID = 'b11e64a2-5466-4c60-bfc5-121c7a80703d'

UNION ALL

SELECT
        CHILD.END1_CATEGORY_ID,
        CHILD.END2_CATEGORY_ID,
        PARENT.DEPTH + 1

FROM
        RPL PARENT,
        CATEGORY_ASSOCIATIONS CHILD

WHERE
        PARENT.END2_CATEGORY_ID = CHILD.END1_CATEGORY_ID
        AND DEPTH < 1000)
SELECT
        TEMP.DEPTH,
        TEMP.NAME AS END1_NAME,
        CATEGORIES.NAME AS END2_NAME,
        TEMP.END1_CATEGORY_ID AS END1,
          TEMP.END2_CATEGORY_ID AS END2
FROM
        (
        SELECT
                END1_CATEGORY_ID,
                NAME,
                END2_CATEGORY_ID,
                DEPTH

        FROM
                RPL
        LEFT OUTER JOIN CATEGORIES ON
                RPL.END1_CATEGORY_ID = CATEGORIES.CATEGORY_ID) AS TEMP
LEFT OUTER JOIN CATEGORIES ON
        CATEGORIES.CATEGORY_ID = TEMP.END2_CATEGORY_ID
ORDER BY
        DEPTH;

Number of policies that are assigned to a particular governance rule

SELECT
        ENFORCEMENT_RULES.NAME,
        COUNT
FROM
        (
        SELECT
                ENFORCEMENT_RULES.RULE_ID AS RULE_ID,
                COUNT(ARTIFACT_ENFORCEMENT_RULE_ASSOCIATIONS.ARTIFACT_ID) AS COUNT
        FROM
                ENFORCEMENT_RULES
        LEFT OUTER JOIN ARTIFACT_ENFORCEMENT_RULE_ASSOCIATIONS ON
                ENFORCEMENT_RULES.RULE_ID = ARTIFACT_ENFORCEMENT_RULE_ASSOCIATIONS.RULE_ID
        GROUP BY
                ENFORCEMENT_RULES.RULE_ID) AS TEMPTABLE
LEFT OUTER JOIN ENFORCEMENT_RULES ON
        ENFORCEMENT_RULES.RULE_ID = TEMPTABLE.RULE_ID

Trend of data quality scores of a catalog asset over a period of time

For Db2:

SELECT
        CA.NAME AS ASSET_NAME,
        TEMP.TECH_START AS UPDATE_TIME,
        TEMP.TECH_START,
        TEMP.ASSET_ID,
        TEMP.TECH_END,
        QUALITY_SCORE
FROM
        CONTAINER_ASSETS AS CA,
        (
        SELECT
                CONTAINER_DATA_ASSETS.ASSET_ID,
                CONTAINER_DATA_ASSETS.QUALITY_SCORE,
                CONTAINER_DATA_ASSETS.TECH_START,
                CONTAINER_DATA_ASSETS.TECH_END
        FROM
                CONTAINER_DATA_ASSETS FOR SYSTEM_TIME
        FROM
                '2021-08-19-00.00.00.000000000000' TO '9999-12-30-00.00.00.000000000000'
        WHERE
                CONTAINER_DATA_ASSETS.ASSET_ID = '85a89a9c-8fd3-4306-8931-cd15c41ae1d4'
                AND QUALITY_SCORE IS NOT NULL) AS TEMP
WHERE
        TEMP.ASSET_ID = CA.ASSET_ID
        -- NOT NULL check is needed to ensure intermediate state during update that contains no quality score does not show up

For PostgreSQL:

select
        CA.NAME as ASSET_NAME,
        TEMP.TECH_START as UPDATE_TIME,
        TEMP.TECH_START,
        TEMP.ASSET_ID,
        TEMP.TECH_END,
        TEMP.QUALITY_SCORE
from
        CONTAINER_ASSETS as CA,
        (
                select
                        CONTAINER_DATA_ASSETS.ASSET_ID,
                        CONTAINER_DATA_ASSETS.QUALITY_SCORE,
                        CONTAINER_DATA_ASSETS.TECH_START,
                        CONTAINER_DATA_ASSETS.TECH_END
                from
                        CONTAINER_DATA_ASSETS
                where
                        CONTAINER_DATA_ASSETS.ASSET_ID = '130ab82a-c3a4-4fbc-880b-51bc49517ebe'
                        and CONTAINER_DATA_ASSETS.QUALITY_SCORE is not null
        union
                select
                        HIST_CONTAINER_DATA_ASSETS.ASSET_ID,
                        HIST_CONTAINER_DATA_ASSETS.QUALITY_SCORE,
                        HIST_CONTAINER_DATA_ASSETS.TECH_START,
                        HIST_CONTAINER_DATA_ASSETS.TECH_END
                from
                        HIST_CONTAINER_DATA_ASSETS
                where
                        HIST_CONTAINER_DATA_ASSETS.ASSET_ID = '130ab82a-c3a4-4fbc-880b-51bc49517ebe'
                        and HIST_CONTAINER_DATA_ASSETS.QUALITY_SCORE is not null
                ) as temp

where
        TEMP.ASSET_ID = CA.ASSET_ID

Parent topic: Setting up reporting for Watson Knowledge Catalog