PostgreSQL: sum of values ​​connected via LATERAL join

I am trying to clear data in a PostgreSQL table, where in some records there are a large number of profanities in a column email_address(the corresponding records were entered by agitated users as a result of disappointment due to an error that has since been fixed):

    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Œ
    β”‚ email_address β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”œ
    β”‚ foo@go.bar.me.net β”‚
    β”‚ foo@foo.com        β”‚
    β”‚ foo@example.com    β”‚
    β”‚ baz@example.com    β”‚
    β”‚ barred@qux.com     β”‚
    └───────────────────└

The output of the desired request

I would like to create a query that annotates each row from the data table with profanity assessment and orders account entries so that a person can go through the annotated data (presented in the web application) and take the necessary actions:

    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Œ
    β”‚ email_address β”‚ score β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”œ
    β”‚ foo@foo.com        β”‚ 18 β”‚
    β”‚ foo@go.bar.me.net β”‚ 14 β”‚
    β”‚ foo@example.com    β”‚ 9 β”‚
    β”‚ baz@example.com    β”‚ 3 β”‚
    β”‚ barred@qux.com     β”‚ 0 β”‚
    └────────────────────────────└

Attempt # 1

, , , ( 2 ...) , , email_address. profanities :

    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”
    β”‚ profanity_regexp β”‚ score β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ foo              β”‚     9 β”‚
    β”‚ bar(?!red)       β”‚     5 β”‚
    β”‚ baz              β”‚     3 β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”˜

, LATERAL regexp_matches, email_address ( ):

SELECT
    data.email_address,
    array_agg(matches)
FROM
    data,
    profanities p,
    LATERAL regexp_matches(data.email_address, p.posix_regexp, 'gi') matches
GROUP BY
    data.email_address;

:

    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚   email_address   β”‚ profanities_found β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ foo@foo.com       β”‚ {{foo},{foo}}     β”‚
    β”‚ foo@example.com   β”‚ {{foo}}           β”‚
    β”‚ foo@go.bar.me.net β”‚ {{foo},{bar}}     β”‚
    β”‚ baz@example.com   β”‚ {{baz}}           β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

SUB-SELECT

, SQL:

SELECT
    data.email_address,
    array(
        SELECT score * ( 
            SELECT COUNT(*)
            FROM (SELECT
                regexp_matches(data.email_address, p.posix_regexp, 'gi')
            ) matches
        )
        FROM profanities p
    ) prof
from data;

( ) :

    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
    β”‚   email_address   β”‚   prof   β”‚
    β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
    β”‚ foo@go.bar.me.net β”‚ {9,5,0}  β”‚
    β”‚ foo@foo.com       β”‚ {18,0,0} β”‚
    β”‚ foo@example.com   β”‚ {9,0,0}  β”‚
    β”‚ baz@example.com   β”‚ {0,0,3}  β”‚
    β”‚ barred@qux.com    β”‚ {0,0,0}  β”‚
    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

, ?

, ?


http://sqlfiddle.com/#!17/6685c/4

+4
3

@daurnimator, , LATERAL . :

SELECT
    data.email_address,
    (
        SELECT
            coalesce(sum(s.score), 0) AS score
        FROM
            profanities,
            LATERAL (
                SELECT
                    profanities.score * array_length(
                        regexp_matches(
                            data.email_address,
                            profanities.profanity_regexp,
                            'gi'
                        ),
                        1
                    ) score
            ) s
    ) AS score
FROM
    data;

, , . , Django RawSQL , order_by('-score') .

0

. , .

SELECT email_address,
(
    SELECT SUM(s)
    FROM
        UNNEST(prof.profanity_score_subtotals) s
) AS sum_prof FROM (
    SELECT
        data.email_address,
        array(
            SELECT score * ( 
                SELECT COUNT(*)
                FROM (SELECT
                    regexp_matches(data.email_address, p.profanity_regexp, 'gi')
                ) matches
            )
            FROM profanities p
        ) profanity_score_subtotals
    FROM data
) prof;
+1

- postgres set-return where, :

SELECT
    data.email_address,
    t.score
FROM
    data,
    LATERAL (
        SELECT
            coalesce(sum(s.score), 0) AS score
        FROM
            profanities,
            LATERAL (
                SELECT
                    profanities.score * array_length(
                        regexp_matches(
                            data.email_address,
                            profanities.profanity_regexp,
                            'gi'
                        ),
                        1
                    ) score
            ) s
    ) t;
+1

Source: https://habr.com/ru/post/1694571/


All Articles