This is a typical situation when you start building a database from data that is still stored, for example, in a log file. There is a solution - as usual, but it is not very fast. Perhaps you can write a log message handler to process messages as they arrive; if the flow (messages / second) is not too large, you will not notice the overhead, especially if you can forget about writing the message in a flat text file.
-, . , 3- (3NF). , ( event_type) . ( , 2NF - , , ISO, M/F (/) .. - 3NF .)
, event_type char(20). int , 4 . 1000 event_type char(20), 20kB . , . , date timestamp, ( 4 8 ) , (, ).
-, , . .
- (, , , python):
CREATE FUNCTION ingest_log_message(mess text) RETURNS int AS $$
DECLARE
parts text[];
et_id int;
log_id int;
BEGIN
parts := regexp_split_to_array(mess, ',');
SELECT id INTO et_id
FROM event_type
WHERE type_text = quote_literal(parts[2]);
IF NOT FOUND THEN
INSERT INTO event_type (type_text)
VALUES (quote_literal(parts[2]))
RETURNING id INTO et_id;
END IF;
INSERT INTO log_message (dt, et, msg)
VALUES (parts[1]::timestamp, et_id, quote_literal(parts[3]))
RETURNING id INTO log_id;
RETURN log_id;
END; $$ LANGUAGE plpgsql STRICT;
:
CREATE TABLE event_type (
id serial PRIMARY KEY,
type_text char(20)
);
CREATE TABLE log_message (
id serial PRIMARY KEY,
dt timestamp,
et integer REFERENCES event_type
msg text
);
SELECT, id :
SELECT * FROM ingest_log_message(the_message);
quote_literal() . : (1) ( "" ); (2) SQL- .
, , .