-- Title : [PGS 9.x] PostgreSQL Tips & Tricks
-- Reference : postgres.cz
-- Key word :
********************************************************************************/
How to unnest an array with element indexes
Sometimes we need to unnest a array with indexes. There are more ways how to do it:
Author: Pavel Stěhule (PostgreSQL 9.1)
CREATE OR REPLACE FUNCTION unnest_rownum(anyarray) RETURNS TABLE (id int, element anyelement) AS $$ BEGIN id := 1; FOREACH element IN array $1 LOOP RETURN NEXT; id := id + 1; END LOOP; RETURN; END $$ LANGUAGE plpgsql; postgres=# select * from unnest_rownum(ARRAY['A','B','C']); id | element ----+--------- 1 | A 2 | B 3 | C (3 rows)
Author: Tom Lane (PostgreSQL 9.3)
SELECT i, arraycol[i] FROM tab, LATERAL generate_subscripts(arraycol, 1) as i;
9.4 supports clause WITH ORDINALITY:
postgres=# SELECT * FROM unnest(ARRAY['A','D','C']) WITH ORDINALITY; unnest | ordinality --------+------------ A | 1 D | 2 C | 3 (3 rows)
Allow only one NULL in column
Author: Pavel Stěhule
UNIQUE constraint ensures unique values in column(s). But NULL is not a comparable value and unique constraint ignores these values. Sometime we would to allow only one NULL in column. There is a simply solution based on partial index and functional (here only constant) index.
postgres=# CREATE TABLE omega(a int UNIQUE); CREATE TABLE postgres=# INSERT INTO omega VALUES(NULL); INSERT 0 1 postgres=# INSERT INTO omega VALUES(NULL); INSERT 0 1 postgres=# TRUNCATE omega; TRUNCATE TABLE postgres=# CREATE UNIQUE INDEX ON omega ((1)) WHERE a IS NULL; CREATE INDEX postgres=# INSERT INTO omega VALUES(NULL); INSERT 0 1 postgres=# INSERT INTO omega VALUES(NULL); ERROR: duplicate key value violates unique constraint "omega_expr_idx" DETAIL: Key ((1))=(1) already exists. postgres=# UPDATE omega SET a = 10; UPDATE 1 postgres=# INSERT INTO omega VALUES(NULL); INSERT 0 1 Time: 7.601 ms
Bytea to BLOB conversion
Author: unknown
Store bytea string to LO object.
CREATE OR REPLACE FUNCTION make_lo(bytea) RETURNS oid AS $$ DECLARE loid oid; fd integer; bytes integer; BEGIN loid := lo_creat(-1); fd := lo_open(loid, 131072); bytes := lowrite(fd, $1); IF (bytes != LENGTH($1)) THEN RAISE EXCEPTION 'Not all data copied to blob'; END IF; PERFORM lo_close(fd); RETURN loid; END; $$ LANGUAGE plpgsql STRICT;
Only one column can hold NULL
Author: Daniel Vérité
How we can calculate how much columns hold a NULL value? Nice example of variadic function usage.
CREATE OR REPLACE FUNCTION public.null_count(VARIADIC anyarray) RETURNS integer LANGUAGE sql AS $function$ SELECT sum(CASE WHEN v IS NULL THEN 1 ELSE 0 END)::int FROM unnest($1) g(v) $function$
Usage:
CREATE TABLE xxx( a int, b int, c int, CHECK (null_count(a,b,c) <= 1))
Overwriting a application name in psql (for psql based script identification)
Author: Erik Rijkers
Application_name is one of connection params, that we can attach to log event and later we can to identify a SQL statement producent. There are more possibilities how to do it (one is a example of connection string in url format).
[pavel@localhost ~]$ PGAPPNAME=splunk psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" postgres application_name ------------------ splunk (1 row) [pavel@localhost ~]$ psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" "dbname=postgres application_name=splunk" application_name ------------------ splunk (1 row) [pavel@localhost ~]$ psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" postgresql://localhost/postgres?application_name=splunk application_name ------------------ splunk (1 row)
Getting N unique most similar values with usage KNNsearch and pg_tgrm module
Author: Tom Lane
SELECT DISTINCT ON (village_name <-> 'Benešov') village_name, village_name<->'Benešov' FROM villages ORDER BY village_name <-> 'Benešov' LIMIT 10; village_name | ?column? -----------------------+---------- Benešov | 0 Benešovice | 0.416667 Dolní Benešov | 0.428571 Benešov u Semil | 0.5 Benešov nad Černou | 0.578947 Benešov nad Ploučnicí | 0.636364 Benecko | 0.666667 Benetice | 0.692308 Bečov | 0.727273 Bezkov | 0.75 (10 rows) postgres=# EXPLAIN SELECT DISTINCT ON (village_name <-> 'Benešov') village_name, village_name<->'Benešov' FROM obce ORDER BY village_name <-> 'Benešov' LIMIT 10; QUERY PLAN ------------------------------------------------------------------------------------------------- Limit (cost=0.00..2.76 rows=10 width=10) -> Unique (cost=0.00..1474.75 rows=5341 width=10) -> Index Scan using villages_village_name_idx on obce (cost=0.00..1433.14 rows=16644 width=10) Order By: (village_name <-> 'Benešov'::text) (4 rows)
Using custom casting when you migrate from Oracle to PostgreSQL
Author: Pavel Stěhule
Oracle doesn't support boolean data type, and developers use a varchar(1) or number(1) instead. It is a issue in PostgreSQL because this width is too less for storing a strings "true" or "false". But we can redefine a default casting (there is used a ugly hack - direct update of system tables under superuser rights). This solution is fast hack - changing to PostgreSQL's boolean is much better.
CREATE OR REPLACE FUNCTION public.boolcast_varchar(boolean) RETURNS character varying LANGUAGE sql AS $$ SELECT CASE WHEN $1 THEN 't' ELSE 'f' END $$ UPDATE pg_cast SET castfunc='boolcast_varchar'::regproc, castcontext='i' WHERE castsource=16 and casttarget=1043; CREATE TABLE test(a varchar(1)); INSERT INTO test VALUES(true);
JSON creating
Author: Merlin
select row_to_json(t1) from ( select 'joe' as username, (select project from (values(1, 'prj1')) as project(project_id, project_name)) as project ) t1;
A array's fields trimming
Author: a_horse_with_no_name
postgres=# select * from g; a -------------------------------- {" ssss ssss","sss ssss "} {" ssss ssssaaaa "} (2 rows) postgres=# select row_number() over(), unnest(a) e from g; row_number | e ------------+----------------- 1 | ssss ssss 1 | sss ssss 2 | ssss ssssaaaa (3 rows) postgres=# select array_agg(trim(x.e)) from (select row_number() over() rn, unnest(a) e from g) x group by rn; array_agg --------------------------- {"ssss ssss","sss ssss"} {"ssss ssssaaaa"} (2 rows)
Attention: This trick depends on imlementation - and should not work in future releases. If you use a 9.3 and newer PostgreSQL, use a LATERAL join instead.
Simply query result processing in bash
Author: Pavel Stěhule
psql has not complete macro language, but we can use bash and we can use features, that provides bash:
pavel ~ $ psql postgres -A -t --field-separator=" " \ > -c "copy (select 1,'Ahoj Svete', i > from generate_series(1,3) g(i)) > to stdout delimiter ' '" | \ > while read var1 var2 var3; > do > echo "a=$var1,b=$var2,c=$var3 "; > done a=1,b=Ahoj Svete,c=1 a=1,b=Ahoj Svete,c=2 a=1,b=Ahoj Svete,c=3
Similar solution designed by Hubert Depesz Lubaczewski:
psql -qAtX -c "copy (select * from t) to stdout" | while IFS=$'\t' read -r a b c; do echo -e "a=[$a] b=[$b] c=[$c]"; done
Glue comment to query
Author: Thomas Kellerer
When you need append notice to query, you can use nested comment:
SELECT /* my comments, that I would to see in PostgreSQL log */ a, b, c FROM mytab;
Logout all connections
Author: Pavel Stěhule
Execute query as user postgres:
postgres=# select * from pg_stat_activity ; datid | datname | pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | -------+----------+-------+----------+----------+------------------+-------------+-----------------+-------------+-------------------------------+ 12894 | postgres | 21091 | 10 | postgres | psql | | | -1 | 2012-03-06 09:16:02.466983+01 | 12894 | postgres | 21103 | 16384 | pavel | psql | | | -1 | 2012-03-06 09:17:02.827352+01 | 12894 | postgres | 21104 | 16384 | pavel | psql | | | -1 | 2012-03-06 09:17:12.176979+01 | (3 rows) postgres=# select pg_terminate_backend(pid) from pg_stat_activity where pid <> pg_backend_pid() ; pg_terminate_backend ---------------------- t t (2 rows) postgres=# select * from pg_stat_activity ; datid | datname | pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | -------+----------+-------+----------+----------+------------------+-------------+-----------------+-------------+-------------------------------+ 12894 | postgres | 21091 | 10 | postgres | psql | | | -1 | 2012-03-06 09:16:02.466983+01 | (1 row)
Taking first unlocked row from table
Sometimes we need first unlocked row (queue implementation). We can use following trick:
postgres=# select * from queue; id | processed | cmd ----+-----------+------- 1 | f | task1 2 | f | task2 3 | f | task2 4 | f | task3 (4 rows)
Usage:
-- consument1 postgres=# begin; BEGIN postgres=# select * from queue where not processed and pg_try_advisory_xact_lock(id) for update limit 1; id | processed | cmd ----+-----------+------- 1 | f | task1 (1 row) postgres=# update queue set processed = true where id = 1; UPDATE 1 postgres=# commit; COMMIT -- consument2 postgres=# begin; BEGIN postgres=# select * from queue where not processed and pg_try_advisory_xact_lock(id) for update limit 1; id | processed | cmd ----+-----------+------- 2 | f | task2 (1 row) postgres=# update queue set processed = true where id = 2; UPDATE 1 postgres=# commit; COMMIT
With advisory locks we can implement unblocked queue parallel processing.
Searching gaps in time series with window analytic functions
Source: Magnus Hagander http://blog.hagander.net/archives/203-Finding-gaps-in-partitioned-sequences.html
Function lag can be used for any sorted series:
SELECT * FROM ( SELECT gropid, year, month, seq, seq-lag(seq,1) OVER (PARTITION BY groupid, year, month ORDER BY seq) AS gap FROM mytable ) AS t WHERE NOT (t.gap=1) ORDER BY groupid, year, month, seq
Delete duplicate rows with window analytic functions
We can use a window function for very effective removing of duplicate rows:
DELETE FROM tab WHERE id IN (SELECT id FROM (SELECT row_number() OVER (PARTITION BY column_with_duplicate_values), id FROM tab) x WHERE x.row_number > 1);
Some PostgreSQL's optimized version (with ctid):
DELETE FROM tab WHERE ctid = ANY(ARRAY(SELECT ctid FROM (SELECT row_number() OVER (PARTITION BY column_with_duplicate_values), ctid FROM tab) x WHERE x.row_number > 1));
LIKE to list of patterns
PostgreSQL cannot to use (I)LIKE together list of patterns (in conformance with ANSI SQL):
-- this is not valid WHERE somecolumn LIKE ('%some%','%someelse%')
But we can use arrays or regular expressions:
-- this is valid WHERE somecolumn ~~* any(array['%some%', '%someelse']));
Author: Tom Lane
Enforcing unique name for cursor in PL/pgSQL
PL/pgSQL cursors uses PostgreSQL SQL cursors. When we use PL/pgSQL cursor, then PL/pgSQL runtime creates SQL cursor with same name. This behave is unwanted sometimes. But when we know, so PL/pgSQL cursor is +/- text variable with SQL name, we can assign NULL to this variable. Then PL/pgSQL runtime cannot to use a predefined name and it will generate a unique name:
DECLARE mycursor CURSOR FOR SELECT * FROM int4_tbl WHERE f1 > id; newid INTEGER; out TEXT; BEGIN out := id::text || ' '; mycursor := null; OPEN mycursor; ..
Conversion BLOB to Bytea
Source: http://snipplr.com/view/62576/convert-oid-to-bytea/
CREATE OR REPLACE FUNCTION merge_oid(val oid) RETURNS bytea AS $$ DECLARE merged bytea; arr bytea; BEGIN FOR arr IN SELECT DATA FROM pg_largeobject WHERE loid = val ORDER BY pageno LOOP IF merged IS NULL THEN merged := arr; ELSE merged := merged || arr; END IF; END LOOP; RETURN merged; END $$ LANGUAGE plpgsql;
Function for decoding of url code
Author: Marti Raudsepp
CREATE OR REPLACE FUNCTION url_decode(input text) RETURNS text LANGUAGE plpgsql IMMUTABLE STRICT AS $$ DECLARE bin bytea = ''; byte text; BEGIN FOR byte IN (select (regexp_matches(input, '(%..|.)', 'g'))[1]) LOOP IF length(byte) = 3 THEN bin = bin || decode(substring(byte, 2, 2), 'hex'); ELSE bin = bin || byte::bytea; END IF; END LOOP; RETURN convert_from(bin, 'utf8'); END $$;
Usage:
ohs=# select url_decode('Hell%C3%B6%20World%21'); url_decode ────────────── Hellö World! (1 row)
Another version (by Marc Mamin)
CREATE OR REPLACE FUNCTION urldecode_arr(url text) RETURNS text AS $$ BEGIN RETURN (WITH str AS (SELECT CASE WHEN $1 ~ '^%[0-9a-fA-F][0-9a-fA-F]' THEN array[''] END || regexp_split_to_array ($1, '(%[0-9a-fA-F][0-9a-fA-F])+', 'i') plain, ARRAY(SELECT (regexp_matches ($1, '((?:%[0-9a-fA-F][0-9a-fA-F])+)', 'gi'))[1]) encoded) SELECT coalesce(string_agg(plain[i] || coalesce( convert_from(decode(replace(encoded[i], '%',''), 'hex'), 'utf8'), ''), ''), $1) FROM str, (SELECT generate_series(1, array_upper(encoded,1) + 2) i FROM str) blah); END $$ LANGUAGE plpgsql IMMUTABLE STRICT;
Emacs's configuration for PLpgSQL syntax highlighting when function is edited from console
Author: Peter Eisentraut
modify file .profile
PSQL_EDITOR=emacs; export PSQL_EDITOR.
modify file .emacs
(add-to-list 'auto-mode-alist '("/psql.edit.[0-9]+\\'" . sql-mode))
Cast bytea to text
When we need cast content of bytea variable to adequate text value directly (content of bytea is char codes), then following function is solution:
CREATE OR REPLACE FUNCTION bytea_to_text(bytea) RETURNS text AS $$ SELECT convert_from($1, current_setting('server_encoding')) $$ LANGUAGE sql; xxx=# SELECT bytea_to_text('žluťoučký kůň se napil žluté vody'::bytea); bytea_to_text ─────────────────────────────────── žluťoučký kůň se napil žluté vody (1 row)
Simply age calculation from birth date
postgres=# SELECT CURRENT_DATE; date ------------ 2011-09-20 (1 row) postgres=# SELECT EXTRACT(YEAR FROM age('1972-08-20'::date)); date_part ----------- 39 (1 row)
Is current server master or slave?
When we need to identify server's state, then we can use a function:
SELECT pg_is_in_recovery();
Author: Simon Riggs
Dynamic modification some record's field
PL/pgSQL can modify record's field only when field specification is static. Some fast C libraries are generally available, but when we have no necessary rights, then we cannot use it. Then we can use function:
CREATE OR REPLACE FUNCTION x.setfield2(anyelement, text, text) RETURNS anyelement LANGUAGE plpgsql AS $function$ DECLARE _name text; _values text[]; _value text; _attnum int; BEGIN FOR _name, _attnum IN SELECT a.attname, a.attnum FROM pg_catalog.pg_attribute a WHERE a.attrelid = (SELECT typrelid FROM pg_type WHERE oid = pg_typeof($1)::oid) LOOP IF _name = $2 THEN _value := $3; ELSE EXECUTE 'SELECT (($1).' || quote_ident(_name) || ')::text' INTO _value USING $1; END IF; _values[_attnum] := COALESCE('"' || replace(replace(_value, '"', '""'), '''', '''''') || '"', ''); END LOOP; EXECUTE 'SELECT (' || pg_typeof($1)::text || '''(' || array_to_string(_values,',') || ')'').*' INTO $1; RETURN $1; END; $function$; postgres=# select setfield2(mypoint '(10,)', 'b', '33'); setfield2 ─────────── (10,33) (1 row) Time: 9,480 ms
Author: Pavel Stěhule
Erwin Brandstetter designed better (more simply, faster) solution:
-- Erwin 1 CREATE OR REPLACE FUNCTION x.setfield3(anyelement, text, text) RETURNS anyelement AS $body$ DECLARE _list text; BEGIN _list := ( SELECT string_agg(x.fld, ',') FROM ( SELECT CASE WHEN a.attname = $2 THEN quote_literal($3) ELSE quote_ident(a.attname) END AS fld FROM pg_catalog.pg_attribute a WHERE a.attrelid = (SELECT typrelid FROM pg_type WHERE oid = pg_typeof($1)::oid) ORDER BY a.attnum ) x ); EXECUTE ' SELECT ' || _list || ' FROM (SELECT $1.*) x' USING $1 INTO $1; RETURN $1; END; $body$ LANGUAGE plpgsql;
or modernised final version
CREATE FUNCTION f_setfield(INOUT _comp_val anyelement, _field text, _val text) RETURNS anyelement AS $func$ BEGIN EXECUTE 'SELECT ' || array_to_string(ARRAY( SELECT CASE WHEN attname = _field THEN '$2' ELSE '($1).' || quote_ident(attname) END AS fld FROM pg_catalog.pg_attribute WHERE attrelid = pg_typeof(_comp_val)::text::regclass AND attnum > 0 AND attisdropped = FALSE ORDER BY attnum ), ',') USING _comp_val, _val INTO _comp_val; END $func$ LANGUAGE plpgsql;
Iteration over RECORD variable inside trigger
Author: Tom Lane (for PostgreSQL 8.4 and higher)
CREATE OR REPLACE FUNCTION dynamic_trigger() RETURNS TRIGGER LANGUAGE plpgsql AS $$ DECLARE ri RECORD; t TEXT; BEGIN RAISE NOTICE E'\n Operation: %\n Schema: %\n Table: %', TG_OP, TG_TABLE_SCHEMA, TG_TABLE_NAME; FOR ri IN SELECT ordinal_position, column_name, data_type FROM information_schema.columns WHERE table_schema = quote_ident(TG_TABLE_SCHEMA) AND table_name = quote_ident(TG_TABLE_NAME) ORDER BY ordinal_position LOOP EXECUTE 'SELECT ($1).' || ri.column_name || '::text' INTO t USING NEW; RAISE NOTICE E'Column\n number: %\n name: %\n type: %\n value: %.', ri.ordinal_position, ri.column_name, ri.data_type, t; END LOOP; RETURN NEW; END; $$;
Note: This technique needs one dynamic SQL per one record's field - functions from PL Toolbox library or from Hstore contrib module are more effective.
Enforcing some chars in to_char function
Sometimes we needs to show some chars, that are same like special chars (has some semantic) from mask. Text in quotes (in format mask) is not interpreted and it is showed without changes:
postgres=# select to_char(current_timestamp,'YYYY-MM-DDTHH24:MI:SS'); to_char ----------------------- 2010-02-07THH24:38:10 (1 row) postgres=# select to_char(current_timestamp,'YYYY-MM-DD"T"HH24:MI:SS'); to_char --------------------- 2010-02-07T07:38:22 (1 row)
Author: Milen A. Radev
Barrier against to unwanted conversion char(n) to text
Almost all string functions have parameters of text type. PostgreSQL ensures automatic conversion from char(n) to varchar. But there are a few cases where this behave causes problems - mainly for older applications, where char(n) type was used (there is a clean advice - don't use this type). In some legacy application is comparison between char(n) type and literal constant with spaces and it doesn't work in PostgreSQL now:
SELECT ... WHERE substr(somecolumn,1,4) = 'ab ';
It doesn't work, because substr function has text parameter - and returns text and 'ab' is not equal to 'ab '. But we can overload substr function with char(n) datatype (it is possible because varchar and char uses same binary format) - and it doesn't do unwanted conversion from char(n) to text:
create or replace function substr(character, int, int) returns character as $$ select substr($1::cstring::text,$2,$3) $$ language sql;
Tome Lane wrote more effective proposal:
create function substr(char,int,int) returns char strict immutable language internal as 'text_substr' ;
It is little bit dirty trick, but 100% functional.
postgres=# create table f(a character(5)); CREATE TABLE postgres=# insert into f values('a'),('ab'),('abc'); INSERT 0 3 postgres=# select * from f; a ------- a ab abc (3 rows) postgres=# select * from f where substr(a,1,3) = 'a '; a ------- a (1 row) postgres=# select * from f where substr(a,1,3) = 'ab '; a ------- ab (1 row)
Get functions's DDL command
PostgreSQL allows simple way to get full source code of any function:
postgres=# select oid from pg_proc where proname = 'fu'; oid ------- 16389 (1 row) postgres=# select pg_get_functiondef(16389); pg_get_functiondef --------------------------------------------------------------------- CREATE OR REPLACE FUNCTION public.fu(anyelement) RETURNS anyelement LANGUAGE plpgsql AS $function$ begin raise notice '%', $1; return $1; end; $function$+ (1 row)
from Russian alphabet (Cyrillic) to ASCII
source: http://leha.papulovskiy.ru/blog/
CREATE OR REPLACE FUNCTION urltranslit(text) RETURNS text as $$ SELECT regexp_replace( replace( replace( replace( replace( replace( replace( replace( translate( lower($1), 'абвгдеёзийклмнопрстуфхыэъь', 'abvgdeezijklmnoprstufhye' ), 'ж', 'zh' ), 'ц', 'ts' ), 'ч', 'ch' ), 'ш', 'sh' ), 'щ', 'sch' ), 'ю', 'yu' ), 'я', 'ya' ) , '[^a-z]+', '-', 'g' ) $$ LANGUAGE SQL;