Skip to content

Upload csv files to postgresql database

Upload csv files to postgresql database published on Комментариев к записи Upload csv files to postgresql database нет

here is my drafts of how-to automate upload a lot of csv files to database ( use text fieds because there is no need to mach all types of columns in this case )

 for i in $(ls -1 *.csv); do echo "create table $i (" ; echo $(head -1 $i | sed 's/,/ text,\n/g' ); echo "text );" ;echo copy $i | awk -F "." '{print $1}'; echo "from '/u01/import/$i' DELIMITER ',' CSV HEADER;" ; done

postgresql script to find excess indexes

postgresql script to find excess indexes published on Комментариев к записи postgresql script to find excess indexes нет

here how excess indexes can be found, excess mean that there is a single column index which is covered by first value of complex index

select * from (
select count(*),pg_size_pretty(pg_relation_size(t.table_name)),t.table_name,t.indkey_names[1],min(array_length(t.indkey_names,1)) min_index_elements_cnt from (
SELECT i.relname as indname,
       i.relowner as indowner,
       idx.indrelid::regclass table_name,
       idx.indkey,
       ARRAY(
       SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
       FROM generate_subscripts(idx.indkey, 1) as k
       ORDER BY k
       ) as indkey_names,
       idx.indexprs IS NOT NULL as indexprs,
       idx.indpred IS NOT NULL as indpred
FROM   pg_index as idx
JOIN   pg_class as i
ON     i.oid = idx.indexrelid
where idx.indrelid::text not like 'pg_toast%' 
and pg_relation_size(indexrelid::regclass)>100*1024*1024 ) t
group by t.indkey_names[1],t.table_name
having count (*) >1
) tt where min_index_elements_cnt =1;

oracle how to get disk event waits estimates

oracle how to get disk event waits estimates published on Комментариев к записи oracle how to get disk event waits estimates нет

base took from ASH Architecture and Advanced Usage — RMOUG

select event,round (est_dbtime_ms/
CASE WHEN est_waits=0 THEN est_dbtime_ms ELSE est_waits END
,1) as est_avg_latency_ms,time#
from (
      select event,round(sum(case when time_waited >0 then greatest(1,1000000/time_waited) else 0 end )) as est_waits,
      sum(1000) as est_dbtime_ms ,TRUNC( SAMPLE_TIME, 'MI') - mod( EXTRACT(minute FROM  SAMPLE_TIME), 1) /(24 * 60) as time#
      from v$active_session_history where  wait_class ='User I/O' 
      group by event,TRUNC( SAMPLE_TIME, 'MI') - mod( EXTRACT(minute FROM  SAMPLE_TIME), 1) /(24 * 60) 
) order by 3 desc;

postgresql configure separate directory for config files

postgresql configure separate directory for config files published on 1 комментарий к записи postgresql configure separate directory for config files

first of all we need to create directory for config files outside $PGDATA:
in my case this is /u01/postgres/9.5/main/conf/

next we need to modify service and add option with path of postgresql.conf

vim /usr/lib/systemd/system/postgresql-9.5.service
[Unit]
Description=PostgreSQL 9.5 database server
After=syslog.target
After=network.target

[Service]
Type=forking

User=postgres
Group=postgres

# Note: avoid inserting whitespace in these Environment= lines, or you may
# break postgresql-setup.

# Location of database directory
Environment=PGDATA=/u01/postgres/9.5/main/pgdata

# Where to send early-startup messages from the server (before the logging
# options of postgresql.conf take effect)
# This is normally controlled by the global default set by systemd
# StandardOutput=syslog

# Disable OOM kill on the postmaster
OOMScoreAdjust=-1000

ExecStartPre=/usr/pgsql-9.5/bin/postgresql95-check-db-dir ${PGDATA}
ExecStart=/usr/pgsql-9.5/bin/pg_ctl start -D ${PGDATA} -s -w -t 300 -o "-c config_file=/u01/postgres/9.5/main/conf/postgresql.conf"
ExecStop=/usr/pgsql-9.5/bin/pg_ctl stop -D ${PGDATA} -s -m fast
ExecReload=/usr/pgsql-9.5/bin/pg_ctl reload -D ${PGDATA} -s

# Give a reasonable amount of time for the server to start up/shut down
TimeoutSec=300

[Install]
WantedBy=multi-user.target

in config file we modify location of other config files:

data_directory='/u01/postgres/9.5/main/pgdata'
hba_file='/u01/postgres/9.5/main/conf/pg_hba.conf'
ident_file='/u01/postgres/9.5/main/conf/pg_ident.conf'

postgresql query to find duplicate indexes

postgresql query to find duplicate indexes published on Комментариев к записи postgresql query to find duplicate indexes нет
     
SELECT sub.table, pg_size_pretty(SUM(pg_relation_size(idx))::BIGINT) AS SIZE,
     (array_agg(idx))[1] AS idx1, (array_agg(idx))[2] AS idx2,
     (array_agg(idx))[3] AS idx3, (array_agg(idx))[4] AS idx4 
            FROM (  SELECT indrelid::regclass as table,indexrelid::regclass AS idx, (indrelid::text ||E'\n'|| indclass::text ||E'\n'|| indkey::text ||E'\n'|| 
                    COALESCE(indexprs::text,'')||E'\n' || COALESCE(indpred::text,'')) AS KEY 
                    FROM pg_index
                     where  pg_relation_size(indexrelid::regclass)>100*1024*1024 ) sub 
GROUP BY sub.table, KEY HAVING COUNT(*)>1 
ORDER BY SUM(pg_relation_size(idx)) DESC;

sample output

       table       |  size  |            idx1             |                idx2                | idx3 | idx4
-------------------+--------+-----------------------------+------------------------------------+------+------
 mdm_record_column | 797 MB | mdm_record_column_column_fk | mdm_record_column_record_fk        |      |
 fin_bill_generate | 300 MB | fin_bill_generate_fk        | fin_bill_generate_spec_item_id_idx |      |

also usefull to look at usage stat on this indexes

SELECT 
    relid::regclass AS table, 
    indexrelid::regclass AS index, 
    pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS index_size, 
    idx_tup_read, 
    idx_tup_fetch, 
    idx_scan,
    pg_get_indexdef(pg_index.indexrelid) as indexdef
FROM 
    pg_stat_user_indexes 
    JOIN pg_index USING (indexrelid) 
WHERE 
     (relid::regclass)::text ='mdm_record_column' order by idx_scan desc ;

       table       |              index              | index_size | idx_tup_read | idx_tup_fetch | idx_scan |                                         indexdef
-------------------+---------------------------------+------------+--------------+---------------+----------+-------------------------------------------------------------------------------------------
 mdm_record_column | mdm_record_column_record_fk     | 399 MB     |       758024 |        758024 |     2992 | CREATE INDEX mdm_record_column_record_fk ON mdm_record_column USING btree (column_id)
 mdm_record_column | mdm_record_column_record_id_idx | 399 MB     |         1922 |          1922 |      442 | CREATE INDEX mdm_record_column_record_id_idx ON mdm_record_column USING btree (record_id)
 mdm_record_column | mdm_record_column_pk            | 399 MB     |            0 |             0 |        0 | CREATE UNIQUE INDEX mdm_record_column_pk ON mdm_record_column USING btree (id)
 mdm_record_column | mdm_record_column_column_fk     | 399 MB     |            0 |             0 |        0 | CREATE INDEX mdm_record_column_column_fk ON mdm_record_column USING btree (column_id)

PgSQL Indexes and «LIKE»

PgSQL Indexes and «LIKE» published on Комментариев к записи PgSQL Indexes and «LIKE» нет

Original from Paul Ramsey

Hi all, because English is not my native language, that’s why I will write as little as possible =)
here is the case, when we write a query with like and get Seq Scan (full table scan), instead index scan this material can be usefull for you

create table tt as   select s, md5(random()::text) from generate_Series(1,990000) s;
 create index on tt(md5);
 show LC_COLLATE;
 lc_collate
-------------
 en_US.UTF-8

 explain analyze select * from tt where md5 like 'a6b90b58a652b8e1bd01bbe2%';
                                               QUERY PLAN
--------------------------------------------------------------------------------------------------------
 Seq Scan on tt  (cost=0.00..20625.00 rows=4950 width=36) (actual time=132.559..132.559 rows=0 loops=1)
   Filter: (md5 ~~ 'a6b90b58a652b8e1bd01bbe2%'::text)
   Rows Removed by Filter: 990000
 Planning time: 0.203 ms
 Execution time: 132.583 ms

if we create same index but with text_pattern_ops we can get good improvement:

 create index on tt(md5 text_pattern_ops);

 explain analyze select * from tt where md5 like 'a6b90b58a652b8e1bd01bbe2%';
                                                    QUERY PLAN
------------------------------------------------------------------------------------------------------------------
 Index Scan using tt_md5_idx1 on tt  (cost=0.42..8.45 rows=99 width=37) (actual time=0.022..0.022 rows=0 loops=1)
   Index Cond: ((md5 >= 'a6b90b58a652b8e1bd01bbe2'::text) AND (md5 < 'a6b90b58a652b8e1bd01bbe3'::text))
   Filter: (md5 ~~ 'a6b90b58a652b8e1bd01bbe2%'::text)
 Planning time: 0.403 ms
 Execution time: 0.043 ms

ORA-42016: shape of interim table does not match specified column mapping

ORA-42016: shape of interim table does not match specified column mapping published on 1 комментарий к записи ORA-42016: shape of interim table does not match specified column mapping

try to make table partitioned:
create dummy table:

  CREATE TABLE USER."PERS_DATA_OPERATION_LOG_REDEF" 
   (	"ID" NUMBER NOT NULL ENABLE, 
	"OBJECT_OID" NUMBER NOT NULL ENABLE, 
	"OBJECT_SNILS" VARCHAR2(256), 
	"OBJECT_FULL_NAME" VARCHAR2(4000), 
	"SUBJECT_OID" NUMBER, 
	"SUBJECT_SNILS" VARCHAR2(256), 
	"SUBJECT_FULL_NAME" VARCHAR2(4000), 
	"CRT_ON" TIMESTAMP (6) , 
	"OPERATION_TYPE" VARCHAR2(256), 
	 CHECK (OPERATION_TYPE IN ('VIEW','PASS_RESET','REMOVE','BGIR_RESTART')) ENABLE, 
	 CONSTRAINT "PERS_DATA_OPERATION_LOG_PK1" PRIMARY KEY ("ID")
   ) 
     PARTITION BY RANGE ("CRT_ON")    INTERVAL(NUMTOYMINTERVAL(1, 'MONTH'))
     (partition p0 VALUES LESS THAN (TIMESTAMP' 2011-01-01 00:00:00') ) ;

start redef and get error:

exec dbms_redefinition.can_redef_table ('USER','PERS_DATA_OPERATION_LOG',dbms_redefinition.cons_use_pk);
   
exec  DBMS_REDEFINITION.start_redef_table ('USER','PERS_DATA_OPERATION_LOG','PERS_DATA_OPERATION_LOG_REDEF',NULL,DBMS_REDEFINITION.cons_use_pk);

ORA-42016: shape of interim table does not match specified column mapping

this is because original table have a timestamp with local timezone format
how to fix it:

select listagg(column_name,',') within  group  ( order by column_id) from dba_tab_columns where table_name='PERS_DATA_OPERATION_LOG';

begin 
DBMS_REDEFINITION.start_redef_table(
    uname        => 'USER',
    orig_table   => 'PERS_DATA_OPERATION_LOG',
    int_table    => 'PERS_DATA_OPERATION_LOG_REDEF',
    col_mapping =>'ID,OBJECT_OID,OBJECT_SNILS,OBJECT_FULL_NAME,SUBJECT_OID,SUBJECT_SNILS,SUBJECT_FULL_NAME,to_timestamp(crt_on) crt_on,OPERATION_TYPE',
    options_flag => DBMS_REDEFINITION.cons_use_pk);
end;



SET SERVEROUTPUT ON
DECLARE
 l_num_errors PLS_INTEGER;
BEGIN
 DBMS_REDEFINITION.copy_table_dependents(
   uname             => 'USER',
   orig_table        => 'PERS_DATA_OPERATION_LOG',
   int_table         => 'PERS_DATA_OPERATION_LOG_REDEF',
   copy_indexes      => DBMS_REDEFINITION.cons_orig_params, -- Non-Default
   copy_triggers     => TRUE,  -- Default
   copy_constraints  => TRUE,  -- Default
   copy_privileges   => TRUE,  -- Default
   ignore_errors     => true,
   num_errors        => l_num_errors);
 DBMS_OUTPUT.put_line('l_num_errors=' || l_num_errors);
END;
/
exec DBMS_REDEFINITION.SYNC_INTERIM_TABLE('USER','PERS_DATA_OPERATION_LOG','PERS_DATA_OPERATION_LOG_REDEF');

EXEC DBMS_REDEFINITION.finish_redef_table('USER', 'PERS_DATA_OPERATION_LOG', 'PERS_DATA_OPERATION_LOG_REDEF' );

Primary Sidebar

Яндекс.Метрика