Saturday, November 27, 2021

Creating TPCH data for Oracle database using dbgen

3:24 PM Posted by Dilli Raj Maharjan No comments

The Transaction Processing Council Ad-hoc (TPCH) is a decision support benchmark. It consists of a suite of business-oriented ad-hoc queries and concurrent data modifications. The queries and the data populating the database have been chosen to have broad industry-wide relevance. Reference: http://www.tpc.org. These data can be used for benchmarking and the test propose. These data are very much real with relations. In my case, I am using a docker container with Oracle database 19c to load TPCH data.


Run docker container from Oracle 19c image.
docker run -it -d --privileged --hostname dbgen_demo --name dbgen_demo \
-p 1521:1521 -v /Users/maharjan/Docker/dbgendata:/opt/oracle/oradata oracle/database:19.3.0-ee



Run bash in the docker container as the oracle user
docker exec -it dbgen_demo /bin/bash

Run bash in the docker container as the oracle user
docker exec -it --user root dbgen_demo /bin/bash

Install Linux packages required for the dbgen tool
yum -y install which git gcc tree vi



Download the dbgen source code from git location
git clone https://github.com/electrum/tpch-dbgen.git











Change directory to tpch-dbgen directory and check makefile.suite
cd tpch-dbgen/
grep -v ^# makefile.suite


Open the file with your favorite editor.
vi makefile.suite

Make following changes
CC = gcc                # Name of the compiler
DATABASE = ORACLE       # Database product Type
MACHINE = LINUX         # OS Type
WORKLOAD = TPCH         # Transaction Processing Council Ad-hoc/decision support benchmark (computer performance) 


# Execute make executable to install
make -f makefile.suite


Generate the tpch files
mkdir data
cd data
cp ../dbgen .
cp ../dists.dss .


Execute dbgen for the appropriate dbsize factor -s 1 is 1GB size
./dbgen -s 1


List the files using the tree command.
tree



Create oracle user tpch

sqlplus "/ as sysdba" <<EOF
alter session set container=ORCLPDB1;
create user tpch identified by tpch default tablespace users quota unlimited on users;
grant connect,resource to tpch;
grant create any directory to tpch;
EOF


Create required tables. Following block of SQL will create external tables referring to the 
files with data and the real tables.

sqlplus tpch/tpch@ORCLPDB1<<EOF
create or replace directory data as '/home/oracle/tpch-dbgen/data';
CREATE TABLE tpch.part_ext
(
    p_partkey       NUMBER(10),
    p_name          VARCHAR2(55),
    p_mfgr          CHAR(25),
    p_brand         CHAR(10),
    p_type          VARCHAR2(25),
    p_size          INTEGER,
    p_container     CHAR(10),
    p_retailprice   NUMBER,
    p_comment       VARCHAR2(23)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('part.tbl'));

CREATE TABLE tpch.part
(
    p_partkey       NUMBER(10) NOT NULL,
    p_name          VARCHAR2(55) NOT NULL,
    p_mfgr          CHAR(25) NOT NULL,
    p_brand         CHAR(10) NOT NULL,
    p_type          VARCHAR2(25) NOT NULL,
    p_size          INTEGER NOT NULL,
    p_container     CHAR(10) NOT NULL,
    p_retailprice   NUMBER NOT NULL,
    p_comment       VARCHAR2(23) NOT NULL
);


CREATE TABLE tpch.supplier_ext
(
    s_suppkey     NUMBER(10),
    s_name        CHAR(25),
    s_address     VARCHAR2(64),
    s_nationkey   NUMBER(10),
    s_phone       CHAR(15),
    s_acctbal     NUMBER,
    s_comment     VARCHAR2(128)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('supplier.tbl'));

CREATE TABLE tpch.supplier
(
    s_suppkey     NUMBER(10, 0) NOT NULL,
    s_name        CHAR(25) NOT NULL,
    s_address     VARCHAR2(64) NOT NULL,
    s_nationkey   NUMBER(10) NOT NULL,
    s_phone       CHAR(15) NOT NULL,
    s_acctbal     NUMBER NOT NULL,
    s_comment     VARCHAR2(128) NOT NULL
);

CREATE TABLE tpch.partsupp_ext
(
    ps_partkey      NUMBER(10),
    ps_suppkey      NUMBER(10),
    ps_availqty     INTEGER,
    ps_supplycost   NUMBER,
    ps_comment      VARCHAR2(256)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('partsupp.tbl'));

CREATE TABLE tpch.partsupp
(
    ps_partkey      NUMBER(10) NOT NULL,
    ps_suppkey      NUMBER(10) NOT NULL,
    ps_availqty     INTEGER NOT NULL,
    ps_supplycost   NUMBER NOT NULL,
    ps_comment      VARCHAR2(256) NOT NULL
);

CREATE TABLE tpch.customer_ext
(
    c_custkey      NUMBER(10),
    c_name         VARCHAR2(32),
    c_address      VARCHAR2(64),
    c_nationkey    NUMBER(10),
    c_phone        CHAR(15),
    c_acctbal      NUMBER,
    c_mktsegment   CHAR(10),
    c_comment      VARCHAR2(128)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('customer.tbl'));

CREATE TABLE tpch.customer
(
    c_custkey      NUMBER(10) NOT NULL,
    c_name         VARCHAR2(32) NOT NULL,
    c_address      VARCHAR2(64) NOT NULL,
    c_nationkey    NUMBER(10) NOT NULL,
    c_phone        CHAR(15) NOT NULL,
    c_acctbal      NUMBER NOT NULL,
    c_mktsegment   CHAR(10) NOT NULL,
    c_comment      VARCHAR2(128) NOT NULL
);

CREATE TABLE tpch.orders_ext
(
    o_orderkey        NUMBER(10),
    o_custkey         NUMBER(10),
    o_orderstatus     CHAR(1),
    o_totalprice      NUMBER,
    o_orderdate       CHAR(10),
    o_orderpriority   CHAR(15),
    o_clerk           CHAR(15),
    o_shippriority    INTEGER,
    o_comment         VARCHAR2(128)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('orders.tbl'));

CREATE TABLE tpch.orders
(
    o_orderkey        NUMBER(10) NOT NULL,
    o_custkey         NUMBER(10) NOT NULL,
    o_orderstatus     CHAR(1) NOT NULL,
    o_totalprice      NUMBER NOT NULL,
    o_orderdate       DATE NOT NULL,
    o_orderpriority   CHAR(15) NOT NULL,
    o_clerk           CHAR(15) NOT NULL,
    o_shippriority    INTEGER NOT NULL,
    o_comment         VARCHAR2(128) NOT NULL
);


CREATE TABLE tpch.lineitem_ext
(
    l_orderkey        NUMBER(10),
    l_partkey         NUMBER(10),
    l_suppkey         NUMBER(10),
    l_linenumber      INTEGER,
    l_quantity        NUMBER,
    l_extendedprice   NUMBER,
    l_discount        NUMBER,
    l_tax             NUMBER,
    l_returnflag      CHAR(1),
    l_linestatus      CHAR(1),
    l_shipdate        CHAR(10),
    l_commitdate      CHAR(10),
    l_receiptdate     CHAR(10),
    l_shipinstruct    CHAR(25),
    l_shipmode        CHAR(10),
    l_comment         VARCHAR2(64)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('lineitem.tbl'));

CREATE TABLE tpch.lineitem
(
    l_orderkey        NUMBER(10),
    l_partkey         NUMBER(10),
    l_suppkey         NUMBER(10),
    l_linenumber      INTEGER,
    l_quantity        NUMBER,
    l_extendedprice   NUMBER,
    l_discount        NUMBER,
    l_tax             NUMBER,
    l_returnflag      CHAR(1),
    l_linestatus      CHAR(1),
    l_shipdate        DATE,
    l_commitdate      DATE,
    l_receiptdate     DATE,
    l_shipinstruct    CHAR(25),
    l_shipmode        CHAR(10),
    l_comment         VARCHAR2(64)
);

CREATE TABLE tpch.nation_ext
(
    n_nationkey   NUMBER(10),
    n_name        CHAR(25),
    n_regionkey   NUMBER(10),
    n_comment     VARCHAR(256)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('nation.tbl'));

CREATE TABLE tpch.nation
(
    n_nationkey   NUMBER(10),
    n_name        CHAR(25),
    n_regionkey   NUMBER(10),
    n_comment     VARCHAR(256)
);

CREATE TABLE tpch.region_ext
(
    r_regionkey   NUMBER(10),
    r_name        CHAR(25),
    r_comment     VARCHAR(256)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY data
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('region.tbl'));

CREATE TABLE tpch.region
(
    r_regionkey   NUMBER(10),
    r_name        CHAR(25),
    r_comment     VARCHAR(256)
);

EOF



Insert into real tables from the external tables

sqlplus tpch/tpch@ORCLPDB1<<EOF
ALTER SESSION SET nls_date_format='YYYY-MM-DD';
INSERT /*+ APPEND */ INTO  tpch.part     SELECT * FROM tpch.part_ext;
INSERT /*+ APPEND */ INTO  tpch.supplier SELECT * FROM tpch.supplier_ext;
INSERT /*+ APPEND */ INTO  tpch.partsupp SELECT * FROM tpch.partsupp_ext;
INSERT /*+ APPEND */ INTO  tpch.customer SELECT * FROM tpch.customer_ext;
INSERT /*+ APPEND */ INTO  tpch.orders   SELECT * FROM tpch.orders_ext;
INSERT /*+ APPEND */ INTO  tpch.lineitem SELECT * FROM tpch.lineitem_ext;
INSERT /*+ APPEND */ INTO  tpch.nation   SELECT * FROM tpch.nation_ext;
INSERT /*+ APPEND */ INTO  tpch.region   SELECT * FROM tpch.region_ext;
EOF


Add constraints to the table.

sqlplus tpch/tpch@ORCLPDB1<<EOF
ALTER TABLE tpch.part ADD CONSTRAINT pk_part PRIMARY KEY(p_partkey);
ALTER TABLE tpch.supplier ADD CONSTRAINT pk_supplier PRIMARY KEY(s_suppkey);
ALTER TABLE tpch.partsupp ADD CONSTRAINT pk_partsupp PRIMARY KEY(ps_partkey, ps_suppkey);
ALTER TABLE tpch.customer ADD CONSTRAINT pk_customer PRIMARY KEY(c_custkey);
ALTER TABLE tpch.orders ADD CONSTRAINT pk_orders PRIMARY KEY(o_orderkey);
ALTER TABLE tpch.lineitem ADD CONSTRAINT pk_lineitem PRIMARY KEY(l_linenumber, l_orderkey);
ALTER TABLE tpch.nation ADD CONSTRAINT pk_nation PRIMARY KEY(n_nationkey);
ALTER TABLE tpch.region ADD CONSTRAINT pk_region PRIMARY KEY(r_regionkey);
ALTER TABLE tpch.partsupp ADD CONSTRAINT fk_partsupp_part FOREIGN KEY(ps_partkey) REFERENCES tpch.part(p_partkey);
ALTER TABLE tpch.partsupp ADD CONSTRAINT fk_partsupp_supplier FOREIGN KEY(ps_suppkey) REFERENCES tpch.supplier(s_suppkey);
ALTER TABLE tpch.customer ADD CONSTRAINT fk_customer_nation FOREIGN KEY(c_nationkey) REFERENCES tpch.nation(n_nationkey);
ALTER TABLE tpch.orders ADD CONSTRAINT fk_orders_customer FOREIGN KEY(o_custkey) REFERENCES tpch.customer(c_custkey);
ALTER TABLE tpch.lineitem ADD CONSTRAINT fk_lineitem_order FOREIGN KEY(l_orderkey) REFERENCES tpch.orders(o_orderkey);
ALTER TABLE tpch.lineitem ADD CONSTRAINT fk_lineitem_part FOREIGN KEY(l_partkey) REFERENCES tpch.part(p_partkey);
ALTER TABLE tpch.lineitem ADD CONSTRAINT fk_lineitem_supplier FOREIGN KEY(l_suppkey) REFERENCES tpch.supplier(s_suppkey);
ALTER TABLE tpch.lineitem ADD CONSTRAINT fk_lineitem_partsupp FOREIGN KEY(l_partkey, l_suppkey) REFERENCES tpch.partsupp(ps_partkey, ps_suppkey);
ALTER TABLE tpch.part ADD CONSTRAINT chk_part_partkey CHECK(p_partkey >= 0);
ALTER TABLE tpch.supplier ADD CONSTRAINT chk_supplier_suppkey CHECK(s_suppkey >= 0);
ALTER TABLE tpch.customer ADD CONSTRAINT chk_customer_custkey CHECK(c_custkey >= 0);
ALTER TABLE tpch.partsupp ADD CONSTRAINT chk_partsupp_partkey CHECK(ps_partkey >= 0);
ALTER TABLE tpch.region ADD CONSTRAINT chk_region_regionkey CHECK(r_regionkey >= 0);
ALTER TABLE tpch.nation ADD CONSTRAINT chk_nation_nationkey CHECK(n_nationkey >= 0);
ALTER TABLE tpch.part ADD CONSTRAINT chk_part_size CHECK(p_size >= 0);
ALTER TABLE tpch.part ADD CONSTRAINT chk_part_retailprice CHECK(p_retailprice >= 0);
ALTER TABLE tpch.partsupp ADD CONSTRAINT chk_partsupp_availqty CHECK(ps_availqty >= 0);
ALTER TABLE tpch.partsupp ADD CONSTRAINT chk_partsupp_supplycost CHECK(ps_supplycost >= 0);
ALTER TABLE tpch.orders ADD CONSTRAINT chk_orders_totalprice CHECK(o_totalprice >= 0);
ALTER TABLE tpch.lineitem ADD CONSTRAINT chk_lineitem_quantity CHECK(l_quantity >= 0);
ALTER TABLE tpch.lineitem ADD CONSTRAINT chk_lineitem_extendedprice CHECK(l_extendedprice >= 0);
ALTER TABLE tpch.lineitem ADD CONSTRAINT chk_lineitem_tax CHECK(l_tax >= 0);
ALTER TABLE tpch.lineitem ADD CONSTRAINT chk_lineitem_discount CHECK(l_discount >= 0.00 AND l_discount <= 1.00);
ALTER TABLE tpch.lineitem ADD CONSTRAINT chk_lineitem_ship_rcpt CHECK(l_shipdate <= l_receiptdate);
EOF


Count the number of rows in the tables

sqlplus tpch/tpch@ORCLPDB1<<EOF
select count(*) from region ;
select count(*) from nation ;
select count(*) from lineitem ;
select count(*) from orders ;
select count(*) from customer ;
select count(*) from partsupp ;
select count(*) from supplier ;
select count(*) from part ;
EOF


Drop external tables once data is loaded to the table.

sqlplus tpch/tpch@ORCLPDB1<<EOF
drop table region_ext purge;
drop table nation_ext purge;
drop table lineitem_ext purge;
drop table orders_ext purge;
drop table customer_ext purge;
drop table partsupp_ext purge;
drop table supplier_ext purge;
drop table part_ext purge;
EOF


Command to drop all the tables

sqlplus tpch/tpch@ORCLPDB1<<EOF
drop table region purge;
drop table nation purge;
drop table lineitem purge;
drop table orders purge;
drop table customer purge;
drop table partsupp purge;
drop table supplier purge;
drop table part purge;
drop table region_ext purge;
drop table nation_ext purge;
drop table lineitem_ext purge;
drop table orders_ext purge;
drop table customer_ext purge;
drop table partsupp_ext purge;
drop table supplier_ext purge;
drop table part_ext purge;
EOF

0 comments:

Post a Comment