<structfield>reads</structfield> <type>bigint</type>
</para>
<para>
- Number of read operations, each of the size specified in
- <varname>op_bytes</varname>.
+ Number of read operations.
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry">
+ <para role="column_definition">
+ <structfield>read_bytes</structfield> <type>numeric</type>
+ </para>
+ <para>
+ The total size of read operations in bytes.
</para>
</entry>
</row>
<structfield>writes</structfield> <type>bigint</type>
</para>
<para>
- Number of write operations, each of the size specified in
- <varname>op_bytes</varname>.
+ Number of write operations.
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry">
+ <para role="column_definition">
+ <structfield>write_bytes</structfield> <type>numeric</type>
+ </para>
+ <para>
+ The total size of write operations in bytes.
</para>
</entry>
</row>
<structfield>writebacks</structfield> <type>bigint</type>
</para>
<para>
- Number of units of size <varname>op_bytes</varname> which the process
- requested the kernel write out to permanent storage.
+ Number of units of size <symbol>BLCKSZ</symbol> (typically 8kB) which
+ the process requested the kernel write out to permanent storage.
</para>
</entry>
</row>
<structfield>extends</structfield> <type>bigint</type>
</para>
<para>
- Number of relation extend operations, each of the size specified in
- <varname>op_bytes</varname>.
+ Number of relation extend operations.
</para>
</entry>
</row>
<row>
<entry role="catalog_table_entry">
<para role="column_definition">
- <structfield>extend_time</structfield> <type>double precision</type>
+ <structfield>extend_bytes</structfield> <type>numeric</type>
</para>
<para>
- Time spent in extend operations in milliseconds (if
- <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+ The total size of relation extend operations in bytes.
</para>
</entry>
</row>
<row>
<entry role="catalog_table_entry">
<para role="column_definition">
- <structfield>op_bytes</structfield> <type>bigint</type>
- </para>
- <para>
- The number of bytes per unit of I/O read, written, or extended.
+ <structfield>extend_time</structfield> <type>double precision</type>
</para>
<para>
- Relation data reads, writes, and extends are done in
- <varname>block_size</varname> units, derived from the build-time
- parameter <symbol>BLCKSZ</symbol>, which is <literal>8192</literal> by
- default.
+ Time spent in extend operations in milliseconds (if
+ <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
</para>
</entry>
</row>
b.object,
b.context,
b.reads,
+ b.read_bytes,
b.read_time,
b.writes,
+ b.write_bytes,
b.write_time,
b.writebacks,
b.writeback_time,
b.extends,
+ b.extend_bytes,
b.extend_time,
- b.op_bytes,
b.hits,
b.evictions,
b.reuses,
}
if (*foundPtr)
{
- pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1);
+ pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0);
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageHit;
io_start = pgstat_prepare_io_time(track_io_timing);
smgrreadv(operation->smgr, forknum, io_first_block, io_pages, io_buffers_len);
pgstat_count_io_op_time(io_object, io_context, IOOP_READ, io_start,
- io_buffers_len);
+ 1, io_buffers_len * BLCKSZ);
/* Verify each block we read, and terminate the I/O. */
for (int j = 0; j < io_buffers_len; ++j)
* pinners or erroring out.
*/
pgstat_count_io_op(IOOBJECT_RELATION, io_context,
- from_ring ? IOOP_REUSE : IOOP_EVICT, 1);
+ from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
}
/*
UnlockRelationForExtension(bmr.rel, ExclusiveLock);
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
- io_start, extend_by);
+ io_start, 1, extend_by * BLCKSZ);
/* Set BM_VALID, terminate IO, and wake up any waiters */
for (uint32 i = 0; i < extend_by; i++)
* of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
*/
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
- IOOP_WRITE, io_start, 1);
+ IOOP_WRITE, io_start, 1, BLCKSZ);
pgBufferUsage.shared_blks_written++;
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION,
IOCONTEXT_NORMAL, IOOP_WRITE,
- io_start, 1);
+ io_start, 1, BLCKSZ);
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
* blocks of permanent relations.
*/
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
- IOOP_WRITEBACK, io_start, wb_context->nr_pending);
+ IOOP_WRITEBACK, io_start, wb_context->nr_pending, 0);
wb_context->nr_pending = 0;
}
/* Temporary table I/O does not use Buffer Access Strategies */
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL,
- IOOP_WRITE, io_start, 1);
+ IOOP_WRITE, io_start, 1, BLCKSZ);
/* Mark not-dirty now in case we error out below */
buf_state &= ~BM_DIRTY;
ClearBufferTag(&bufHdr->tag);
buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
- pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EVICT, 1);
+
+ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EVICT, 1, 0);
}
return BufferDescriptorGetBuffer(bufHdr);
smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
- io_start, extend_by);
+ io_start, 1, extend_by * BLCKSZ);
for (uint32 i = 0; i < extend_by; i++)
{
* backend fsyncs.
*/
pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
- IOOP_FSYNC, io_start, 1);
+ IOOP_FSYNC, io_start, 1, 0);
}
}
FileClose(file);
pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
- IOOP_FSYNC, io_start, 1);
+ IOOP_FSYNC, io_start, 1, 0);
errno = save_errno;
return result;
bktype_shstats->counts[io_object][io_context][io_op] +=
pending_io->counts[io_object][io_context][io_op];
+ bktype_shstats->bytes[io_object][io_context][io_op] +=
+ pending_io->bytes[io_object][io_context][io_op];
time = pending_io->pending_times[io_object][io_context][io_op];
static PgStat_PendingIO PendingIOStats;
static bool have_iostats = false;
+/*
+ * Check if an IOOp is tracked in bytes. This relies on the ordering of IOOp
+ * defined in pgstat.h, so make sure to update this check when changing its
+ * elements.
+ */
+#define pgstat_is_ioop_tracked_in_bytes(io_op) \
+ ((io_op) < IOOP_NUM_TYPES && (io_op) >= IOOP_EXTEND)
/*
* Check that stats have not been counted for any combination of IOObject,
}
void
-pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
+pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
+ uint32 cnt, uint64 bytes)
{
Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
Assert((unsigned int) io_op < IOOP_NUM_TYPES);
+ Assert(pgstat_is_ioop_tracked_in_bytes(io_op) || bytes == 0);
Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
if (pgstat_tracks_backend_bktype(MyBackendType))
entry_ref = pgstat_prep_backend_pending(MyProcNumber);
entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt;
+ entry_ref->pending_io.bytes[io_object][io_context][io_op] += bytes;
}
PendingIOStats.counts[io_object][io_context][io_op] += cnt;
+ PendingIOStats.bytes[io_object][io_context][io_op] += bytes;
have_iostats = true;
}
*/
void
pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
- instr_time start_time, uint32 cnt)
+ instr_time start_time, uint32 cnt, uint64 bytes)
{
if (track_io_timing)
{
}
}
- pgstat_count_io_op(io_object, io_context, io_op, cnt);
+ pgstat_count_io_op(io_object, io_context, io_op, cnt, bytes);
}
PgStat_IO *
bktype_shstats->counts[io_object][io_context][io_op] +=
PendingIOStats.counts[io_object][io_context][io_op];
+ bktype_shstats->bytes[io_object][io_context][io_op] +=
+ PendingIOStats.bytes[io_object][io_context][io_op];
+
time = PendingIOStats.pending_times[io_object][io_context][io_op];
bktype_shstats->times[io_object][io_context][io_op] +=
IO_COL_OBJECT,
IO_COL_CONTEXT,
IO_COL_READS,
+ IO_COL_READ_BYTES,
IO_COL_READ_TIME,
IO_COL_WRITES,
+ IO_COL_WRITE_BYTES,
IO_COL_WRITE_TIME,
IO_COL_WRITEBACKS,
IO_COL_WRITEBACK_TIME,
IO_COL_EXTENDS,
+ IO_COL_EXTEND_BYTES,
IO_COL_EXTEND_TIME,
- IO_COL_CONVERSION,
IO_COL_HITS,
IO_COL_EVICTIONS,
IO_COL_REUSES,
pg_unreachable();
}
+/*
+ * Get the number of the column containing IO bytes for the specified IOOp.
+ * If an IOOp is not tracked in bytes, IO_COL_INVALID is returned.
+ */
+static io_stat_col
+pgstat_get_io_byte_index(IOOp io_op)
+{
+ switch (io_op)
+ {
+ case IOOP_EXTEND:
+ return IO_COL_EXTEND_BYTES;
+ case IOOP_READ:
+ return IO_COL_READ_BYTES;
+ case IOOP_WRITE:
+ return IO_COL_WRITE_BYTES;
+ case IOOP_EVICT:
+ case IOOP_FSYNC:
+ case IOOP_HIT:
+ case IOOP_REUSE:
+ case IOOP_WRITEBACK:
+ return IO_COL_INVALID;
+ }
+
+ elog(ERROR, "unrecognized IOOp value: %d", io_op);
+ pg_unreachable();
+}
+
/*
* Get the number of the column containing IO times for the specified IOOp.
- * This function encodes our assumption that IO time for an IOOp is displayed
- * in the view in the column directly after the IOOp counts. If an op has no
- * associated time, IO_COL_INVALID is returned.
+ * If an op has no associated time, IO_COL_INVALID is returned.
*/
static io_stat_col
pgstat_get_io_time_index(IOOp io_op)
switch (io_op)
{
case IOOP_READ:
+ return IO_COL_READ_TIME;
case IOOP_WRITE:
+ return IO_COL_WRITE_TIME;
case IOOP_WRITEBACK:
+ return IO_COL_WRITEBACK_TIME;
case IOOP_EXTEND:
+ return IO_COL_EXTEND_TIME;
case IOOP_FSYNC:
- return pgstat_get_io_op_index(io_op) + 1;
+ return IO_COL_FSYNC_TIME;
case IOOP_EVICT:
case IOOP_HIT:
case IOOP_REUSE:
else
nulls[IO_COL_RESET_TIME] = true;
- /*
- * Hard-code this to the value of BLCKSZ for now. Future values
- * could include XLOG_BLCKSZ, once WAL IO is tracked, and constant
- * multipliers, once non-block-oriented IO (e.g. temporary file
- * IO) is tracked.
- */
- values[IO_COL_CONVERSION] = Int64GetDatum(BLCKSZ);
-
for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
{
int op_idx = pgstat_get_io_op_index(io_op);
int time_idx = pgstat_get_io_time_index(io_op);
+ int byte_idx = pgstat_get_io_byte_index(io_op);
/*
* Some combinations of BackendType and IOOp, of IOContext and
else
nulls[op_idx] = true;
- /* not every operation is timed */
- if (time_idx == IO_COL_INVALID)
- continue;
-
if (!nulls[op_idx])
{
- PgStat_Counter time =
- bktype_stats->times[io_obj][io_context][io_op];
+ /* not every operation is timed */
+ if (time_idx != IO_COL_INVALID)
+ {
+ PgStat_Counter time =
+ bktype_stats->times[io_obj][io_context][io_op];
+
+ values[time_idx] = Float8GetDatum(pg_stat_us_to_ms(time));
+ }
- values[time_idx] = Float8GetDatum(pg_stat_us_to_ms(time));
+ /* not every IO is tracked in bytes */
+ if (byte_idx != IO_COL_INVALID)
+ {
+ char buf[256];
+ PgStat_Counter byte =
+ bktype_stats->bytes[io_obj][io_context][io_op];
+
+ /* Convert to numeric */
+ snprintf(buf, sizeof buf, UINT64_FORMAT, byte);
+ values[byte_idx] = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buf),
+ ObjectIdGetDatum(0),
+ Int32GetDatum(-1));
+ }
}
else
- nulls[time_idx] = true;
+ {
+ if (time_idx != IO_COL_INVALID)
+ nulls[time_idx] = true;
+ if (byte_idx != IO_COL_INVALID)
+ nulls[byte_idx] = true;
+ }
}
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202501101
+#define CATALOG_VERSION_NO 202501401
#endif
proname => 'pg_stat_get_io', prorows => '30', proretset => 't',
provolatile => 'v', proparallel => 'r', prorettype => 'record',
proargtypes => '',
- proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
- proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
- proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,writebacks,writeback_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
+ proallargtypes => '{text,text,text,int8,numeric,float8,int8,numeric,float8,int8,float8,int8,numeric,float8,int8,int8,int8,int8,float8,timestamptz}',
+ proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+ proargnames => '{backend_type,object,context,reads,read_bytes,read_time,writes,write_bytes,write_time,writebacks,writeback_time,extends,extend_bytes,extend_time,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
prosrc => 'pg_stat_get_io' },
{ oid => '8806', descr => 'statistics: backend IO statistics',
proname => 'pg_stat_get_backend_io', prorows => '5', proretset => 't',
provolatile => 'v', proparallel => 'r', prorettype => 'record',
proargtypes => 'int4',
- proallargtypes => '{int4,text,text,text,int8,float8,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
- proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
- proargnames => '{backend_pid,backend_type,object,context,reads,read_time,writes,write_time,writebacks,writeback_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
+ proallargtypes => '{int4,text,text,text,int8,numeric,float8,int8,numeric,float8,int8,float8,int8,numeric,float8,int8,int8,int8,int8,float8,timestamptz}',
+ proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+ proargnames => '{backend_pid,backend_type,object,context,reads,read_bytes,read_time,writes,write_bytes,write_time,writebacks,writeback_time,extends,extend_bytes,extend_time,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
prosrc => 'pg_stat_get_backend_io' },
{ oid => '1136', descr => 'statistics: information about WAL activity',
#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1)
+/*
+ * Enumeration of IO operations.
+ *
+ * This enum categorizes IO operations into two groups, depending on if
+ * byte operations are supported.
+ *
+ * Ensure IOOP_EXTEND is the first and IOOP_WRITE is the last ones in the
+ * tracked in bytes group and that the groups stay in that order.
+ */
typedef enum IOOp
{
+ /* IOs not tracked in bytes */
IOOP_EVICT,
- IOOP_EXTEND,
IOOP_FSYNC,
IOOP_HIT,
- IOOP_READ,
IOOP_REUSE,
- IOOP_WRITE,
IOOP_WRITEBACK,
+
+ /* IOs tracked in bytes */
+ IOOP_EXTEND,
+ IOOP_READ,
+ IOOP_WRITE,
} IOOp;
-#define IOOP_NUM_TYPES (IOOP_WRITEBACK + 1)
+#define IOOP_NUM_TYPES (IOOP_WRITE + 1)
typedef struct PgStat_BktypeIO
{
+ uint64 bytes[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
} PgStat_BktypeIO;
typedef struct PgStat_PendingIO
{
+ uint64 bytes[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
} PgStat_PendingIO;
extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
BackendType bktype);
extern void pgstat_count_io_op(IOObject io_object, IOContext io_context,
- IOOp io_op, uint32 cnt);
+ IOOp io_op, uint32 cnt, uint64 bytes);
extern instr_time pgstat_prepare_io_time(bool track_io_guc);
extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context,
- IOOp io_op, instr_time start_time, uint32 cnt);
+ IOOp io_op, instr_time start_time,
+ uint32 cnt, uint64 bytes);
extern PgStat_IO *pgstat_fetch_stat_io(void);
extern const char *pgstat_get_io_context_name(IOContext io_context);
object,
context,
reads,
+ read_bytes,
read_time,
writes,
+ write_bytes,
write_time,
writebacks,
writeback_time,
extends,
+ extend_bytes,
extend_time,
- op_bytes,
hits,
evictions,
reuses,
fsyncs,
fsync_time,
stats_reset
- FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, writebacks, writeback_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
+ FROM pg_stat_get_io() b(backend_type, object, context, reads, read_bytes, read_time, writes, write_bytes, write_time, writebacks, writeback_time, extends, extend_bytes, extend_time, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
pg_stat_progress_analyze| SELECT s.pid,
s.datid,
d.datname,