@@ -534,7 +534,16 @@ static void DoFlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object
534
534
IOContext io_context , XLogRecPtr buffer_lsn );
535
535
static void FlushBuffer (BufferDesc * buf , SMgrRelation reln ,
536
536
IOObject io_object , IOContext io_context );
537
- static void CleanVictimBuffer (BufferDesc * bufdesc , uint32 * buf_state ,
537
+ static BufferDesc * NextStratBufToFlush (BufferAccessStrategy strategy ,
538
+ Buffer sweep_end ,
539
+ XLogRecPtr * lsn ,
540
+ int * sweep_cursor );
541
+ static BufferDesc * PrepareOrRejectEagerFlushBuffer (Buffer bufnum , BlockNumber require ,
542
+ RelFileLocator * rlocator ,
543
+ bool skip_pinned ,
544
+ XLogRecPtr * max_lsn );
545
+ static void CleanVictimBuffer (BufferAccessStrategy strategy ,
546
+ BufferDesc * bufdesc , uint32 * buf_state ,
538
547
bool from_ring , IOContext io_context );
539
548
static void FindAndDropRelationBuffers (RelFileLocator rlocator ,
540
549
ForkNumber forkNum ,
@@ -2420,7 +2429,7 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
2420
2429
}
2421
2430
2422
2431
/* Content lock is released inside CleanVictimBuffer */
2423
- CleanVictimBuffer (buf_hdr , & buf_state , from_ring , io_context );
2432
+ CleanVictimBuffer (strategy , buf_hdr , & buf_state , from_ring , io_context );
2424
2433
}
2425
2434
2426
2435
if (buf_state & BM_VALID )
@@ -4254,6 +4263,40 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
4254
4263
DoFlushBuffer (buf , reln , io_object , io_context , lsn );
4255
4264
}
4256
4265
4266
+ /*
4267
+ * Returns the buffer descriptor of the buffer containing the next block we
4268
+ * should eagerly flush or NULL when there are no further buffers to consider
4269
+ * writing out.
4270
+ */
4271
+ static BufferDesc *
4272
+ NextStratBufToFlush (BufferAccessStrategy strategy ,
4273
+ Buffer sweep_end ,
4274
+ XLogRecPtr * lsn , int * sweep_cursor )
4275
+ {
4276
+ Buffer bufnum ;
4277
+ BufferDesc * bufdesc ;
4278
+
4279
+ while ((bufnum =
4280
+ StrategySweepNextBuffer (strategy , sweep_cursor )) != sweep_end )
4281
+ {
4282
+ /*
4283
+ * For BAS_BULKWRITE, once you hit an InvalidBuffer, the remaining
4284
+ * buffers in the ring will be invalid.
4285
+ */
4286
+ if (!BufferIsValid (bufnum ))
4287
+ break ;
4288
+
4289
+ if ((bufdesc = PrepareOrRejectEagerFlushBuffer (bufnum ,
4290
+ InvalidBlockNumber ,
4291
+ NULL ,
4292
+ true,
4293
+ lsn )) != NULL )
4294
+ return bufdesc ;
4295
+ }
4296
+
4297
+ return NULL ;
4298
+ }
4299
+
4257
4300
/*
4258
4301
* Prepare and write out a dirty victim buffer.
4259
4302
*
@@ -4264,24 +4307,158 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
4264
4307
* bufdesc and buf_state may be modified.
4265
4308
*/
4266
4309
static void
4267
- CleanVictimBuffer (BufferDesc * bufdesc , uint32 * buf_state ,
4310
+ CleanVictimBuffer (BufferAccessStrategy strategy ,
4311
+ BufferDesc * bufdesc , uint32 * buf_state ,
4268
4312
bool from_ring , IOContext io_context )
4269
4313
{
4270
4314
4271
4315
XLogRecPtr max_lsn = InvalidXLogRecPtr ;
4272
4316
LWLock * content_lock ;
4317
+ bool first_buffer = true;
4273
4318
4274
4319
Assert (* buf_state & BM_DIRTY );
4275
4320
4276
4321
/* Set up this victim buffer to be flushed */
4277
4322
if (!PrepareFlushBuffer (bufdesc , buf_state , & max_lsn ))
4278
4323
return ;
4279
4324
4280
- DoFlushBuffer (bufdesc , NULL , IOOBJECT_RELATION , io_context , max_lsn );
4325
+ if (from_ring && StrategySupportsEagerFlush (strategy ))
4326
+ {
4327
+ Buffer sweep_end = BufferDescriptorGetBuffer (bufdesc );
4328
+ int cursor = StrategySweepStart (strategy );
4329
+
4330
+ /* Clean victim buffer and find more to flush opportunistically */
4331
+ do
4332
+ {
4333
+ DoFlushBuffer (bufdesc , NULL , IOOBJECT_RELATION , io_context , max_lsn );
4334
+ content_lock = BufferDescriptorGetContentLock (bufdesc );
4335
+ LWLockRelease (content_lock );
4336
+ ScheduleBufferTagForWriteback (& BackendWritebackContext , io_context ,
4337
+ & bufdesc -> tag );
4338
+ /* We leave the first buffer pinned for the caller */
4339
+ if (!first_buffer )
4340
+ UnpinBuffer (bufdesc );
4341
+ first_buffer = false;
4342
+ } while ((bufdesc = NextStratBufToFlush (strategy , sweep_end ,
4343
+ & max_lsn , & cursor )) != NULL );
4344
+ }
4345
+ else
4346
+ {
4347
+ DoFlushBuffer (bufdesc , NULL , IOOBJECT_RELATION , io_context , max_lsn );
4348
+ content_lock = BufferDescriptorGetContentLock (bufdesc );
4349
+ LWLockRelease (content_lock );
4350
+ ScheduleBufferTagForWriteback (& BackendWritebackContext , io_context ,
4351
+ & bufdesc -> tag );
4352
+ }
4353
+ }
4354
+
4355
+ /*
4356
+ * Prepare bufdesc for eager flushing.
4357
+ *
4358
+ * Given bufnum, return the block -- the pointer to the block data in memory
4359
+ * -- which we will opportunistically flush or NULL if this buffer does not
4360
+ * contain a block that should be flushed.
4361
+ *
4362
+ * require is the BlockNumber required by the caller. Some callers may require
4363
+ * a specific BlockNumber to be in bufnum because they are assembling a
4364
+ * contiguous run of blocks.
4365
+ *
4366
+ * If the caller needs the block to be from a specific relation, rlocator will
4367
+ * be provided.
4368
+ */
4369
+ BufferDesc *
4370
+ PrepareOrRejectEagerFlushBuffer (Buffer bufnum , BlockNumber require ,
4371
+ RelFileLocator * rlocator , bool skip_pinned ,
4372
+ XLogRecPtr * max_lsn )
4373
+ {
4374
+ BufferDesc * bufdesc ;
4375
+ uint32 buf_state ;
4376
+ XLogRecPtr lsn ;
4377
+ BlockNumber blknum ;
4378
+ LWLock * content_lock ;
4379
+
4380
+ if (!BufferIsValid (bufnum ))
4381
+ return NULL ;
4382
+
4383
+ Assert (!BufferIsLocal (bufnum ));
4384
+
4385
+ bufdesc = GetBufferDescriptor (bufnum - 1 );
4386
+
4387
+ /* Block may need to be in a specific relation */
4388
+ if (rlocator &&
4389
+ !RelFileLocatorEquals (BufTagGetRelFileLocator (& bufdesc -> tag ),
4390
+ * rlocator ))
4391
+ return NULL ;
4392
+
4393
+ /* Must do this before taking the buffer header spinlock */
4394
+ ResourceOwnerEnlarge (CurrentResourceOwner );
4395
+ ReservePrivateRefCountEntry ();
4396
+
4397
+ buf_state = LockBufHdr (bufdesc );
4398
+
4399
+ if (!(buf_state & BM_DIRTY ) || !(buf_state & BM_VALID ))
4400
+ goto except_unlock_header ;
4401
+
4402
+ /* We don't eagerly flush buffers used by others */
4403
+ if (skip_pinned &&
4404
+ (BUF_STATE_GET_REFCOUNT (buf_state ) > 0 ||
4405
+ BUF_STATE_GET_USAGECOUNT (buf_state ) > 1 ))
4406
+ goto except_unlock_header ;
4407
+
4408
+ /* Get page LSN while holding header lock */
4409
+ lsn = BufferGetLSN (bufdesc );
4410
+
4411
+ PinBuffer_Locked (bufdesc );
4412
+ CheckBufferIsPinnedOnce (bufnum );
4413
+
4414
+ blknum = BufferGetBlockNumber (bufnum );
4415
+ Assert (BlockNumberIsValid (blknum ));
4416
+
4417
+ /* If we'll have to flush WAL to flush the block, we're done */
4418
+ if (buf_state & BM_PERMANENT && XLogNeedsFlush (lsn ))
4419
+ goto except_unpin_buffer ;
4420
+
4421
+ /* We only include contiguous blocks in the run */
4422
+ if (BlockNumberIsValid (require ) && blknum != require )
4423
+ goto except_unpin_buffer ;
4424
+
4281
4425
content_lock = BufferDescriptorGetContentLock (bufdesc );
4426
+ if (!LWLockConditionalAcquire (content_lock , LW_SHARED ))
4427
+ goto except_unpin_buffer ;
4428
+
4429
+ /*
4430
+ * Now that we have the content lock, we need to recheck if we need to
4431
+ * flush WAL.
4432
+ */
4433
+ buf_state = LockBufHdr (bufdesc );
4434
+ lsn = BufferGetLSN (bufdesc );
4435
+ UnlockBufHdr (bufdesc , buf_state );
4436
+
4437
+ if (buf_state & BM_PERMANENT && XLogNeedsFlush (lsn ))
4438
+ goto except_unlock_content ;
4439
+
4440
+ /* Try to start an I/O operation */
4441
+ if (!StartBufferIO (bufdesc , false, true))
4442
+ goto except_unlock_content ;
4443
+
4444
+ if (lsn > * max_lsn )
4445
+ * max_lsn = lsn ;
4446
+ buf_state = LockBufHdr (bufdesc );
4447
+ buf_state &= ~BM_JUST_DIRTIED ;
4448
+ UnlockBufHdr (bufdesc , buf_state );
4449
+
4450
+ return bufdesc ;
4451
+
4452
+ except_unlock_content :
4282
4453
LWLockRelease (content_lock );
4283
- ScheduleBufferTagForWriteback (& BackendWritebackContext , io_context ,
4284
- & bufdesc -> tag );
4454
+
4455
+ except_unpin_buffer :
4456
+ UnpinBuffer (bufdesc );
4457
+ return NULL ;
4458
+
4459
+ except_unlock_header :
4460
+ UnlockBufHdr (bufdesc , buf_state );
4461
+ return NULL ;
4285
4462
}
4286
4463
4287
4464
/*
0 commit comments