@@ -529,8 +529,13 @@ static inline BufferDesc *BufferAlloc(SMgrRelation smgr,
529
529
static bool AsyncReadBuffers (ReadBuffersOperation * operation , int * nblocks_progress );
530
530
static void CheckReadBuffersOperation (ReadBuffersOperation * operation , bool is_complete );
531
531
static Buffer GetVictimBuffer (BufferAccessStrategy strategy , IOContext io_context );
532
+ static bool PrepareFlushBuffer (BufferDesc * bufdesc , uint32 * buf_state , XLogRecPtr * lsn );
533
+ static void DoFlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
534
+ IOContext io_context , XLogRecPtr buffer_lsn );
532
535
static void FlushBuffer (BufferDesc * buf , SMgrRelation reln ,
533
536
IOObject io_object , IOContext io_context );
537
+ static void CleanVictimBuffer (BufferDesc * bufdesc , uint32 * buf_state ,
538
+ bool from_ring , IOContext io_context );
534
539
static void FindAndDropRelationBuffers (RelFileLocator rlocator ,
535
540
ForkNumber forkNum ,
536
541
BlockNumber nForkBlock ,
@@ -2414,12 +2419,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
2414
2419
continue ;
2415
2420
}
2416
2421
2417
- /* OK, do the I/O */
2418
- FlushBuffer (buf_hdr , NULL , IOOBJECT_RELATION , io_context );
2419
- LWLockRelease (content_lock );
2420
-
2421
- ScheduleBufferTagForWriteback (& BackendWritebackContext , io_context ,
2422
- & buf_hdr -> tag );
2422
+ /* Content lock is released inside CleanVictimBuffer */
2423
+ CleanVictimBuffer (buf_hdr , & buf_state , from_ring , io_context );
2423
2424
}
2424
2425
2425
2426
if (buf_state & BM_VALID )
@@ -4246,53 +4247,66 @@ static void
4246
4247
FlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
4247
4248
IOContext io_context )
4248
4249
{
4249
- XLogRecPtr recptr ;
4250
- ErrorContextCallback errcallback ;
4251
- instr_time io_start ;
4252
- Block bufBlock ;
4253
- char * bufToWrite ;
4254
4250
uint32 buf_state ;
4251
+ XLogRecPtr lsn ;
4255
4252
4256
- /*
4257
- * Try to start an I/O operation. If StartBufferIO returns false, then
4258
- * someone else flushed the buffer before we could, so we need not do
4259
- * anything.
4260
- */
4261
- if (!StartBufferIO (buf , false, false))
4262
- return ;
4253
+ if (PrepareFlushBuffer (buf , & buf_state , & lsn ))
4254
+ DoFlushBuffer (buf , reln , io_object , io_context , lsn );
4255
+ }
4263
4256
4264
- /* Setup error traceback support for ereport() */
4265
- errcallback .callback = shared_buffer_write_error_callback ;
4266
- errcallback .arg = buf ;
4267
- errcallback .previous = error_context_stack ;
4268
- error_context_stack = & errcallback ;
4257
+ /*
4258
+ * Prepare and write out a dirty victim buffer.
4259
+ *
4260
+ * Buffer must be pinned, the content lock must be held exclusively, and the
4261
+ * buffer header spinlock must not be held. The exclusive lock is released and
4262
+ * the buffer is returned pinned but not locked.
4263
+ *
4264
+ * bufdesc and buf_state may be modified.
4265
+ */
4266
+ static void
4267
+ CleanVictimBuffer (BufferDesc * bufdesc , uint32 * buf_state ,
4268
+ bool from_ring , IOContext io_context )
4269
+ {
4269
4270
4270
- /* Find smgr relation for buffer */
4271
- if (reln == NULL )
4272
- reln = smgropen (BufTagGetRelFileLocator (& buf -> tag ), INVALID_PROC_NUMBER );
4271
+ XLogRecPtr max_lsn = InvalidXLogRecPtr ;
4272
+ LWLock * content_lock ;
4273
4273
4274
- TRACE_POSTGRESQL_BUFFER_FLUSH_START (BufTagGetForkNum (& buf -> tag ),
4275
- buf -> tag .blockNum ,
4276
- reln -> smgr_rlocator .locator .spcOid ,
4277
- reln -> smgr_rlocator .locator .dbOid ,
4278
- reln -> smgr_rlocator .locator .relNumber );
4274
+ Assert (* buf_state & BM_DIRTY );
4279
4275
4280
- buf_state = LockBufHdr (buf );
4276
+ /* Set up this victim buffer to be flushed */
4277
+ if (!PrepareFlushBuffer (bufdesc , buf_state , & max_lsn ))
4278
+ return ;
4281
4279
4280
+ DoFlushBuffer (bufdesc , NULL , IOOBJECT_RELATION , io_context , max_lsn );
4281
+ content_lock = BufferDescriptorGetContentLock (bufdesc );
4282
+ LWLockRelease (content_lock );
4283
+ ScheduleBufferTagForWriteback (& BackendWritebackContext , io_context ,
4284
+ & bufdesc -> tag );
4285
+ }
4286
+
4287
+ /*
4288
+ * Prepare the buffer with budesc for writing. buf_state and lsn are output
4289
+ * parameters. Returns true if the buffer acutally needs writing and false
4290
+ * otherwise. All three parameters may be modified.
4291
+ */
4292
+ static bool
4293
+ PrepareFlushBuffer (BufferDesc * bufdesc , uint32 * buf_state , XLogRecPtr * lsn )
4294
+ {
4282
4295
/*
4283
- * Run PageGetLSN while holding header lock, since we don't have the
4284
- * buffer locked exclusively in all cases.
4296
+ * Try to start an I/O operation. If StartBufferIO returns false, then
4297
+ * someone else flushed the buffer before we could, so we need not do
4298
+ * anything.
4285
4299
*/
4286
- recptr = BufferGetLSN (buf );
4300
+ if (!StartBufferIO (bufdesc , false, false))
4301
+ return false;
4287
4302
4288
- /* To check if block content changes while flushing. - vadim 01/17/97 */
4289
- buf_state &= ~BM_JUST_DIRTIED ;
4290
- UnlockBufHdr (buf , buf_state );
4303
+ * lsn = InvalidXLogRecPtr ;
4304
+ * buf_state = LockBufHdr (bufdesc );
4291
4305
4292
4306
/*
4293
- * Force XLOG flush up to buffer's LSN. This implements the basic WAL
4294
- * rule that log updates must hit disk before any of the data-file changes
4295
- * they describe do.
4307
+ * Record the buffer's LSN. We will force XLOG flush up to buffer's LSN.
4308
+ * This implements the basic WAL rule that log updates must hit disk
4309
+ * before any of the data-file changes they describe do.
4296
4310
*
4297
4311
* However, this rule does not apply to unlogged relations, which will be
4298
4312
* lost after a crash anyway. Most unlogged relation pages do not bear
@@ -4305,9 +4319,50 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
4305
4319
* happen, attempting to flush WAL through that location would fail, with
4306
4320
* disastrous system-wide consequences. To make sure that can't happen,
4307
4321
* skip the flush if the buffer isn't permanent.
4322
+ *
4323
+ * We must hold the buffer header lock when examining the page LSN since
4324
+ * don't have buffer exclusively locked in all cases.
4308
4325
*/
4309
- if (buf_state & BM_PERMANENT )
4310
- XLogFlush (recptr );
4326
+ if (* buf_state & BM_PERMANENT )
4327
+ * lsn = BufferGetLSN (bufdesc );
4328
+
4329
+ /* To check if block content changes while flushing. - vadim 01/17/97 */
4330
+ * buf_state &= ~BM_JUST_DIRTIED ;
4331
+ UnlockBufHdr (bufdesc , * buf_state );
4332
+ return true;
4333
+ }
4334
+
4335
+ /*
4336
+ * Actually do the write I/O to clean a buffer. buf and reln may be modified.
4337
+ */
4338
+ static void
4339
+ DoFlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
4340
+ IOContext io_context , XLogRecPtr buffer_lsn )
4341
+ {
4342
+ ErrorContextCallback errcallback ;
4343
+ instr_time io_start ;
4344
+ Block bufBlock ;
4345
+ char * bufToWrite ;
4346
+
4347
+ /* Setup error traceback support for ereport() */
4348
+ errcallback .callback = shared_buffer_write_error_callback ;
4349
+ errcallback .arg = buf ;
4350
+ errcallback .previous = error_context_stack ;
4351
+ error_context_stack = & errcallback ;
4352
+
4353
+ /* Find smgr relation for buffer */
4354
+ if (reln == NULL )
4355
+ reln = smgropen (BufTagGetRelFileLocator (& buf -> tag ), INVALID_PROC_NUMBER );
4356
+
4357
+ TRACE_POSTGRESQL_BUFFER_FLUSH_START (BufTagGetForkNum (& buf -> tag ),
4358
+ buf -> tag .blockNum ,
4359
+ reln -> smgr_rlocator .locator .spcOid ,
4360
+ reln -> smgr_rlocator .locator .dbOid ,
4361
+ reln -> smgr_rlocator .locator .relNumber );
4362
+
4363
+ /* Force XLOG flush up to buffer's LSN */
4364
+ if (!XLogRecPtrIsInvalid (buffer_lsn ))
4365
+ XLogFlush (buffer_lsn );
4311
4366
4312
4367
/*
4313
4368
* Now it's safe to write the buffer to disk. Note that no one else should
0 commit comments