// Copyright 2014 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "public/fpdf_flatten.h" #include #include #include #include #include #include "constants/annotation_common.h" #include "constants/annotation_flags.h" #include "constants/page_object.h" #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h" #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_pageobject.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" #include "core/fpdfdoc/cpdf_annot.h" #include "core/fxcrt/fx_string_wrappers.h" #include "fpdfsdk/cpdfsdk_helpers.h" #include "third_party/base/notreached.h" enum FPDF_TYPE { MAX, MIN }; enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM }; namespace { bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) { constexpr float kMinSize = 0.000001f; if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize) return false; if (rcPage.IsEmpty()) return true; constexpr float kMinBorderSize = 10.000001f; return rect.left - rcPage.left >= -kMinBorderSize && rect.right - rcPage.right <= kMinBorderSize && rect.top - rcPage.top <= kMinBorderSize && rect.bottom - rcPage.bottom >= -kMinBorderSize; } void GetContentsRect(CPDF_Document* pDoc, RetainPtr pDict, std::vector* pRectArray) { auto pPDFPage = pdfium::MakeRetain(pDoc, pDict); pPDFPage->ParseContent(); for (const auto& pPageObject : *pPDFPage) { const CFX_FloatRect& rc = pPageObject->GetRect(); if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox))) pRectArray->push_back(rc); } } void ParserStream(const CPDF_Dictionary* pPageDic, CPDF_Dictionary* pStream, std::vector* pRectArray, std::vector* pObjectArray) { if (!pStream) return; CFX_FloatRect rect; if (pStream->KeyExist("Rect")) rect = pStream->GetRectFor("Rect"); else if (pStream->KeyExist("BBox")) rect = pStream->GetRectFor("BBox"); if (IsValidRect(rect, pPageDic->GetRectFor(pdfium::page_object::kMediaBox))) pRectArray->push_back(rect); pObjectArray->push_back(pStream); } int ParserAnnots(CPDF_Document* pSourceDoc, RetainPtr pPageDic, std::vector* pRectArray, std::vector* pObjectArray, int nUsage) { if (!pSourceDoc) return FLATTEN_FAIL; GetContentsRect(pSourceDoc, pPageDic, pRectArray); RetainPtr pAnnots = pPageDic->GetArrayFor("Annots"); if (!pAnnots) return FLATTEN_NOTHINGTODO; CPDF_ArrayLocker locker(pAnnots); for (const auto& pAnnot : locker) { RetainPtr pAnnotDict = ToDictionary(pAnnot->GetMutableDirect()); if (!pAnnotDict) continue; ByteString sSubtype = pAnnotDict->GetByteStringFor(pdfium::annotation::kSubtype); if (sSubtype == "Popup") continue; int nAnnotFlag = pAnnotDict->GetIntegerFor("F"); if (nAnnotFlag & pdfium::annotation_flags::kHidden) continue; bool bParseStream; if (nUsage == FLAT_NORMALDISPLAY) bParseStream = !(nAnnotFlag & pdfium::annotation_flags::kInvisible); else bParseStream = !!(nAnnotFlag & pdfium::annotation_flags::kPrint); if (bParseStream) ParserStream(pPageDic.Get(), pAnnotDict.Get(), pRectArray, pObjectArray); } return FLATTEN_SUCCESS; } float GetMinMaxValue(const std::vector& array, FPDF_TYPE type, FPDF_VALUE value) { if (array.empty()) return 0.0f; size_t nRects = array.size(); std::vector pArray(nRects); switch (value) { case LEFT: for (size_t i = 0; i < nRects; i++) pArray[i] = array[i].left; break; case TOP: for (size_t i = 0; i < nRects; i++) pArray[i] = array[i].top; break; case RIGHT: for (size_t i = 0; i < nRects; i++) pArray[i] = array[i].right; break; case BOTTOM: for (size_t i = 0; i < nRects; i++) pArray[i] = array[i].bottom; break; default: NOTREACHED_NORETURN(); return 0.0f; } float fRet = pArray[0]; if (type == MAX) { for (size_t i = 1; i < nRects; i++) fRet = std::max(fRet, pArray[i]); } else { for (size_t i = 1; i < nRects; i++) fRet = std::min(fRet, pArray[i]); } return fRet; } CFX_FloatRect CalculateRect(std::vector* pRectArray) { CFX_FloatRect rcRet; rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT); rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP); rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT); rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM); return rcRet; } ByteString GenerateFlattenedContent(const ByteString& key) { return "q 1 0 0 1 0 0 cm /" + key + " Do Q"; } RetainPtr NewIndirectContentsStreamReference( CPDF_Document* pDocument, const ByteString& contents) { auto pNewContents = pDocument->NewIndirect(pDocument->New()); pNewContents->SetData(contents.raw_span()); return pNewContents->MakeReference(pDocument); } void SetPageContents(const ByteString& key, CPDF_Dictionary* pPage, CPDF_Document* pDocument) { RetainPtr pContentsArray = pPage->GetMutableArrayFor(pdfium::page_object::kContents); RetainPtr pContentsStream = pPage->GetMutableStreamFor(pdfium::page_object::kContents); if (!pContentsStream && !pContentsArray) { if (!key.IsEmpty()) { pPage->SetFor(pdfium::page_object::kContents, NewIndirectContentsStreamReference( pDocument, GenerateFlattenedContent(key))); } return; } pPage->ConvertToIndirectObjectFor(pdfium::page_object::kContents, pDocument); if (pContentsArray) { pContentsArray->InsertAt( 0, NewIndirectContentsStreamReference(pDocument, "q")); pContentsArray->Append(NewIndirectContentsStreamReference(pDocument, "Q")); } else { ByteString sStream = "q\n"; { auto pAcc = pdfium::MakeRetain(pContentsStream); pAcc->LoadAllDataFiltered(); sStream += ByteString(pAcc->GetSpan()); sStream += "\nQ"; } pContentsStream->SetDataAndRemoveFilter(sStream.raw_span()); pContentsArray = pDocument->NewIndirect(); pContentsArray->AppendNew(pDocument, pContentsStream->GetObjNum()); pPage->SetNewFor(pdfium::page_object::kContents, pDocument, pContentsArray->GetObjNum()); } if (!key.IsEmpty()) { pContentsArray->Append(NewIndirectContentsStreamReference( pDocument, GenerateFlattenedContent(key))); } } CFX_Matrix GetMatrix(const CFX_FloatRect& rcAnnot, const CFX_FloatRect& rcStream, const CFX_Matrix& matrix) { if (rcStream.IsEmpty()) return CFX_Matrix(); CFX_FloatRect rcTransformed = matrix.TransformRect(rcStream); rcTransformed.Normalize(); float a = rcAnnot.Width() / rcTransformed.Width(); float d = rcAnnot.Height() / rcTransformed.Height(); float e = rcAnnot.left - rcTransformed.left * a; float f = rcAnnot.bottom - rcTransformed.bottom * d; return CFX_Matrix(a, 0.0f, 0.0f, d, e, f); } } // namespace FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) { CPDF_Page* pPage = CPDFPageFromFPDFPage(page); if (!page) return FLATTEN_FAIL; CPDF_Document* pDocument = pPage->GetDocument(); RetainPtr pPageDict = pPage->GetMutableDict(); if (!pDocument) return FLATTEN_FAIL; std::vector ObjectArray; std::vector RectArray; int iRet = ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag); if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL) return iRet; CFX_FloatRect rcMerger = CalculateRect(&RectArray); CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kMediaBox); if (pPageDict->KeyExist(pdfium::page_object::kCropBox)) rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kCropBox); rcOriginalMB.Normalize(); if (rcOriginalMB.IsEmpty()) rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f); CFX_FloatRect rcOriginalCB; if (pPageDict->KeyExist(pdfium::page_object::kCropBox)) { rcOriginalCB = pPageDict->GetRectFor(pdfium::page_object::kCropBox); rcOriginalCB.Normalize(); } if (rcOriginalCB.IsEmpty()) rcOriginalCB = rcOriginalMB; rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left); rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right); rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom); rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top); pPageDict->SetRectFor(pdfium::page_object::kMediaBox, rcOriginalMB); pPageDict->SetRectFor(pdfium::page_object::kCropBox, rcOriginalCB); RetainPtr pRes = pPageDict->GetOrCreateDictFor(pdfium::page_object::kResources); auto pNewXObject = pDocument->NewIndirect(pDocument->New()); RetainPtr pPageXObject = pRes->GetOrCreateDictFor("XObject"); ByteString key; if (!ObjectArray.empty()) { int i = 0; while (i < INT_MAX) { ByteString sKey = ByteString::Format("FFT%d", i); if (!pPageXObject->KeyExist(sKey)) { key = std::move(sKey); break; } ++i; } } SetPageContents(key, pPageDict.Get(), pDocument); RetainPtr pNewXORes; if (!key.IsEmpty()) { pPageXObject->SetNewFor(key, pDocument, pNewXObject->GetObjNum()); RetainPtr pNewOXbjectDic = pNewXObject->GetMutableDict(); pNewXORes = pNewOXbjectDic->SetNewFor("Resources"); pNewOXbjectDic->SetNewFor("Type", "XObject"); pNewOXbjectDic->SetNewFor("Subtype", "Form"); pNewOXbjectDic->SetNewFor("FormType", 1); pNewOXbjectDic->SetRectFor("BBox", rcOriginalCB); } for (size_t i = 0; i < ObjectArray.size(); ++i) { CPDF_Dictionary* pAnnotDict = ObjectArray[i]; if (!pAnnotDict) continue; CFX_FloatRect rcAnnot = pAnnotDict->GetRectFor(pdfium::annotation::kRect); rcAnnot.Normalize(); ByteString sAnnotState = pAnnotDict->GetByteStringFor("AS"); RetainPtr pAnnotAP = pAnnotDict->GetMutableDictFor(pdfium::annotation::kAP); if (!pAnnotAP) continue; RetainPtr pAPStream = pAnnotAP->GetMutableStreamFor("N"); if (!pAPStream) { RetainPtr pAPDict = pAnnotAP->GetMutableDictFor("N"); if (!pAPDict) continue; if (!sAnnotState.IsEmpty()) { pAPStream = pAPDict->GetMutableStreamFor(sAnnotState); } else { if (pAPDict->size() > 0) { CPDF_DictionaryLocker locker(pAPDict); RetainPtr pFirstObj = locker.begin()->second; if (pFirstObj) { if (pFirstObj->IsReference()) pFirstObj = pFirstObj->GetMutableDirect(); if (!pFirstObj->IsStream()) continue; pAPStream.Reset(pFirstObj->AsMutableStream()); } } } } if (!pAPStream) continue; RetainPtr pAPDict = pAPStream->GetDict(); CFX_FloatRect rcStream; if (pAPDict->KeyExist("Rect")) rcStream = pAPDict->GetRectFor("Rect"); else if (pAPDict->KeyExist("BBox")) rcStream = pAPDict->GetRectFor("BBox"); rcStream.Normalize(); if (rcStream.IsEmpty()) continue; RetainPtr pObj = pAPStream; if (pObj->IsInline()) { pObj = pObj->Clone(); pDocument->AddIndirectObject(pObj); } RetainPtr pObjDict = pObj->GetMutableDict(); if (pObjDict) { pObjDict->SetNewFor("Type", "XObject"); pObjDict->SetNewFor("Subtype", "Form"); } RetainPtr pXObject = pNewXORes->GetOrCreateDictFor("XObject"); ByteString sFormName = ByteString::Format("F%d", i); pXObject->SetNewFor(sFormName, pDocument, pObj->GetObjNum()); ByteString sStream; { auto pAcc = pdfium::MakeRetain(pNewXObject); pAcc->LoadAllDataFiltered(); sStream = ByteString(pAcc->GetSpan()); } CFX_Matrix matrix = pAPDict->GetMatrixFor("Matrix"); CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix); m.b = 0; m.c = 0; fxcrt::ostringstream buf; WriteMatrix(buf, m); ByteString str(buf); sStream += ByteString::Format("q %s cm /%s Do Q\n", str.c_str(), sFormName.c_str()); pNewXObject->SetDataAndRemoveFilter(sStream.raw_span()); } pPageDict->RemoveFor("Annots"); return FLATTEN_SUCCESS; }