7
mirror of https://gitlab.com/kicad/code/kicad.git synced 2025-04-14 12:09:35 +00:00

Update embedded files hash to use Murmur3

SHA256 is fine for one-offs but for large libraries where we might be
running the hash on hundreds of files, the speed difference is
appreciable.  We don't require crytographic hashing, just a check that
the original file hasn't been corrupted so Murmur3 satisfies our basic
requirement.
This commit is contained in:
Seth Hillbrand 2024-08-19 14:28:55 -07:00
parent 89849ccff1
commit 5ba50c26a9
8 changed files with 100 additions and 28 deletions

View File

@ -33,6 +33,7 @@
#include <embedded_files.h>
#include <kiid.h>
#include <mmh3_hash.h>
#include <paths.h>
@ -199,7 +200,7 @@ void EMBEDDED_FILES::WriteEmbeddedFiles( OUTPUTFORMATTER& aOut, int aNestLevel,
aOut.Print( aNestLevel + 2, ")\n" ); // Close data
}
aOut.Print( aNestLevel + 2, "(checksum \"%s\")\n", file.data_sha.c_str() );
aOut.Print( aNestLevel + 2, "(checksum \"%s\")\n", file.data_hash.c_str() );
aOut.Print( aNestLevel + 1, ")\n" ); // Close file
}
@ -232,7 +233,9 @@ EMBEDDED_FILES::RETURN_CODE EMBEDDED_FILES::CompressAndEncode( EMBEDDED_FILE& aF
return RETURN_CODE::OUT_OF_MEMORY;
}
picosha2::hash256_hex_string( aFile.decompressedData, aFile.data_sha );
MMH3_HASH hash( EMBEDDED_FILES::Seed() );
hash.add( aFile.decompressedData );
aFile.data_hash = hash.digest().ToString();
return RETURN_CODE::OK;
}
@ -285,11 +288,19 @@ EMBEDDED_FILES::RETURN_CODE EMBEDDED_FILES::DecompressAndDecode( EMBEDDED_FILE&
}
aFile.decompressedData.resize( decompressedSize );
std::string test_hash;
std::string new_hash;
std::string new_sha;
picosha2::hash256_hex_string( aFile.decompressedData, new_sha );
MMH3_HASH hash( EMBEDDED_FILES::Seed() );
hash.add( aFile.decompressedData );
new_hash = hash.digest().ToString();
if( new_sha != aFile.data_sha )
if( aFile.data_hash.length() == 64 )
picosha2::hash256_hex_string( aFile.decompressedData, test_hash );
else
test_hash = new_hash;
if( test_hash != aFile.data_hash )
{
wxLogTrace( wxT( "KICAD_EMBED" ),
wxT( "%s:%s:%d\n * Checksum error in embedded file '%s'" ),
@ -298,6 +309,8 @@ EMBEDDED_FILES::RETURN_CODE EMBEDDED_FILES::DecompressAndDecode( EMBEDDED_FILE&
return RETURN_CODE::CHECKSUM_ERROR;
}
aFile.data_hash = new_hash;
return RETURN_CODE::OK;
}
@ -355,7 +368,7 @@ void EMBEDDED_FILES_PARSER::ParseEmbedded( EMBEDDED_FILES* aFiles )
if( !IsSymbol( token ) )
Expecting( "checksum data" );
file->data_sha = CurStr();
file->data_hash = CurStr();
NeedRIGHT();
break;
@ -434,9 +447,7 @@ void EMBEDDED_FILES_PARSER::ParseEmbedded( EMBEDDED_FILES* aFiles )
{
if( !file->compressedEncodedData.empty() )
{
EMBEDDED_FILES::DecompressAndDecode( *file );
if( !file->Validate() )
if( EMBEDDED_FILES::DecompressAndDecode( *file ) == EMBEDDED_FILES::RETURN_CODE::CHECKSUM_ERROR )
THROW_PARSE_ERROR( "Checksum error in embedded file " + file->name, CurSource(),
CurLine(), CurLineNumber(), CurOffset() );
}
@ -469,9 +480,9 @@ wxFileName EMBEDDED_FILES::GetTemporaryFileName( const wxString& aName ) const
wxFileName inputName( aName );
// Store the cache file name using the data SHA to allow for shared data between
// Store the cache file name using the data hash to allow for shared data between
// multiple projects using the same files as well as deconflicting files with the same name
cacheFile.SetName( "kicad_embedded_" + it->second->data_sha );
cacheFile.SetName( "kicad_embedded_" + it->second->data_hash );
cacheFile.SetExt( inputName.GetExt() );
if( cacheFile.FileExists() && cacheFile.IsFileReadable() )

View File

@ -50,8 +50,8 @@
//#define SEXPR_SYMBOL_LIB_FILE_VERSION 20220914 // Don't save property ID
//#define SEXPR_SYMBOL_LIB_FILE_VERSION 20230620 // ki_description -> Description Field
//#define SEXPR_SYMBOL_LIB_FILE_VERSION 20231120 // generator_version; V8 cleanups
#define SEXPR_SYMBOL_LIB_FILE_VERSION 20240529 // Embedded Files
//#define SEXPR_SYMBOL_LIB_FILE_VERSION 20240529 // Embedded Files
#define SEXPR_SYMBOL_LIB_FILE_VERSION 20240819 // Embedded Files - Update hash algorithm to Murmur3
/**
* Schematic file version.
*/
@ -109,4 +109,5 @@
//#define SEXPR_SCHEMATIC_FILE_VERSION 20240602 // Sheet attributes
//#define SEXPR_SCHEMATIC_FILE_VERSION 20240620 // Embedded Files
//#define SEXPR_SCHEMATIC_FILE_VERSION 20240716 // Multiple netclass assignments
#define SEXPR_SCHEMATIC_FILE_VERSION 20240812 // Netclass color highlighting
//#define SEXPR_SCHEMATIC_FILE_VERSION 20240812 // Netclass color highlighting
#define SEXPR_SCHEMATIC_FILE_VERSION 20240819 // Embedded Files - Update hash algorithm to Murmur3

View File

@ -1502,7 +1502,7 @@ void SCH_SCREEN::FixupEmbeddedData()
{
embeddedFile->compressedEncodedData = file->compressedEncodedData;
embeddedFile->decompressedData = file->decompressedData;
embeddedFile->data_sha = file->data_sha;
embeddedFile->data_hash = file->data_hash;
embeddedFile->is_valid = file->is_valid;
}
}

View File

@ -26,9 +26,10 @@
#include <wx/filename.h>
#include <embedded_files_lexer.h>
#include <wildcards_and_files_ext.h>
#include <richio.h>
#include <mmh3_hash.h>
#include <picosha2.h>
#include <richio.h>
#include <wildcards_and_files_ext.h>
class EMBEDDED_FILES
{
@ -50,11 +51,22 @@ public:
{}
bool Validate()
{
MMH3_HASH hash( EMBEDDED_FILES::Seed() );
hash.add( decompressedData );
is_valid = ( hash.digest().ToString() == data_hash );
return is_valid;
}
// This is the old way of validating the file. It is deprecated and retained only
// to validate files that were previously embedded.
bool Validate_SHA256()
{
std::string new_sha;
picosha2::hash256_hex_string( decompressedData, new_sha );
is_valid = ( new_sha == data_sha );
is_valid = ( new_sha == data_hash );
return is_valid;
}
@ -68,7 +80,7 @@ public:
bool is_valid;
std::string compressedEncodedData;
std::vector<char> decompressedData;
std::string data_sha;
std::string data_hash;
};
enum class RETURN_CODE : int
@ -216,6 +228,11 @@ public:
return m_embedFonts;
}
static uint32_t Seed()
{
return 0xABBA2345;
}
private:
std::map<wxString, EMBEDDED_FILE*> m_files;
std::vector<wxString> m_fontFiles;

View File

@ -70,6 +70,38 @@ public:
len = 0;
}
FORCE_INLINE void addData( const uint8_t* data, size_t length )
{
size_t remaining = length;
while( remaining >= 16 )
{
memcpy( blocks, data, 16 );
hashBlock();
data += 16;
remaining -= 16;
len += 16;
}
if( remaining > 0 )
{
memcpy( blocks, data, remaining );
size_t padding = 4 - ( remaining + 4 ) % 4;
memset( reinterpret_cast<uint8_t*>( blocks ) + remaining, 0, padding );
len += remaining + padding;
}
}
FORCE_INLINE void add( const std::string& input )
{
addData( reinterpret_cast<const uint8_t*>( input.data() ), input.length() );
}
FORCE_INLINE void add( const std::vector<char>& input )
{
addData( reinterpret_cast<const uint8_t*>( input.data() ), input.size() );
}
FORCE_INLINE void add( int32_t input )
{
blocks[( len % 16 ) / 4] = input;

View File

@ -974,7 +974,7 @@ void BOARD::FixupEmbeddedData()
{
embeddedFile->compressedEncodedData = file->compressedEncodedData;
embeddedFile->decompressedData = file->decompressedData;
embeddedFile->data_sha = file->data_sha;
embeddedFile->data_hash = file->data_hash;
embeddedFile->is_valid = file->is_valid;
}
}

View File

@ -160,7 +160,8 @@ class PCB_IO_KICAD_SEXPR; // forward decl
//#define SEXPR_BOARD_FILE_VERSION 20240609 // Add 'tenting' keyword
//#define SEXPR_BOARD_FILE_VERSION 20240617 // Table angles
//#define SEXPR_BOARD_FILE_VERSION 20240703 // User layer types
#define SEXPR_BOARD_FILE_VERSION 20240706 // Embedded Files
//#define SEXPR_BOARD_FILE_VERSION 20240706 // Embedded Files
#define SEXPR_BOARD_FILE_VERSION 20240819 // Embedded Files - Update hash algorithm to Murmur3
#define BOARD_FILE_HOST_VERSION 20200825 ///< Earlier files than this include the host tag
#define LEGACY_ARC_FORMATTING 20210925 ///< These were the last to use old arc formatting

View File

@ -19,7 +19,7 @@
#include <magic_enum.hpp>
#include <boost/test/unit_test.hpp>
#include <picosha2.h>
#include <mmh3_hash.h>
#include <embedded_files.h>
#include <random>
@ -34,7 +34,9 @@ BOOST_AUTO_TEST_CASE( CompressAndEncode_OK )
std::string data = "Hello, World!";
file.decompressedData.assign(data.begin(), data.end());
picosha2::hash256_hex_string(file.decompressedData, file.data_sha);
MMH3_HASH hash( EMBEDDED_FILES::Seed() );
hash.add( file.decompressedData );
file.data_hash = hash.digest().ToString();
EMBEDDED_FILES::RETURN_CODE result = EMBEDDED_FILES::CompressAndEncode(file);
BOOST_CHECK_EQUAL(result, EMBEDDED_FILES::RETURN_CODE::OK);
@ -47,7 +49,9 @@ BOOST_AUTO_TEST_CASE( DecompressAndDecode_OK )
std::string data = "Hello, World!";
file.decompressedData.assign( data.begin(), data.end() );
picosha2::hash256_hex_string( file.decompressedData, file.data_sha );
MMH3_HASH hash( EMBEDDED_FILES::Seed() );
hash.add( file.decompressedData );
file.data_hash = hash.digest().ToString();
EMBEDDED_FILES::RETURN_CODE result = EMBEDDED_FILES::CompressAndEncode( file );
BOOST_CHECK_EQUAL( result, EMBEDDED_FILES::RETURN_CODE::OK );
@ -64,7 +68,9 @@ BOOST_AUTO_TEST_CASE( DecompressAndDecode_OK )
file.decompressedData.assign( data.begin(), data.end() );
picosha2::hash256_hex_string( file.decompressedData, file.data_sha );
hash.reset();
hash.add( file.decompressedData );
file.data_hash = hash.digest().ToString();
result = EMBEDDED_FILES::CompressAndEncode( file );
BOOST_CHECK_EQUAL( result, EMBEDDED_FILES::RETURN_CODE::OK );
@ -80,7 +86,9 @@ BOOST_AUTO_TEST_CASE( DecompressAndDecode_OK )
data += static_cast<char>( i % 256 );
file.decompressedData.assign( data.begin(), data.end() );
picosha2::hash256_hex_string( file.decompressedData, file.data_sha );
hash.reset();
hash.add( file.decompressedData );
file.data_hash = hash.digest().ToString();
result = EMBEDDED_FILES::CompressAndEncode( file );
BOOST_CHECK_EQUAL( result, EMBEDDED_FILES::RETURN_CODE::OK );
@ -99,7 +107,9 @@ BOOST_AUTO_TEST_CASE( DecompressAndDecode_OK )
data += static_cast<char>( rng() % 256 );
file.decompressedData.assign( data.begin(), data.end() );
picosha2::hash256_hex_string( file.decompressedData, file.data_sha );
hash.reset();
hash.add( file.decompressedData );
file.data_hash = hash.digest().ToString();
result = EMBEDDED_FILES::CompressAndEncode( file );
BOOST_CHECK_EQUAL( result, EMBEDDED_FILES::RETURN_CODE::OK );
@ -120,7 +130,7 @@ BOOST_AUTO_TEST_CASE( DecompressAndDecode_ChecksumError )
BOOST_CHECK_EQUAL(result, EMBEDDED_FILES::RETURN_CODE::OK);
// Modify the checksum
file.data_sha[0] = 'x';
file.data_hash[0] = 'x';
result = EMBEDDED_FILES::DecompressAndDecode(file);
BOOST_CHECK_EQUAL(result, EMBEDDED_FILES::RETURN_CODE::CHECKSUM_ERROR);