DWF

DWF

Reply
New Member
Lin_1983_Zheng
Posts: 1
Registered: ‎01-27-2013
Message 1 of 1 (484 Views)

How to extract URL from DWF file especially non-english URL?

484 Views, 0 Replies
01-28-2013 12:00 AM

Hello,expers

    I am tring to extract all the URL information from specified DWF file now.However,when trying to process the DWF file with chinese characters,it always fail.I have pasted my code fragment as following:

 

DwfW2DHandler::smileyvery-happy:wfW2DHandler( DWFInputStream& rStream, bool bHtmlOutput)
: rW2DStream( rStream )
, m_iUrlIndex( 0 )

{
//setup the URL action here to catch it.
set_url_action( HandleURL );
set_text_action( HandleText );
set_file_mode( WT_File::File_Read );
set_stream_user_data( this );
open();
}

DwfW2DHandler::~DwfW2DHandler()
{
close();
}


//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_close(void)
{
return WT_Result::smileyfrustrated:uccess;
}
//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_end_seek()
{
return WT_Result::smileyfrustrated:uccess;
}

//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_open(void)
{
//heuristics().set_allow_binary_data(true);
//heuristics().set_allow_data_compression(true);
nBytesAvailable = rW2DStream.available();
//wcout<< L"DwfW2DHandler::smileytongue:rocess_stream_open nBytesAvailable = "<<nBytesAvailable<<endl;
return WT_Result::smileyfrustrated:uccess;
}
//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_read ( int desired_bytes, int& bytes_read, void* buffer )
{
WT_Result result = WT_Result::smileyfrustrated:uccess;
//wcout << L"Enter DwfW2DHandler::smileytongue:rocess_stream_read desired_bytes = "<<desired_bytes<<endl;
try
{
bytes_read = ( int )rW2DStream.read( buffer, desired_bytes );
}
catch (...)
{
result = WT_Result::Internal_Error;
}
//wcout << L"Exit DwfW2DHandler::smileytongue:rocess_stream_read bytes_read ="<<bytes_read<<L"result="<<result<<endl;
return result;
}

//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_seek (int distance, int& amount_seeked)
{
WT_Result result = WT_Result::smileyfrustrated:uccess;
try
{
amount_seeked = rW2DStream.seek( SEEK_CUR, distance );
}
catch (...)
{
result = WT_Result::Internal_Error;
}
return result;
}
//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_tell (unsigned long *current_file_pointer_position)
{
*current_file_pointer_position = (int)(nBytesAvailable - rW2DStream.available());

return WT_Result::smileyfrustrated:uccess;
}

//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess_stream_write(int size, void const* buffer)
{
return WT_Result::Toolkit_Usage_Error;
}

//some basic required function for derived class to be implemented
WT_Result DwfW2DHandler::smileytongue:rocess()
{
//wcout << L"Enter DwfW2DHandler::smileytongue:rocess"<<endl;
WT_Result result = WT_Result::smileyfrustrated:uccess;

while( WT_Result::smileyfrustrated:uccess == ( result = process_next_object() ) ||
WT_Result::Unsupported_DWF_Opcode == result )
{
/*
//wcout << L"DwfW2DHandler::smileytongue:rocess process next object success"<<endl;
WT_Object const* pCurrentObj = current_object();
wcout << L"DwfW2DHandler::smileytongue:rocess id ="<<pCurrentObj->object_id()<<endl;
if ( ( pCurrentObj->object_id() == WT_Object::URL_ID ) ||
( pCurrentObj->object_id() == WT_Object::URL_List_ID ) )
{
//wcout << L"DwfW2DHandler::smileytongue:rocess URL List object found"<<endl;
}

if ( pCurrentObj->object_id() == WT_Object::Text_ID )
{
WT_String const & rTextString = ( ( WT_Text* )pCurrentObj )->string();
wchar_t* pTextString = WT_String::to_wchar( rTextString.length(), rTextString.unicode() );
pTextString = pTextString ? pTextString : L"NULL";
//wcout <<L"Text String ="<<pTextString<<endl;
}
*/
};
return result;
}


WT_Result DwfW2DHandler::HandleURL( WT_URL& rUrl, WT_File& rFile )
{
//wcout << L"Enter DwfW2DHandler::HandleURL"<<endl;
DwfW2DHandler& rThisHandler = ( DwfW2DHandler& )rFile;
WT_URL_List urlList = rUrl.url();

WT_URL_Item* pUrlItem = NULL;
for ( ;( ( pUrlItem = urlList.url_item_from_index( rThisHandler.m_iUrlIndex ) ) != NULL ); rThisHandler.m_iUrlIndex++ )
{
WT_String& rUrl = pUrlItem->address();
wchar_t* pUrlString = WT_String::to_wchar( rUrl.length(), rUrl.unicode() );

//wcout <<L"Length="<<rUrl.length()<<endl;
wcout << L"[Machine-readable URL=" << pUrlString<<L"]"<<endl;
delete [] pUrlString;

//if friendly_name then append that otherwise put the address as URL text
if( pUrlItem->friendly_name() )
{
//wcout << L"friendly name exist"<<endl;
WT_String& rFriendlyNameString = pUrlItem->friendly_name();
wchar_t* pFriendlyName = WT_String::to_wchar( rFriendlyNameString.length(), rFriendlyNameString.unicode() );
//delete when you are done here
//wcout<<L"Length="<<rFriendlyNameString.length()<<endl;
wcout << L"[Human-readable URL=" << pFriendlyName<<L"]"<<endl;
delete[] pFriendlyName;
}


}

return WT_Result::smileyfrustrated:uccess;
}

WT_Result DwfW2DHandler::HandleText( WT_Text& rText, WT_File& rFile )
{
//wcout << L"Enter DwfW2DHandler::HandleText"<<endl;
WT_String const & rTextString = rText.string();
wchar_t* pTextString = WT_String::to_wchar( rTextString.length(), rTextString.unicode() );
//wcout <<L"pTextString="<<pTextString<<endl;
return WT_Result::smileyfrustrated:uccess;
}

 


According to the testing result,program will simply exit when calling HandleText if the text is chinese. Anyway, the code is long and boring, thanks for reading and help.

 

best regards

Post to the Community

Have questions about Autodesk products? Ask the community.

New Post
Announcements
Do you have 60 seconds to spare? The Autodesk Community Team is revamping our site ranking system and we want your feedback! Please click here to launch the 5 question survey. As always your input is greatly appreciated.