Orderblogger-importer-blogitem.php000064400000013336147600314460012350 0ustar00get_item_tags(SIMPLEPIE_NAMESPACE_ATOMPUB, 'control')) && !empty($control[0]['child'][SIMPLEPIE_NAMESPACE_ATOMPUB]['draft'][0]['data'])) { $draft = ('yes' == $control[0]['child'][SIMPLEPIE_NAMESPACE_ATOMPUB]['draft'][0]['data']); } return $draft; } //Tried using date functions from http://core.trac.wordpress.org/attachment/ticket/7652/7652-separate.diff //but ended up with 1970s dates so returned to Otto's version which is much simplified function get_updated() { $temparray = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated'); if (isset($temparray[0]['data'])) return $this->convert_date($temparray[0]['data']); else return null; } function get_published() { $temparray = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'published'); if (isset($temparray[0]['data'])) return $this->convert_date($temparray[0]['data']); else return null; } function get_geotags() {//Return an array of geo tags see http://codex.wordpress.org/Geodata //example source // Rådhuspladsen 3, 1550 Copenhagen, Denmark // 55.6760968 12.5683371 $latlong = $this->get_item_tags(SIMPLEPIE_NAMESPACE_GEOTAG, 'point'); if (isset($latlong[0]['data'])) { preg_match('/([0-9.-]+).+?([0-9.-]+)/', $latlong[0]['data'], $matches); $lat=(float)$matches[1]; $long=(float)$matches[2]; } if (!isset($lat) |!isset($long)) { return null; //Without lat long we can't have a valid location } $address = $this->get_item_tags(SIMPLEPIE_NAMESPACE_GEOTAG, 'featurename'); if (isset($address[0]['data'])) $geo_address = $address[0]['data']; else $geo_address = null; $geo = array('geo_latitude' => $lat, 'geo_longitude' => $long, 'geo_address' => $geo_address ); return $geo; } function convert_date($date) { preg_match('#([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?:\.[0-9]+)?(Z|[\+|\-][0-9]{2,4}){0,1}#', $date, $date_bits); $offset = iso8601_timezone_to_offset($date_bits[7]); $timestamp = gmmktime($date_bits[4], $date_bits[5], $date_bits[6], $date_bits[2], $date_bits[3], $date_bits[1]); $timestamp -= $offset; // Convert from Blogger local time to GMT $timestamp += get_option('gmt_offset') * 3600; // Convert from GMT to WP local time return gmdate('Y-m-d H:i:s', $timestamp); } //Don't Sanitize the ID, the default get_id was cleaning our IDs and that meant that nested comments did not work function get_id($hash = false, $fn = 'md5') { if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'id')) { return $return[0]['data']; } } //Prefiltered links function get_links($rel = 'alternate') { $mylinks = array(); foreach ($rel as $type) { $links =parent::get_links($type); if (!is_null($links)) { foreach ($links as $link) { $mylinks[] = array('rel' => $type, 'href' => $link); } } } return $mylinks; } //Preprocessed categories function get_categories() { $cats = parent::get_categories(); $mycats = array(); if (!is_null($cats)) { foreach ($cats as $cat) { $mycats[] = $cat->term; } } return $mycats; } //What is the source of this item e.g. a comment linked to a post //10/3/2014 Added error handling for where the comment links to a post that no longer exists on blogger. function get_source() { $temp = $this->get_item_tags(SIMPLEPIE_NAMESPACE_THREAD, 'in-reply-to'); if (!is_null($temp)){ foreach ($temp as $t) { if (isset($t['attribs']['']['source'])) { $source = $t['attribs']['']['source']; } } return $source; } } } } ?>blogger-entry.php000064400000010213147600314460010037 0ustar00links as $link) { // save the self link as meta if ($link['rel'] == 'self') { $postself = $link['href']; $parts = parse_url($link['href']); $this->old_permalink = $parts['path']; } // get the old URI for the page when available if ($link['rel'] == 'alternate') { $parts = parse_url($link['href']); $this->bookmark = $parts['path']; } // save the replies feed link as meta (ignore the comment form one) if ($link['rel'] == 'replies' && false === strpos($link['href'], '#comment-form')) { $this->postreplies = $link['href']; } } } function import() { $post_date = $this->published; $post_content = $this->content; $post_title = $this->title; $post_author = $this->author; $post_status = $this->isDraft ? 'draft' : 'publish'; //AGC:24/10/2013 Turn off the pingbacks $post_pingback = Blogger_Importer::POST_PINGBACK; // N.B. Clean up of $post_content is now part of the sanitize class // Check for duplication part of calling function $post = compact('post_date', 'post_content', 'post_author', 'post_title', 'post_status', 'post_pingback'); $post_id = wp_insert_post($post); if (is_wp_error($post_id)) return $post_id; wp_create_categories(array_map('addslashes', $this->categories), $post_id); add_post_meta($post_id, 'blogger_blog', $this->blogurl, true); add_post_meta($post_id, 'blogger_author', $this->bloggerauthor, true); if (!$this->isDraft && isset($this->bookmark)) add_post_meta($post_id, 'blogger_permalink', $this->bookmark, true); add_post_meta($post_id, 'blogger_internal', $this->old_permalink, true); if (isset($this->geotags)) { add_post_meta($post_id,'geo_latitude',$this->geotags['geo_latitude']); add_post_meta($post_id,'geo_longitude',$this->geotags['geo_longitude']); add_post_meta($post_id,'geo_public',1); if (isset($this->geotags['geo_address'])) { add_post_meta($post_id,'geo_address',$this->geotags['geo_address']); } } return $post_id; } function post_exists() { $p = $this->get_post_by_oldID($this->old_permalink); if ($p == 0 && isset($this->bookmark)) { $p = $this->get_post_by_oldID($this->bookmark); } return $p; } function get_post_by_oldID($oldID) { //Check to see if this post has been loaded already //Can we use get_posts for this? global $wpdb; $query = "SELECT post_id FROM $wpdb->postmeta m inner join $wpdb->posts p on p.ID = m.post_id where meta_key = 'blogger_internal' and meta_value = '%s' and p.post_type = 'post' LIMIT 0 , 1"; $p = (int) $wpdb->get_var( $wpdb->prepare($query, $oldID) ); return $p; } } } blogger-importer-sanitize.php000064400000012662147600314460012375 0ustar00 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array('longdesc', 'src'), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite'); public function __construct() { parent::__construct(); } function _normalize_tag($matches) { return '<' . strtolower($matches[1]); } function sanitize($data, $type, $base = '') { //Simplified function $data = trim($data); // Normalise tags (string replacement is case sensitive) $data = preg_replace_callback('|<(/?[A-Z]+)|', array(&$this, '_normalize_tag'), $data); // Remappings $data = str_replace('
', '
', $data); $data = str_replace('
', '
', $data); //Workshopshed: > Workshopshed: $data = preg_replace('|()(?).*(.*)()|', '$2', $data); //N.B. Don't strip comments as blogger uses which is the same as WordPress // Comments might also contain section targetting e.g. //Now clean up foreach ($this->strip_htmltags as $tag) { $pcre = "/<($tag)" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\/$tag" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\/)?>)/siU'; while (preg_match($pcre, $data)) { $data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data); } } foreach ($this->strip_attributes as $attrib) { $data = preg_replace('/(<[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\s*=\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\x09\x0A\x0B\x0C\x0D\x20\x22\x27\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\1\2\3>', $data); } // Replace relative URLs $this->base = $base; foreach ($this->replace_url_attributes as $element => $attributes) { $data = $this->replace_urls($data, $element, $attributes); } // Images are handled as a separate step as we need to download them // Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data $data = trim($data); return $data; } function replace_urls($data, $tag, $attributes) { //This seems to do nothing at all!? if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (is_array($attributes)) { foreach ($attributes as $attribute) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $new_element = SimplePie_Misc::element_implode($element); $data = str_replace($element['full'], $new_element, $data); $element['full'] = $new_element; } } } elseif (isset($element['attribs'][$attributes]['data'])) { $element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } } return $data; } //Latest SimplePie checks for this function public function set_registry(SimplePie_Registry $registry) { parent::set_registry($registry); } } ?>uninstall.php000064400000000427147600314460007276 0ustar00=' ) ) { require_once ABSPATH . WPINC . '/class-simplepie.php'; } else { require_once ABSPATH . WPINC . '/class-feed.php'; } // Custom classes used by importer require_once dirname( __FILE__ ) . '/blogger-importer-sanitize.php'; require_once dirname( __FILE__ ) . '/blogger-importer-blogitem.php'; require_once dirname( __FILE__ ) . '/blogger-entry.php'; require_once dirname( __FILE__ ) . '/comment-entry.php'; if ( ! class_exists( 'WP_Importer' ) ) { $class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php'; if ( file_exists( $class_wp_importer ) ) require $class_wp_importer; } /** * Blogger Importer class for managing the import process of a XML file * */ if ( !class_exists( 'Blogger_Importer' ) ) { class Blogger_Importer extends WP_Importer { const IMPORT_IMG = true; // Should we import the images (boolean) const LARGE_IMAGE_SIZE = '1024'; // The size of large images downloaded (string) const POST_PINGBACK = 0; // Turn off the post pingback, set to 1 to re-enabled(bool) private $id = null; // XML attachment ID private $author_mapping = array(); private $authors = array(); private $comments_done = 0; private $comments_skipped = 0; private $host = null; private $images_done = 0; private $images_progress = 0; private $images_skipped = 0; private $import_data = null; private $links_done = 0; private $links_progress = 0; private $posts_done = 0; private $posts_skipped = 0; private $processed_authors = array(); private $version = null; /** * Registered callback function for the Blogger Importer * * Manages the three separate stages of the XML import process */ function dispatch() { $this->header(); $step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step']; switch ( $step ) { case 0: $this->greet(); break; case 1: check_admin_referer( 'import-upload' ); if ( $this->handle_upload() ) $this->import_options(); break; case 2: check_admin_referer( 'import-blogger' ); $this->id = (int) $_POST['import_id']; $file = get_attached_file( $this->id ); set_time_limit(0); $this->import( $file ); break; } $this->footer(); } /** * The main controller for the actual import stage. * * @param string $file Path to the XML file for importing */ function import( $file ) { add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) ); $this->import_start( $file ); $this->get_author_mapping(); wp_suspend_cache_invalidation( true ); $this->process_posts(); $this->process_comments(); if (Blogger_Importer::IMPORT_IMG) { $this->process_images(); } $this->process_links(); wp_suspend_cache_invalidation( false ); // update incorrect/missing information in the DB //$this->backfill_parents(); //$this->backfill_attachment_urls(); //$this->remap_featured_images(); $this->import_end(); } /** * Parses the XML file and prepares us for the task of processing parsed data * * @param string $file Path to the XML file for importing */ function import_start( $file ) { if ( ! is_file($file) ) { echo '

' . __( 'Sorry, there has been an error.', 'blogger-importer' ) . '
'; echo __( 'The file does not exist, please try again.', 'blogger-importer' ) . '

'; $this->footer(); die(); } $import_data = $this->parse( $file ); if ( is_wp_error( $import_data ) ) { echo '

' . __( 'Sorry, there has been an error.', 'blogger-importer' ) . '
'; echo esc_html( $import_data->get_error_message() ) . '

'; $this->footer(); die(); } $this->import_data = $import_data; // $links = $import_data->get_links('alternate'); $this->host = parse_url($links[0], PHP_URL_HOST); $this->images_progress = 0; $this->images_skipped = 0; $this->links_done = 0; $this->links_progress = 0; wp_defer_term_counting( true ); wp_defer_comment_counting( true ); do_action( 'import_start' ); } /** * Performs post-import cleanup of files and the cache */ function import_end() { wp_import_cleanup( $this->id ); wp_cache_flush(); foreach ( get_taxonomies() as $tax ) { delete_option( "{$tax}_children" ); _get_term_hierarchy( $tax ); } wp_defer_term_counting( false ); wp_defer_comment_counting( false ); echo '

' . __( 'All done.', 'blogger-importer' ) . ' ' . __( 'Have fun!', 'blogger-importer' ) . '' . '

'; echo '

' . __( 'Remember to update the passwords and roles of imported users.', 'blogger-importer' ) . '

'; do_action( 'import_end' ); } /** * Handles the WXR upload and initial parsing of the file to prepare for * displaying author import options * * @return bool False if error uploading or invalid file, true otherwise */ function handle_upload() { $file = wp_import_handle_upload(); if ( isset( $file['error'] ) ) { echo '

' . __( 'Sorry, there has been an error.', 'blogger-importer' ) . '
'; echo esc_html( $file['error'] ) . '

'; return false; } else if ( ! file_exists( $file['file'] ) ) { echo '

' . __( 'Sorry, there has been an error.', 'blogger-importer' ) . '
'; printf( __( 'The export file could not be found at %s. It is likely that this was caused by a permissions problem.', 'blogger-importer' ), esc_html( $file['file'] ) ); echo '

'; return false; } $this->id = (int) $file['id']; $import_data = $file['file']; if ( is_wp_error( $import_data ) ) { echo '

' . __( 'Sorry, there has been an error.', 'blogger-importer' ) . '
'; echo esc_html( $import_data->get_error_message() ) . '

'; return false; } $this->get_authors_from_import( $import_data ); return true; } /** * Retrieve authors from parsed data * * @param array $import_data Data returned by a WXR parser */ function get_authors_from_import( $import_data ) { $feed = $this->parse($import_data); $authors = $feed->get_authors(); foreach ($authors as $author) { $login = sanitize_user( $author->get_name(), true ); $this->authors[$login] = array( 'author_login' => $author->get_name(), 'author_display_name' => $author->get_name(), 'author_email' => $author->get_email(), ); } } /** * Display pre-import options, author importing/mapping and option to * fetch attachments */ function import_options() { $j = 0; ?>
authors ) ) : ?>

admins entries.', 'blogger-importer' ); ?>

allow_create_users() ) : ?>

    authors as $author ) : ?>
  1. author_select( $j++, $author ); ?>

' . esc_html( $author['author_display_name'] ); echo '
'; $create_users = $this->allow_create_users(); if ( $create_users ) { _e( 'as a new user:', 'blogger-importer' ); $value = esc_attr( sanitize_user( $author['author_login'], true ) ); echo '
'; } _e( 'or assign posts to an existing user:', 'blogger-importer' ); wp_dropdown_users( array( 'name' => "user_map[$n]", 'multi' => true, 'show_option_all' => __( '- Select -', 'blogger-importer' ) ) ); echo ''; } /** * Map old author logins to local user IDs based on decisions made * in import options form. Can map to an existing user, create a new user * or falls back to the current user in case of error with either of the previous */ function get_author_mapping() { if ( ! isset( $_POST['imported_authors'] ) ) return; $create_users = $this->allow_create_users(); foreach ( (array) $_POST['imported_authors'] as $i => $old_login ) { // Multisite adds strtolower to sanitize_user. Need to sanitize here to stop breakage in process_posts. $santized_old_login = sanitize_user( $old_login, true ); $old_id = isset( $this->authors[$old_login]['author_id'] ) ? intval($this->authors[$old_login]['author_id']) : false; if ( ! empty( $_POST['user_map'][$i] ) ) { $user = get_userdata( intval($_POST['user_map'][$i]) ); if ( isset( $user->ID ) ) { if ( $old_id ) $this->processed_authors[$old_id] = $user->ID; $this->author_mapping[$santized_old_login] = $user->ID; } } else if ( $create_users ) { if ( ! empty($_POST['user_new'][$i]) ) { $user_id = wp_create_user( $_POST['user_new'][$i], wp_generate_password() ); } else if ( $this->version != '1.0' ) { $user_data = array( 'user_login' => $old_login, 'user_pass' => wp_generate_password(), 'user_email' => isset( $this->authors[$old_login]['author_email'] ) ? $this->authors[$old_login]['author_email'] : '', 'display_name' => $this->authors[$old_login]['author_display_name'], 'first_name' => isset( $this->authors[$old_login]['author_first_name'] ) ? $this->authors[$old_login]['author_first_name'] : '', 'last_name' => isset( $this->authors[$old_login]['author_last_name'] ) ? $this->authors[$old_login]['author_last_name'] : '', ); $user_id = wp_insert_user( $user_data ); } if ( ! is_wp_error( $user_id ) ) { if ( $old_id ) $this->processed_authors[$old_id] = $user_id; $this->author_mapping[$santized_old_login] = $user_id; } else { if ( array_key_exists( $old_login, $this->authors ) ) { printf( __( 'Failed to create new user for %s. Their posts will be attributed to the current user.', 'blogger-importer' ), esc_html( $this->authors[$old_login]['author_display_name'] ) ); } else { printf( __( 'Failed to create new user. Their posts will be attributed to the current user.', 'blogger-importer' ) ); } if ( defined('IMPORT_DEBUG') && IMPORT_DEBUG ) echo ' ' . $user_id->get_error_message(); echo '
'; } } // failsafe: if the user_id was invalid, default to the current user if ( ! isset( $this->author_mapping[$santized_old_login] ) ) { if ( $old_id ) $this->processed_authors[$old_id] = (int) get_current_user_id(); $this->author_mapping[$santized_old_login] = (int) get_current_user_id(); } } } /** * Create new posts based on import information */ function process_posts() { $feed = $this->import_data; foreach ( $feed->get_items() as $item ) { // check that it is actually a post first // $is_post = false; $cats = $item->get_categories(); foreach ( $cats as $cat ) { if ( $cat == 'http://schemas.google.com/blogger/2008/kind#post' ) { $is_post = true; break; } } // only import posts for now if ( ! $is_post ) { continue; } $blogentry = new BloggerEntry(); $blogentry->blogurl = $this->host; $blogentry->id = $item->get_id(); $blogentry->published = $item->get_published(); $blogentry->updated = $item->get_updated(); $blogentry->isDraft = $item->get_draft_status(); $blogentry->title = $item->get_title(); $blogentry->content = $item->get_content(); $blogentry->geotags = $item->get_geotags(); // map the post author $blogentry->bloggerauthor = sanitize_user( $item->get_author()->get_name(), true ); if ( isset( $this->author_mapping[$blogentry->bloggerauthor] ) ) $blogentry->author = $this->author_mapping[$blogentry->bloggerauthor]; else $blogentry->author = (int) get_current_user_id(); $blogentry->links = $item->get_links(array('replies', 'edit', 'self', 'alternate')); $blogentry->parselinks(); foreach ( $cats as $cat ) { if ( false === strpos( $cat, 'http://schemas.google.com') ) { $blogentry->categories[] = $cat; } } // Checks for duplicates $post_id = $blogentry->post_exists(); if ( $post_id != 0 ) { $this->posts_skipped++; } else { //Unique new post so import it $post_id = $blogentry->import(); $this->posts_done++; } } } /** * Create new comments based on import information */ function process_comments() { $feed = $this->import_data; foreach ( $feed->get_items() as $item ) { // check that it is actually a comment first // $is_comment = false; $cats = $item->get_categories(); foreach ( $cats as $cat ) { if ( $cat == 'http://schemas.google.com/blogger/2008/kind#comment' ) { $is_comment = true; break; } } // we only import comments here if ( ! $is_comment ) { continue; } $commententry = new CommentEntry(); $commententry->id = $item->get_id(); $commententry->updated = $item->get_updated(); $commententry->content = $item->get_content(); $commententry->author = $item->get_author()->get_name(); $commententry->authoruri = $item->get_author()->get_link(); $commententry->authoremail = $item->get_author()->get_email(); $replyto = $item->get_item_tags('http://purl.org/syndication/thread/1.0','in-reply-to'); $commententry->source = $replyto[0]['attribs']['']['source']; $commententry->source = $item->get_source(); $parts = parse_url($commententry->source); $commententry->old_post_permalink = $parts['path']; //Will be something like this '/feeds/417730729915399755/posts/default/8397846992898424746' $bloggerentry = new BloggerEntry(); $commententry->post_ID = $bloggerentry->get_post_by_oldID($commententry->old_post_permalink); //Get the links $commententry->links = $item->get_links(array('edit', 'self', 'alternate', 'related')); $commententry->parselinks(); // Nested comment? if ( isset($commententry->related) ) { $commententry->parentcommentid = $commententry->get_comment_by_oldID($commententry->related); } //Perhaps could log errors here? if ($commententry->post_ID != 0) { // Checks for duplicates if ($comment_id = $commententry->exists()) { $this->comments_skipped++; } else { $comment_id = $commententry->import(); $this->comments_done++; } } else { $this->comments_skipped++; } } } /* * Search for either a linked image or a non linked image within the supplied html * or * Ref: http://www.the-art-of-web.com/php/parse-links/ * "]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>" * http://wordpress.org/extend/plugins/blogger-image-import/ * "]+href\=([\"'`])(.*)\\1[^<]*?]*src\=([\"'`])(.*)\\3[^>]*>" */ function get_images($content) { $highrez = array(); $lowrez = array(); //First images with links //Might be nice to expand this top try and get Alt and/or Title attributes for use as description $regexp = "]*href\=([\"'`])([^> ]*?)\\1[^<]*?]*src\=([\"'`])([^\> ]*?)\\3[^>]*>"; if (preg_match_all("/$regexp/siU", $content, $matches1, PREG_SET_ORDER)) { //http://www.techrepublic.com/article/17-useful-functions-for-manipulating-arrays-in-php/5792851 foreach ($matches1 as $match) { if ($this->isurlimage($match[2])) { $highrez[$match[4]] = $match[2]; } else { $lowrez[$match[4]] = ''; } } } //Now any image (n.b. this overlaps the previous set) $regexp = "]*src\=([\"'`])([^\> ]*?)\\1[^>]*>"; if (preg_match_all("/$regexp/siU", $content, $matches2, PREG_SET_ORDER)) { foreach ($matches2 as $match) { $lowrez[$match[2]] = ''; } } //Remove any rows from this second set that are already in the first set and merge two sets of results $images = array_merge($lowrez, $highrez); return $images; } /** * Update all of the images in the posts that have already been imported */ function process_images() { $postsprocessed = $this->images_progress; if ($postsprocessed == 0) { $imagesprocessed = 0; $imagesskipped = 0; } else { $imagesprocessed = $this->images_done; $imagesskipped = $this->images_skipped; } $batchsize = 20; $loadedposts = get_posts( array( 'meta_key' => 'blogger_blog', 'meta_value' => $this->host, 'posts_per_page' => $batchsize, 'offset' => $postsprocessed, 'post_status' => array('draft', 'publish', 'future') )); //Stop if nothing left if (count($loadedposts) == 0) { return true; } foreach($loadedposts as $importedpost) { $importedcontent = $importedpost->post_content; $author = get_post_meta($importedpost->ID, 'blogger_author', true); $img_count = 0; //Count of images for this post foreach($this->get_images($importedcontent) as $lowrez => $highrez) { if (!$this->image_filter($lowrez)) { //Pass null for description so that the default (filename) is used, might be good to use Alt tag instead? $newcontent = $this->import_image($importedpost->ID, $lowrez, $highrez, null, $img_count, $importedcontent, $this->host, $author); if (!is_wp_error($newcontent)) { $importedcontent = $newcontent; $img_count++; } else { $imagesskipped++; } } } $imagesprocessed += $img_count; $importedpost->post_content = $importedcontent; wp_update_post($importedpost); $postsprocessed++; $this->images_done = $imagesprocessed; $this->images_progress = $postsprocessed; $this->images_skipped = $imagesskipped; } unset($loadedposts); return; } function image_urlremap($url, $large) { /* Fix problem urls e.g. change https://lh4.googleusercontent.com/-nt66qhxzDyY/TZOD-RhTYMI/AAAAAAAACd4/Elzm1smRFb4/s800-h/Ski%2520Trip.jpg to to https://lh4.googleusercontent.com/-nt66qhxzDyY/TZOD-RhTYMI/AAAAAAAACd4/Elzm1smRFb4/s800/Ski%2520Trip.jpg Could use a apply_filter here to allow users to add their own tweeks */ $pattern = '/(\/)(s\d*)-h(\/)/i'; $replacement = '$1$2$3'; $img = preg_replace($pattern, $replacement, $url); /* Strip out ? and # on the end of files */ $pattern = '/(.*)[#\?].*/i'; $replacement = '$1'; $img = preg_replace($pattern, $replacement, $img); if ($large) { // For images on blogger we can swap /sXXX/ with for example /s1600/ to get a larger file. // Use a standardised large size so we can control quality vs filesize. $pattern = '/(\/)(s\d*)(\/)/i'; $replacement = '$1s'.Blogger_Importer::LARGE_IMAGE_SIZE. '$3'; $img = preg_replace($pattern, $replacement, $img); } return $img; } function image_filter($url) { // Do we exclude this particular image? // Don't include images that are already loaded onto this site // Could use a apply_filter here to allow users to add their own tweeks return (substr($url, 0, strlen(site_url())) == site_url()); } function import_image($post_id, $lowrez, $highrez, $description, $imgcount, $postcontent, $blogname, $author) { /* Import a new image unless we specifically filter it out or if it has already been downloaded on another page. Based on http://wordpress.stackexchange.com/questions//media-sideload-image-file-name and the tumblr-importer Simple filename cleaning as characters such as +, % cause issues ref: http://wordpress.org/extend/plugins/uploadplus/ It's processing links of a form similar to these as provided by the "get_images" function or If the high resolution (linked) file is not an image then the low resolution version is downloaded. */ $lowrez_old = $lowrez; $highrez_old = $highrez; $highrezispage = false; $lowrez = $this->image_urlremap($lowrez, false); if ($lowrez == '') return new WP_Error('Not an image', $message = __('Lowrez not an image', 'blogger-importer'), $data = array($lowrez_old, $highrez_old)); if ($highrez != '') { $highrez = $this->image_urlremap($highrez, true); } else { $highrez = $this->image_urlremap($lowrez, true); } if (!$att_id = $this->image_exists($lowrez)) { //Option to add a timeout to download_url, but don't use the wp_remote_get timeout as that's much shorter than the default here of 300s $tmp = @download_url($highrez); if (is_wp_error($tmp)) { @unlink($tmp); // clean up, copied this from other examples but how is this supposed to work if $tmp is an error?? //Don't exit as can still try the small image } // If the highrez was not an image then try the lowrex if (!$this->is_image($tmp, $highrez)) { $highrezispage = true; //That image was not valid $tmp = @download_url($lowrez); // Option to add a timeout here if (is_wp_error($tmp)) { @unlink($tmp); // clean up return $tmp; // output wp_error } if (!$this->is_image($tmp, $lowrez)) { @unlink($tmp); // clean up None of items are actually images, for example might be a single pixel, deliberately filtered out or a 404 error? return new WP_Error('No Images', __('None of the images are valid', 'blogger-importer'), $data = array($lowrez_old, $highrez_old)); } } $new_name = preg_replace('/[^A-Za-z0-9._ ]/i', '-', basename($lowrez)); $file_array = array('name' => $new_name, 'tmp_name' => $tmp); if ( empty( $description ) ) { $description = $new_name; } $att_id = media_handle_sideload($file_array, $post_id, $description); if (is_wp_error($att_id)) { @unlink($file_array['tmp_name']); return $att_id; } // Link attachment upto old url, store the author so we can replace it later add_post_meta($att_id, 'blogger_permalink', $lowrez); add_post_meta($att_id, 'blogger_blog', $blogname, true); add_post_meta($att_id, 'blogger_author', $author, true); if ($highrezispage) //Not an image so store so we can link later add_post_meta($att_id, 'blogger_largeimgispage', true); } else { //Image already exists, check if the high rez one was valid last time $tmp = get_post_meta($att_id, 'blogger_largeimgispage', true); if ($tmp == true) $highrezispage = true; } //Always treat picassa webs as image so they get replaced with the new High rez link if (substr($highrez, 0, 27) == 'http://picasaweb.google.com') $highrezispage = false; //Replace the image strings if (!$highrezispage && $highrez_old != '') { $imagesrc = wp_get_attachment_image_src($att_id, 'full'); $highrez_new = reset($imagesrc); $postcontent = str_replace($highrez_old, $highrez_new, $postcontent); } $imagesrc = wp_get_attachment_image_src($att_id, 'medium'); $lowrez_new = reset($imagesrc); $postcontent = str_replace($lowrez_old, $lowrez_new, $postcontent); //Set the first image to be the post thumbnail (zero index) if ($imgcount == 0) { set_post_thumbnail($post_id, $att_id); } //media handle sideload moves the file so there should be no temp file left but cleanup just incase. @unlink($tmp); // incase something goes wrong if ($postcontent == '') { return new WP_Error('Empty Content', __("Attempting to write back empty content", 'blogger-importer'), $data = array($lowrez_old, $highrez_old)); } return $postcontent; } function is_image($file, $filename) { //Is the downloaded file really an image //e.g. it looked like an image from the URL but when downloaded it was something else perhaps a html page //Also filter out tracking images of 1 pixel square //Found that wp_check_filetype_and_ext and wp_match_mime_types was giving false positives $imgstats = @getimagesize($file); if (!$imgstats) { return false; } return (($imgstats[0] > 1) && ($imgstats[1] > 1)); } function image_exists($lowrez) { global $wpdb; return $wpdb->get_var($wpdb->prepare("SELECT ID FROM $wpdb->posts p INNER JOIN $wpdb->postmeta m ON p.ID = m.post_id AND meta_key = 'blogger_permalink' WHERE post_type = 'attachment' AND meta_value = %s LIMIT 0 , 1", $lowrez)); } function process_links() { //Update all of the links in the blog global $wpdb; $postsprocessed = $this->links_progress; if ($postsprocessed == 0) { $linksprocessed = 0; } else { $linksprocessed = $this->links_done; } $batchsize = 20; $oldurlsearch = $this->host; if (substr($oldurlsearch, 0, 3) == 'www.') { $oldurlsearch = substr($oldurlsearch, 3, strlen($oldurlsearch - 3)); } $oldurlsearch = str_replace('.', '\.', $oldurlsearch); $blogspot = stripos($oldurlsearch, '\.blogspot\.'); if ($blogspot) { //Blogspot addresses can be international e.g. myblog.blogspot.com, myblog.blogspot.com.au or myblog.blogspot.co.uk or myblog.blogspot.de both resolve to the same blog. //See http://www.searchenginejournal.com/google-blogger-url-censorship/39724/ $oldurlsearch = substr($oldurlsearch, 0, $blogspot + 12). '[\w\.]{2,6}'; } $loadedposts = get_posts(array('meta_key' => 'blogger_blog', 'meta_value' => $this->host, 'posts_per_page' => $batchsize, 'offset' => $postsprocessed, 'post_status' => array('draft', 'publish', 'future'))); //Stop if nothing left if (count($loadedposts) == 0) { return true; } foreach($loadedposts as $importedpost) { $importedcontent = $importedpost->post_content; $regexp = ']*href=([\"\'`])(https?:\/\/(?:www\.)*'.$oldurlsearch.'\/)([^\" >]*?)\1[^>]*>(.*)<\/a>'; if (preg_match_all("/$regexp/siU", $importedcontent, $matches, PREG_SET_ORDER)) { foreach($matches as $match) { $HostURL = substr($match[2], 0, strlen($match[2]) - 1); //e.g. http://minitemp.blogspot.co.uk $PageURL = '/'.$match[3]; //e.g. '/2011/04/what-happens-if-blog-title-is-really.html' $sql = $wpdb->prepare("SELECT post_id FROM $wpdb->postmeta m inner join $wpdb->posts p on p.id = m.post_id and post_type = 'post' where meta_key = '%s' and meta_value = '%s' ", 'blogger_permalink', $PageURL); $linkpostid = $wpdb->get_var($sql); if ($linkpostid != 0) { $NewURL = get_permalink($linkpostid); } else { // Page not found, update content with just the new domain $NewURL = site_url($PageURL); } $importedcontent = str_replace($HostURL.$PageURL, $NewURL, $importedcontent); $linksprocessed++; } if ($importedcontent == '') { return new WP_Error('Empty Content', __("Attempting to write back empty content")); } $importedpost->post_content = $importedcontent; wp_update_post($importedpost); } $postsprocessed++; //For some reason the intermediate values are not getting written, is it that the options are cached hence not read back? $this->links_done = $linksprocessed; $this->links_progress = $postsprocessed; } unset($loadedposts); return; } function isurlimage($srcurl) { //Process picasaweb links and files that are images if (substr($srcurl, 0, 27) == 'http://picasaweb.google.com') return true; return preg_match('/(?i)\.(jpe?g|png|gif|bmp)$/i', $srcurl); } /** * Parse the atom file * * @param string $file Path to XML file for parsing * @return SimplePie object of the feed */ function parse( $file ) { $data = file_get_contents( $file ); // parse the feed $feed = new SimplePie(); //set_xxxx methods depreciated (and not working?) replaced with get_registry as per docs $reg = $feed->get_registry(); $reg->register('Sanitize', 'Blogger_Importer_Sanitize'); $feed->sanitize = $reg->create('Sanitize'); //Should not really need to do this but there seems to be an issue with the SimplePie class? $reg->register('Item', 'WP_SimplePie_Blog_Item'); $feed->set_raw_data( $data ); $feed->init(); return $feed; } // Display import page title function header() { echo '
'; if ( version_compare( get_bloginfo( 'version' ), '3.8.0', '<' ) ) { screen_icon(); } echo '

' . __( 'Import Blogger', 'blogger-importer' ) . '

'; } // Close div.wrap function footer() { echo '
'; } /** * Display introductory text and file upload form */ function greet() { echo '
'; echo '

'.__( 'Howdy! Upload your Blogger Exported XML file and we’ll import the contents from it into this site.', 'blogger-importer' ).'

'; echo '

'.__( 'Choose an XML file to upload, then click Upload file and import.', 'blogger-importer' ).'

'; wp_import_upload_form( 'admin.php?import=blogger&step=1' ); echo '
'; } /** * Decide if the given meta key maps to information we will want to import * * @param string $key The meta key to check * @return string|bool The key if we do want to import, false if not */ function is_valid_meta_key( $key ) { // skip attachment metadata since we'll regenerate it from scratch // skip _edit_lock as not relevant for import if ( in_array( $key, array( '_wp_attached_file', '_wp_attachment_metadata', '_edit_lock' ) ) ) return false; return $key; } /** * Decide whether or not the importer is allowed to create users. * Default is true, can be filtered via import_allow_create_users * * @return bool True if creating users is allowed */ function allow_create_users() { return apply_filters( 'import_allow_create_users', true ); } /** * Added to http_request_timeout filter to force timeout at 60 seconds during import * @return int 60 */ function bump_request_timeout( $val ) { return 60; } // return the difference in length between two strings function cmpr_strlen( $a, $b ) { return strlen($b) - strlen($a); } } } // class_exists( 'Blogger_Importer' ) function blogger_importer_init() { load_plugin_textdomain( 'blogger-importer', false, dirname( plugin_basename( __FILE__ ) ) . '/languages/' ); /** * WordPress Importer object for registering the import callback * @global WP_Import $wp_import */ $GLOBALS['wp_import'] = new Blogger_Importer(); register_importer('blogger', __('Blogger', 'blogger-importer'), __('Import posts, comments, and categories from a Blogger blog and migrate authors to WordPress users.', 'blogger-importer'), array( $GLOBALS['wp_import'], 'dispatch' ) ); } add_action( 'admin_init', 'blogger_importer_init' ); readme.txt000064400000034324147600314460006555 0ustar00=== Blogger Importer === Contributors: wordpressdotorg, Otto42, Workshopshed, SergeyBiryukov, rmccue Donate link: Tags: importer, blogger Requires at least: 3.0 Tested up to: 6.6 Stable tag: 0.9.3 License: GPLv2 or later Imports posts, images, comments, and categories (blogger tags) from a Blogger blog then migrates authors to WordPress users. == Description == The Blogger Importer imports your blog data from a Google Blogger site into a WordPress.org installation. = Items imported = * Categories * Posts (published, scheduled and draft) * Comments (not spam) * Images = Items not imported = * Pages * Widgets/Widget Data * Templates/Theme * Comment and author Avatars == Installation == 1. Upload the `blogger-importer` folder to the `/wp-content/plugins/` directory 1. Activate the plugin through the 'Plugins' menu in WordPress = Prerequisites = The importer connects your server to the blogger server to copy across the posts. For this to work you need to have connectivity from the server to the internet and also have at least one of the remote access protocols enabled, e.g. curl, streams or fsockopen. You can use the Core Control plugin to test if these are working correctly. The importer connects to Google over a secure connection so OpenSSL needs to be enabled on your server. The importer uses the SimplePie classes to read and process the data from blogger so you will need the php-xml module installed on your webserver. = Preparation = It is strongly recommended that you **disable all other plugins and caching** during the import. This will ensure that the information transfers across as smoothly as possible and that posts and comments are correctly transferrred. = How to use = 1. On your Blogger account, visit the Settings->Other page, and locate the "Export Blog" option. This will download an XML file containing your posts and comments. 2. In WordPress, the Blogger Importer is available from the Tools->Import menu. 3. Upload the XML file to WordPress. 4. The posts will be read and you will be given the option to map the authors of the posts appropriately. 5. Allow the import to finish. 6. If the import fails halfway, you can simply retry. Already imported posts will be skipped and not duplicated. == Frequently Asked Questions == = How do I re-import? = Simply upload the XML file again. Already imported posts will be skipped and not duplicated. = Once I've imported the posts do I need to keep the plugin? = No, you can remove the plugin once you've completed your migration. = How do I know which posts were imported? = Each of the posts loaded is tagged with a meta tags indicating where the posts were loaded from. The permalink will be set to the visible URL if the post was published or the internal ID if it was still a draft or scheduled post * blogger_author * blogger_blog * blogger_permalink = After importing there are a lot of categories = Blogger does not distinguish between tags and categories so you will likely want to review what was imported and then use the categories to tags converter = What about pages? = This importer does not handle blogger pages, you will need to manually transfer them. = What about images? = This version of the importer imports these too, but you can disable this via a setting in the blogger-importer.php file. Tracking images of size 1x1 are not processed. If you with to specifically exclude other images you could code something for the image_filter function. = What size are the images? = The importer will attempt to download the a large version of the file if one is available. This is controlled by the setting "LARGE_IMAGE_SIZE" and defaults to a width of 1024. The display size of the images is the "medium" size of images as defined on WordPress. You can change this in advance if you want to show a different size. = How do I know what images are skipped? = If you hover over the progress bar for images it will tell you how many images are skipped. To see the filenames of these images you will need to enable WordPress debugging to log to file. See https://wordpress.org/documentation/article/debugging-in-wordpress/ = What about future posts? = The scheduled posts will be transferred and will be published as specified. However, Blogger and WordPress handle drafts differently, WordPress does not support dates on draft posts so you will need to use a plugin if you wish to plan your writing schedule. = Are the permalinks the same? = No, WordPress and Blogger handle the permalinks differently. However, it is possible to use the redirection plugin or your .htaccess file to map the old URLs across to the new URLs. = My posts and comments moved across but some things are stripped out = The importer uses the SimplePie classes to process the data, these in turn use a Simplepie_Sanitize class to remove potentially malicious code from the source data. If the php-xml module is not installed then this may result in your entire comment text being stripped out and the error "PHP Warning: DOMDocument not found, unable to use sanitizer" to appear in your logs. = The comments don't have avatars = This is a known limitation of the data that is provided from Blogger. The WordPress system uses Gravatar to provide the images for the comment avatars. This relies the email of the person making the comment. Blogger does not provide the email address in the data feed so WordPress does not display the correct images. You can manually update or script change to the comment email addresses to work around this issue. = It does not seem to be processing the images = The most common reasons for this are lack of memory and timeouts, these should appear in your error log. Also check you've not run out of disk space on your server. Because WordPress stores the files in multiple resolutions one image might take up as much as 250kb spread across 5 files of different sizes. = How do I make the images bigger or smaller? / My images are fuzzy = The importer will attempt to download a large version of images but it displays them on the blog at the medium size. If you go into your settings->media options then you can display a different size "medium" image by default. You can't make this bigger than the file that has been downloaded which is where the next setting comes in. The default size for the large images is 1024, you can change this to an even larger size by changing the following line in the blogger-import.php file. const LARGE_IMAGE_SIZE = '1024'; The file downloaded won't be bigger than the origional file so if it was only 800x600 to start with then it won't be any bigger than that. If your origional blog has hardcoded width and height values that are larger than the medium size settings then that might result in your images becoming fuzzy. = I've run out of disk space processing the images = The importer is designed to download the high resolution images where they are available. You can either disable the downloading of images or you can change the constant LARGE_IMAGE_SIZE string in the blogger-importer.php file to swap the links with a smaller image. == Reference == * https://www.simplepie.org/ The following were referenced for implementing the images and links * https://wordpress.org/plugins/remote-images-grabber/ * http://notions.okuda.ca/wordpress-plugins/blogger-image-import/ * https://wordpress.org/plugins/cache-images/ * https://wordpress.org/plugins/tumblr-importer/ * https://core.trac.wordpress.org/ticket/14525 * https://wpengineer.com/1735/easier-better-solutions-to-get-pictures-on-your-posts/ * https://web.archive.org/web/20211121020918/http://www.velvetblues.com/web-development-blog/wordpress-plugin-update-urls/ * http://wordpress.stackexchange.com/questions//media-sideload-image-file-name (not working) * https://code.tutsplus.com/a-guide-to-the-wordpress-http-api-the-basics--wp-25125t == Known Issues == * Some users have reported that their IFrames are stripped out of the post content. * Requests for better performance of larger transfers and tranfers of images * Review of behavior when it re-imports, partiularly are the counts correct * Review using get_posts or get_comments with the appropriate parameters to get the counts and exists instead of using SQL * Incorrect notice, PHP Notice: The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. This occurs even when Iconv is installed, could be related to Blogger reporting 0 comments * When the importer is running it's not possible to stop it using the stop button * Blogger's count of comments include those not linked to a post e.g. the post has been deleted. == Filters and Actions == These actions and filters have been added so that you can extend the functionality of the importer without needing to modify the code. Action - import_start - This is run when the import starts processing the records for a new blog Action - import_done - This is run when the import finishes processing the records for a blog. Filter - blogger_importer_congrats - Passes the list of options shown to the user when the blog is complete, options can be added or removed. == Changelog == = 0.9.3 = * Add support for WordPress 6.6.2 * Add support for PHP 8.3 = 0.9.2 = * Add support for WordPress 6.2 = 0.9.1 = * Add support for WordPress 6.1 = 0.9 = * Complete rewrite to use XML files instead. = 0.8 = * Fixed issue with the authors form not showing a the list of authors for a blog * Simplified check for duplicate comments * Code simplified for get_authors and get_author_form * Fixed issue with wpdb prepare and integer keys by switching to a sub select query * Make comment handling more robust * Simplified functions to reduce messages in the log = 0.7 = * Fixed issue with drafts not being imported in the right state * Added extra error handling for get_oauth_link to stop blank tokens being sent to the form * Restructured code to keep similar steps in single function and to allow testing of components to be done * Re-incorporated the "congrats" function and provided a sensible list of what to do next * Add a geo_public flag to posts with geotags * Dropped _normalize_tag after confirming that it's handled by SimplePie * Added image handling https://core.trac.wordpress.org/ticket/4010 * Added setting author on images * Added error handling in get_oauth_link() as suggested by daniel_henrique ref https://core.trac.wordpress.org/ticket/21163 * Added a check for OpenSSL as suggested by digitalsensus * Fixed issue with SimplePie santizer not getting set in WordPress 3.5 * Added filter for the congrats function 'blogger_importer_congrats' so other plugins can add in new options * Converted manual HTML table to WP_LIST_TABLE * Moved inline Javascript to separate file to aid debugging and testing * Wrapped data sent to Javascript in I18n functions. * Fixed timeout error in the Javascript, timeouts were not being used. * Supress post revisions when importing so that DB does not grow * Added processing of internal links * Added uninstall.php to remove options on uninstall * Added a timeout value to all of the wp_remote_get calls as people have reported timeout issues * Added a setting to control the large images downloaded from blogger. * Stopped logging all the post and comment IDs in arrays and storing in option this improved the importing of very large blogs * Fixed issue with comment_author_IP notice * Code restructuring to use classes for blog objects * Changed AJAX calls to use technique described here https://codex.wordpress.org/AJAX_in_Plugins#Ajax_on_the_Administration_Side * Added AdminURL to the greet function rather than hardcoded path * Defaulted to turn off post pingbacks * Fix to stop it counting pingbacks, issue reported by realdoublebee * Retrofitted Security enhancement from 0.6, nonce added to form buttons on main screen * Security enhancement, nonce added to form button on authors screen * Updated POT file * Greek Translation from Stergatou Eleni https://buddypress.org/community/members/lenasterg/ = 0.6 = * Security enhancement, nonce added to form button on main screen = 0.5 = * Merged in fix by SergeyBiryukov https://core.trac.wordpress.org/ticket/16012 * Merged in rmccue change to get_total_results to also use SimplePie from https://core.trac.wordpress.org/attachment/ticket/7652/7652-blogger.diff * Reviewed in rmccue's changes in https://core.trac.wordpress.org/attachment/ticket/7652/7652-separate.diff issues with date handling functions so skipped those * Moved SimplePie functions in new class WP_SimplePie_Blog_Item incorporating get_draft_status and get_updated and convert date * Tested comments from source blog GMT-8, destination London (currently GMT-1), comment dates transferred correctly. * Fixed typo in oauth_get * Added screen_icon() to all pages * Added GeoTags as per spec on https://codex.wordpress.org/Geodata * Change by Otto42, rmccue to use Simplepie XML processing rather than Atomparser, https://core.trac.wordpress.org/ticket/14525 ref: https://core.trac.wordpress.org/attachment/ticket/7652/7652-blogger.diff this also fixes https://core.trac.wordpress.org/ticket/15560 * Change by Otto42 to use OAuth rather than AuthSub authentication, should make authentication more reliable * Fix by Andy from Workshopshed to load comments and nested comments correctly * Fix by Andy from Workshopshed to correctly pass the blogger start-index and max-results parameters to oAuth functions and to process more than one batch https://core.trac.wordpress.org/ticket/19096 * Fix by Andy from Workshopshed error about incorrect enqueuing of scripts also changed styles to work the same * Change by Andy from Workshopshed testing in debug mode and wrapped ajax return into a function to suppress debug messages * Fix by Andy from Workshopshed notices for undefined variables. * Change by Andy from Workshopshed Added tooltip to results table to show numbers of posts and comments skipped (duplicates / missing key) * Fix by Andy from Workshopshed incorrectly checking for duplicates based on only the date and username, this gave false positives when large numbers of comments, particularly anonymous ones. = 0.4 = * Fix for tracking images being added by Blogger to non-authenticated feeds https://core.trac.wordpress.org/ticket/17623 = 0.3 = * Bugfix for 403 Invalid AuthSub Token https://core.trac.wordpress.org/ticket/14629 = 0.1 = * Initial release == Upgrade Notice == = 0.8 = Some bug fixes and simplified code see change log. comment-entry.php000064400000006733147600314460010074 0ustar00links as $link) { if ($link['rel'] == 'alternate') { $parts = parse_url($link['href']); if (isset($parts['fragment'])){ $this->old_permalink = $parts['fragment']; } } //Parent post for nested links if ($link['rel'] == 'related') { $parts = parse_url($link['href']); $this->related = $parts['path']; } if ($link['rel'] == 'self') { $parts = parse_url($link['href']); $this->self = $parts['path']; } } } function import() { $comment_author = $this->author; $comment_author_url = $this->authoruri; $comment_author_email = $this->authoremail; $comment_date = $this->updated; $comment_content = $this->content; $comment_post_ID = $this->post_ID; $comment_author_IP = '127.0.0.1'; //Blogger does not supply the IP so default this // Clean up content // Simplepie does some cleaning but does not do these. $comment_content = str_replace('
', '
', $comment_content); $comment_content = str_replace('
', '
', $comment_content); $comment_parent = isset($this->parentcommentid) ? $this->parentcommentid : 0; $comment = compact('comment_post_ID', 'comment_author', 'comment_author_url', 'comment_author_email','comment_author_IP','comment_date', 'comment_content', 'comment_parent'); $comment = wp_filter_comment($comment); $comment_id = wp_insert_comment($comment); //links of the form /feeds/417730729915399755/8397846992898424746/comments/default/7732208643735403000 add_comment_meta($comment_id, 'blogger_internal', $this->self, true); return $comment_id; } function exists() { //Do we have 2 comments for the same author at the same time, on the same post? //returns comment id return ($this->get_comment_by_oldID($this->self)); } function get_comment_by_oldID($oldID) { //Check to see if this post has been loaded already //Can we use get_comments for this? global $wpdb; $query = "SELECT c.comment_id FROM $wpdb->commentmeta m inner join $wpdb->comments c on c.comment_ID = m.comment_id where meta_key = 'blogger_internal' and meta_value = '%s' LIMIT 0 , 1"; $c = (int) $wpdb->get_var( $wpdb->prepare($query, $oldID) ); return $c; } } } ?>