Brutal insert speed optimization by bypassing the ORM completely

master
Ondřej Hruška 7 years ago
parent 13b22f7cb4
commit c76fc21820
Signed by: MightyPork
GPG Key ID: 2C5FD5035250423D
  1. 28
      app/Http/Controllers/TableController.php
  2. 17
      app/Models/Proposal.php
  3. 28
      app/Tables/Changeset.php
  4. 9
      app/Tables/Column.php

@ -14,6 +14,7 @@ use App\Tables\CXMacroExporter;
use App\Tables\JsonExporter;
use App\Tables\PhpExporter;
use App\Tables\RowNumerator;
use DB;
use Illuminate\Http\Request;
use Illuminate\Validation\Rule;
use MightyPork\Exceptions\NotApplicableException;
@ -243,7 +244,7 @@ class TableController extends Controller
$rowsToInsert = null;
$rowNumerator = null;
try {
$rowsToInsert = (new Changeset)->csvToRowsArray($columns, $dataCsvLines, true, false)->all();
$rowsToInsert = (new Changeset)->csvToRowsArray($columns, $dataCsvLines, false)->all();
} catch (\Exception $e) {
return $this->backWithErrors(['data' => $e->getMessage()]);
}
@ -270,17 +271,32 @@ class TableController extends Controller
/** @var Table $table */
$table = null;
\DB::transaction(function () use ($revisionFields, $tableFields, $rowsToInsert, &$table) {
$revision = Revision::create($revisionFields);
$newRevision = Revision::create($revisionFields);
$tableFields['revision_id'] = $revision->id; // current revision (not-null constraint on this FK)
$tableFields['revision_id'] = $newRevision->id; // current revision (not-null constraint on this FK)
$table = Table::create($tableFields);
// Attach the revision to the table, set as current
$table->revisions()->attach($revision);
$table->revision()->associate($revision);
$table->revisions()->attach($newRevision);
$table->revision()->associate($newRevision);
// Spawn rows, linked to the revision
$revision->rows()->createMany($rowsToInsert);
if (!empty($rowsToInsert)) {
// this replicates the code in Proposal->toRevision
$conn = DB::connection();
$prepared = $conn->getPdo()->prepare('INSERT INTO rows (data) VALUES (?)');
foreach (array_chunk($rowsToInsert, 10000) as $i => $chunk) {
$ids = [];
foreach ($chunk as $newRow) {
$prepared->execute([json_encode($newRow)]);
$ids[] = $conn->getPdo()->lastInsertId();
}
$qms = rtrim(str_repeat('('.$newRevision->id.', ?),', count($ids)), ',');
$conn->statement('INSERT INTO revision_row_pivot (revision_id, row_id) VALUES '.$qms.';',
$ids);
}
}
});
return redirect($table->viewRoute);

@ -193,11 +193,20 @@ class Proposal extends BaseModel
// --- Insert new rows ---
if ($changeset->newRows) {
$newRowData = [];
foreach ($changeset->newRows as $newRow) {
$newRowData[] = new Row(['data' => $newRow]);
$conn = DB::connection();
$prepared = $conn->getPdo()->prepare('INSERT INTO rows (data) VALUES (?)');
foreach (array_chunk($changeset->newRows, 10000) as $i => $chunk) {
$ids = [];
foreach ($chunk as $newRow) {
$prepared->execute([json_encode($newRow)]);
$ids[] = $conn->getPdo()->lastInsertId();
}
$qms = rtrim(str_repeat('('.$newRevision->id.', ?),', count($ids)), ',');
$conn->statement('INSERT INTO revision_row_pivot (revision_id, row_id) VALUES '.$qms.';',
$ids);
}
$newRevision->rows()->saveMany($newRowData);
}
$newRevision->update(['row_count' => $newRevision->rows()->count()]);

@ -573,10 +573,10 @@ class Changeset
* so it can be inserted directly into DB
* @return Collection
*/
public function csvToRowsArray($columns, $csvArray, $forTableInsert, $useDraftIds)
public function csvToRowsArray($columns, $csvArray, $useDraftIds)
{
/** @var Collection $rows */
$rows = collect($csvArray)->map(function ($row) use ($columns, $forTableInsert) {
$rows = collect($csvArray)->map(function ($row) use ($columns) {
if (count($row) == 0 || count($row) == 1 && $row[0] == '') return null; // discard empty lines
if (count($row) != count($columns)) {
throw new NotApplicableException("All rows must have " . count($columns) . " fields.");
@ -594,11 +594,7 @@ class Changeset
$data[$col->id] = $col->cast(trim($val));
}
if ($forTableInsert) {
return ['data' => $data];
} else {
return $data;
}
return $data;
})->filter();
if ($useDraftIds) {
@ -607,18 +603,10 @@ class Changeset
$rowNumerator = new RowNumerator(count($csvArray));
}
if ($forTableInsert) {
return $rows->map(function ($row) use (&$rowNumerator) {
$row['data']['_id'] = $rowNumerator->next();
return $row;
});
}
else {
return $rows->map(function ($row) use (&$rowNumerator) {
$row['_id'] = $rowNumerator->next();
return $row;
});
}
return $rows->map(function ($row) use (&$rowNumerator) {
$row['_id'] = $rowNumerator->next();
return $row;
});
}
/**
@ -655,7 +643,7 @@ class Changeset
/** @var Column[] $columns */
$columns = $columns->values()->all();
$rows = self::csvToRowsArray($columns, $csvArray, false, true)
$rows = self::csvToRowsArray($columns, $csvArray, true)
->keyBy('_id');
// using '+' to avoid renumbering

@ -187,20 +187,21 @@ class Column implements JsonSerializable, Arrayable
{
switch ($this->type) {
case 'int':
if (is_null($value)) return 0;
if (is_null($value) || $value === "") return 0;
if (is_int($value)) return $value;
if (is_bool($value)) return (int)$value;
if (is_float($value)) return round($value);
if (is_numeric($value)) return intval($value);
throw new SimpleValidationException($this->id, "Could not convert value \"$value\" to int!");
case 'float':
if (is_null($value)) return 0;
if (is_int($value) || is_float($value)) return (float)$value;
if (is_null($value) || $value === "") return 0;
if (is_int($value) || is_float($value) || is_bool($value)) return (float)$value;
if (is_numeric($value)) return floatval($value);
throw new SimpleValidationException($this->id, "Could not convert value \"$value\" to float!");
case 'bool':
if (is_null($value)) return false;
if (is_null($value) || $value === "") return false;
return Utils::parseBool($value);
case 'string':

Loading…
Cancel
Save